├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ ├── pkgdown.yaml │ └── test-coverage.yaml ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── RcppExports.R ├── assert.R ├── averageposterior.R ├── builders.R ├── imp-brms.R ├── marginalcoef.R ├── margins.R ├── prediction.R ├── rimplementation.R └── utils.R ├── README.md ├── _pkgdown.yml ├── codecov.yml ├── man ├── assertall.Rd ├── brmsmargins.Rd ├── bsummary.Rd ├── builders.Rd ├── dot-averagePosterior.Rd ├── dot-checktab.Rd ├── dot-extractlink.Rd ├── dot-links.Rd ├── dot-percent.Rd ├── integratemvn.Rd ├── integratere.Rd ├── is.random.Rd ├── lmcpp.Rd ├── marginalcoef.Rd ├── prediction.Rd ├── rowBootMeans.Rd └── tab2mat.Rd ├── src ├── .gitignore ├── Makevars ├── Makevars.win ├── RcppExports.cpp ├── integratemvn.cpp ├── integratemvn.h ├── integratere.cpp ├── lmcpp.cpp ├── lmcpp.h ├── rowbootmeans.cpp ├── tab2mat.cpp └── tab2mat.h ├── tests ├── testthat.R └── testthat │ ├── test-bsummary.R │ ├── test-builders.R │ ├── test-dot-checktab.R │ ├── test-dot-links.R │ ├── test-dot-percent.R │ ├── test-fixedeffects-bernoulli-margins.R │ ├── test-fixedeffects-beta-margins.R │ ├── test-fixedeffects-gamma-margins.R │ ├── test-integratemvn.R │ ├── test-marginalcoef-mixedlogit.R │ ├── test-marginalcoef-mixedpoisson.R │ ├── test-predict-mixedlogit.R │ ├── test-predict-mixedlogtrans.R │ ├── test-predict-mixedpoisson.R │ ├── test-rowbootmeans.R │ └── test-tab2mat.R └── vignettes ├── .gitignore ├── fixed-effects-marginaleffects.Rmd ├── fixed-effects-marginaleffects.Rmd.orig ├── location-scale-marginaleffects.Rmd ├── location-scale-marginaleffects.Rmd.orig ├── mixed-effects-marginaleffects.Rmd └── mixed-effects-marginaleffects.Rmd.orig /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^.~ 4 | ^LICENSE\.md$ 5 | LICENSE 6 | ^codecov\.yml$ 7 | ^_pkgdown\.yml$ 8 | ^pkgdown$ 9 | ^appveyor\.yml$ 10 | ^\.travis\.yml$ 11 | ^codecov\.yml$ 12 | ^vignettes/.*\.Rmd\.orig$ 13 | ^vignettes/precompile.R$ 14 | \.Rhistory 15 | \.gitignore 16 | ^\.github$ 17 | ^docs$ 18 | internal.md -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | 8 | name: R-CMD-check.yaml 9 | 10 | permissions: read-all 11 | 12 | jobs: 13 | R-CMD-check: 14 | if: "!contains(github.event.head_commit.message, '[ci skip]')" 15 | runs-on: ${{ matrix.config.os }} 16 | 17 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 18 | 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | config: 23 | - {os: windows-latest, r: 'release'} 24 | - {os: ubuntu-latest, r: 'release'} 25 | 26 | env: 27 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 28 | R_KEEP_PKG_SOURCE: yes 29 | steps: 30 | - uses: actions/checkout@v4 31 | 32 | - uses: r-lib/actions/setup-r@v2 33 | with: 34 | r-version: ${{ matrix.config.r }} 35 | http-user-agent: ${{ matrix.config.http-user-agent }} 36 | use-public-rspm: true 37 | 38 | - uses: r-lib/actions/setup-r-dependencies@v2 39 | with: 40 | extra-packages: > 41 | any::rcmdcheck 42 | BH 43 | RcppEigen 44 | needs: check 45 | 46 | - uses: r-lib/actions/check-r-package@v2 47 | with: 48 | upload-snapshots: true 49 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | release: 8 | types: [published] 9 | workflow_dispatch: 10 | 11 | name: pkgdown.yaml 12 | 13 | permissions: read-all 14 | 15 | jobs: 16 | pkgdown: 17 | if: "!contains(github.event.head_commit.message, '[ci skip]')" 18 | runs-on: ubuntu-latest 19 | # Only restrict concurrency for non-PR jobs 20 | concurrency: 21 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 22 | env: 23 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 24 | permissions: 25 | contents: write 26 | steps: 27 | - uses: actions/checkout@v4 28 | 29 | - uses: r-lib/actions/setup-pandoc@v2 30 | 31 | - uses: r-lib/actions/setup-r@v2 32 | with: 33 | use-public-rspm: true 34 | 35 | - uses: r-lib/actions/setup-r-dependencies@v2 36 | with: 37 | extra-packages: any::pkgdown, local::. 38 | needs: website 39 | 40 | - name: Build site 41 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 42 | shell: Rscript {0} 43 | 44 | - name: Deploy to GitHub pages 🚀 45 | if: github.event_name != 'pull_request' 46 | uses: JamesIves/github-pages-deploy-action@v4.5.0 47 | with: 48 | clean: false 49 | branch: gh-pages 50 | folder: docs -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | 8 | name: test-coverage.yaml 9 | 10 | permissions: read-all 11 | 12 | jobs: 13 | test-coverage: 14 | if: "!contains(github.event.head_commit.message, '[ci skip]')" 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | 22 | - uses: r-lib/actions/setup-r@v2 23 | with: 24 | use-public-rspm: true 25 | 26 | - uses: r-lib/actions/setup-r-dependencies@v2 27 | with: 28 | extra-packages: > 29 | any::covr 30 | any::xml2 31 | BH 32 | RcppEigen 33 | needs: coverage 34 | 35 | - name: Test coverage 36 | run: | 37 | cov <- covr::package_coverage( 38 | quiet = FALSE, 39 | clean = FALSE, 40 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") 41 | ) 42 | covr::to_cobertura(cov) 43 | shell: Rscript {0} 44 | 45 | - uses: codecov/codecov-action@v4 46 | with: 47 | # Fail if error if not on PR, or if on PR and token is given 48 | fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }} 49 | file: ./cobertura.xml 50 | plugin: noop 51 | disable_search: true 52 | token: ${{ secrets.CODECOV_TOKEN }} 53 | 54 | - name: Show testthat output 55 | if: always() 56 | run: | 57 | ## -------------------------------------------------------------------- 58 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true 59 | shell: bash 60 | 61 | - name: Upload test results 62 | if: failure() 63 | uses: actions/upload-artifact@v4 64 | with: 65 | name: coverage-test-failures 66 | path: ${{ runner.temp }}/package -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | desktop.ini 2 | .DS_Store 3 | .httr-oauth 4 | .Rproj.user 5 | .Rhistory 6 | R/.Rhistory 7 | .RData 8 | *~ 9 | ^.~ 10 | [#]*[#] 11 | # artifacts of R CMD check/build 12 | *.Rcheck 13 | *.tar.gz 14 | Rplots.pdf 15 | inst/doc 16 | docs 17 | internal.md -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: brmsmargins 2 | Title: Bayesian Marginal Effects for 'brms' Models 3 | Version: 0.2.1 4 | Authors@R: 5 | c(person(given = "Joshua F.", 6 | family = "Wiley", 7 | role = c("aut", "cre"), 8 | email = "jwiley.psych@gmail.com", 9 | comment = c(ORCID = "0000-0002-0271-6702")), 10 | person(given = "Donald", 11 | family = "Hedeker", 12 | role = c("aut"), 13 | email = "hedeker@uchicago.edu", 14 | comment = c(ORCID = "0000-0001-8134-6094"))) 15 | URL: https://joshuawiley.com/brmsmargins/, 16 | https://github.com/JWiley/brmsmargins 17 | BugReports: https://github.com/JWiley/brmsmargins/issues 18 | Description: Calculate Bayesian marginal effects, average marginal effects, and marginal coefficients (also called population averaged coefficients) for models fit using the 'brms' package including fixed effects, mixed effects, and location scale models. These are based on marginal predictions that integrate out random effects if necessary (see for example and ). 19 | License: GPL (>= 3) 20 | Encoding: UTF-8 21 | Roxygen: list(markdown = TRUE) 22 | RoxygenNote: 7.3.2 23 | Depends: 24 | R (>= 4.0.0) 25 | Imports: 26 | methods, 27 | stats, 28 | data.table (>= 1.12.0), 29 | extraoperators (>= 0.1.1), 30 | brms, 31 | bayestestR, 32 | Rcpp, 33 | posterior 34 | Suggests: 35 | testthat (>= 3.0.0), 36 | covr, 37 | withr, 38 | knitr, 39 | rmarkdown, 40 | margins, 41 | betareg 42 | Config/testthat/edition: 3 43 | Config/testthat/parallel: true 44 | LinkingTo: 45 | RcppArmadillo, 46 | Rcpp 47 | VignetteBuilder: knitr 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2021 2 | COPYRIGHT HOLDER: brmsmargins authors 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(brmsmargins) 4 | export(bsummary) 5 | export(integratemvn) 6 | export(integratere) 7 | export(lmcpp) 8 | export(marginalcoef) 9 | export(prediction) 10 | export(rowBootMeans) 11 | export(tab2mat) 12 | import(bayestestR) 13 | import(brms) 14 | import(data.table) 15 | importFrom(Rcpp,sourceCpp) 16 | importFrom(bayestestR,ci) 17 | importFrom(brms,brmsterms) 18 | importFrom(brms,is.brmsfit) 19 | importFrom(brms,make_standata) 20 | importFrom(brms,standata) 21 | importFrom(data.table,":=") 22 | importFrom(data.table,as.data.table) 23 | importFrom(data.table,copy) 24 | importFrom(data.table,data.table) 25 | importFrom(data.table,is.data.table) 26 | importFrom(extraoperators,"%e%") 27 | importFrom(extraoperators,"%gele%") 28 | importFrom(extraoperators,"%nin%") 29 | importFrom(methods,missingArg) 30 | importFrom(posterior,as_draws_df) 31 | importFrom(posterior,ndraws) 32 | importFrom(stats,fitted) 33 | importFrom(stats,formula) 34 | importFrom(stats,median) 35 | importFrom(stats,model.frame) 36 | importFrom(stats,plogis) 37 | importFrom(stats,qlogis) 38 | importFrom(stats,rnorm) 39 | importFrom(stats,runif) 40 | useDynLib(brmsmargins, .registration = TRUE) 41 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # brmsmargins 0.2.1 2 | 3 | * Added `seed` argument to `marginalcoef()` to allow for reproducible results. 4 | * Minor documentation edits. 5 | 6 | # brmsmargins 0.2.0 7 | 8 | * Fixed a bug when using `prediction()` with option `effects = "integrateoutRE"` 9 | when smooth terms were present. As `prediction()` underpins other functions, 10 | such as `brmsmargins()` this issue also impacts those other functions. 11 | * New function: `marginalcoef()` which calculates population averaged (marginal) coefficients 12 | for the fixed effects coefficients from mixed effects models using a method 13 | described by Donald Hedeker, who joins the author team. 14 | Currently, only the main location parameter is supported. That is, 15 | marginal coefficients for the scale part of a model, in location and scale models, 16 | is not currently supported. 17 | * New argument, `wat`, added to `brmsmargins()` to support including 18 | calculating average marginal effects for multilevel centered 19 | categorical predictors. 20 | * Updates to vignettes demonstrating: (1) the use of marginal coefficients; 21 | (2) marginal effects for centered categorical predictors; and 22 | (3) 'simple' marginal effects when models include interaction terms. 23 | * Revised documentation for `bmrsmargins()` and `prediction()` to be clearer 24 | around which arguments users must directly specify and which are optional or 25 | have sensible defaults. 26 | * Added more unit testing and vignettes. 27 | 28 | # brmsmargins 0.1.1 29 | 30 | * Fixed a bug preventing predictions integrating out random effects for mixed effects models with a random intercept only (reported in Issue#1). Thanks to @ajnafa for reporting. 31 | * Added support for Gamma and Beta regression models. 32 | * More extensive testing added. 33 | 34 | # brmsmargins 0.1.0 35 | 36 | * Initial release 37 | -------------------------------------------------------------------------------- /R/RcppExports.R: -------------------------------------------------------------------------------- 1 | # Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #' Integrate over Multivariate Normal Random Effects 5 | #' 6 | #' Used in the process of Monte Carlo integration 7 | #' over multivariate normal random effects. This generates the 8 | #' random draws from the multivariate normal distribution 9 | #' and multiplies these by the data. 10 | #' Not intended to be called directly by most users. 11 | #' 12 | #' @param X A numeric matrix of the data to be multiplied by the random effects 13 | #' @param k An integer, the number of random samples to be used for numerical integration 14 | #' @param sd A numeric vector of the standard deviations 15 | #' @param chol A numeric matrix, which should be the Cholesky decomposition of the 16 | #' correlation matrix of the multivariate normal distribution. 17 | #' @return A numeric matrix with random values 18 | #' @export 19 | #' @examples 20 | #' integratemvn( 21 | #' X = matrix(1, 1, 2), 22 | #' k = 100L, 23 | #' sd = c(10, 5), 24 | #' chol = chol(matrix(c(1, .5, .5, 1), 2))) 25 | #' 26 | #' integratemvn(matrix(1, 1, 1), 100L, c(5), matrix(1)) 27 | integratemvn <- function(X, k, sd, chol) { 28 | .Call(`_brmsmargins_integratemvn`, X, k, sd, chol) 29 | } 30 | 31 | #' Integrate over Random Effects 32 | #' 33 | #' Used to conduct Monte Carlo integration over Gaussian random effects. 34 | #' Not intended to be called directly by most users. 35 | #' 36 | #' @param d A list with model matrices for each random effect block. 37 | #' @param sd A list with standard deviation matrices for each random effect block 38 | #' where rows are different posterior draws. 39 | #' @param L A list with matrices for each random effect block containing the parts of 40 | #' the L matrix, the Cholesky decomposition of the random effect correlation matrix. 41 | #' @param k An integer, the number of samples for Monte Carlo integration. 42 | #' @param yhat A matrix of the fixed effects predictions 43 | #' @param backtrans An integer, indicating the type of back transformation. 44 | #' 0 indicates inverse logit (e.g., for logistic regression). 45 | #' 1 indicates exponential (e.g., for poisson or negative binomial regression or if outcome was natural log transformed). 46 | #' 2 indicates square (e.g., if outcome was square root transformed). 47 | #' 3 indicates inverse (e.g., if outcome was inverse transformed such as Gamma regression) 48 | #' Any other integer results in no transformation. -9 is recommended as the option for no 49 | #' transformation as any future transformations supported will be other, positive integers. 50 | #' @return A numeric matrix with the Monte Carlo integral calculated. 51 | #' @export 52 | #' @examples 53 | #' integratere( 54 | #' d = list(matrix(1, 1, 1)), 55 | #' sd = list(matrix(1, 2, 1)), 56 | #' L = list(matrix(1, 2, 1)), 57 | #' k = 10L, 58 | #' yhat = matrix(0, 2, 1), 59 | #' backtrans = 0L) 60 | integratere <- function(d, sd, L, k, yhat, backtrans) { 61 | .Call(`_brmsmargins_integratere`, d, sd, L, k, yhat, backtrans) 62 | } 63 | 64 | #' Fast Linear Regression 65 | #' 66 | #' Used to get marginal coefficients off of a generalized linear mixed model. 67 | #' 68 | #' @param X A numeric model matrix. If intercept is desired, it must already have been added as a column. 69 | #' @param y A numeric matrix. A single column if one response variable or multiple columns 70 | #' where each column is a different response, such as a for marginal coefficients where 71 | #' each column is a different MCMC sample. 72 | #' @return A numeric matrix with the coefficient. 73 | #' @export 74 | #' @examples 75 | #' lmcpp(cbind(1, mtcars$hp, mtcars$am), as.matrix(mtcars[, c("mpg", "qsec")])) 76 | lmcpp <- function(X, y) { 77 | .Call(`_brmsmargins_lmcpp`, X, y) 78 | } 79 | 80 | #' Bootstrap Row Means 81 | #' 82 | #' This takes a numeric matrix, bootstrap resamples each row, and then 83 | #' calculates the mean. The intended use case is for Bayesian posterior 84 | #' predictions from sample data. Instead of directly calculating the 85 | #' average marginal effect (AME) across all observed values, these can be 86 | #' bootstrapped, so that uncertainty in the target population, and thus 87 | #' the AME in the target population, can be incorporated. 88 | #' Model uncertainty is already assumed to be handled by the different posterior 89 | #' samples, which are assumed to be across rows. 90 | #' 91 | #' @param x A numeric matrix 92 | #' @return A numeric vector with the simple bootstrapped row means of the matrix 93 | #' @export 94 | #' @examples 95 | #' 96 | #' x <- matrix(1:9, byrow = TRUE, 3) 97 | #' replicate(10, rowBootMeans(x)) 98 | rowBootMeans <- function(x) { 99 | .Call(`_brmsmargins_rowBootMeans`, x) 100 | } 101 | 102 | #' Convert a Row of a Table to a Square Matrix 103 | #' 104 | #' Utility function to convert a row matrix to a square matrix. 105 | #' Used as the \code{brms} package returns things like the Cholesky 106 | #' decomposition matrix as separate columns where rows are posterior draws. 107 | #' Not intended to be called directly by most users. 108 | #' 109 | #' @param X a matrix 110 | #' @return A numeric matrix with one row. 111 | #' @export 112 | #' @examples 113 | #' tab2mat(matrix(1:4, 1)) 114 | #' tab2mat(matrix(1:9, 1)) 115 | tab2mat <- function(X) { 116 | .Call(`_brmsmargins_tab2mat`, X) 117 | } 118 | 119 | -------------------------------------------------------------------------------- /R/assert.R: -------------------------------------------------------------------------------- 1 | #' @title Check Assertions about a \code{brmsfit} Model Object 2 | #' 3 | #' @description 4 | #' These are a set of internal utility functions. 5 | #' They are not intended for general use. 6 | #' Instead, they are intended to be called in circumstances 7 | #' where the expected result is \code{TRUE}. 8 | #' All of them are designed to try to give informative error 9 | #' messages if the assertion is not met. 10 | #' All of them result in a \code{stop()} error if the assertion is not met. 11 | #' 12 | #' @details 13 | #' \itemize{ 14 | #' \item{\code{.assertbrmsfit}}{asserts that the object should be of class \code{brmsfit}.} 15 | #' \item{\code{.assertgaussian}}{asserts that all random effects are Gaussian.} 16 | #' \item{\code{.assertfamily}}{asserts that the distribution (family) of the outcome is a currently supported family. Only applies when integrating out random effects.} 17 | #' \item{\code{.assertlink}}{asserts that the link function is a currently supported link function. Only applies when integrating out random effects.} 18 | #' } 19 | #' 20 | #' @param object A \code{brmsfit} model object to be evaluated. 21 | #' @param dpar Required for \code{.assertdpar} which checks this is valid. 22 | #' Optional for \code{.assertlink} which will use \code{NULL} if not 23 | #' specified. If specified, this should be \code{NULL} or 24 | #' a character string. 25 | #' 26 | #' @return An invisible, logical \code{TRUE} if the assertion is met. 27 | #' An (informative) error message if the assertion is not met. 28 | #' @keywords internal 29 | #' @name assertall 30 | NULL 31 | 32 | 33 | #' @rdname assertall 34 | #' @importFrom brms is.brmsfit 35 | .assertbrmsfit <- function(object) { 36 | if (!isTRUE(is.brmsfit(object))) { 37 | stop(sprintf("object must be of class 'brmsfit', but was %s", 38 | paste(class(object), collapse = "; "))) 39 | } else { 40 | invisible(TRUE) 41 | } 42 | } 43 | 44 | #' @rdname assertall 45 | .assertgaussian <- function(object) { 46 | .assertbrmsfit(object) 47 | 48 | result <- FALSE 49 | if (isTRUE(is.random(object))) { 50 | if (isFALSE(all(object$ranef$dist == "gaussian"))) { 51 | err <- sprintf(paste0("Currently only gaussian random effects are supported, ", 52 | "but the following distribution(s) were found '%s'."), 53 | paste(unique(object$ranef$dist), collapse = "; ")) 54 | stop(err) 55 | } else { 56 | result <- TRUE 57 | } 58 | } else { 59 | result <- TRUE 60 | } 61 | invisible(result) 62 | } 63 | 64 | #' @rdname assertall 65 | .assertfamily <- function(object) { 66 | .assertbrmsfit(object) 67 | family <- object$family$family 68 | validlength <- identical(length(family), 1L) 69 | if (isFALSE(validlength)) { 70 | stop(sprintf("The 'family' must be a character string of length 1, but found length %d.", 71 | length(family))) 72 | } 73 | 74 | validclass <- is.character(family) 75 | if (isFALSE(validclass)) { 76 | stop(sprintf("The 'family' must be a character string, but a '%s' class was found.", 77 | paste(class(family), collapse = "; "))) 78 | } 79 | 80 | fams <- c("gaussian", 81 | "bernoulli", "beta", 82 | "gamma", 83 | "poisson", "negbinomial") 84 | validtype <- family %in% fams 85 | if (isFALSE(validtype)) { 86 | stop(sprintf("The 'family' must be one of (%s), but found '%s'.", 87 | paste(fams, collapse = ", "), 88 | family)) 89 | } 90 | invisible(TRUE) 91 | } 92 | 93 | #' @rdname assertall 94 | .assertdpar <- function(object, dpar) { 95 | .assertbrmsfit(object) 96 | out <- FALSE 97 | if (isTRUE(is.null(dpar))) { 98 | out <- TRUE 99 | } 100 | if (isFALSE(is.null(dpar))) { 101 | if (isFALSE(identical(length(dpar), 1L))) { 102 | stop(sprintf(paste0( 103 | "The 'dpar' argument must be NULL or a character string (length 1 vector)\n", 104 | "but found a '%s' object of length %d"), 105 | paste(class(dpar), collapse = "; "), 106 | length(dpar))) 107 | } 108 | 109 | if (isFALSE(is.character(dpar))) { 110 | stop(sprintf("'dpar' must be class character, but '%s' class was found.", 111 | paste(class(dpar), collapse = "; "))) 112 | } 113 | 114 | validdpars <- names(brmsterms(object$formula)$dpars) 115 | if (isFALSE(dpar %in% validdpars)) { 116 | stop(sprintf(paste0( 117 | "dpar was specified as '%s' but this was not found in the model.\n", 118 | "A valid dpar for this model must be in: [%s]."), 119 | dpar, paste(validdpars, collapse = "; "))) 120 | } 121 | out <- TRUE 122 | } 123 | 124 | invisible(out) 125 | } 126 | 127 | #' @rdname assertall 128 | #' @importFrom methods missingArg 129 | .assertlink <- function(object, dpar) { 130 | .assertbrmsfit(object) 131 | 132 | if (isTRUE(missingArg(dpar))) { 133 | dpar <- NULL 134 | } 135 | 136 | link <- .extractlink(object, dpar) 137 | 138 | validlength <- identical(length(link), 1L) 139 | if (isFALSE(validlength)) { 140 | stop(sprintf("The 'link' must be a character string of length 1, but found length %d.", 141 | length(link))) 142 | } 143 | 144 | validclass <- is.character(link) 145 | if (isFALSE(validclass)) { 146 | stop(sprintf("The 'link' must be a character string, but a '%s' class was found.", 147 | paste(class(link), collapse = "; "))) 148 | } 149 | 150 | linkfun <- c("identity", "logit", "log") 151 | validtype <- link %in% linkfun 152 | if (isFALSE(validtype)) { 153 | stop(sprintf("The 'link' must be one of (%s), but found '%s'.", 154 | paste(linkfun, collapse = ", "), 155 | link)) 156 | } 157 | invisible(TRUE) 158 | } 159 | -------------------------------------------------------------------------------- /R/averageposterior.R: -------------------------------------------------------------------------------- 1 | #' Average Over Posterior Predictions 2 | #' 3 | #' Internal function that averages over posterior predictions 4 | #' using either [rowMeans()] or [rowBootMeans()], the latter 5 | #' being useful to incorporate uncertainty from the 6 | #' inputs being used to generate predictions. 7 | #' 8 | #' @param posterior A posterior matrix type object. It is assumed that different 9 | #' predictions to be averaged over are on different columns. Different posterior 10 | #' draws are on different rows. 11 | #' @param resample An integer indicating the number of 12 | #' bootstrap resamples of the posterior predictions to 13 | #' use when calculating summaries. Defaults to \code{0L}. 14 | #' See the details section for more informations as its implementation 15 | #' is experimental and it may not operate as one would expect. 16 | #' @param seed A seed for random number generation. Defaults to \code{FALSE}, 17 | #' which means no seed is set. 18 | #' Only used if \code{resample} is a positive, non-zero integer. 19 | #' @return A vector of the averaged posterior. 20 | #' @keywords internal 21 | .averagePosterior <- function(posterior, resample = 0L, seed = FALSE) { 22 | if (isTRUE(resample == 0)) { 23 | posterior <- rowMeans(posterior, na.rm = TRUE) 24 | } else if (isTRUE(resample > 0)) { 25 | if (!isFALSE(seed)) { 26 | set.seed(seed) 27 | } 28 | 29 | yhat <- matrix(NA_real_, nrow = nrow(posterior), ncol = resample) 30 | for (i in seq_len(resample)) { 31 | yhat[, i] <- rowBootMeans(posterior) 32 | } 33 | 34 | posterior <- as.vector(yhat) 35 | } 36 | return(posterior) 37 | } 38 | -------------------------------------------------------------------------------- /R/builders.R: -------------------------------------------------------------------------------- 1 | #' @title Build the Variable Names or Data Objects for Estimation 2 | #' 3 | #' @description 4 | #' These are a set of internal utility functions. 5 | #' They are not intended for general use. 6 | #' 7 | #' @details 8 | #' \itemize{ 9 | #' \item{\code{.namesL}}{Generate names of an L matrix from \code{brms}. Create the variable names for the Cholesky decomposition of the random effects correlation matrix in \code{brms}. Note that \code{brms} returns the lower triangular matrix and we want the upper triangular matrix, so the names are transposed. The results can then be passed to the \code{tab2mat} function to convert the row vector into a matrix.} 10 | #' \item{\code{.buildL}}{Returns the L matrix object. Rows are posterior draws.} 11 | #' \item{\code{.namesSD}}{Create the names of random effect standard deviation estimates.} 12 | #' \item{\code{.buildSD}}{Return matrix of random effect standard deviation estimates. Rows are posterior draws.} 13 | #' \item{\code{.namesZ}}{Create the names of random effects data for predictions.} 14 | #' \item{\code{.buildZ}}{Return matrix of data for random effect predictions.} 15 | #' } 16 | #' 17 | #' @param data A data object. For example the result of [make_standata()] 18 | #' for [.buildZ()], which is a list, 19 | #' or a dataset of the posterior draws such as from [as_draws_df()] 20 | #' for [.buildL()] and [.buildSD()]. 21 | #' @param ranef A data set with information about the model object random effects. 22 | #' Only used for \code{.namesSD} and \code{.buildSD}. 23 | #' @param block Which random effect block to use. An integer. 24 | #' @param number The number of elements in that random effect block. An integer. 25 | #' @param dpar Which dpar to use. Does not apply to the L matrix. 26 | #' @return A character vector for all \code{.names} functions or a matrix 27 | #' for all \code{.build} functions. 28 | #' @keywords internal 29 | #' @name builders 30 | NULL 31 | 32 | ## make Rcmd check happy 33 | utils::globalVariables(c("Block", "Row", "Col")) 34 | 35 | #' @rdname builders 36 | #' @importFrom data.table as.data.table 37 | .namesL <- function(block, number) { 38 | n <- expand.grid(Block = block, 39 | Row = seq_len(number), 40 | Col = seq_len(number)) 41 | n <- as.data.table(n) 42 | n[, sprintf("L_%d[%d,%d]", 43 | Block, Row, Col)] 44 | } 45 | 46 | ## make Rcmd check happy 47 | utils::globalVariables(c("..n")) 48 | 49 | #' @rdname builders 50 | .buildL <- function(data, block, number, dpar) { 51 | stopifnot(is.data.table(data)) 52 | n <- .namesL(block, number) 53 | if (isTRUE(number == 1)) { 54 | out <- matrix(1, nrow = nrow(data), ncol = 1) 55 | colnames(out) <- n 56 | } else { 57 | out <- as.matrix(data[, ..n]) 58 | } 59 | return(out) 60 | } 61 | 62 | ## make Rcmd check happy 63 | utils::globalVariables(c("group", "coef", "id")) 64 | 65 | #' @rdname builders 66 | .namesSD <- function(ranef, block, dpar) { 67 | stopifnot(is.data.table(ranef)) 68 | n <- ranef[id == block] 69 | if (isTRUE(is.null(dpar)) || isFALSE(nzchar(dpar))) { 70 | n[, sprintf("sd_%s__%s", group, coef)] 71 | } else if (isTRUE(nzchar(dpar))) { 72 | n[, sprintf("sd_%s__%s_%s", group, dpar, coef)] 73 | } 74 | } 75 | 76 | #' @rdname builders 77 | .buildSD <- function(data, ranef, block, dpar) { 78 | stopifnot(is.data.table(data)) 79 | n <- .namesSD(ranef, block, dpar) 80 | as.matrix(data[, ..n]) 81 | } 82 | 83 | ## make Rcmd check happy 84 | utils::globalVariables(c("Number")) 85 | 86 | #' @rdname builders 87 | .namesZ <- function(block, number, dpar) { 88 | n <- expand.grid(Block = block, 89 | Number = seq_len(number)) 90 | n <- as.data.table(n) 91 | 92 | if (isTRUE(is.null(dpar)) || isFALSE(nzchar(dpar))) { 93 | n[, sprintf("Z_%d_%d", Block, Number)] 94 | } else if (isTRUE(nzchar(dpar))) { 95 | n[, sprintf("Z_%d_%s_%d", Block, dpar, Number)] 96 | } 97 | } 98 | 99 | #' @rdname builders 100 | .buildZ <- function(data, block, number, dpar) { 101 | n <- .namesZ(block, number, dpar) 102 | as.matrix(do.call(cbind, data[n])) 103 | } 104 | -------------------------------------------------------------------------------- /R/imp-brms.R: -------------------------------------------------------------------------------- 1 | ## usethis namespace: start 2 | #' @useDynLib brmsmargins, .registration = TRUE 3 | ## usethis namespace: end 4 | NULL 5 | 6 | ## usethis namespace: start 7 | #' @importFrom Rcpp sourceCpp 8 | ## usethis namespace: end 9 | NULL 10 | 11 | # import the brms package 12 | #' @import brms 13 | NULL 14 | 15 | # import the bayestestR package 16 | #' @import bayestestR 17 | NULL 18 | 19 | # data.table is generally careful to minimize the scope for namespace 20 | # conflicts (i.e., functions with the same name as in other packages); 21 | # a more conservative approach using @importFrom should be careful to 22 | # import any needed data.table special symbols as well, e.g., if you 23 | # run DT[ , .N, by='grp'] in your package, you'll need to add 24 | # @importFrom data.table .N to prevent the NOTE from R CMD check. 25 | # See ?data.table::`special-symbols` for the list of such symbols 26 | # data.table defines; see the 'Importing data.table' vignette for more 27 | # advice (vignette('datatable-importing', 'data.table')). 28 | # 29 | #' @import data.table 30 | NULL 31 | -------------------------------------------------------------------------------- /R/marginalcoef.R: -------------------------------------------------------------------------------- 1 | #' Marginal Coefficients from a 'brms' Model 2 | #' 3 | #' Calculate marginal coefficients from a \code{brms} 4 | #' generalized linear mixed model using the method proposed by Hedeker (2018). 5 | #' 6 | #' @param object A fitted brms model object that includes random effects. Required. 7 | #' @param summarize A logical value, whether or not to 8 | #' calculate summaries of the posterior predictions. 9 | #' Defaults to \code{TRUE}. 10 | #' @param posterior A logical value whether or not to 11 | #' save and return the posterior samples. Defaults 12 | #' to \code{FALSE} as the assumption is a typical 13 | #' use case is to return the summaries only. 14 | #' @param index An optional integer vector, giving the posterior draws 15 | #' to be used in the calculations. If omitted, defaults to all 16 | #' posterior draws. 17 | #' @param backtrans A character string indicating the type of 18 | #' back transformation to be applied. Can be one of 19 | #' \dQuote{response} meaning to use the response scale, 20 | #' \dQuote{linear} or \dQuote{identity} meaning to use the linear predictor scale, 21 | #' or a specific back transformation desired, from a possible list of 22 | #' \dQuote{invlogit}, \dQuote{exp}, \dQuote{square}, or \dQuote{inverse}. 23 | #' Custom back transformations should only be needed if, for example, 24 | #' the outcome variable was transformed prior to fitting the model. 25 | #' @param k An integer providing the number of random draws to use for 26 | #' integrating out the random effects. Only relevant when \code{effects} 27 | #' is \dQuote{integrateoutRE}. 28 | #' @param seed An \emph{optional} argument that controls whether (and if so what) random seed 29 | #' to use. This can help with reproducibility of results. 30 | #' It is missing by default. 31 | #' @param ... Additional arguments passed to \code{bsummary()}, 32 | #' and only relevant if \code{summarize} is \code{TRUE}. 33 | #' @return A list with \code{Summary} and \code{Posterior}. 34 | #' Some of these may be \code{NULL} depending on the arguments used. 35 | #' @references 36 | #' Hedeker, D., du Toit, S. H., Demirtas, H. & Gibbons, R. D. (2018) 37 | #' \doi{10.1111/biom.12707} 38 | #' \dQuote{A note on marginalization of regression parameters from mixed models of binary outcomes} 39 | #' @importFrom data.table as.data.table 40 | #' @importFrom stats formula 41 | #' @importFrom posterior as_draws_df ndraws 42 | #' @importFrom brms make_standata 43 | #' @importFrom methods missingArg 44 | #' @export 45 | marginalcoef <- function(object, summarize = TRUE, posterior = FALSE, index, 46 | backtrans = c("response", "linear", "identity", 47 | "invlogit", "exp", "square", "inverse"), 48 | k = 100L, seed, ...) { 49 | ## checks and assertions 50 | .assertbrmsfit(object) 51 | 52 | if (isFALSE(is.random(object))) { 53 | stop("object must have random effects to use marginalcoef()") 54 | } 55 | 56 | ## assert the assumed family / distribution is a supported one 57 | .assertfamily(object) 58 | ## assert the link function used is a supported one 59 | .assertlink(object) 60 | ## assert that all random effects in the model are Gaussian 61 | .assertgaussian(object) 62 | 63 | if (isTRUE(missingArg(index))) { 64 | index <- seq_len(ndraws(object)) 65 | } 66 | 67 | links <- .links( 68 | link = .extractlink(object, NULL), 69 | effects = "integrateoutRE", backtrans = backtrans) 70 | 71 | mf <- model.frame(object) 72 | X <- make_standata(formula(object), data = mf)$X 73 | 74 | if (isFALSE(missing(seed))) { 75 | if (isFALSE(is.null(seed))) { 76 | stopifnot(identical(length(seed), 1L)) 77 | set.seed(seed) 78 | } 79 | } 80 | lambda <- prediction( 81 | object, data = mf, 82 | summarize = FALSE, posterior = TRUE, index = index, 83 | effects = "integrateoutRE", backtrans = backtrans, 84 | k = k, raw = TRUE) 85 | 86 | y <- links$fun(t(lambda$Posterior)) 87 | 88 | B <- lmcpp(X, y) 89 | 90 | out <- list( 91 | Summary = NULL, 92 | Posterior = NULL) 93 | 94 | if (isTRUE(summarize)) { 95 | out$Summary <- as.data.table(do.call(rbind, apply(B, 1, bsummary, ...))) 96 | out$Summary[, Label := colnames(X)] 97 | } 98 | if (isTRUE(posterior)) { 99 | out$Posterior <- B 100 | } 101 | 102 | return(out) 103 | } 104 | -------------------------------------------------------------------------------- /R/prediction.R: -------------------------------------------------------------------------------- 1 | #' Marginal Posterior Predictions from a 'brms' Model 2 | #' 3 | #' Calculate marginal predictions from a \code{brms} model. 4 | #' Marginal predictions average over the input data for each posterior draw. 5 | #' Marginal predictions for models with random effects will integrate 6 | #' over random effects. 7 | #' Arguments are labeled as \emph{required} when it is required that the 8 | #' user directly specify the argument. Arguments are labeled as 9 | #' \emph{optional} when either the argument is optional or there are 10 | #' sensible default values so that users do not typically need to specify 11 | #' the argument. 12 | #' 13 | #' @param object A \emph{required} argument specifying a fitted 14 | #' \code{brms} model object. 15 | #' @param data A \emph{required} argument specifying a data frame or 16 | #' data table passed to \code{fitted()} as the new data to be used 17 | #' for predictions. 18 | #' @param summarize An \emph{optional} argument, a logical value, whether 19 | #' or not to calculate summaries of the posterior predictions. 20 | #' Defaults to \code{TRUE}. 21 | #' @param posterior An \emph{optional} argument, a logical value whether 22 | #' or not to save and return the posterior samples. Defaults 23 | #' to \code{FALSE} as the assumption is a typical 24 | #' use case is to return the summaries only. 25 | #' @param index An \emph{optional} argument, an integer vector, giving the 26 | #' posterior draws to be used in the calculations. If omitted, 27 | #' defaults to all posterior draws. 28 | #' @param dpar An \emph{optional} argument, the parameter passed on to the 29 | #' \code{dpar} argument of \code{fitted()} in brms. Defaults to \code{NULL} 30 | #' indicating the mean or location parameter typically. 31 | #' @param resample An \emph{optional} argument, an integer indicating the 32 | #' number of bootstrap resamples of the posterior predictions to 33 | #' use when calculating summaries. Defaults to \code{0L}. 34 | #' See documentation from [.averagePosterior()] for more details. 35 | #' This should be considered experimental. 36 | #' @param resampleseed An \emph{optional} argument, a seed for random number 37 | #' generation. Defaults to \code{FALSE}, which means no seed is set. 38 | #' Only used if \code{resample} is a positive, non-zero integer. 39 | #' See documentation from [.averagePosterior()] for more details. 40 | #' This should be considered experimental. 41 | #' @param effects An \emph{optional} argument, a character string indicating 42 | #' the type of prediction to be made. Can be one of 43 | #' \dQuote{fixedonly} meaning only use fixed effects, 44 | #' \dQuote{includeRE} meaning that random effects should be 45 | #' included in the predictions, or 46 | #' \dQuote{integrateoutRE} meaning that random effects should be 47 | #' integrated out / over in the predictions. 48 | #' It defaults to \dQuote{fixedonly} so is not typically required for 49 | #' a user to specify it. 50 | #' @param backtrans An \emph{optional} argument, a character string indicating 51 | #' the type of back transformation to be applied. Can be one of 52 | #' \dQuote{response} meaning to use the response scale, 53 | #' \dQuote{linear} or \dQuote{identity} meaning to use the linear predictor scale, 54 | #' or a specific back transformation desired, from a possible list of 55 | #' \dQuote{invlogit}, \dQuote{exp}, \dQuote{square}, or \dQuote{inverse}. 56 | #' Custom back transformations should only be needed if, for example, 57 | #' the outcome variable was transformed prior to fitting the model. 58 | #' It defaults to \dQuote{response} so is not typically required for 59 | #' a user to specify it. 60 | #' @param k An \emph{optional} argument, an integer providing the number of 61 | #' random draws to use for integrating out the random effects. 62 | #' Only relevant when \code{effects} is \dQuote{integrateoutRE}. 63 | #' It defaults to \code{100L}, a rather arbitrary number attempting to 64 | #' balance the increased precision that comes from a larger value, 65 | #' with the increased computational cost of more Monte Carlo simulations 66 | #' when integrating out random effects. 67 | #' @param raw An \emph{optional} argument, a logical value indicating whether to 68 | #' return the raw output or to average over the Monte Carlo samples. 69 | #' Defaults to \code{FALSE}. 70 | #' Setting it to \code{TRUE} can be useful if you want not only the 71 | #' full posterior distribution but also the \code{k} Monte Carlo samples 72 | #' used for the numerical integration. This cannot be used with 73 | #' \code{summarize = TRUE}. 74 | #' @param ... Additional arguments passed to \code{bsummary()}, 75 | #' and only relevant if \code{summarize} is \code{TRUE}. 76 | #' @return A list with \code{Summary} and \code{Posterior}. 77 | #' Some of these may be \code{NULL} depending on the arguments used. 78 | #' @references 79 | #' Pavlou, M., Ambler, G., Seaman, S., & Omar, R. Z. (2015) 80 | #' \doi{10.1186/s12874-015-0046-6} 81 | #' \dQuote{A note on obtaining correct marginal predictions from a random intercepts model for binary outcomes} 82 | #' and 83 | #' Skrondal, A., & Rabe-Hesketh, S. (2009) 84 | #' \doi{10.1111/j.1467-985X.2009.00587.x} 85 | #' \dQuote{Prediction in multilevel generalized linear models} 86 | #' @importFrom data.table as.data.table 87 | #' @importFrom stats fitted formula 88 | #' @importFrom posterior as_draws_df ndraws 89 | #' @importFrom brms standata 90 | #' @export 91 | prediction <- function(object, data, summarize = TRUE, posterior = FALSE, 92 | index, dpar = NULL, resample = 0L, resampleseed = FALSE, 93 | effects = c("fixedonly", "includeRE", "integrateoutRE"), 94 | backtrans = c("response", "linear", "identity", 95 | "invlogit", "exp", "square", "inverse"), 96 | k = 100L, raw = FALSE, ...) { 97 | ## checks and assertions 98 | if (isTRUE(missing(object))) { 99 | stop(paste( 100 | "'object' is a required argument and cannot be missing;", 101 | " it should be a saved model fit from brms. For example:", 102 | " m <- brm(y ~ x, data = yourdata)", 103 | " See ?prediction or the website articles (vignettes) for details.", 104 | " https://joshuawiley.com/brmsmargins/", sep = "\n")) 105 | } 106 | .assertbrmsfit(object) 107 | .assertdpar(object, dpar = dpar) 108 | 109 | if (isFALSE(is.random(object))) { 110 | if (isFALSE(effects == "fixedonly")) { 111 | stop("object does not have random effects: must use \"effects = 'fixedonly'\"") 112 | } 113 | } 114 | 115 | effects <- match.arg(effects, several.ok = FALSE) 116 | backtrans <- match.arg(backtrans, several.ok = FALSE) 117 | 118 | if (isTRUE(effects == "integrateoutRE")) { 119 | ## assert the assumed family / distribution is a supported one 120 | .assertfamily(object) 121 | ## assert the link function used is a supported one 122 | .assertlink(object, dpar = dpar) 123 | ## assert that all random effects in the model are Gaussian 124 | .assertgaussian(object) 125 | } 126 | 127 | if (isTRUE(missing(index))) { 128 | index <- seq_len(ndraws(object)) 129 | } 130 | 131 | links <- .links( 132 | link = .extractlink(object, dpar), 133 | effects = effects, backtrans = backtrans) 134 | 135 | ## set whether fitted() should include RE (NULL) or not (NA) 136 | ## see help for ?fitted.brmsfit for more details 137 | if (isTRUE(effects %in% c("fixedonly", "integrateoutRE"))) { 138 | useRE <- NA 139 | } else if (isTRUE(effects == "includeRE")) { 140 | useRE <- NULL 141 | } 142 | 143 | ## generate all predictions (if fixedonly or includeRE) 144 | ## or generate just the fixed effects predictions (if integrateoutRE) 145 | yhat <- fitted( 146 | object = object, newdata = data, 147 | re_formula = useRE, 148 | scale = links$scale, dpar = dpar, 149 | draw_ids = index, summary = FALSE) 150 | yhat <- links$useifun(yhat) 151 | 152 | if (isTRUE(effects == "integrateoutRE")) { 153 | if (isTRUE(links$ilink != "identity")) { 154 | post <- as.data.table(as_draws_df(object))[index, ] 155 | 156 | dtmp <- standata(object, newdata = data, check_response = FALSE, allow_new_levels = TRUE) 157 | 158 | re <- as.data.table(object$ranef) 159 | 160 | if (is.null(dpar)) { 161 | usedpar <- "" 162 | } else { 163 | usedpar <- dpar 164 | } 165 | 166 | re <- re[dpar == usedpar] 167 | 168 | blocks <- unique(re$id) 169 | nblocks <- length(blocks) 170 | 171 | d2 <- sd <- L <- vector("list", nblocks) 172 | 173 | for (i in seq_len(nblocks)) { 174 | useblock <- blocks[i] 175 | usere <- re[id == useblock] 176 | num <- max(usere$cn) 177 | d2[[i]] <- .buildZ(data = dtmp, block = useblock, number = num, dpar = dpar) 178 | sd[[i]] <- .buildSD(data = post, ranef = usere, block = useblock, dpar = dpar) 179 | L[[i]] <- .buildL(data = post, block = useblock, number = num) 180 | names(d2)[i] <- names(sd)[i] <- names(L)[i] <- sprintf("Block%d", useblock) 181 | } 182 | 183 | yhat <- integratere(d = d2, sd = sd, L = L, k = k, 184 | yhat = yhat, backtrans = links$useilinknum) 185 | } 186 | } 187 | 188 | if (isTRUE(raw)) { 189 | if (isTRUE(summarize)) { 190 | message("summarize cannot be TRUE when raw = TRUE, setting to FALSE") 191 | summarize <- FALSE 192 | } 193 | if (isFALSE(posterior)) { 194 | message("posterior cannot be FALSE when raw = TRUE, setting to TRUE") 195 | posterior <- TRUE 196 | } 197 | } else { 198 | ## average across rows 199 | ## either using row wise means, or row wise bootstrapped means 200 | yhat <- .averagePosterior(yhat, resample = resample, seed = resampleseed) 201 | } 202 | 203 | out <- list( 204 | Summary = NULL, 205 | Posterior = NULL) 206 | 207 | if (isTRUE(summarize)) { 208 | out$Summary <- bsummary(yhat, ...) 209 | } 210 | if (isTRUE(posterior)) { 211 | out$Posterior <- yhat 212 | } 213 | 214 | return(out) 215 | } 216 | -------------------------------------------------------------------------------- /R/rimplementation.R: -------------------------------------------------------------------------------- 1 | #' @describeIn integratemvn Pure \code{R} implementation of \code{integratemvn} 2 | #' @importFrom stats rnorm 3 | integratemvnR <- function(X, k, sd, chol) { 4 | n <- length(sd) 5 | Z <- matrix(rnorm(k * n, mean = 0, sd = 1), nrow = k, ncol = n) 6 | if (n > 1) { 7 | Z <- Z %*% chol 8 | } 9 | for (i in seq_len(n)) { 10 | Z[, i] <- Z[, i] * sd[i] 11 | } 12 | X %*% t(Z) 13 | } 14 | 15 | #' @describeIn tab2mat Pure \code{R} implementation of \code{tab2mat} 16 | tab2matR <- function(X) { 17 | X <- as.vector(X) 18 | dims <- sqrt(length(X)) 19 | matrix(X, dims, dims, byrow = TRUE) 20 | } 21 | 22 | #' @describeIn integratere Pure \code{R} implementation of \code{integratere} 23 | integratereR <- function(d, sd, L, k, yhat, backtrans) { 24 | M <- nrow(yhat) 25 | N <- ncol(yhat) 26 | J <- length(sd) 27 | yhat2 <- matrix(0, M, N) 28 | for (i in seq_len(M)) { 29 | Z <- vector("list", J) 30 | for (re in seq_len(J)) { 31 | cholmat <- tab2matR(L[[re]][i, ]) 32 | dmat <- d[[re]] 33 | Z[[re]] <- integratemvnR(dmat, k, sd[[re]][i, ], cholmat) 34 | } 35 | Zall <- Z[[1]] 36 | if (J > 1) { 37 | for (re in 2:J) { 38 | Zall <- Zall + Z[[re]] 39 | } 40 | } 41 | for (nsamp in seq_len(k)) { 42 | Zall[, nsamp] <- Zall[, nsamp] + t(yhat[i, ]) 43 | } 44 | if (backtrans == 0) { 45 | Zall <- 1 / (1 + exp(-Zall)) 46 | } else if (backtrans == 1) { 47 | Zall <- exp(Zall) 48 | } else if (backtrans == 2) { 49 | Zall <- Zall^2 50 | } 51 | zm <- rowMeans(Zall) 52 | yhat2[i, ] <- t(zm) 53 | } 54 | return(yhat2) 55 | } 56 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | #' Calculate Percent of Observations Within or Without a Window 2 | #' 3 | #' This is an internal helper function to calculate and label 4 | #' the percentage of a posterior distribution that falls within 5 | #' the Region of Practical Equivalence (ROPE) or 6 | #' at or beyond a Minimally Important Difference (MID). 7 | #' It is designed to fail gracefully if no window given, and to 8 | #' give some useful labels about the windows / range used. 9 | #' Intended for use internally as part of \code{\link{brmsmargins}}. 10 | #' 11 | #' @param x A vector of values to evaluate. Required. 12 | #' @param window An optional numeric vector giving a window. 13 | #' @param within A logical value indicating whether to calculate the 14 | #' percentage within the window (if \code{TRUE}) or the 15 | #' percentage at or outside the window (if \code{FALSE}). 16 | #' Defaults to \code{TRUE}. 17 | #' @return A list with the \code{Window}, if specified else \code{NULL}, 18 | #' the \code{Percent} of observations, and a \code{Label} specifying the 19 | #' exact window used in human readable format. 20 | #' @keywords internal 21 | #' @importFrom extraoperators %e% 22 | .percent <- function(x, window = NULL, within = TRUE) { 23 | if (isTRUE(is.null(window))) { 24 | window <- NA_real_ 25 | pi <- NA_real_ 26 | lab <- NA_character_ 27 | } else { 28 | if (isFALSE(isTRUE(is.numeric(window)) && 29 | isTRUE(identical(length(window), 2L)))) { 30 | stop(sprintf("window must be a numeric vector with length 2, but found a %s vector of length %d", 31 | paste(class(window), collapse = "; "), length(window))) 32 | } 33 | 34 | window <- as.numeric(window) 35 | if (isTRUE(within)) { 36 | lab <- sprintf("[%s, %s]", 37 | as.character(min(window)), 38 | as.character(max(window))) 39 | } else if (isFALSE(within)) { 40 | lab <- sprintf("[-Inf, %s] | [%s, Inf]", 41 | as.character(min(window)), 42 | as.character(max(window))) 43 | } 44 | pi <- mean(x %e% lab, na.rm = TRUE) * 100 45 | } 46 | list( 47 | Window = window, 48 | Percent = pi, 49 | Label = lab) 50 | } 51 | 52 | #' Personal Preference Based Bayesian Summary 53 | #' 54 | #' Returns a summary of a posterior distribution for a single 55 | #' parameter / value. It is based on personal preference. Notably, it does not 56 | #' only use \code{bayestestR::describe_posterior}, an excellent function, 57 | #' because of the desire to also describe the percentage of the full posterior 58 | #' distribution that is at or exceeding the value of a 59 | #' Minimally Important Difference (MID). MIDs are used in clinical studies with outcome 60 | #' measures where there are pre-defined differences that are considered clinically 61 | #' important, which is distinct from the ROPE or general credible intervals capturing 62 | #' uncertainty. 63 | #' 64 | #' @param x The posterior distribution of a parameter 65 | #' @param CI A numeric value indicating the desired width of the credible interval. 66 | #' Defaults to \code{0.99} currently, but this is subject to change. 67 | #' a 99% interval was chosen as the default as there have been recent arguments 68 | #' made in the realm of meta science that there are, essentially, too many 69 | #' false positives and that many of the \dQuote{findings} in science are not able 70 | #' to be replicated. 71 | #' In any case, users should ideally specify a desired CI width, and not rely on 72 | #' defaults. 73 | #' @param CIType A character string indicating the type of credible interval, passed on 74 | #' to the \code{\link[bayestestR]{ci}} function as the method for CIs. 75 | #' @param ROPE Either left as \code{NULL}, the default, or a numeric vector of 76 | #' length 2, specifying the lower and upper thresholds for the 77 | #' Region of Practical Equivalence (ROPE). 78 | #' @param MID Either left as \code{NULL}, the default, or a numeric vector of 79 | #' length 2, specifying the lower and upper thresholds for a 80 | #' Minimally Important Difference (MID). Unlike the ROPE, percentages for 81 | #' the MID are calculated as at or exceeding the bounds specified by this 82 | #' argument, whereas the ROPE is the percentage of the posterior at or inside 83 | #' the bounds specified. 84 | #' @return A \code{data.table} with the mean, \code{M} 85 | #' \describe{ 86 | #' \item{M}{the mean of the posterior samples} 87 | #' \item{Mdn}{the median of the posterior samples} 88 | #' \item{LL}{the lower limit of the credible interval} 89 | #' \item{UL}{the upper limit of the credible interval} 90 | #' \item{PercentROPE}{the percentage of posterior samples falling into the ROPE} 91 | #' \item{PercentMID}{the percentage of posterior samples falling at or beyond the MID} 92 | #' \item{CI}{the width of the credible interval used} 93 | #' \item{CIType}{the type of credible interval used (e.g., highest density interval)} 94 | #' \item{ROPE}{a label describing the values included in the ROPE} 95 | #' \item{MID}{a label describing the values included in the MID} 96 | #' } 97 | #' @export 98 | #' @importFrom bayestestR ci 99 | #' @importFrom data.table data.table 100 | #' @importFrom stats median 101 | #' @importFrom extraoperators %gele% 102 | #' @references 103 | #' Kruschke, J. K. (2018). 104 | #' \doi{10.1177/2515245918771304} 105 | #' \dQuote{Rejecting or accepting parameter values in Bayesian estimation} 106 | #' @examples 107 | #' 108 | #' bsummary(rnorm(1000)) 109 | #' 110 | #' bsummary(rnorm(1000), ROPE = c(-.5, .5), MID = c(-1, 1)) 111 | bsummary <- function(x, CI = 0.99, CIType = "HDI", ROPE = NULL, MID = NULL) { 112 | if (isTRUE(missing(x))) { 113 | stop("'x' is required and cannot be missing. See ?bsummary for details") 114 | } 115 | 116 | if (isFALSE(is.numeric(x))) { 117 | stop(sprintf("to be summarized x must be numeric, but %s class was found", 118 | paste(class(x), collapse = "; "))) 119 | } 120 | 121 | if (isFALSE(CI %gele% c(0, 1))) { 122 | stop(paste( 123 | sprintf("'CI' is %s", as.character(CI)), 124 | "'CI' should specify the desired credible interval as a numeric value in (0, 1)", 125 | "See ?bayestestR::ci for details", 126 | sep = "\n")) 127 | } 128 | 129 | if (isFALSE(CIType %in% c("HDI", "ETI", "BCI", "SI"))) { 130 | stop(paste( 131 | sprintf("'CIType' is %s", as.character(CIType)), 132 | "'CIType' should be one of 'HDI' (default), 'ETI', 'BCI', or 'SI'", 133 | "See ?bayestestR::ci for details", 134 | sep = "\n")) 135 | } 136 | 137 | ropes <- .percent(x, window = ROPE, within = TRUE) 138 | mids <- .percent(x, window = MID, within = FALSE) 139 | 140 | m <- mean(x, na.rm = TRUE) 141 | mdn <- median(x, na.rm = TRUE) 142 | cis <- bayestestR::ci(x, ci = CI, method = CIType) 143 | out <- data.table( 144 | M = as.numeric(m), 145 | Mdn = as.numeric(mdn), 146 | LL = as.numeric(cis$CI_low), 147 | UL = as.numeric(cis$CI_high), 148 | PercentROPE = as.numeric(ropes$Percent), 149 | PercentMID = as.numeric(mids$Percent), 150 | CI = as.numeric(CI), 151 | CIType = CIType, 152 | ROPE = ropes$Label, 153 | MID = mids$Label) 154 | 155 | return(out) 156 | } 157 | 158 | #' Check Object Class is a Table 159 | #' 160 | #' Internal utility function confirm that an object 161 | #' has the attributes needed to be used as data. 162 | #' Currently it should be a \code{tbl}, 163 | #' \code{data.frame}, or \code{data.table}. 164 | #' 165 | #' @param x An object to be evaluated. 166 | #' @param requireNames A logical, whether names are 167 | #' required. Defaults to \code{TRUE} 168 | #' @return An empty string if no issues. Otherwise, a non zero 169 | #' string with warning/error messages. 170 | #' @keywords internal 171 | #' @importFrom data.table is.data.table 172 | .checktab <- function(x, requireNames = TRUE) { 173 | xclass <- paste(class(x), collapse = "; ") 174 | pass1 <- isTRUE(inherits(x, "tbl")) || 175 | isTRUE(is.data.frame(x)) || 176 | isTRUE(is.data.table(x)) || 177 | is.matrix(x) 178 | 179 | cnames <- colnames(x) 180 | pass2 <- isFALSE(is.null(cnames)) 181 | 182 | errmsg1 <- errmsg2 <- "" 183 | 184 | if (isFALSE(pass1)) { 185 | errmsg1 <- sprintf(paste0( 186 | "Object is of class %s ", 187 | "but must be a matrix, data.frame, data.table, or tbl.\n"), 188 | xclass) 189 | } 190 | if (isFALSE(pass2)) { 191 | errmsg2 <- "Variables/Columns must be named, but column names were NULL.\n" 192 | } 193 | 194 | if (isTRUE(requireNames)) { 195 | out <- paste0(errmsg1, errmsg2) 196 | } else { 197 | out <- errmsg1 198 | } 199 | return(out) 200 | } 201 | 202 | #' Check a \code{brmsfit} Object has Random Effects 203 | #' 204 | #' Internal utility function to check whether a \code{brmsfit} 205 | #' object has any random effects or not. 206 | #' 207 | #' @param object An object to be evaluated. 208 | #' @return \code{TRUE} if any random effects present. 209 | #' \code{FALSE} if no random effects present. 210 | #' @keywords internal 211 | is.random <- function(object) { 212 | .assertbrmsfit(object) 213 | 214 | isTRUE(nrow(object$ranef) >= 1L) 215 | } 216 | 217 | #' Extract the Link from a \code{brms} Model 218 | #' 219 | #' Internal utility function to take a \code{brmsfit} object 220 | #' and extract the link for a specific \code{dpar}. 221 | #' 222 | #' @param object A \code{brmsfit} class model object. 223 | #' @param dpar The dpar for which the link should be extracted. 224 | #' @return A character string, the link. 225 | #' @keywords internal 226 | #' @importFrom brms brmsterms 227 | .extractlink <- function(object, dpar) { 228 | .assertbrmsfit(object) 229 | .assertdpar(object, dpar) 230 | 231 | if (isTRUE(is.null(dpar))) { 232 | link <- object$family$link 233 | } else if (isFALSE(is.null(dpar))) { 234 | tmp <- brmsterms(object$formula)$dpars 235 | tmp <- vapply(tmp, function(x) x$family$link, 236 | FUN.VALUE = character(1)) 237 | link <- tmp[[dpar]] 238 | } 239 | 240 | return(link) 241 | } 242 | 243 | #' Convert a Link Function Name to a List 244 | #' 245 | #' Internal utility function used in [prediction()]. 246 | #' Takes a link function name as a character string, 247 | #' the type of effect to be used, and the desired back transformation 248 | #' and returns a list with all the options needed to execute the desired 249 | #' options in [prediction()]. 250 | #' 251 | #' @param link The link named in a \code{brmsfit} object 252 | #' @param effects A character string, the type of effect desired 253 | #' @param backtrans A character string, the type of back transformation 254 | #' @return A list with eight elements. 255 | #' \describe{ 256 | #' \item{scale}{A character string giving the argument to be passed to [fitted()].} 257 | #' \item{ilink}{A character string giving the name of the inverse link function.} 258 | #' \item{ifun}{Inverse link function as an \code{R} function.} 259 | #' \item{ilinknum}{An integer giving the inverse link / transformation to be applied in [integratere()], needed as this is a C++ function and cannot use the \code{R} based inverse link function.} 260 | #' } 261 | #' @importFrom stats plogis qlogis 262 | #' @keywords internal 263 | .links <- function(link, 264 | effects = c("fixedonly", "includeRE", "integrateoutRE"), 265 | backtrans = c("response", "linear", "identity", "invlogit", "exp", "square", "inverse")) { 266 | effects <- match.arg(effects, several.ok = FALSE) 267 | backtrans <- match.arg(backtrans, several.ok = FALSE) 268 | 269 | if (isTRUE(backtrans %in% c("linear", "identity"))) { 270 | ## options if back transformation is linear (meaning on linear scale) 271 | ## or identity, meaning no back transformation 272 | ## either way we do the same thing which is nothing 273 | scale <- "linear" 274 | useinverselink <- inverselink <- "identity" 275 | } else if (isTRUE(backtrans %in% c("invlogit", "exp", "square", "inverse"))) { 276 | ## options if back transformations were custom specified 277 | ## in these cases we get predictions on linear scale 278 | ## and then manually apply back transformation 279 | scale <- "linear" 280 | useinverselink <- inverselink <- backtrans 281 | } else if (isTRUE(backtrans %in% "response")) { 282 | ## options for when using the generic asking for 283 | ## predictions on the original response scale 284 | ## in this case we need to proceed differently depending on 285 | ## the 'effect' argument 286 | if (isTRUE(link == "identity")) { 287 | inverselink <- "identity" 288 | } else if (isTRUE(link == "logit")) { 289 | inverselink <- "invlogit" 290 | } else if (isTRUE(link == "log")) { 291 | inverselink <- "exp" 292 | } else if (isTRUE(link == "sqrt")) { 293 | inverselink <- "square" 294 | } else if (isTRUE(link == "inverse")) { 295 | inverselink <- "inverse" 296 | } else { 297 | inverselink <- "notsupported" 298 | } 299 | 300 | if (isTRUE(effects %in% c("fixedonly", "includeRE"))) { 301 | ## for both these 'effects' we can rely on fitted() 302 | ## to apply the correct back transformation, so we 303 | ## do not need to do anything other than set scale to "response" 304 | scale <- "response" 305 | useinverselink <- "identity" 306 | } else if (isTRUE(effects == "integrateoutRE")) { 307 | ## when integrating out random effects 308 | ## we cannot rely on backtransformation being handled 309 | ## by fitted(), so we must do it manually 310 | scale <- "linear" 311 | if (isFALSE(identical(inverselink, "notsupported"))) { 312 | useinverselink <- inverselink 313 | } else { 314 | stop("non supported link function detected for integrating out REs") 315 | } 316 | } 317 | } 318 | 319 | ## function to switch inverselink with a function and number 320 | invlinkswitch <- function(x) { 321 | switch(x, 322 | identity = list(fun = function(x) x, linkfun = function(x) x, num = -9L), 323 | invlogit = list(fun = plogis, linkfun = qlogis, num = 0L), 324 | exp = list(fun = exp, linkfun = log, num = 1L), 325 | square = list(fun = function(x) x^2, linkfun = sqrt, num = 2L), 326 | inverse = list(fun = function(x) 1 / x), linkfun = function(x) 1 / x, num = 3L) 327 | } 328 | 329 | ## link function 330 | linkfun <- invlinkswitch(inverselink)$linkfun 331 | uselinkfun <- invlinkswitch(useinverselink)$linkfun 332 | 333 | ## inverse link function 334 | inversefun <- invlinkswitch(inverselink)$fun 335 | useinversefun <- invlinkswitch(useinverselink)$fun 336 | 337 | ## argument for integratere() C++ function 338 | inverselinknum <- invlinkswitch(inverselink)$num 339 | useinverselinknum <- invlinkswitch(useinverselink)$num 340 | 341 | ## when integrating out REs, need to use identity inversefun 342 | ## because the back transformation happens in C++ via inverselinknum 343 | if (isTRUE(effects == "integrateoutRE")) { 344 | useinversefun <- function(x) x 345 | } 346 | 347 | list( 348 | scale = scale, 349 | ilink = inverselink, 350 | ifun = inversefun, 351 | ilinknum = inverselinknum, 352 | useifun = useinversefun, 353 | useilinknum = useinverselinknum, 354 | fun = linkfun, 355 | usefun = uselinkfun) 356 | } 357 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | [![CRAN-status](https://www.r-pkg.org/badges/version/brmsmargins)](https://cran.r-project.org/package=brmsmargins) 3 | [![R-CMD-check](https://github.com/JWiley/brmsmargins/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/JWiley/brmsmargins/actions) 4 | [![codecov](https://codecov.io/gh/JWiley/brmsmargins/graph/badge.svg?token=VXf0Qo0PRY)](https://codecov.io/gh/JWiley/brmsmargins) 5 | [![lifecycle](https://lifecycle.r-lib.org/articles/figures/lifecycle-experimental.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) 6 | 7 | 8 | ## Overview 9 | 10 | This package has functions to calculate marginal effects 11 | from `brms` models ( http://paul-buerkner.github.io/brms/ ). 12 | A central motivator is to calculate average marginal effects (AMEs) 13 | for continuous and discrete predictors in fixed effects only and 14 | mixed effects regression models including location scale models. 15 | 16 | This table shows an overview of currently supported 17 | models / features where "X" indicates a specific model / feature 18 | is currently supported. The column 'Fixed' means fixed effects only models. 19 | The column 'Mixed' means mixed effects models. 20 | 21 | | Distribution / Feature | Fixed | Mixed | 22 | |------------------------------------------------|--------------------|--------------------| 23 | | Gaussian / Normal | :heavy_check_mark: | :heavy_check_mark: | 24 | | Bernoulli (logistic) | :heavy_check_mark: | :heavy_check_mark: | 25 | | Poisson | :heavy_check_mark: | :heavy_check_mark: | 26 | | Negative Binomial | :heavy_check_mark: | :heavy_check_mark: | 27 | | Gamma | :heavy_check_mark: | :heavy_check_mark: | 28 | | Beta | :heavy_check_mark: | :heavy_check_mark: | 29 | | Multinomial logistic | :x: | :x: | 30 | | Multivariate models | :x: | :x: | 31 | | Gaussian location scale models | :heavy_check_mark: | :heavy_check_mark: | 32 | | Natural log / square root transformed outcomes | :heavy_check_mark: | :heavy_check_mark: | 33 | | Monotonic predictors | :heavy_check_mark: | :heavy_check_mark: | 34 | | Custom outcome transformations | :x: | :x: | 35 | 36 | In general, any distribution supported by `brms` that generates one and 37 | only one predicted value (e.g., not multinomial logistic regression models) 38 | should be supported for fixed effects only models. 39 | Also note that currently, only Gaussian random effects are supported. This is not too 40 | limiting as even for Bernoulli, Poisson, etc. outcomes, the random effects 41 | are commonly assumed to have a Gaussian distribution. 42 | 43 | Here is a quick syntax overview of how to use the main function, 44 | `brmsmargins()`. 45 | 46 | #### Fixed effects, continuous predictor. 47 | 48 | ```r 49 | h <- .001 50 | ames <- brmsmargins( 51 | object = model, 52 | add = data.frame(x = c(0, h)), 53 | contrasts = cbind("AME x" = c(-1 / h, 1 / h)), 54 | effects = "fixedonly") 55 | 56 | ames$ContrastSummary 57 | ``` 58 | 59 | #### Fixed effects, discrete predictor. 60 | 61 | ```r 62 | ames <- brmsmargins( 63 | object = model, 64 | add = data.frame(x = c(0, 1)), 65 | contrasts = cbind("AME x" = c(-1, 1)), 66 | effects = "fixedonly") 67 | 68 | ames$Summary 69 | ames$ContrastSummary 70 | ``` 71 | 72 | #### Mixed effects, continuous predictor. 73 | 74 | ```r 75 | h <- .001 76 | ames <- brmsmargins( 77 | object = model, 78 | add = data.frame(x = c(0, h)), 79 | contrasts = cbind("AME x" = c(-1 / h, 1 / h)), 80 | effects = "integrateoutRE") 81 | 82 | ames$ContrastSummary 83 | ``` 84 | 85 | #### Mixed effects, discrete predictor. 86 | 87 | ```r 88 | ames <- brmsmargins( 89 | object = model, 90 | add = data.frame(x = c(0, 1)), 91 | contrasts = cbind("AME x" = c(-1, 1)), 92 | effects = "integrateoutRE") 93 | 94 | ames$Summary 95 | ames$ContrastSummary 96 | ``` 97 | 98 | #### Mixed Effects Location Scale, continuous predictor 99 | 100 | ```r 101 | h <- .001 102 | ames <- brmsmargins( 103 | object = model, 104 | add = data.frame(x = c(0, h)), 105 | contrasts = cbind("AME x" = c(-1 / h, 1 / h)), 106 | dpar = "sigma", 107 | effects = "integrateoutRE") 108 | 109 | ames$ContrastSummary 110 | ``` 111 | 112 | #### Mixed Effects Location Scale, discrete predictor 113 | 114 | ```r 115 | ames <- brmsmargins( 116 | object = model, 117 | at = data.frame(x = c(0, 1)), 118 | contrasts = cbind("AME x" = c(-1, 1)), 119 | dpar = "sigma", 120 | effects = "integrateoutRE") 121 | 122 | ames$Summary 123 | ames$ContrastSummary 124 | ``` 125 | 126 | Note that even on mixed effects models, it is possible to generate 127 | predictions and marginal effects from the fixed effects only, 128 | just by specifying `effects = "fixedonly"` but this is 129 | probably not a good idea generally so not shown by default. 130 | 131 | Also note that for all of these examples `ames$Summary` would 132 | have a summary of the averaged predicted values. These often 133 | are useful for discrete predictors. For continuous 134 | predictors, if the focus is on marginal effects, they often are 135 | not interesting. However, the `at` argument can be used 136 | with continuous predictors to generate interesting averaged 137 | predicted values. For example, this would get predicted 138 | values integrating out random effects for a range of ages 139 | averaging (marginalizing) all other predictors / covariates. 140 | 141 | ```r 142 | ames <- brmsmargins( 143 | object = model, 144 | at = data.frame(age = c(20, 30, 40, 50, 60)), 145 | effects = "integrateoutRE") 146 | 147 | ames$Summary 148 | ``` 149 | 150 | ## Installation 151 | 152 | You can install the package from CRAN by running this code: 153 | 154 | ```r 155 | install.packages("brmsmargins") 156 | ``` 157 | 158 | Alternately, for the latest, development version, run: 159 | 160 | ```r 161 | remotes::install_github("JWiley/brmsmargins") 162 | ``` 163 | 164 | ## Learn More 165 | 166 | There are three vignettes that introduce how to use the package 167 | for several scenarios. 168 | 169 | - [Fixed effects only models](https://joshuawiley.com/brmsmargins/articles/fixed-effects-marginaleffects.html) 170 | (also called single level models). This also is the best place to start learning 171 | about how to use the package. It includes a brief amount of motivation for 172 | why we would want to calculate marginal effects at all. 173 | - [Mixed effects models](https://joshuawiley.com/brmsmargins/articles/mixed-effects-marginaleffects.html) 174 | (also called multilevel models). This shows how to calculate marginal effects 175 | for mixed effects / multilevel models. There are runnable examples, but 176 | not much background. 177 | - [Location scale models](https://joshuawiley.com/brmsmargins/articles/location-scale-marginaleffects.html). 178 | Location scale models are models where both the location (e.g., mean) and 179 | scale (e.g., variance / residual standard deviation) are explicitly 180 | modeled as outcomes. These require use of distributional parameters `dpar` in `brms`. 181 | This vignette shows how to calculate marginal effects from location scale models for the 182 | scale part. 183 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://joshuawiley.com/brmsmargins 2 | 3 | home: 4 | title: brmsmargins - Bayesian marginal effects 5 | description: Marginal effects for Bayesian fixed effects, mixed effects, and location scale models 6 | 7 | authors: 8 | Joshua F. Wiley: 9 | href: https://joshuawiley.com 10 | 11 | destination: docs 12 | 13 | articles: 14 | - title: Vignettes 15 | navbar: ~ 16 | contents: 17 | - fixed-effects-marginaleffects 18 | - mixed-effects-marginaleffects 19 | - location-scale-marginaleffects 20 | 21 | footer: 22 | structure: 23 | left: developed_by 24 | right: built_with 25 | 26 | template: 27 | bootstrap: 5 28 | light-switch: true 29 | 30 | opengraph: 31 | twitter: 32 | creator: "@WileyResearch" 33 | site: https://joshuawiley.com 34 | 35 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /man/assertall.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/assert.R 3 | \name{assertall} 4 | \alias{assertall} 5 | \alias{.assertbrmsfit} 6 | \alias{.assertgaussian} 7 | \alias{.assertfamily} 8 | \alias{.assertdpar} 9 | \alias{.assertlink} 10 | \title{Check Assertions about a \code{brmsfit} Model Object} 11 | \usage{ 12 | .assertbrmsfit(object) 13 | 14 | .assertgaussian(object) 15 | 16 | .assertfamily(object) 17 | 18 | .assertdpar(object, dpar) 19 | 20 | .assertlink(object, dpar) 21 | } 22 | \arguments{ 23 | \item{object}{A \code{brmsfit} model object to be evaluated.} 24 | 25 | \item{dpar}{Required for \code{.assertdpar} which checks this is valid. 26 | Optional for \code{.assertlink} which will use \code{NULL} if not 27 | specified. If specified, this should be \code{NULL} or 28 | a character string.} 29 | } 30 | \value{ 31 | An invisible, logical \code{TRUE} if the assertion is met. 32 | An (informative) error message if the assertion is not met. 33 | } 34 | \description{ 35 | These are a set of internal utility functions. 36 | They are not intended for general use. 37 | Instead, they are intended to be called in circumstances 38 | where the expected result is \code{TRUE}. 39 | All of them are designed to try to give informative error 40 | messages if the assertion is not met. 41 | All of them result in a \code{stop()} error if the assertion is not met. 42 | } 43 | \details{ 44 | \itemize{ 45 | \item{\code{.assertbrmsfit}}{asserts that the object should be of class \code{brmsfit}.} 46 | \item{\code{.assertgaussian}}{asserts that all random effects are Gaussian.} 47 | \item{\code{.assertfamily}}{asserts that the distribution (family) of the outcome is a currently supported family. Only applies when integrating out random effects.} 48 | \item{\code{.assertlink}}{asserts that the link function is a currently supported link function. Only applies when integrating out random effects.} 49 | } 50 | } 51 | \keyword{internal} 52 | -------------------------------------------------------------------------------- /man/brmsmargins.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/margins.R 3 | \name{brmsmargins} 4 | \alias{brmsmargins} 5 | \title{Calculate Marginal Effects from 'brms' Models} 6 | \usage{ 7 | brmsmargins( 8 | object, 9 | at = NULL, 10 | wat = NULL, 11 | add = NULL, 12 | newdata = model.frame(object), 13 | CI = 0.99, 14 | CIType = "HDI", 15 | contrasts = NULL, 16 | ROPE = NULL, 17 | MID = NULL, 18 | subset = NULL, 19 | dpar = NULL, 20 | seed, 21 | verbose = FALSE, 22 | ... 23 | ) 24 | } 25 | \arguments{ 26 | \item{object}{A \emph{required} argument specifying a fitted \code{brms} model object.} 27 | 28 | \item{at}{An \emph{optional} argument (but note, either \code{at} or \code{add} are 29 | \emph{required}) specifying an object inheriting from data frame indicating 30 | the values to hold specific variables at when calculating average 31 | predictions. This is intended for AMEs from categorical variables.} 32 | 33 | \item{wat}{An \emph{optional} list with named elements including one element named, 34 | \dQuote{ID} with a single character string, the name of the variable 35 | in the model frame that is the ID variable. Additionally, 36 | there should be one or more named elements, named after variables 37 | in the model (and specified in the \code{at} argument), that 38 | contain a \code{data.table} or \code{data.frame} with three 39 | variables: (1) the ID variable giving IDs, (2) the values 40 | specified for the variable in the \code{at} argument, and 41 | (3) the actual values to be substituted for each ID. 42 | \code{wat} cannot be non null unless \code{at} also is non null.} 43 | 44 | \item{add}{An \emph{optional} argument (but note, either \code{at} or \code{add} are 45 | \emph{required}) specifying an object inheriting from data frame indicating 46 | the values to add to specific variables at when calculating average 47 | predictions. This is intended for AMEs for continuous variables.} 48 | 49 | \item{newdata}{An \emph{optional} argument specifying an object inheriting 50 | from data frame indicating the baseline values to use for predictions and AMEs. 51 | It uses a sensible default: the model frame from the \code{brms} 52 | model object passed on the \code{object} argument.} 53 | 54 | \item{CI}{An \emph{optional} argument with a numeric value specifying the width 55 | of the credible interval. Defaults to \code{0.99}. This default is arbitrary, 56 | but is purposefully higher than the common \code{0.95} to encourage science 57 | with greater acknowledgment of uncertainty or larger sample sizes (ideally).} 58 | 59 | \item{CIType}{An \emph{optional} argument, a character string specifying the 60 | type of credible interval (e.g., highest density interval). It is passed down to 61 | \code{\link{bsummary}} which in turn passes it to 62 | \code{\link[bayestestR]{ci}}. Defaults to \dQuote{HDI}.} 63 | 64 | \item{contrasts}{An \emph{optional} argument specifying a contrast matrix. 65 | The posterior predictions matrix 66 | is post multiplied by the contrast matrix, so they must be conformable. 67 | The posterior predictions matrix has a separate column for each row in the 68 | \code{at} or \code{add} object, so the contrast matrix should have the same 69 | number of rows. It can have multiple columns, if you desire multiple specific 70 | contrasts.} 71 | 72 | \item{ROPE}{An \emph{optional} argument, that can either be left as \code{NULL}, 73 | the default, or a numeric vector of length 2, specifying the 74 | lower and upper thresholds for the 75 | Region of Practical Equivalence (ROPE).} 76 | 77 | \item{MID}{An \emph{optional} argument, that can either left as \code{NULL}, 78 | the default, or a numeric vector of length 2, specifying the 79 | lower and upper thresholds for a 80 | Minimally Important Difference (MID). Unlike the ROPE, percentages for 81 | the MID are calculated as at or exceeding the bounds specified by this 82 | argument, whereas the ROPE is the percentage of the posterior at or inside 83 | the bounds specified.} 84 | 85 | \item{subset}{An \emph{optional} argument, a character string that is a 86 | valid \code{R} expression used to subset the dataset passed in \code{newdata}, 87 | prior to analysis. Defaults to \code{NULL}.} 88 | 89 | \item{dpar}{An \emph{optional} argument giving the parameter passed on to the \code{dpar} 90 | argument of \code{fitted()} in brms. Defaults to \code{NULL}, 91 | indicating the mean or location parameter typically.} 92 | 93 | \item{seed}{An \emph{optional} argument that controls whether (and if so what) random seed 94 | to use. This does not matter when using fixed effects only. However, 95 | when using Monte Carlo integration to integrate out random effects from 96 | mixed effects models, it is critical if you are looking at a continuous 97 | marginal effect with some small offset value as otherwise the 98 | Monte Carlo error from one set of predictions to another may exceed 99 | the true predicted difference. 100 | If \code{seed} is left missing, the default, than a single, random integer 101 | between +\- 1e7 is chosen and used to set the seed before each 102 | prediction. If manually chosen (recommended for reproducibility), 103 | the seed should either be a single value, in which case this single 104 | value is used to set the seed before each prediction. 105 | Alternately, it can be a vector of seeds with either the same length 106 | as the number of rows in \code{at} or \code{add}, whichever was specified. 107 | This is probably generally not what you want, as it means that even for 108 | the same input data, you would get slightly different predictions 109 | (when integrating out random effects) due to Monte Carlo variation. 110 | Finally, rather than being missing, you can explicitly set 111 | \code{seed = NULL}, if you do not want any seed to be set. 112 | This would be fine, for instance, when only using fixed effects, 113 | or if you know what you are doing and intend that behavior when 114 | integrating out random effects.} 115 | 116 | \item{verbose}{An \emph{optional} argument, a logical value whether to print 117 | more verbose messages. Defaults to \code{FALSE} which is quieter. Set to 118 | \code{TRUE} for more messages to be printed where relevant.} 119 | 120 | \item{...}{An \emph{optional} argument, additional arguments passed on to 121 | \code{\link{prediction}}. In particular, the \code{effects} argument of \code{\link[=prediction]{prediction()}} 122 | is important for mixed effects models to control how random effects 123 | are treated in the predictions, which subsequently changes the 124 | marginal effect estimates.} 125 | } 126 | \value{ 127 | A list with four elements. 128 | \itemize{ 129 | \item{\code{Posterior}}{Posterior distribution of all predictions. These predictions default to fixed effects only, but by specifying options to \code{\link[=prediction]{prediction()}} they can include random effects or be predictions integrating out random effects.} 130 | \item{\code{Summary}}{A summary of the predictions.} 131 | \item{\code{Contrasts}}{Posterior distribution of all contrasts, if a contrast matrix was specified.} 132 | \item{\code{ContrastSummary}}{A summary of the posterior distribution of all contrasts, if specified} 133 | } 134 | } 135 | \description{ 136 | This function is designed to help calculate marginal effects 137 | including average marginal effects (AMEs) from \code{brms} models. 138 | Arguments are labeled as \emph{required} when it is required that the 139 | user directly specify the argument. Arguments are labeled as 140 | \emph{optional} when either the argument is optional or there are 141 | sensible default values so that users do not typically need to specify 142 | the argument. 143 | } 144 | \details{ 145 | The main parts required for the function are a fitted model object, 146 | (via the \code{object} argument) a dataset to be used for prediction, 147 | (via the \code{newdata} argument which defaults to the model frame), 148 | and a dataset passed to either \code{at} or \code{add}. 149 | The steps are as follows: 150 | \enumerate{ 151 | \item Check that the function inputs (model object, data, etc.) are valid. 152 | \item Take the dataset from the \code{newdata} argument and either 153 | add the values from the first row of \code{add} or replace the values 154 | using the first row of \code{at}. Only variables specified in 155 | \code{at} or \code{add} are modified. Other variables are left as is. 156 | \item Use the \code{fitted()} function to generate predictions based on 157 | this modified dataset. If \code{effects} is set to \dQuote{fixedonly} 158 | (meaning only generate predictions using fixed effects) 159 | or to \dQuote{includeRE} 160 | (meaning generate predictions using fixed and random effects), 161 | then predictions are generated entirely using the \code{fitted()} 162 | function and are, typically back transformed to the response scale. 163 | For mixed effects models with fixed and random effects where 164 | \code{effects} is set to \dQuote{integrateoutRE}, then \code{fitted()} 165 | is only used to generate predictions using the fixed effects on the linear 166 | scale. For each prediction generated, the random effects are integrated out 167 | by drawing \code{k} random samples from the model assumed random effect(s) 168 | distribution. These are added to the fixed effects predictions, 169 | back transformed, and then averaged over all \code{k} random samples to 170 | perform numerical Monte Carlo integration. 171 | \item All the predictions for each posterior draw, after any back transformation 172 | has been applied, are averaged, resulting in one, marginal value for each 173 | posterior draw. These are marginal predictions. They are average marginal 174 | predictions if averaging over the sample dataset, or may be marginal predictions 175 | at the means, if the initial input dataset used mean values, etc. 176 | \item Steps two to four are repeated for each row of \code{at} or \code{add}. 177 | Results are combined into a matrix where the columns are different 178 | rows from \code{at} or \code{add} and the rows are different posterior 179 | draws. 180 | \item If contrasts were specified, using a contrast matrix, the 181 | marginal prediction matrix is post multiplied by the contrast matrix. 182 | Depending on the choice(s) of \code{add} or \code{at} and the 183 | values in the contrast matrix, these can then be 184 | average marginal effects (AMEs) by using numerical integration 185 | (\code{add} with 0 and a very close to 0 value) or 186 | discrete difference (\code{at} with say 0 and 1 as values) 187 | for a given predictor(s). 188 | \item The marginal predictions and the contrasts, if specified are 189 | summarized. 190 | } 191 | 192 | Although \code{brmsmargins()} is focused on helping to calculate 193 | marginal effects, it can also be used to generate marginal predictions, 194 | and indeed these marginal predictions are the foundation of any 195 | marginal effect estimates. Through manipulating the input data, 196 | \code{at} or \code{add} and the contrast matrix, other types of estimates 197 | averaged or weighting results in specific ways are also possible. 198 | } 199 | \examples{ 200 | \dontrun{ 201 | #### Testing #### 202 | ## sample data and logistic model with brms 203 | set.seed(1234) 204 | Tx <- rep(0:1, each = 50) 205 | ybin <- c(rep(0:1, c(40,10)), rep(0:1, c(10,40))) 206 | logitd <- data.frame(Tx = Tx, ybin = ybin) 207 | logitd$x <- rnorm(100, mean = logitd$ybin, sd = 2) 208 | 209 | mbin <- brms::brm(ybin ~ Tx + x, data = logitd, family = brms::bernoulli()) 210 | 211 | summary(mbin) 212 | 213 | ## now check AME for Tx 214 | tmp <- brmsmargins( 215 | object = mbin, 216 | at = data.table::data.table(Tx = 0:1), 217 | contrasts = matrix(c(-1, 1), nrow = 2), 218 | ROPE = c(-.05, +.05), 219 | MID = c(-.10, +.10)) 220 | 221 | tmp$Summary 222 | tmp$ContrastSummary ## Tx AME 223 | 224 | 225 | ## now check AME for Tx with bootstrapping the AME population 226 | tmpalt <- brmsmargins( 227 | object = mbin, 228 | at = data.table::data.table(Tx = 0:1), 229 | contrasts = matrix(c(-1, 1), nrow = 2), 230 | ROPE = c(-.05, +.05), 231 | MID = c(-.10, +.10), 232 | resample = 100L) 233 | 234 | tmpalt$Summary 235 | tmpalt$ContrastSummary ## Tx AME 236 | 237 | ## now check AME for continuous predictor, x 238 | ## use .01 as an approximation for first derivative 239 | ## 1 / .01 in the contrast matrix to get back to a one unit change metric 240 | tmp2 <- brmsmargins( 241 | object = mbin, 242 | add = data.table::data.table(x = c(0, .01)), 243 | contrasts = matrix(c(-1/.01, 1/.01), nrow = 2), 244 | ROPE = c(-.05, +.05), 245 | MID = c(-.10, +.10)) 246 | 247 | tmp2$ContrastSummary ## x AME 248 | 249 | if (FALSE) { 250 | library(lme4) 251 | data(sleepstudy) 252 | fit <- brms::brm(Reaction ~ 1 + Days + (1 + Days | Subject), 253 | data = sleepstudy, 254 | cores = 4) 255 | 256 | summary(fit, prob = 0.99) 257 | 258 | tmp <- brmsmargins( 259 | object = fit, 260 | at = data.table::data.table(Days = 0:1), 261 | contrasts = matrix(c(-1, 1), nrow = 2), 262 | ROPE = c(-.05, +.05), 263 | MID = c(-.10, +.10), CIType = "ETI", effects = "integrateoutRE", k = 5L) 264 | 265 | tmp$Summary 266 | tmp$ContrastSummary 267 | } 268 | } 269 | } 270 | \references{ 271 | Pavlou, M., Ambler, G., Seaman, S., & Omar, R. Z. (2015) 272 | \doi{10.1186/s12874-015-0046-6} 273 | \dQuote{A note on obtaining correct marginal predictions from a random intercepts model for binary outcomes} 274 | and 275 | Skrondal, A., & Rabe-Hesketh, S. (2009) 276 | \doi{10.1111/j.1467-985X.2009.00587.x} 277 | \dQuote{Prediction in multilevel generalized linear models} 278 | and 279 | Norton EC, Dowd BE, Maciejewski ML. (2019) 280 | \doi{10.1001/jama.2019.1954} 281 | \dQuote{Marginal Effects—Quantifying the Effect of Changes in Risk Factors in Logistic Regression Models} 282 | } 283 | -------------------------------------------------------------------------------- /man/bsummary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{bsummary} 4 | \alias{bsummary} 5 | \title{Personal Preference Based Bayesian Summary} 6 | \usage{ 7 | bsummary(x, CI = 0.99, CIType = "HDI", ROPE = NULL, MID = NULL) 8 | } 9 | \arguments{ 10 | \item{x}{The posterior distribution of a parameter} 11 | 12 | \item{CI}{A numeric value indicating the desired width of the credible interval. 13 | Defaults to \code{0.99} currently, but this is subject to change. 14 | a 99\% interval was chosen as the default as there have been recent arguments 15 | made in the realm of meta science that there are, essentially, too many 16 | false positives and that many of the \dQuote{findings} in science are not able 17 | to be replicated. 18 | In any case, users should ideally specify a desired CI width, and not rely on 19 | defaults.} 20 | 21 | \item{CIType}{A character string indicating the type of credible interval, passed on 22 | to the \code{\link[bayestestR]{ci}} function as the method for CIs.} 23 | 24 | \item{ROPE}{Either left as \code{NULL}, the default, or a numeric vector of 25 | length 2, specifying the lower and upper thresholds for the 26 | Region of Practical Equivalence (ROPE).} 27 | 28 | \item{MID}{Either left as \code{NULL}, the default, or a numeric vector of 29 | length 2, specifying the lower and upper thresholds for a 30 | Minimally Important Difference (MID). Unlike the ROPE, percentages for 31 | the MID are calculated as at or exceeding the bounds specified by this 32 | argument, whereas the ROPE is the percentage of the posterior at or inside 33 | the bounds specified.} 34 | } 35 | \value{ 36 | A \code{data.table} with the mean, \code{M} 37 | \describe{ 38 | \item{M}{the mean of the posterior samples} 39 | \item{Mdn}{the median of the posterior samples} 40 | \item{LL}{the lower limit of the credible interval} 41 | \item{UL}{the upper limit of the credible interval} 42 | \item{PercentROPE}{the percentage of posterior samples falling into the ROPE} 43 | \item{PercentMID}{the percentage of posterior samples falling at or beyond the MID} 44 | \item{CI}{the width of the credible interval used} 45 | \item{CIType}{the type of credible interval used (e.g., highest density interval)} 46 | \item{ROPE}{a label describing the values included in the ROPE} 47 | \item{MID}{a label describing the values included in the MID} 48 | } 49 | } 50 | \description{ 51 | Returns a summary of a posterior distribution for a single 52 | parameter / value. It is based on personal preference. Notably, it does not 53 | only use \code{bayestestR::describe_posterior}, an excellent function, 54 | because of the desire to also describe the percentage of the full posterior 55 | distribution that is at or exceeding the value of a 56 | Minimally Important Difference (MID). MIDs are used in clinical studies with outcome 57 | measures where there are pre-defined differences that are considered clinically 58 | important, which is distinct from the ROPE or general credible intervals capturing 59 | uncertainty. 60 | } 61 | \examples{ 62 | 63 | bsummary(rnorm(1000)) 64 | 65 | bsummary(rnorm(1000), ROPE = c(-.5, .5), MID = c(-1, 1)) 66 | } 67 | \references{ 68 | Kruschke, J. K. (2018). 69 | \doi{10.1177/2515245918771304} 70 | \dQuote{Rejecting or accepting parameter values in Bayesian estimation} 71 | } 72 | -------------------------------------------------------------------------------- /man/builders.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/builders.R 3 | \name{builders} 4 | \alias{builders} 5 | \alias{.namesL} 6 | \alias{.buildL} 7 | \alias{.namesSD} 8 | \alias{.buildSD} 9 | \alias{.namesZ} 10 | \alias{.buildZ} 11 | \title{Build the Variable Names or Data Objects for Estimation} 12 | \usage{ 13 | .namesL(block, number) 14 | 15 | .buildL(data, block, number, dpar) 16 | 17 | .namesSD(ranef, block, dpar) 18 | 19 | .buildSD(data, ranef, block, dpar) 20 | 21 | .namesZ(block, number, dpar) 22 | 23 | .buildZ(data, block, number, dpar) 24 | } 25 | \arguments{ 26 | \item{block}{Which random effect block to use. An integer.} 27 | 28 | \item{number}{The number of elements in that random effect block. An integer.} 29 | 30 | \item{data}{A data object. For example the result of \code{\link[=make_standata]{make_standata()}} 31 | for \code{\link[=.buildZ]{.buildZ()}}, which is a list, 32 | or a dataset of the posterior draws such as from \code{\link[=as_draws_df]{as_draws_df()}} 33 | for \code{\link[=.buildL]{.buildL()}} and \code{\link[=.buildSD]{.buildSD()}}.} 34 | 35 | \item{dpar}{Which dpar to use. Does not apply to the L matrix.} 36 | 37 | \item{ranef}{A data set with information about the model object random effects. 38 | Only used for \code{.namesSD} and \code{.buildSD}.} 39 | } 40 | \value{ 41 | A character vector for all \code{.names} functions or a matrix 42 | for all \code{.build} functions. 43 | } 44 | \description{ 45 | These are a set of internal utility functions. 46 | They are not intended for general use. 47 | } 48 | \details{ 49 | \itemize{ 50 | \item{\code{.namesL}}{Generate names of an L matrix from \code{brms}. Create the variable names for the Cholesky decomposition of the random effects correlation matrix in \code{brms}. Note that \code{brms} returns the lower triangular matrix and we want the upper triangular matrix, so the names are transposed. The results can then be passed to the \code{tab2mat} function to convert the row vector into a matrix.} 51 | \item{\code{.buildL}}{Returns the L matrix object. Rows are posterior draws.} 52 | \item{\code{.namesSD}}{Create the names of random effect standard deviation estimates.} 53 | \item{\code{.buildSD}}{Return matrix of random effect standard deviation estimates. Rows are posterior draws.} 54 | \item{\code{.namesZ}}{Create the names of random effects data for predictions.} 55 | \item{\code{.buildZ}}{Return matrix of data for random effect predictions.} 56 | } 57 | } 58 | \keyword{internal} 59 | -------------------------------------------------------------------------------- /man/dot-averagePosterior.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/averageposterior.R 3 | \name{.averagePosterior} 4 | \alias{.averagePosterior} 5 | \title{Average Over Posterior Predictions} 6 | \usage{ 7 | .averagePosterior(posterior, resample = 0L, seed = FALSE) 8 | } 9 | \arguments{ 10 | \item{posterior}{A posterior matrix type object. It is assumed that different 11 | predictions to be averaged over are on different columns. Different posterior 12 | draws are on different rows.} 13 | 14 | \item{resample}{An integer indicating the number of 15 | bootstrap resamples of the posterior predictions to 16 | use when calculating summaries. Defaults to \code{0L}. 17 | See the details section for more informations as its implementation 18 | is experimental and it may not operate as one would expect.} 19 | 20 | \item{seed}{A seed for random number generation. Defaults to \code{FALSE}, 21 | which means no seed is set. 22 | Only used if \code{resample} is a positive, non-zero integer.} 23 | } 24 | \value{ 25 | A vector of the averaged posterior. 26 | } 27 | \description{ 28 | Internal function that averages over posterior predictions 29 | using either \code{\link[=rowMeans]{rowMeans()}} or \code{\link[=rowBootMeans]{rowBootMeans()}}, the latter 30 | being useful to incorporate uncertainty from the 31 | inputs being used to generate predictions. 32 | } 33 | \keyword{internal} 34 | -------------------------------------------------------------------------------- /man/dot-checktab.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{.checktab} 4 | \alias{.checktab} 5 | \title{Check Object Class is a Table} 6 | \usage{ 7 | .checktab(x, requireNames = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{An object to be evaluated.} 11 | 12 | \item{requireNames}{A logical, whether names are 13 | required. Defaults to \code{TRUE}} 14 | } 15 | \value{ 16 | An empty string if no issues. Otherwise, a non zero 17 | string with warning/error messages. 18 | } 19 | \description{ 20 | Internal utility function confirm that an object 21 | has the attributes needed to be used as data. 22 | Currently it should be a \code{tbl}, 23 | \code{data.frame}, or \code{data.table}. 24 | } 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /man/dot-extractlink.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{.extractlink} 4 | \alias{.extractlink} 5 | \title{Extract the Link from a \code{brms} Model} 6 | \usage{ 7 | .extractlink(object, dpar) 8 | } 9 | \arguments{ 10 | \item{object}{A \code{brmsfit} class model object.} 11 | 12 | \item{dpar}{The dpar for which the link should be extracted.} 13 | } 14 | \value{ 15 | A character string, the link. 16 | } 17 | \description{ 18 | Internal utility function to take a \code{brmsfit} object 19 | and extract the link for a specific \code{dpar}. 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/dot-links.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{.links} 4 | \alias{.links} 5 | \title{Convert a Link Function Name to a List} 6 | \usage{ 7 | .links( 8 | link, 9 | effects = c("fixedonly", "includeRE", "integrateoutRE"), 10 | backtrans = c("response", "linear", "identity", "invlogit", "exp", "square", "inverse") 11 | ) 12 | } 13 | \arguments{ 14 | \item{link}{The link named in a \code{brmsfit} object} 15 | 16 | \item{effects}{A character string, the type of effect desired} 17 | 18 | \item{backtrans}{A character string, the type of back transformation} 19 | } 20 | \value{ 21 | A list with eight elements. 22 | \describe{ 23 | \item{scale}{A character string giving the argument to be passed to \code{\link[=fitted]{fitted()}}.} 24 | \item{ilink}{A character string giving the name of the inverse link function.} 25 | \item{ifun}{Inverse link function as an \code{R} function.} 26 | \item{ilinknum}{An integer giving the inverse link / transformation to be applied in \code{\link[=integratere]{integratere()}}, needed as this is a C++ function and cannot use the \code{R} based inverse link function.} 27 | } 28 | } 29 | \description{ 30 | Internal utility function used in \code{\link[=prediction]{prediction()}}. 31 | Takes a link function name as a character string, 32 | the type of effect to be used, and the desired back transformation 33 | and returns a list with all the options needed to execute the desired 34 | options in \code{\link[=prediction]{prediction()}}. 35 | } 36 | \keyword{internal} 37 | -------------------------------------------------------------------------------- /man/dot-percent.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{.percent} 4 | \alias{.percent} 5 | \title{Calculate Percent of Observations Within or Without a Window} 6 | \usage{ 7 | .percent(x, window = NULL, within = TRUE) 8 | } 9 | \arguments{ 10 | \item{x}{A vector of values to evaluate. Required.} 11 | 12 | \item{window}{An optional numeric vector giving a window.} 13 | 14 | \item{within}{A logical value indicating whether to calculate the 15 | percentage within the window (if \code{TRUE}) or the 16 | percentage at or outside the window (if \code{FALSE}). 17 | Defaults to \code{TRUE}.} 18 | } 19 | \value{ 20 | A list with the \code{Window}, if specified else \code{NULL}, 21 | the \code{Percent} of observations, and a \code{Label} specifying the 22 | exact window used in human readable format. 23 | } 24 | \description{ 25 | This is an internal helper function to calculate and label 26 | the percentage of a posterior distribution that falls within 27 | the Region of Practical Equivalence (ROPE) or 28 | at or beyond a Minimally Important Difference (MID). 29 | It is designed to fail gracefully if no window given, and to 30 | give some useful labels about the windows / range used. 31 | Intended for use internally as part of \code{\link{brmsmargins}}. 32 | } 33 | \keyword{internal} 34 | -------------------------------------------------------------------------------- /man/integratemvn.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R, R/rimplementation.R 3 | \name{integratemvn} 4 | \alias{integratemvn} 5 | \alias{integratemvnR} 6 | \title{Integrate over Multivariate Normal Random Effects} 7 | \usage{ 8 | integratemvn(X, k, sd, chol) 9 | 10 | integratemvnR(X, k, sd, chol) 11 | } 12 | \arguments{ 13 | \item{X}{A numeric matrix of the data to be multiplied by the random effects} 14 | 15 | \item{k}{An integer, the number of random samples to be used for numerical integration} 16 | 17 | \item{sd}{A numeric vector of the standard deviations} 18 | 19 | \item{chol}{A numeric matrix, which should be the Cholesky decomposition of the 20 | correlation matrix of the multivariate normal distribution.} 21 | } 22 | \value{ 23 | A numeric matrix with random values 24 | } 25 | \description{ 26 | Used in the process of Monte Carlo integration 27 | over multivariate normal random effects. This generates the 28 | random draws from the multivariate normal distribution 29 | and multiplies these by the data. 30 | Not intended to be called directly by most users. 31 | } 32 | \section{Functions}{ 33 | \itemize{ 34 | \item \code{integratemvnR()}: Pure \code{R} implementation of \code{integratemvn} 35 | 36 | }} 37 | \examples{ 38 | integratemvn( 39 | X = matrix(1, 1, 2), 40 | k = 100L, 41 | sd = c(10, 5), 42 | chol = chol(matrix(c(1, .5, .5, 1), 2))) 43 | 44 | integratemvn(matrix(1, 1, 1), 100L, c(5), matrix(1)) 45 | } 46 | -------------------------------------------------------------------------------- /man/integratere.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R, R/rimplementation.R 3 | \name{integratere} 4 | \alias{integratere} 5 | \alias{integratereR} 6 | \title{Integrate over Random Effects} 7 | \usage{ 8 | integratere(d, sd, L, k, yhat, backtrans) 9 | 10 | integratereR(d, sd, L, k, yhat, backtrans) 11 | } 12 | \arguments{ 13 | \item{d}{A list with model matrices for each random effect block.} 14 | 15 | \item{sd}{A list with standard deviation matrices for each random effect block 16 | where rows are different posterior draws.} 17 | 18 | \item{L}{A list with matrices for each random effect block containing the parts of 19 | the L matrix, the Cholesky decomposition of the random effect correlation matrix.} 20 | 21 | \item{k}{An integer, the number of samples for Monte Carlo integration.} 22 | 23 | \item{yhat}{A matrix of the fixed effects predictions} 24 | 25 | \item{backtrans}{An integer, indicating the type of back transformation. 26 | 0 indicates inverse logit (e.g., for logistic regression). 27 | 1 indicates exponential (e.g., for poisson or negative binomial regression or if outcome was natural log transformed). 28 | 2 indicates square (e.g., if outcome was square root transformed). 29 | 3 indicates inverse (e.g., if outcome was inverse transformed such as Gamma regression) 30 | Any other integer results in no transformation. -9 is recommended as the option for no 31 | transformation as any future transformations supported will be other, positive integers.} 32 | } 33 | \value{ 34 | A numeric matrix with the Monte Carlo integral calculated. 35 | } 36 | \description{ 37 | Used to conduct Monte Carlo integration over Gaussian random effects. 38 | Not intended to be called directly by most users. 39 | } 40 | \section{Functions}{ 41 | \itemize{ 42 | \item \code{integratereR()}: Pure \code{R} implementation of \code{integratere} 43 | 44 | }} 45 | \examples{ 46 | integratere( 47 | d = list(matrix(1, 1, 1)), 48 | sd = list(matrix(1, 2, 1)), 49 | L = list(matrix(1, 2, 1)), 50 | k = 10L, 51 | yhat = matrix(0, 2, 1), 52 | backtrans = 0L) 53 | } 54 | -------------------------------------------------------------------------------- /man/is.random.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{is.random} 4 | \alias{is.random} 5 | \title{Check a \code{brmsfit} Object has Random Effects} 6 | \usage{ 7 | is.random(object) 8 | } 9 | \arguments{ 10 | \item{object}{An object to be evaluated.} 11 | } 12 | \value{ 13 | \code{TRUE} if any random effects present. 14 | \code{FALSE} if no random effects present. 15 | } 16 | \description{ 17 | Internal utility function to check whether a \code{brmsfit} 18 | object has any random effects or not. 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/lmcpp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{lmcpp} 4 | \alias{lmcpp} 5 | \title{Fast Linear Regression} 6 | \usage{ 7 | lmcpp(X, y) 8 | } 9 | \arguments{ 10 | \item{X}{A numeric model matrix. If intercept is desired, it must already have been added as a column.} 11 | 12 | \item{y}{A numeric matrix. A single column if one response variable or multiple columns 13 | where each column is a different response, such as a for marginal coefficients where 14 | each column is a different MCMC sample.} 15 | } 16 | \value{ 17 | A numeric matrix with the coefficient. 18 | } 19 | \description{ 20 | Used to get marginal coefficients off of a generalized linear mixed model. 21 | } 22 | \examples{ 23 | lmcpp(cbind(1, mtcars$hp, mtcars$am), as.matrix(mtcars[, c("mpg", "qsec")])) 24 | } 25 | -------------------------------------------------------------------------------- /man/marginalcoef.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/marginalcoef.R 3 | \name{marginalcoef} 4 | \alias{marginalcoef} 5 | \title{Marginal Coefficients from a 'brms' Model} 6 | \usage{ 7 | marginalcoef( 8 | object, 9 | summarize = TRUE, 10 | posterior = FALSE, 11 | index, 12 | backtrans = c("response", "linear", "identity", "invlogit", "exp", "square", "inverse"), 13 | k = 100L, 14 | seed, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{object}{A fitted brms model object that includes random effects. Required.} 20 | 21 | \item{summarize}{A logical value, whether or not to 22 | calculate summaries of the posterior predictions. 23 | Defaults to \code{TRUE}.} 24 | 25 | \item{posterior}{A logical value whether or not to 26 | save and return the posterior samples. Defaults 27 | to \code{FALSE} as the assumption is a typical 28 | use case is to return the summaries only.} 29 | 30 | \item{index}{An optional integer vector, giving the posterior draws 31 | to be used in the calculations. If omitted, defaults to all 32 | posterior draws.} 33 | 34 | \item{backtrans}{A character string indicating the type of 35 | back transformation to be applied. Can be one of 36 | \dQuote{response} meaning to use the response scale, 37 | \dQuote{linear} or \dQuote{identity} meaning to use the linear predictor scale, 38 | or a specific back transformation desired, from a possible list of 39 | \dQuote{invlogit}, \dQuote{exp}, \dQuote{square}, or \dQuote{inverse}. 40 | Custom back transformations should only be needed if, for example, 41 | the outcome variable was transformed prior to fitting the model.} 42 | 43 | \item{k}{An integer providing the number of random draws to use for 44 | integrating out the random effects. Only relevant when \code{effects} 45 | is \dQuote{integrateoutRE}.} 46 | 47 | \item{seed}{An \emph{optional} argument that controls whether (and if so what) random seed 48 | to use. This can help with reproducibility of results. 49 | It is missing by default.} 50 | 51 | \item{...}{Additional arguments passed to \code{bsummary()}, 52 | and only relevant if \code{summarize} is \code{TRUE}.} 53 | } 54 | \value{ 55 | A list with \code{Summary} and \code{Posterior}. 56 | Some of these may be \code{NULL} depending on the arguments used. 57 | } 58 | \description{ 59 | Calculate marginal coefficients from a \code{brms} 60 | generalized linear mixed model using the method proposed by Hedeker (2018). 61 | } 62 | \references{ 63 | Hedeker, D., du Toit, S. H., Demirtas, H. & Gibbons, R. D. (2018) 64 | \doi{10.1111/biom.12707} 65 | \dQuote{A note on marginalization of regression parameters from mixed models of binary outcomes} 66 | } 67 | -------------------------------------------------------------------------------- /man/prediction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prediction.R 3 | \name{prediction} 4 | \alias{prediction} 5 | \title{Marginal Posterior Predictions from a 'brms' Model} 6 | \usage{ 7 | prediction( 8 | object, 9 | data, 10 | summarize = TRUE, 11 | posterior = FALSE, 12 | index, 13 | dpar = NULL, 14 | resample = 0L, 15 | resampleseed = FALSE, 16 | effects = c("fixedonly", "includeRE", "integrateoutRE"), 17 | backtrans = c("response", "linear", "identity", "invlogit", "exp", "square", "inverse"), 18 | k = 100L, 19 | raw = FALSE, 20 | ... 21 | ) 22 | } 23 | \arguments{ 24 | \item{object}{A \emph{required} argument specifying a fitted 25 | \code{brms} model object.} 26 | 27 | \item{data}{A \emph{required} argument specifying a data frame or 28 | data table passed to \code{fitted()} as the new data to be used 29 | for predictions.} 30 | 31 | \item{summarize}{An \emph{optional} argument, a logical value, whether 32 | or not to calculate summaries of the posterior predictions. 33 | Defaults to \code{TRUE}.} 34 | 35 | \item{posterior}{An \emph{optional} argument, a logical value whether 36 | or not to save and return the posterior samples. Defaults 37 | to \code{FALSE} as the assumption is a typical 38 | use case is to return the summaries only.} 39 | 40 | \item{index}{An \emph{optional} argument, an integer vector, giving the 41 | posterior draws to be used in the calculations. If omitted, 42 | defaults to all posterior draws.} 43 | 44 | \item{dpar}{An \emph{optional} argument, the parameter passed on to the 45 | \code{dpar} argument of \code{fitted()} in brms. Defaults to \code{NULL} 46 | indicating the mean or location parameter typically.} 47 | 48 | \item{resample}{An \emph{optional} argument, an integer indicating the 49 | number of bootstrap resamples of the posterior predictions to 50 | use when calculating summaries. Defaults to \code{0L}. 51 | See documentation from \code{\link[=.averagePosterior]{.averagePosterior()}} for more details. 52 | This should be considered experimental.} 53 | 54 | \item{resampleseed}{An \emph{optional} argument, a seed for random number 55 | generation. Defaults to \code{FALSE}, which means no seed is set. 56 | Only used if \code{resample} is a positive, non-zero integer. 57 | See documentation from \code{\link[=.averagePosterior]{.averagePosterior()}} for more details. 58 | This should be considered experimental.} 59 | 60 | \item{effects}{An \emph{optional} argument, a character string indicating 61 | the type of prediction to be made. Can be one of 62 | \dQuote{fixedonly} meaning only use fixed effects, 63 | \dQuote{includeRE} meaning that random effects should be 64 | included in the predictions, or 65 | \dQuote{integrateoutRE} meaning that random effects should be 66 | integrated out / over in the predictions. 67 | It defaults to \dQuote{fixedonly} so is not typically required for 68 | a user to specify it.} 69 | 70 | \item{backtrans}{An \emph{optional} argument, a character string indicating 71 | the type of back transformation to be applied. Can be one of 72 | \dQuote{response} meaning to use the response scale, 73 | \dQuote{linear} or \dQuote{identity} meaning to use the linear predictor scale, 74 | or a specific back transformation desired, from a possible list of 75 | \dQuote{invlogit}, \dQuote{exp}, \dQuote{square}, or \dQuote{inverse}. 76 | Custom back transformations should only be needed if, for example, 77 | the outcome variable was transformed prior to fitting the model. 78 | It defaults to \dQuote{response} so is not typically required for 79 | a user to specify it.} 80 | 81 | \item{k}{An \emph{optional} argument, an integer providing the number of 82 | random draws to use for integrating out the random effects. 83 | Only relevant when \code{effects} is \dQuote{integrateoutRE}. 84 | It defaults to \code{100L}, a rather arbitrary number attempting to 85 | balance the increased precision that comes from a larger value, 86 | with the increased computational cost of more Monte Carlo simulations 87 | when integrating out random effects.} 88 | 89 | \item{raw}{An \emph{optional} argument, a logical value indicating whether to 90 | return the raw output or to average over the Monte Carlo samples. 91 | Defaults to \code{FALSE}. 92 | Setting it to \code{TRUE} can be useful if you want not only the 93 | full posterior distribution but also the \code{k} Monte Carlo samples 94 | used for the numerical integration. This cannot be used with 95 | \code{summarize = TRUE}.} 96 | 97 | \item{...}{Additional arguments passed to \code{bsummary()}, 98 | and only relevant if \code{summarize} is \code{TRUE}.} 99 | } 100 | \value{ 101 | A list with \code{Summary} and \code{Posterior}. 102 | Some of these may be \code{NULL} depending on the arguments used. 103 | } 104 | \description{ 105 | Calculate marginal predictions from a \code{brms} model. 106 | Marginal predictions average over the input data for each posterior draw. 107 | Marginal predictions for models with random effects will integrate 108 | over random effects. 109 | Arguments are labeled as \emph{required} when it is required that the 110 | user directly specify the argument. Arguments are labeled as 111 | \emph{optional} when either the argument is optional or there are 112 | sensible default values so that users do not typically need to specify 113 | the argument. 114 | } 115 | \references{ 116 | Pavlou, M., Ambler, G., Seaman, S., & Omar, R. Z. (2015) 117 | \doi{10.1186/s12874-015-0046-6} 118 | \dQuote{A note on obtaining correct marginal predictions from a random intercepts model for binary outcomes} 119 | and 120 | Skrondal, A., & Rabe-Hesketh, S. (2009) 121 | \doi{10.1111/j.1467-985X.2009.00587.x} 122 | \dQuote{Prediction in multilevel generalized linear models} 123 | } 124 | -------------------------------------------------------------------------------- /man/rowBootMeans.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R 3 | \name{rowBootMeans} 4 | \alias{rowBootMeans} 5 | \title{Bootstrap Row Means} 6 | \usage{ 7 | rowBootMeans(x) 8 | } 9 | \arguments{ 10 | \item{x}{A numeric matrix} 11 | } 12 | \value{ 13 | A numeric vector with the simple bootstrapped row means of the matrix 14 | } 15 | \description{ 16 | This takes a numeric matrix, bootstrap resamples each row, and then 17 | calculates the mean. The intended use case is for Bayesian posterior 18 | predictions from sample data. Instead of directly calculating the 19 | average marginal effect (AME) across all observed values, these can be 20 | bootstrapped, so that uncertainty in the target population, and thus 21 | the AME in the target population, can be incorporated. 22 | Model uncertainty is already assumed to be handled by the different posterior 23 | samples, which are assumed to be across rows. 24 | } 25 | \examples{ 26 | 27 | x <- matrix(1:9, byrow = TRUE, 3) 28 | replicate(10, rowBootMeans(x)) 29 | } 30 | -------------------------------------------------------------------------------- /man/tab2mat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RcppExports.R, R/rimplementation.R 3 | \name{tab2mat} 4 | \alias{tab2mat} 5 | \alias{tab2matR} 6 | \title{Convert a Row of a Table to a Square Matrix} 7 | \usage{ 8 | tab2mat(X) 9 | 10 | tab2matR(X) 11 | } 12 | \arguments{ 13 | \item{X}{a matrix} 14 | } 15 | \value{ 16 | A numeric matrix with one row. 17 | } 18 | \description{ 19 | Utility function to convert a row matrix to a square matrix. 20 | Used as the \code{brms} package returns things like the Cholesky 21 | decomposition matrix as separate columns where rows are posterior draws. 22 | Not intended to be called directly by most users. 23 | } 24 | \section{Functions}{ 25 | \itemize{ 26 | \item \code{tab2matR()}: Pure \code{R} implementation of \code{tab2mat} 27 | 28 | }} 29 | \examples{ 30 | tab2mat(matrix(1:4, 1)) 31 | tab2mat(matrix(1:9, 1)) 32 | } 33 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.dll 4 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | CXX_STD = CXX11 2 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 3 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 4 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | CXX_STD = CXX11 2 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) 3 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 4 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | #include 6 | 7 | using namespace Rcpp; 8 | 9 | #ifdef RCPP_USE_GLOBAL_ROSTREAM 10 | Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); 11 | Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); 12 | #endif 13 | 14 | // integratemvn 15 | arma::mat integratemvn(const arma::mat& X, const int k, const Rcpp::NumericVector& sd, const arma::mat& chol); 16 | RcppExport SEXP _brmsmargins_integratemvn(SEXP XSEXP, SEXP kSEXP, SEXP sdSEXP, SEXP cholSEXP) { 17 | BEGIN_RCPP 18 | Rcpp::RObject rcpp_result_gen; 19 | Rcpp::RNGScope rcpp_rngScope_gen; 20 | Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP); 21 | Rcpp::traits::input_parameter< const int >::type k(kSEXP); 22 | Rcpp::traits::input_parameter< const Rcpp::NumericVector& >::type sd(sdSEXP); 23 | Rcpp::traits::input_parameter< const arma::mat& >::type chol(cholSEXP); 24 | rcpp_result_gen = Rcpp::wrap(integratemvn(X, k, sd, chol)); 25 | return rcpp_result_gen; 26 | END_RCPP 27 | } 28 | // integratere 29 | arma::mat integratere(List d, List sd, List L, int k, const arma::mat& yhat, int backtrans); 30 | RcppExport SEXP _brmsmargins_integratere(SEXP dSEXP, SEXP sdSEXP, SEXP LSEXP, SEXP kSEXP, SEXP yhatSEXP, SEXP backtransSEXP) { 31 | BEGIN_RCPP 32 | Rcpp::RObject rcpp_result_gen; 33 | Rcpp::RNGScope rcpp_rngScope_gen; 34 | Rcpp::traits::input_parameter< List >::type d(dSEXP); 35 | Rcpp::traits::input_parameter< List >::type sd(sdSEXP); 36 | Rcpp::traits::input_parameter< List >::type L(LSEXP); 37 | Rcpp::traits::input_parameter< int >::type k(kSEXP); 38 | Rcpp::traits::input_parameter< const arma::mat& >::type yhat(yhatSEXP); 39 | Rcpp::traits::input_parameter< int >::type backtrans(backtransSEXP); 40 | rcpp_result_gen = Rcpp::wrap(integratere(d, sd, L, k, yhat, backtrans)); 41 | return rcpp_result_gen; 42 | END_RCPP 43 | } 44 | // lmcpp 45 | arma::mat lmcpp(const arma::mat& X, const arma::mat& y); 46 | RcppExport SEXP _brmsmargins_lmcpp(SEXP XSEXP, SEXP ySEXP) { 47 | BEGIN_RCPP 48 | Rcpp::RObject rcpp_result_gen; 49 | Rcpp::RNGScope rcpp_rngScope_gen; 50 | Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP); 51 | Rcpp::traits::input_parameter< const arma::mat& >::type y(ySEXP); 52 | rcpp_result_gen = Rcpp::wrap(lmcpp(X, y)); 53 | return rcpp_result_gen; 54 | END_RCPP 55 | } 56 | // rowBootMeans 57 | NumericVector rowBootMeans(NumericMatrix x); 58 | RcppExport SEXP _brmsmargins_rowBootMeans(SEXP xSEXP) { 59 | BEGIN_RCPP 60 | Rcpp::RObject rcpp_result_gen; 61 | Rcpp::RNGScope rcpp_rngScope_gen; 62 | Rcpp::traits::input_parameter< NumericMatrix >::type x(xSEXP); 63 | rcpp_result_gen = Rcpp::wrap(rowBootMeans(x)); 64 | return rcpp_result_gen; 65 | END_RCPP 66 | } 67 | // tab2mat 68 | arma::mat tab2mat(const arma::mat& X); 69 | RcppExport SEXP _brmsmargins_tab2mat(SEXP XSEXP) { 70 | BEGIN_RCPP 71 | Rcpp::RObject rcpp_result_gen; 72 | Rcpp::RNGScope rcpp_rngScope_gen; 73 | Rcpp::traits::input_parameter< const arma::mat& >::type X(XSEXP); 74 | rcpp_result_gen = Rcpp::wrap(tab2mat(X)); 75 | return rcpp_result_gen; 76 | END_RCPP 77 | } 78 | 79 | static const R_CallMethodDef CallEntries[] = { 80 | {"_brmsmargins_integratemvn", (DL_FUNC) &_brmsmargins_integratemvn, 4}, 81 | {"_brmsmargins_integratere", (DL_FUNC) &_brmsmargins_integratere, 6}, 82 | {"_brmsmargins_lmcpp", (DL_FUNC) &_brmsmargins_lmcpp, 2}, 83 | {"_brmsmargins_rowBootMeans", (DL_FUNC) &_brmsmargins_rowBootMeans, 1}, 84 | {"_brmsmargins_tab2mat", (DL_FUNC) &_brmsmargins_tab2mat, 1}, 85 | {NULL, NULL, 0} 86 | }; 87 | 88 | RcppExport void R_init_brmsmargins(DllInfo *dll) { 89 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 90 | R_useDynamicSymbols(dll, FALSE); 91 | } 92 | -------------------------------------------------------------------------------- /src/integratemvn.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | // [[Rcpp::depends(RcppArmadillo)]] 3 | 4 | using namespace Rcpp; 5 | 6 | //' Integrate over Multivariate Normal Random Effects 7 | //' 8 | //' Used in the process of Monte Carlo integration 9 | //' over multivariate normal random effects. This generates the 10 | //' random draws from the multivariate normal distribution 11 | //' and multiplies these by the data. 12 | //' Not intended to be called directly by most users. 13 | //' 14 | //' @param X A numeric matrix of the data to be multiplied by the random effects 15 | //' @param k An integer, the number of random samples to be used for numerical integration 16 | //' @param sd A numeric vector of the standard deviations 17 | //' @param chol A numeric matrix, which should be the Cholesky decomposition of the 18 | //' correlation matrix of the multivariate normal distribution. 19 | //' @return A numeric matrix with random values 20 | //' @export 21 | //' @examples 22 | //' integratemvn( 23 | //' X = matrix(1, 1, 2), 24 | //' k = 100L, 25 | //' sd = c(10, 5), 26 | //' chol = chol(matrix(c(1, .5, .5, 1), 2))) 27 | //' 28 | //' integratemvn(matrix(1, 1, 1), 100L, c(5), matrix(1)) 29 | // [[Rcpp::export]] 30 | arma::mat integratemvn(const arma::mat& X, const int k, const Rcpp::NumericVector& sd, const arma::mat& chol) { 31 | int n = sd.length(); 32 | arma::mat Z = arma::randn(k, n); 33 | if (n > 1) { 34 | Z = Z * chol; 35 | } 36 | for (int i = 0; i < n; i++) { 37 | Z.col(i) *= sd(i); 38 | } 39 | arma::mat out = X * Z.t(); 40 | return(out); 41 | } 42 | -------------------------------------------------------------------------------- /src/integratemvn.h: -------------------------------------------------------------------------------- 1 | #ifndef __integratemvn__ 2 | #define __integratemvn__ 3 | 4 | #include 5 | arma::mat integratemvn(const arma::mat& X, const int k, const Rcpp::NumericVector& sd, const arma::mat& chol); 6 | 7 | #endif // __integratemvn__ 8 | -------------------------------------------------------------------------------- /src/integratere.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include "integratemvn.h" 3 | #include "tab2mat.h" 4 | // [[Rcpp::depends(RcppArmadillo)]] 5 | 6 | using namespace Rcpp; 7 | 8 | //' Integrate over Random Effects 9 | //' 10 | //' Used to conduct Monte Carlo integration over Gaussian random effects. 11 | //' Not intended to be called directly by most users. 12 | //' 13 | //' @param d A list with model matrices for each random effect block. 14 | //' @param sd A list with standard deviation matrices for each random effect block 15 | //' where rows are different posterior draws. 16 | //' @param L A list with matrices for each random effect block containing the parts of 17 | //' the L matrix, the Cholesky decomposition of the random effect correlation matrix. 18 | //' @param k An integer, the number of samples for Monte Carlo integration. 19 | //' @param yhat A matrix of the fixed effects predictions 20 | //' @param backtrans An integer, indicating the type of back transformation. 21 | //' 0 indicates inverse logit (e.g., for logistic regression). 22 | //' 1 indicates exponential (e.g., for poisson or negative binomial regression or if outcome was natural log transformed). 23 | //' 2 indicates square (e.g., if outcome was square root transformed). 24 | //' 3 indicates inverse (e.g., if outcome was inverse transformed such as Gamma regression) 25 | //' Any other integer results in no transformation. -9 is recommended as the option for no 26 | //' transformation as any future transformations supported will be other, positive integers. 27 | //' @return A numeric matrix with the Monte Carlo integral calculated. 28 | //' @export 29 | //' @examples 30 | //' integratere( 31 | //' d = list(matrix(1, 1, 1)), 32 | //' sd = list(matrix(1, 2, 1)), 33 | //' L = list(matrix(1, 2, 1)), 34 | //' k = 10L, 35 | //' yhat = matrix(0, 2, 1), 36 | //' backtrans = 0L) 37 | // [[Rcpp::export]] 38 | arma::mat integratere(List d, List sd, List L, int k, const arma::mat& yhat, int backtrans) { 39 | int M = yhat.n_rows; 40 | int N = yhat.n_cols; 41 | int J = sd.length(); 42 | 43 | arma::mat yhat2 = arma::zeros(M, N); 44 | 45 | for (int i = 0; i < M; i++) { 46 | List Z(J); 47 | 48 | for (int re = 0; re < J; re++) { 49 | NumericMatrix x = L[re]; 50 | arma::mat xmat = arma::mat(x.begin(), x.nrow(), x.ncol(), false); 51 | arma::mat cholmat = tab2mat(xmat.row(i)); 52 | arma::mat dmat = d[re]; 53 | 54 | NumericMatrix sdmat = sd[re]; 55 | NumericVector sdvec = sdmat(i, _); 56 | 57 | Z[re] = integratemvn(dmat, k, sdvec, cholmat); 58 | } 59 | 60 | // initialize matrix for all random effect predictions 61 | arma::mat Zall = Z[0]; 62 | if (J > 0) { 63 | for (int re = 1; re < J; re++) { 64 | arma::mat tmp = Z[re]; 65 | Zall += tmp; 66 | } 67 | } 68 | for (int nsamp = 0; nsamp < k; nsamp++) { 69 | Zall.col(nsamp) = Zall.col(nsamp) + yhat.row(i).t(); 70 | } 71 | if (backtrans == 0) { 72 | Zall = 1 / (1 + exp(-Zall)); 73 | } else if (backtrans == 1) { 74 | Zall = exp(Zall); 75 | } else if (backtrans == 2) { 76 | Zall = pow(Zall, 2); 77 | } else if (backtrans == 3) { 78 | Zall = 1 / Zall; 79 | } 80 | arma::colvec zm = arma::mean(Zall, 1); 81 | yhat2.row(i) = zm.t(); 82 | } 83 | return(yhat2); 84 | } 85 | -------------------------------------------------------------------------------- /src/lmcpp.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | // [[Rcpp::depends(RcppArmadillo)]] 3 | 4 | using namespace Rcpp; 5 | 6 | //' Fast Linear Regression 7 | //' 8 | //' Used to get marginal coefficients off of a generalized linear mixed model. 9 | //' 10 | //' @param X A numeric model matrix. If intercept is desired, it must already have been added as a column. 11 | //' @param y A numeric matrix. A single column if one response variable or multiple columns 12 | //' where each column is a different response, such as a for marginal coefficients where 13 | //' each column is a different MCMC sample. 14 | //' @return A numeric matrix with the coefficient. 15 | //' @export 16 | //' @examples 17 | //' lmcpp(cbind(1, mtcars$hp, mtcars$am), as.matrix(mtcars[, c("mpg", "qsec")])) 18 | // [[Rcpp::export]] 19 | arma::mat lmcpp(const arma::mat& X, const arma::mat& y) { 20 | int k = X.n_cols; 21 | int n = y.n_cols; 22 | 23 | arma::mat B(k, n); 24 | 25 | for (int i = 0; i < n; i++) { 26 | B.col(i) = solve(X, y.col(i)); 27 | } 28 | 29 | return(B); 30 | } 31 | -------------------------------------------------------------------------------- /src/lmcpp.h: -------------------------------------------------------------------------------- 1 | #ifndef __lmcpp__ 2 | #define __lmcpp__ 3 | 4 | #include 5 | arma::mat lmcpp(const arma::mat& X, const arma::mat& y); 6 | 7 | #endif // __lmcpp__ 8 | 9 | 10 | -------------------------------------------------------------------------------- /src/rowbootmeans.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | using namespace Rcpp; 3 | 4 | //' Bootstrap Row Means 5 | //' 6 | //' This takes a numeric matrix, bootstrap resamples each row, and then 7 | //' calculates the mean. The intended use case is for Bayesian posterior 8 | //' predictions from sample data. Instead of directly calculating the 9 | //' average marginal effect (AME) across all observed values, these can be 10 | //' bootstrapped, so that uncertainty in the target population, and thus 11 | //' the AME in the target population, can be incorporated. 12 | //' Model uncertainty is already assumed to be handled by the different posterior 13 | //' samples, which are assumed to be across rows. 14 | //' 15 | //' @param x A numeric matrix 16 | //' @return A numeric vector with the simple bootstrapped row means of the matrix 17 | //' @export 18 | //' @examples 19 | //' 20 | //' x <- matrix(1:9, byrow = TRUE, 3) 21 | //' replicate(10, rowBootMeans(x)) 22 | // [[Rcpp::export]] 23 | NumericVector rowBootMeans(NumericMatrix x) { 24 | NumericVector out( x.nrow() ); 25 | int n = x.nrow(); 26 | for(int i = 0; i < n; i++){ 27 | NumericVector tmp = x(i, _); 28 | out[i] = mean(sample(tmp, tmp.size(), true)); 29 | } 30 | return out; 31 | } 32 | -------------------------------------------------------------------------------- /src/tab2mat.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | // [[Rcpp::depends(RcppArmadillo)]] 3 | 4 | using namespace Rcpp; 5 | 6 | //' Convert a Row of a Table to a Square Matrix 7 | //' 8 | //' Utility function to convert a row matrix to a square matrix. 9 | //' Used as the \code{brms} package returns things like the Cholesky 10 | //' decomposition matrix as separate columns where rows are posterior draws. 11 | //' Not intended to be called directly by most users. 12 | //' 13 | //' @param X a matrix 14 | //' @return A numeric matrix with one row. 15 | //' @export 16 | //' @examples 17 | //' tab2mat(matrix(1:4, 1)) 18 | //' tab2mat(matrix(1:9, 1)) 19 | // [[Rcpp::export]] 20 | arma::mat tab2mat(const arma::mat& X) { 21 | int ncol = X.n_cols; 22 | double dims = sqrt(ncol); 23 | arma::mat Z = arma::zeros(dims, dims); 24 | 25 | for (int i = 0; i < dims; i++) { 26 | for (int j = 0; j < dims; j++) { 27 | Z(i, j) = X(0, j + dims * i); 28 | } 29 | } 30 | return(Z); 31 | } 32 | -------------------------------------------------------------------------------- /src/tab2mat.h: -------------------------------------------------------------------------------- 1 | #ifndef __tab2mat__ 2 | #define __tab2mat__ 3 | 4 | #include 5 | arma::mat tab2mat(const arma::mat& X); 6 | 7 | #endif // __tab2mat__ 8 | 9 | 10 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(data.table) 3 | library(brms) 4 | library(brmsmargins) 5 | 6 | test_check("brmsmargins") 7 | -------------------------------------------------------------------------------- /tests/testthat/test-bsummary.R: -------------------------------------------------------------------------------- 1 | test_that("bsummary errors if invalid input", { 2 | expect_error(bsummary(letters[1:5])) 3 | }) 4 | 5 | test_that("bsummary works", { 6 | x <- bsummary(1:100) 7 | 8 | ## check types 9 | expect_s3_class(x, "data.table") 10 | expect_type(x$M, "double") 11 | expect_type(x$Mdn, "double") 12 | expect_type(x$LL, "double") 13 | expect_type(x$UL, "double") 14 | expect_type(x$PercentROPE, "double") 15 | expect_type(x$PercentMID, "double") 16 | expect_type(x$CI, "double") 17 | expect_type(x$CIType, "character") 18 | expect_type(x$ROPE, "character") 19 | expect_type(x$MID, "character") 20 | 21 | ## check values 22 | expect_equal(x$M, 50.5) 23 | expect_equal(x$Mdn, 50.5) 24 | expect_equal(x$LL, 1) 25 | expect_equal(x$UL, 100) 26 | expect_equal(x$PercentROPE, NA_real_) 27 | expect_equal(x$PercentMID, NA_real_) 28 | expect_equal(x$CI, 0.99) 29 | expect_equal(x$CIType, "HDI") 30 | expect_equal(x$ROPE, NA_character_) 31 | expect_equal(x$MID, NA_character_) 32 | }) 33 | 34 | test_that("bsummary works with ROPEs and MIDs", { 35 | x <- bsummary((-50:60) / 100, ROPE = c(-.5, .5), MID = c(-1, 1)) 36 | 37 | ## check types 38 | expect_s3_class(x, "data.table") 39 | expect_type(x$M, "double") 40 | expect_type(x$Mdn, "double") 41 | expect_type(x$LL, "double") 42 | expect_type(x$UL, "double") 43 | expect_type(x$PercentROPE, "double") 44 | expect_type(x$PercentMID, "double") 45 | expect_type(x$CI, "double") 46 | expect_type(x$CIType, "character") 47 | expect_type(x$ROPE, "character") 48 | expect_type(x$MID, "character") 49 | 50 | ## check values 51 | expect_equal(x$M, 0.05) 52 | expect_equal(x$Mdn, 0.05) 53 | expect_true(x$PercentROPE > 50) 54 | expect_equal(x$PercentMID, 0) 55 | expect_equal(x$CI, 0.99) 56 | expect_equal(x$CIType, "HDI") 57 | expect_equal(x$ROPE, "[-0.5, 0.5]") 58 | expect_equal(x$MID, "[-Inf, -1] | [1, Inf]") 59 | }) 60 | -------------------------------------------------------------------------------- /tests/testthat/test-builders.R: -------------------------------------------------------------------------------- 1 | test_that(".namesL creates proper names that can be converted to a matrix", { 2 | expect_equal( 3 | brmsmargins:::.namesL(1, 1), 4 | "L_1[1,1]") 5 | 6 | expect_equal( 7 | dim(brmsmargins:::tab2matR( 8 | matrix(brmsmargins:::.namesL(1, 3), 1))), 9 | c(3L, 3L)) 10 | }) 11 | 12 | test_that(".namesZ creates proper names", { 13 | expect_equal( 14 | brmsmargins:::.namesZ(1, 3, NULL), 15 | c("Z_1_1", "Z_1_2", "Z_1_3")) 16 | }) 17 | -------------------------------------------------------------------------------- /tests/testthat/test-dot-checktab.R: -------------------------------------------------------------------------------- 1 | test_that(".checktab returns a non zero character string if invalid input", { 2 | x <- brmsmargins:::.checktab(1:5) 3 | expect_type(x, "character") 4 | expect_true(nzchar(x)) 5 | }) 6 | 7 | test_that(".checktab returns an empty character string if invalid input", { 8 | x <- brmsmargins:::.checktab(mtcars) 9 | expect_type(x, "character") 10 | expect_false(nzchar(x)) 11 | }) 12 | -------------------------------------------------------------------------------- /tests/testthat/test-dot-links.R: -------------------------------------------------------------------------------- 1 | test_that(".links returns correct values with identity link and fixedonly", { 2 | x <- brmsmargins:::.links( 3 | link = "identity", effects = "fixedonly", backtrans = "response") 4 | 5 | expect_type(x, "list") 6 | expect_equal(x$scale, "response") 7 | expect_equal(x$ilink, "identity") 8 | expect_equal(x$useifun(0.5), 0.5) 9 | expect_equal(x$ilinknum, -9) 10 | }) 11 | 12 | test_that(".links returns correct values with logit link and fixedonly", { 13 | x <- brmsmargins:::.links( 14 | link = "logit", effects = "fixedonly", backtrans = "response") 15 | 16 | expect_type(x, "list") 17 | expect_equal(x$scale, "response") 18 | expect_equal(x$ilink, "invlogit") 19 | expect_equal(x$useifun(0.5), 0.5) 20 | expect_equal(x$ilinknum, 0L) 21 | expect_equal(x$useilinknum, -9L) 22 | }) 23 | 24 | test_that(".links returns correct values with identity link and integrateoutRE", { 25 | x <- brmsmargins:::.links( 26 | link = "identity", effects = "integrateoutRE", backtrans = "response") 27 | 28 | expect_type(x, "list") 29 | expect_equal(x$scale, "linear") 30 | expect_equal(x$ilink, "identity") 31 | expect_equal(x$useifun(0.5), 0.5) 32 | expect_equal(x$ilinknum, -9) 33 | }) 34 | 35 | test_that(".links returns correct values with logit link and integrateoutRE", { 36 | x <- brmsmargins:::.links( 37 | link = "logit", effects = "integrateoutRE", backtrans = "response") 38 | 39 | expect_type(x, "list") 40 | expect_equal(x$scale, "linear") 41 | expect_equal(x$ilink, "invlogit") 42 | expect_equal(x$ifun(0.5), plogis(0.5)) 43 | expect_equal(x$useifun(0.5), 0.5) 44 | expect_equal(x$ilinknum, 0L) 45 | expect_equal(x$useilinknum, 0L) 46 | }) 47 | 48 | test_that(".links returns correct values with log link and integrateoutRE", { 49 | x <- brmsmargins:::.links( 50 | link = "log", effects = "integrateoutRE", backtrans = "response") 51 | 52 | expect_type(x, "list") 53 | expect_equal(x$scale, "linear") 54 | expect_equal(x$ilink, "exp") 55 | expect_equal(x$ifun(0.5), exp(0.5)) 56 | expect_equal(x$useifun(0.5), 0.5) 57 | expect_equal(x$ilinknum, 1L) 58 | expect_equal(x$useilinknum, 1L) 59 | }) 60 | 61 | test_that(".links returns correct values with sqrt link and integrateoutRE", { 62 | x <- brmsmargins:::.links( 63 | link = "sqrt", effects = "integrateoutRE", backtrans = "response") 64 | 65 | expect_type(x, "list") 66 | expect_equal(x$scale, "linear") 67 | expect_equal(x$ilink, "square") 68 | expect_equal(x$ifun(0.5), (0.5)^2) 69 | expect_equal(x$useifun(0.5), 0.5) 70 | expect_equal(x$ilinknum, 2) 71 | expect_equal(x$useilinknum, 2) 72 | }) 73 | -------------------------------------------------------------------------------- /tests/testthat/test-dot-percent.R: -------------------------------------------------------------------------------- 1 | test_that(".percent returns NA values when window is NULL", { 2 | x <- brmsmargins:::.percent(1:10, window = NULL) 3 | 4 | ## check types 5 | expect_type(x, "list") 6 | expect_type(x$Window, "double") 7 | expect_type(x$Percent, "double") 8 | expect_type(x$Label, "character") 9 | 10 | ## check values 11 | expect_equal(x$Window, NA_real_) 12 | expect_equal(x$Percent, NA_real_) 13 | expect_equal(x$Label, NA_character_) 14 | }) 15 | 16 | test_that(".percent returns NA values when within is TRUE", { 17 | x <- brmsmargins:::.percent(1:10, window = c(3, 5)) 18 | 19 | ## check types 20 | expect_type(x, "list") 21 | expect_type(x$Window, "double") 22 | expect_type(x$Percent, "double") 23 | expect_type(x$Label, "character") 24 | 25 | ## check values 26 | expect_equal(x$Window, c(3, 5)) 27 | expect_equal(x$Percent, 30) 28 | expect_equal(x$Label, "[3, 5]") 29 | }) 30 | 31 | test_that(".percent returns NA values when within is FALSE", { 32 | x <- brmsmargins:::.percent(1:10, window = c(2, 6), within = FALSE) 33 | 34 | ## check types 35 | expect_type(x, "list") 36 | expect_type(x$Window, "double") 37 | expect_type(x$Percent, "double") 38 | expect_type(x$Label, "character") 39 | 40 | ## check values 41 | expect_equal(x$Window, c(2, 6)) 42 | expect_equal(x$Percent, 70) 43 | expect_equal(x$Label, "[-Inf, 2] | [6, Inf]") 44 | }) 45 | 46 | test_that(".percent errors if window is not valid", { 47 | expect_error(brmsmargins:::.percent(1:10, window = c(2))) 48 | expect_error(brmsmargins:::.percent(1:10, window = c("b", "c"))) 49 | }) 50 | -------------------------------------------------------------------------------- /tests/testthat/test-fixedeffects-bernoulli-margins.R: -------------------------------------------------------------------------------- 1 | skip_on_cran() 2 | 3 | if (!requireNamespace("cmdstanr", quietly = TRUE)) { 4 | backend <- "rstan" 5 | ## if using rstan backend, models can crash on Windows 6 | ## so skip if on windows and cannot use cmdstanr 7 | skip_on_os("windows") 8 | } else { 9 | if (isFALSE(is.null(cmdstanr::cmdstan_version(error_on_NA = FALSE)))) { 10 | backend <- "cmdstanr" 11 | } 12 | } 13 | 14 | suppressWarnings( 15 | m.bayes <- brms::brm( 16 | formula = am ~ mpg, family = "bernoulli", 17 | data = mtcars, iter = 1000, warmup = 500, seed = 1234, 18 | chains = 2, backend = backend, save_pars = save_pars(all = TRUE), 19 | silent = 2, refresh = 0) 20 | ) 21 | 22 | h <- .001 23 | margins.bayes <- brmsmargins(m.bayes, 24 | add = data.frame(mpg = c(0, h)), 25 | CI = 0.95, 26 | contrasts = cbind(AME = c(-1 / h, 1 / h))) 27 | ame.bayes <- margins.bayes$ContrastSummary 28 | 29 | test_that("brmsmargins runs for a fixed effects logistic model", { 30 | expect_type(margins.bayes, "list") 31 | expect_equal(nrow(margins.bayes$Posterior), 32 | ndraws(m.bayes)) 33 | 34 | expect_equal(nrow(margins.bayes$Contrasts), 35 | ndraws(m.bayes)) 36 | 37 | expect_true(all(margins.bayes$M >= 0 & margins.bayes$M <= 1)) 38 | expect_true(all(margins.bayes$Mdn >= 0 & margins.bayes$Mdn <= 1)) 39 | expect_true(all(margins.bayes$LL >= 0 & margins.bayes$LL <= 1)) 40 | expect_true(all(margins.bayes$UL >= 0 & margins.bayes$UL <= 1)) 41 | 42 | expect_true(ame.bayes$M >= 0 && ame.bayes$M <= 1) 43 | expect_true(ame.bayes$Mdn >= 0 && ame.bayes$Mdn <= 1) 44 | expect_true(ame.bayes$LL >= 0 && ame.bayes$LL <= 1) 45 | expect_true(ame.bayes$UL >= 0 && ame.bayes$UL <= 1) 46 | }) 47 | 48 | skip_if_not_installed("margins") 49 | 50 | m.freq <- glm(am ~ mpg, data = mtcars, family = binomial()) 51 | ame.freq <- summary(margins::margins(m.freq)) 52 | 53 | test_that("brmsmargins roughly matches margins on a frequentist fixed effects logistic model", { 54 | 55 | expect_true(abs(ame.freq$AME - ame.bayes$M) < .01) 56 | 57 | expect_true(abs(ame.freq$lower - ame.bayes$LL) < .01) 58 | 59 | expect_true(abs(ame.freq$upper - ame.bayes$UL) < .01) 60 | }) 61 | -------------------------------------------------------------------------------- /tests/testthat/test-fixedeffects-beta-margins.R: -------------------------------------------------------------------------------- 1 | skip_on_cran() 2 | 3 | if (!requireNamespace("cmdstanr", quietly = TRUE)) { 4 | backend <- "rstan" 5 | ## if using rstan backend, models can crash on Windows 6 | ## so skip if on windows and cannot use cmdstanr 7 | skip_on_os("windows") 8 | } else { 9 | if (isFALSE(is.null(cmdstanr::cmdstan_version(error_on_NA = FALSE)))) { 10 | backend <- "cmdstanr" 11 | } 12 | } 13 | 14 | dbeta <- withr::with_seed( 15 | seed = 12345, code = { 16 | nGroups <- 2L 17 | nObs <- 200L 18 | d <- data.table(x = rep(0:1, each = nObs)) 19 | d[, y := rbeta(n = nObs * nGroups, shape1 = 2 + x, shape2 = 5)] 20 | copy(d) 21 | }) 22 | 23 | suppressWarnings( 24 | bayes.beta <- brms::brm( 25 | formula = y ~ x, family = "beta", 26 | data = dbeta, iter = 1000, warmup = 500, seed = 1234, 27 | chains = 2, backend = backend, save_pars = save_pars(all = TRUE), 28 | silent = 2, refresh = 0) 29 | ) 30 | 31 | h <- .001 32 | margins.bayes <- brmsmargins(bayes.beta, 33 | add = data.table(x = c(0, h)), 34 | CI = 0.95, 35 | contrasts = cbind(AME = c(-1 / h, 1 / h))) 36 | ame.bayes <- margins.bayes$ContrastSummary 37 | 38 | test_that("brmsmargins runs for a fixed effects beta model", { 39 | expect_type(margins.bayes, "list") 40 | expect_equal(nrow(margins.bayes$Posterior), 41 | ndraws(bayes.beta)) 42 | 43 | expect_equal(nrow(margins.bayes$Contrasts), 44 | ndraws(bayes.beta)) 45 | 46 | expect_true(all(margins.bayes$M >= 0 & margins.bayes$M <= 1)) 47 | expect_true(all(margins.bayes$Mdn >= 0 & margins.bayes$Mdn <= 1)) 48 | expect_true(all(margins.bayes$LL >= 0 & margins.bayes$LL <= 1)) 49 | expect_true(all(margins.bayes$UL >= 0 & margins.bayes$UL <= 1)) 50 | 51 | expect_true(ame.bayes$M >= 0 && ame.bayes$M <= 1) 52 | expect_true(ame.bayes$Mdn >= 0 && ame.bayes$Mdn <= 1) 53 | expect_true(ame.bayes$LL >= 0 && ame.bayes$LL <= 1) 54 | expect_true(ame.bayes$UL >= 0 && ame.bayes$UL <= 1) 55 | }) 56 | 57 | skip_if_not_installed("margins") 58 | skip_if_not_installed("betareg") 59 | 60 | m.freq <- betareg::betareg(y ~ x, data = dbeta) 61 | ame.freq <- summary(margins::margins(m.freq)) 62 | 63 | test_that("brmsmargins roughly matches margins on a frequentist fixed effects beta regression model", { 64 | 65 | expect_true(abs(ame.freq$AME - ame.bayes$M) < .01) 66 | 67 | expect_true(abs(ame.freq$lower - ame.bayes$LL) < .01) 68 | 69 | expect_true(abs(ame.freq$upper - ame.bayes$UL) < .01) 70 | }) 71 | -------------------------------------------------------------------------------- /tests/testthat/test-fixedeffects-gamma-margins.R: -------------------------------------------------------------------------------- 1 | skip_on_cran() 2 | 3 | if (!requireNamespace("cmdstanr", quietly = TRUE)) { 4 | backend <- "rstan" 5 | ## if using rstan backend, models can crash on Windows 6 | ## so skip if on windows and cannot use cmdstanr 7 | skip_on_os("windows") 8 | } else { 9 | if (isFALSE(is.null(cmdstanr::cmdstan_version(error_on_NA = FALSE)))) { 10 | backend <- "cmdstanr" 11 | } 12 | } 13 | 14 | dgamma <- withr::with_seed( 15 | seed = 12345, code = { 16 | nGroups <- 2L 17 | nObs <- 200L 18 | d <- data.table(x = rep(0:1, each = nObs)) 19 | d[, y := rgamma(n = nObs * nGroups, shape = 2 + x, rate = 5)] 20 | copy(d) 21 | }) 22 | 23 | suppressWarnings( 24 | bayes.gamma <- brms::brm( 25 | formula = y ~ x, family = "Gamma", 26 | data = dgamma, iter = 1000, warmup = 500, seed = 1234, 27 | chains = 2, backend = backend, save_pars = save_pars(all = TRUE), 28 | silent = 2, refresh = 0) 29 | ) 30 | 31 | h <- .001 32 | margins.bayes <- brmsmargins(bayes.gamma, 33 | add = data.table(x = c(0, h)), 34 | CI = 0.95, 35 | contrasts = cbind(AME = c(-1 / h, 1 / h))) 36 | ame.bayes <- margins.bayes$ContrastSummary 37 | 38 | test_that("brmsmargins runs for a fixed effects gamma model", { 39 | expect_type(margins.bayes, "list") 40 | expect_equal(nrow(margins.bayes$Posterior), 41 | ndraws(bayes.gamma)) 42 | 43 | expect_equal(nrow(margins.bayes$Contrasts), 44 | ndraws(bayes.gamma)) 45 | 46 | expect_true(all(margins.bayes$M >= 0 & margins.bayes$M <= 1)) 47 | expect_true(all(margins.bayes$Mdn >= 0 & margins.bayes$Mdn <= 1)) 48 | expect_true(all(margins.bayes$LL >= 0 & margins.bayes$LL <= 1)) 49 | expect_true(all(margins.bayes$UL >= 0 & margins.bayes$UL <= 1)) 50 | 51 | expect_true(ame.bayes$M >= 0 && ame.bayes$M <= 1) 52 | expect_true(ame.bayes$Mdn >= 0 && ame.bayes$Mdn <= 1) 53 | expect_true(ame.bayes$LL >= 0 && ame.bayes$LL <= 1) 54 | expect_true(ame.bayes$UL >= 0 && ame.bayes$UL <= 1) 55 | }) 56 | 57 | test_that("brmsmargins CI include true difference for a gamma regression model", { 58 | 59 | expect_true(ame.bayes$LL <= 1 / 5) 60 | 61 | expect_true(ame.bayes$UL >= 1 / 5) 62 | }) 63 | 64 | 65 | skip_if_not_installed("margins") 66 | 67 | m.freq <- stats::glm(y ~ x, data = dgamma, family = Gamma()) 68 | ame.freq <- summary(margins::margins(m.freq)) 69 | 70 | test_that("brmsmargins roughly matches margins on a frequentist fixed effects gamma regression model", { 71 | 72 | expect_true(abs(ame.freq$AME - ame.bayes$M) < .05) 73 | 74 | expect_true(abs(ame.freq$lower - ame.bayes$LL) < .05) 75 | 76 | expect_true(abs(ame.freq$upper - ame.bayes$UL) < .05) 77 | }) 78 | -------------------------------------------------------------------------------- /tests/testthat/test-integratemvn.R: -------------------------------------------------------------------------------- 1 | test_that("integratemvn works with a seed", { 2 | d <- matrix(1, 1, 2) 3 | sd <- c(10, 5) 4 | L <- chol(matrix(c(1, .5, .5, 1), 2)) 5 | 6 | res1 <- withr::with_seed( 7 | seed = 1234, 8 | code = integratemvn(d, 5L, sd, L)) 9 | 10 | res2 <- withr::with_seed( 11 | seed = 1234, 12 | code = integratemvn(d, 5L, sd, L)) 13 | 14 | res3 <- withr::with_seed( 15 | seed = 4321, 16 | code = integratemvn(d, 5L, sd, L)) 17 | 18 | expect_equal(res1, res2) 19 | expect_false(isTRUE(all.equal(res1, res3))) 20 | }) 21 | 22 | test_that("integratemvn works with 0 values", { 23 | d <- matrix(0, 1, 1) 24 | sd <- c(1) 25 | L <- chol(matrix(c(1))) 26 | 27 | res <- withr::with_seed( 28 | seed = 1234, 29 | code = integratemvn(d, 5L, sd, L)) 30 | 31 | expect_equal(res, matrix(0, 1, 5L)) 32 | }) 33 | 34 | test_that("integratemvn works", { 35 | d <- matrix(1, 1, 1) 36 | sd <- c(1) 37 | L <- chol(matrix(c(1))) 38 | 39 | res <- withr::with_seed( 40 | seed = 1234, 41 | code = integratemvn(d, 5000L, sd, L)) 42 | 43 | expect_true(abs(rowMeans(res)) < .15) 44 | expect_true(abs(sd(res[1, ]) - 1) < .15) 45 | }) 46 | -------------------------------------------------------------------------------- /tests/testthat/test-marginalcoef-mixedlogit.R: -------------------------------------------------------------------------------- 1 | skip_on_cran() 2 | 3 | if (!requireNamespace("cmdstanr", quietly = TRUE)) { 4 | backend <- "rstan" 5 | ## if using rstan backend, models can crash on Windows 6 | ## so skip if on windows and cannot use cmdstanr 7 | skip_on_os("windows") 8 | } else { 9 | if (isFALSE(is.null(cmdstanr::cmdstan_version(error_on_NA = FALSE)))) { 10 | backend <- "cmdstanr" 11 | } 12 | } 13 | 14 | dlogit <- withr::with_seed( 15 | seed = 12345, code = { 16 | nGroups <- 100 17 | nObs <- 20 18 | theta.location <- matrix(rnorm(nGroups * 2), nrow = nGroups, ncol = 2) 19 | theta.location[, 1] <- theta.location[, 1] - mean(theta.location[, 1]) 20 | theta.location[, 2] <- theta.location[, 2] - mean(theta.location[, 2]) 21 | theta.location[, 1] <- theta.location[, 1] / sd(theta.location[, 1]) 22 | theta.location[, 2] <- theta.location[, 2] / sd(theta.location[, 2]) 23 | theta.location <- theta.location %*% chol(matrix(c(1.5, -.25, -.25, .5^2), 2)) 24 | theta.location[, 1] <- theta.location[, 1] - 2.5 25 | theta.location[, 2] <- theta.location[, 2] + 1 26 | d <- data.table( 27 | x = rep(rep(0:1, each = nObs / 2), times = nGroups)) 28 | d[, ID := rep(seq_len(nGroups), each = nObs)] 29 | 30 | for (i in seq_len(nGroups)) { 31 | d[ID == i, y := rbinom( 32 | n = nObs, 33 | size = 1, 34 | prob = plogis(theta.location[i, 1] + theta.location[i, 2] * x)) 35 | ] 36 | } 37 | copy(d) 38 | }) 39 | 40 | res.samp <- dlogit[, .(M = mean(y)), by = .(ID, x)][, .(M = mean(M)), by = x] 41 | res.samp <- res.samp[, .( 42 | Label = c("Intercept", "x"), 43 | Est = c(qlogis(M[x == 0]), 44 | log( 45 | (M[x == 1] / (1 - M[x == 1])) / 46 | (M[x == 0] / (1 - M[x == 0])))))] 47 | 48 | suppressWarnings( 49 | mlogit <- brms::brm( 50 | y ~ 1 + x + (1 + x | ID), family = "bernoulli", 51 | data = dlogit, iter = 1000, warmup = 500, seed = 1234, 52 | chains = 2, backend = backend, save_pars = save_pars(all = TRUE), 53 | silent = 2, refresh = 0) 54 | ) 55 | 56 | mc <- withr::with_seed( 57 | seed = 1234, { 58 | marginalcoef(mlogit, CI = 0.95) 59 | }) 60 | 61 | test_that("marginalcoef works to integrate out random effects for marginal coefficients in multilevel logistic models", { 62 | expect_type(mc, "list") 63 | expect_true(abs(mc$Summary$M[1] - res.samp$Est[1]) < .05) 64 | expect_true(abs(mc$Summary$M[2] - res.samp$Est[2]) < .05) 65 | }) 66 | -------------------------------------------------------------------------------- /tests/testthat/test-marginalcoef-mixedpoisson.R: -------------------------------------------------------------------------------- 1 | skip_on_cran() 2 | 3 | if (!requireNamespace("cmdstanr", quietly = TRUE)) { 4 | backend <- "rstan" 5 | ## if using rstan backend, models can crash on Windows 6 | ## so skip if on windows and cannot use cmdstanr 7 | skip_on_os("windows") 8 | } else { 9 | if (isFALSE(is.null(cmdstanr::cmdstan_version(error_on_NA = FALSE)))) { 10 | backend <- "cmdstanr" 11 | } 12 | } 13 | 14 | dpois <- withr::with_seed( 15 | seed = 12345, code = { 16 | nGroups <- 100 17 | nObs <- 20 18 | theta.location <- matrix(rnorm(nGroups * 2), nrow = nGroups, ncol = 2) 19 | theta.location[, 1] <- theta.location[, 1] - mean(theta.location[, 1]) 20 | theta.location[, 2] <- theta.location[, 2] - mean(theta.location[, 2]) 21 | theta.location[, 1] <- theta.location[, 1] / sd(theta.location[, 1]) 22 | theta.location[, 2] <- theta.location[, 2] / sd(theta.location[, 2]) 23 | theta.location <- theta.location %*% chol(matrix(c(1.5, -.25, -.25, .5^2), 2)) 24 | theta.location[, 1] <- theta.location[, 1] - 2.5 25 | theta.location[, 2] <- theta.location[, 2] + 1 26 | d <- data.table( 27 | x = rep(rep(0:1, each = nObs / 2), times = nGroups)) 28 | d[, ID := rep(seq_len(nGroups), each = nObs)] 29 | 30 | for (i in seq_len(nGroups)) { 31 | d[ID == i, y := rpois( 32 | n = nObs, 33 | lambda = exp(1 + theta.location[i, 1] + theta.location[i, 2] * x)) 34 | ] 35 | } 36 | copy(d) 37 | }) 38 | 39 | res.samp <- dpois[, .(M = mean(y)), by = .(ID, x)][, .(M = mean(M)), by = x] 40 | res.samp <- res.samp[, .( 41 | Label = c("Intercept", "x"), 42 | Est = c(log(M[x == 0]), 43 | log( (M[x == 1] ) / (M[x == 0]) )))] 44 | 45 | suppressWarnings( 46 | mpois <- brms::brm( 47 | y ~ 1 + x + (1 + x | ID), family = "poisson", 48 | data = dpois, iter = 1000, warmup = 500, seed = 1234, 49 | chains = 2, backend = backend, 50 | save_pars = brms::save_pars(all = TRUE), 51 | silent = 2, refresh = 0) 52 | ) 53 | 54 | mc <- withr::with_seed( 55 | seed = 1234, { 56 | marginalcoef(mpois, CI = 0.95) 57 | }) 58 | 59 | test_that("marginalcoef works to integrate out random effects for marginal coefficients in multilevel poisson models", { 60 | expect_type(mc, "list") 61 | expect_true(abs(mc$Summary$M[1] - res.samp$Est[1]) < .15) 62 | expect_true(abs(mc$Summary$M[2] - res.samp$Est[2]) < .05) 63 | }) 64 | -------------------------------------------------------------------------------- /tests/testthat/test-predict-mixedlogit.R: -------------------------------------------------------------------------------- 1 | skip_on_cran() 2 | 3 | if (!requireNamespace("cmdstanr", quietly = TRUE)) { 4 | backend <- "rstan" 5 | ## if using rstan backend, models can crash on Windows 6 | ## so skip if on windows and cannot use cmdstanr 7 | skip_on_os("windows") 8 | } else { 9 | if (isFALSE(is.null(cmdstanr::cmdstan_version(error_on_NA = FALSE)))) { 10 | backend <- "cmdstanr" 11 | } 12 | } 13 | 14 | dlogit <- withr::with_seed( 15 | seed = 12345, code = { 16 | nGroups <- 100 17 | nObs <- 20 18 | theta.location <- matrix(rnorm(nGroups * 2), nrow = nGroups, ncol = 2) 19 | theta.location[, 1] <- theta.location[, 1] - mean(theta.location[, 1]) 20 | theta.location[, 2] <- theta.location[, 2] - mean(theta.location[, 2]) 21 | theta.location[, 1] <- theta.location[, 1] / sd(theta.location[, 1]) 22 | theta.location[, 2] <- theta.location[, 2] / sd(theta.location[, 2]) 23 | theta.location <- theta.location %*% chol(matrix(c(1.5, -.25, -.25, .5^2), 2)) 24 | theta.location[, 1] <- theta.location[, 1] - 2.5 25 | theta.location[, 2] <- theta.location[, 2] + 1 26 | d <- data.table( 27 | x = rep(rep(0:1, each = nObs / 2), times = nGroups)) 28 | d[, ID := rep(seq_len(nGroups), each = nObs)] 29 | 30 | for (i in seq_len(nGroups)) { 31 | d[ID == i, y := rbinom( 32 | n = nObs, 33 | size = 1, 34 | prob = plogis(theta.location[i, 1] + theta.location[i, 2] * x)) 35 | ] 36 | } 37 | copy(d) 38 | }) 39 | 40 | res.samp <- dlogit[, .(M = mean(y)), by = .(ID, x)][, .(M = mean(M)), by = x] 41 | 42 | suppressWarnings( 43 | mlogit <- brms::brm( 44 | y ~ 1 + x + (1 + x | ID), family = "bernoulli", 45 | data = dlogit, iter = 1000, warmup = 500, seed = 1234, 46 | chains = 2, backend = backend, save_pars = save_pars(all = TRUE), 47 | silent = 2, refresh = 0) 48 | ) 49 | 50 | preddat <- data.frame(y = c(0, 0), x = c(0, 1), ID = 999) 51 | 52 | res.integrate <- withr::with_seed( 53 | seed = 1234, { 54 | test0 <- prediction(object = mlogit, data = preddat[1, ], posterior = TRUE, 55 | effects = "integrateoutRE", k = 100L, CI = 0.95, CIType = "ETI") 56 | test1 <- prediction(object = mlogit, data = preddat[2, ], posterior = TRUE, 57 | effects = "integrateoutRE", k = 100L, CI = 0.95, CIType = "ETI") 58 | ame <- list(Summary = NULL, Posterior = test1$Posterior - test0$Posterior) 59 | ame$Summary <- bsummary(ame$Posterior, CI = 0.95, CIType = "ETI") 60 | 61 | list( 62 | Summary = rbind( 63 | test0$Summary, test1$Summary, ame$Summary), 64 | Posterior = cbind( 65 | test0$Posterior, test1$Posterior, ame$Posterior)) 66 | }) 67 | 68 | res.fixedonly <- withr::with_seed( 69 | seed = 1234, { 70 | test0 <- prediction(object = mlogit, data = preddat[1, ], posterior = TRUE, 71 | effects = "fixedonly", CI = 0.95, CIType = "ETI") 72 | test1 <- prediction(object = mlogit, data = preddat[2, ], posterior = TRUE, 73 | effects = "fixedonly", CI = 0.95, CIType = "ETI") 74 | ame <- list(Summary = NULL, Posterior = test1$Posterior - test0$Posterior) 75 | ame$Summary <- bsummary(ame$Posterior, CI = 0.95, CIType = "ETI") 76 | 77 | list( 78 | Summary = rbind( 79 | test0$Summary, test1$Summary, ame$Summary), 80 | Posterior = cbind( 81 | test0$Posterior, test1$Posterior, ame$Posterior)) 82 | }) 83 | 84 | test_that("prediction works to integrate out random effects in multilevel logistic models", { 85 | expect_type(res.integrate, "list") 86 | expect_equal( 87 | c(ndraws(mlogit), 3L), 88 | dim(res.integrate$Posterior)) 89 | expect_true(all( 90 | res.integrate$Posterior[, 1:2] >= 0 & 91 | res.integrate$Posterior[, 1:2] <= 1)) 92 | expect_true(all( 93 | res.integrate$Summary$M >= 0 & 94 | res.integrate$Summary$M <= 1)) 95 | 96 | expect_true(abs(res.integrate$Summary$M[1] - res.samp$M[1]) < .01) 97 | expect_true(abs(res.integrate$Summary$M[2] - res.samp$M[2]) < .01) 98 | expect_true(abs(res.integrate$Summary$M[3] - 99 | (res.samp$M[2] - res.samp$M[1])) < .01) 100 | }) 101 | 102 | test_that("prediction works with fixed effects only in multilevel logistic models", { 103 | expect_type(res.fixedonly, "list") 104 | expect_equal( 105 | c(ndraws(mlogit), 3L), 106 | dim(res.fixedonly$Posterior)) 107 | expect_true(all( 108 | res.fixedonly$Posterior[, 1:2] >= 0 & 109 | res.fixedonly$Posterior[, 1:2] <= 1)) 110 | expect_true(all( 111 | res.fixedonly$Summary$M >= 0 & 112 | res.fixedonly$Summary$M <= 1)) 113 | expect_true(res.fixedonly$Summary$M[1] < res.integrate$Summary$M[1]) 114 | expect_true(res.fixedonly$Summary$M[2] < res.integrate$Summary$M[2]) 115 | }) 116 | 117 | h <- .001 118 | ames <- brmsmargins( 119 | object = mlogit, 120 | add = data.frame(x = c(0, h)), 121 | contrasts = cbind("AME time" = c(-1 / h, 1 / h)), 122 | effects = "integrateoutRE", 123 | k = 100L, 124 | seed = 1234 125 | ) 126 | 127 | test_that("brmsmargins works with random slope logit models", { 128 | expect_type(ames, "list") 129 | expect_equal( 130 | ndraws(mlogit), 131 | nrow(ames$Posterior)) 132 | expect_true(all( 133 | ames$Posterior[, 1:2] >= 0 & 134 | ames$Posterior[, 1:2] <= 1)) 135 | expect_true(all( 136 | ames$ContrastSummary$M >= 0 & 137 | ames$ContrastSummary$M <= 1)) 138 | expect_true(abs(ames$ContrastSummary$M - 0.11) < .02) 139 | }) 140 | 141 | suppressWarnings( 142 | mlogit.intonly <- brms::brm( 143 | y ~ 1 + x + (1 | ID), family = "bernoulli", 144 | data = dlogit, seed = 1234, 145 | chains = 2, backend = backend, save_pars = save_pars(all = TRUE), 146 | silent = 2, refresh = 0) 147 | ) 148 | 149 | h <- .001 150 | ames <- brmsmargins( 151 | object = mlogit.intonly, 152 | add = data.frame(x = c(0, h)), 153 | contrasts = cbind("AME time" = c(-1 / h, 1 / h)), 154 | effects = "integrateoutRE", 155 | k = 100L, 156 | seed = 1234 157 | ) 158 | 159 | test_that("brmsmargins works with intercept only logit models", { 160 | expect_type(ames, "list") 161 | expect_equal( 162 | ndraws(mlogit.intonly), 163 | nrow(ames$Posterior)) 164 | expect_true(all( 165 | ames$Posterior[, 1:2] >= 0 & 166 | ames$Posterior[, 1:2] <= 1)) 167 | expect_true(all( 168 | ames$ContrastSummary$M >= 0 & 169 | ames$ContrastSummary$M <= 1)) 170 | expect_true(abs(ames$ContrastSummary$M - 0.11) < .02) 171 | }) 172 | -------------------------------------------------------------------------------- /tests/testthat/test-predict-mixedlogtrans.R: -------------------------------------------------------------------------------- 1 | skip_on_cran() 2 | 3 | if (!requireNamespace("cmdstanr", quietly = TRUE)) { 4 | backend <- "rstan" 5 | ## if using rstan backend, models can crash on Windows 6 | ## so skip if on windows and cannot use cmdstanr 7 | skip_on_os("windows") 8 | } else { 9 | if (isFALSE(is.null(cmdstanr::cmdstan_version(error_on_NA = FALSE)))) { 10 | backend <- "cmdstanr" 11 | } 12 | } 13 | 14 | dlog <- withr::with_seed( 15 | seed = 12345, code = { 16 | nGroups <- 100 17 | nObs <- 20 18 | theta.location <- matrix(rnorm(nGroups * 2), nrow = nGroups, ncol = 2) 19 | theta.location[, 1] <- theta.location[, 1] - mean(theta.location[, 1]) 20 | theta.location[, 2] <- theta.location[, 2] - mean(theta.location[, 2]) 21 | theta.location[, 1] <- theta.location[, 1] / sd(theta.location[, 1]) 22 | theta.location[, 2] <- theta.location[, 2] / sd(theta.location[, 2]) 23 | theta.location <- theta.location %*% chol(matrix(c(1.5, -.25, -.25, .5^2), 2)) 24 | theta.location[, 1] <- theta.location[, 1] - 2.5 25 | theta.location[, 2] <- theta.location[, 2] + 1 26 | d <- data.table( 27 | x = rep(rep(0:1, each = nObs / 2), times = nGroups)) 28 | d[, ID := rep(seq_len(nGroups), each = nObs)] 29 | 30 | for (i in seq_len(nGroups)) { 31 | d[ID == i, y := exp(rnorm( 32 | n = nObs, 33 | mean = theta.location[i, 1] + theta.location[i, 2] * x, 34 | sd = 1)) 35 | ] 36 | } 37 | copy(d) 38 | }) 39 | 40 | res.samp <- dlog[, .(M = mean(log(y))), by = .(ID, x)][, .(M = mean(exp(M))), by = x] 41 | 42 | suppressWarnings( 43 | mlog <- brms::brm( 44 | log(y) ~ 1 + x + (1 + x | ID), family = "gaussian", 45 | data = dlog, iter = 1000, warmup = 500, seed = 1234, 46 | chains = 2, backend = backend, save_pars = save_pars(all = TRUE), 47 | silent = 2, refresh = 0) 48 | ) 49 | 50 | preddat <- data.frame(y = c(0, 0), x = c(0, 1), ID = 999) 51 | 52 | res.integrate <- withr::with_seed( 53 | seed = 1234, { 54 | test0 <- prediction(object = mlog, data = preddat[1, ], posterior = TRUE, 55 | effects = "integrateoutRE", backtrans = "exp", 56 | k = 100L, CI = 0.95, CIType = "ETI") 57 | test1 <- prediction(object = mlog, data = preddat[2, ], posterior = TRUE, 58 | effects = "integrateoutRE", backtrans = "exp", 59 | k = 100L, CI = 0.95, CIType = "ETI") 60 | ame <- list(Summary = NULL, Posterior = test1$Posterior - test0$Posterior) 61 | ame$Summary <- bsummary(ame$Posterior, CI = 0.95, CIType = "ETI") 62 | 63 | list( 64 | Summary = rbind( 65 | test0$Summary, test1$Summary, ame$Summary), 66 | Posterior = cbind( 67 | test0$Posterior, test1$Posterior, ame$Posterior)) 68 | }) 69 | 70 | res.fixedonly <- withr::with_seed( 71 | seed = 1234, { 72 | test0 <- prediction(object = mlog, data = preddat[1, ], posterior = TRUE, 73 | effects = "fixedonly", backtrans = "exp", 74 | CI = 0.95, CIType = "ETI") 75 | test1 <- prediction(object = mlog, data = preddat[2, ], posterior = TRUE, 76 | effects = "fixedonly", backtrans = "exp", 77 | CI = 0.95, CIType = "ETI") 78 | ame <- list(Summary = NULL, Posterior = test1$Posterior - test0$Posterior) 79 | ame$Summary <- bsummary(ame$Posterior, CI = 0.95, CIType = "ETI") 80 | 81 | list( 82 | Summary = rbind( 83 | test0$Summary, test1$Summary, ame$Summary), 84 | Posterior = cbind( 85 | test0$Posterior, test1$Posterior, ame$Posterior)) 86 | }) 87 | 88 | test_that("prediction works to integrate out random effects in multilevel log transformed models", { 89 | expect_type(res.integrate, "list") 90 | expect_equal( 91 | c(ndraws(mlog), 3L), 92 | dim(res.integrate$Posterior)) 93 | expect_true(all( 94 | res.integrate$Posterior[, 1:2] >= 0)) 95 | expect_true(all( 96 | res.integrate$Summary$M >= 0)) 97 | 98 | expect_true(abs(res.integrate$Summary$M[1] - res.samp$M[1]) < .05) 99 | expect_true(abs(res.integrate$Summary$M[2] - res.samp$M[2]) < .05) 100 | expect_true(abs(res.integrate$Summary$M[3] - 101 | (res.samp$M[2] - res.samp$M[1])) < .05) 102 | }) 103 | 104 | test_that("prediction works with fixed effects only in multilevel log transformed models", { 105 | expect_type(res.fixedonly, "list") 106 | expect_equal( 107 | c(ndraws(mlog), 3L), 108 | dim(res.fixedonly$Posterior)) 109 | expect_true(all( 110 | res.fixedonly$Posterior[, 1:2] >= 0)) 111 | expect_true(all( 112 | res.fixedonly$Summary$M >= 0)) 113 | expect_true(res.fixedonly$Summary$M[1] < res.integrate$Summary$M[1]) 114 | expect_true(res.fixedonly$Summary$M[2] < res.integrate$Summary$M[2]) 115 | }) 116 | -------------------------------------------------------------------------------- /tests/testthat/test-predict-mixedpoisson.R: -------------------------------------------------------------------------------- 1 | skip_on_cran() 2 | 3 | if (!requireNamespace("cmdstanr", quietly = TRUE)) { 4 | backend <- "rstan" 5 | ## if using rstan backend, models can crash on Windows 6 | ## so skip if on windows and cannot use cmdstanr 7 | skip_on_os("windows") 8 | } else { 9 | if (isFALSE(is.null(cmdstanr::cmdstan_version(error_on_NA = FALSE)))) { 10 | backend <- "cmdstanr" 11 | } 12 | } 13 | 14 | dpois <- withr::with_seed( 15 | seed = 12345, code = { 16 | nGroups <- 100 17 | nObs <- 20 18 | theta.location <- matrix(rnorm(nGroups * 2), nrow = nGroups, ncol = 2) 19 | theta.location[, 1] <- theta.location[, 1] - mean(theta.location[, 1]) 20 | theta.location[, 2] <- theta.location[, 2] - mean(theta.location[, 2]) 21 | theta.location[, 1] <- theta.location[, 1] / sd(theta.location[, 1]) 22 | theta.location[, 2] <- theta.location[, 2] / sd(theta.location[, 2]) 23 | theta.location <- theta.location %*% chol(matrix(c(1.5, -.25, -.25, .5^2), 2)) 24 | theta.location[, 1] <- theta.location[, 1] - 2.5 25 | theta.location[, 2] <- theta.location[, 2] + 1 26 | d <- data.table( 27 | x = rep(rep(0:1, each = nObs / 2), times = nGroups)) 28 | d[, ID := rep(seq_len(nGroups), each = nObs)] 29 | 30 | for (i in seq_len(nGroups)) { 31 | d[ID == i, y := rpois( 32 | n = nObs, 33 | lambda = exp(1 + theta.location[i, 1] + theta.location[i, 2] * x)) 34 | ] 35 | } 36 | copy(d) 37 | }) 38 | 39 | res.samp <- dpois[, .(M = mean(y)), by = .(ID, x)][, .(M = mean(M)), by = x] 40 | 41 | suppressWarnings( 42 | mpois <- brms::brm( 43 | y ~ 1 + x + (1 + x | ID), family = "poisson", 44 | data = dpois, iter = 1000, warmup = 500, seed = 1234, 45 | chains = 2, backend = backend, 46 | save_pars = brms::save_pars(all = TRUE), 47 | silent = 2, refresh = 0) 48 | ) 49 | 50 | preddat <- data.frame(y = c(0, 0), x = c(0, 1), ID = 999) 51 | 52 | res.integrate <- withr::with_seed( 53 | seed = 1234, { 54 | test0 <- prediction(object = mpois, data = preddat[1, ], posterior = TRUE, 55 | effects = "integrateoutRE", k = 100L, CI = 0.95, CIType = "ETI") 56 | test1 <- prediction(object = mpois, data = preddat[2, ], posterior = TRUE, 57 | effects = "integrateoutRE", k = 100L, CI = 0.95, CIType = "ETI") 58 | ame <- list(Summary = NULL, Posterior = test1$Posterior - test0$Posterior) 59 | ame$Summary <- bsummary(ame$Posterior, CI = 0.95, CIType = "ETI") 60 | 61 | list( 62 | Summary = rbind( 63 | test0$Summary, test1$Summary, ame$Summary), 64 | Posterior = cbind( 65 | test0$Posterior, test1$Posterior, ame$Posterior)) 66 | }) 67 | 68 | res.fixedonly <- withr::with_seed( 69 | seed = 1234, { 70 | test0 <- prediction(object = mpois, data = preddat[1, ], posterior = TRUE, 71 | effects = "fixedonly", CI = 0.95, CIType = "ETI") 72 | test1 <- prediction(object = mpois, data = preddat[2, ], posterior = TRUE, 73 | effects = "fixedonly", CI = 0.95, CIType = "ETI") 74 | ame <- list(Summary = NULL, Posterior = test1$Posterior - test0$Posterior) 75 | ame$Summary <- bsummary(ame$Posterior, CI = 0.95, CIType = "ETI") 76 | 77 | list( 78 | Summary = rbind( 79 | test0$Summary, test1$Summary, ame$Summary), 80 | Posterior = cbind( 81 | test0$Posterior, test1$Posterior, ame$Posterior)) 82 | }) 83 | 84 | test_that("prediction works to integrate out random effects in multilevel poisson models", { 85 | expect_type(res.integrate, "list") 86 | expect_equal( 87 | c(ndraws(mpois), 3L), 88 | dim(res.integrate$Posterior)) 89 | expect_true(all( 90 | res.integrate$Posterior[, 1:2] >= 0)) 91 | expect_true(all( 92 | res.integrate$Summary$M >= 0)) 93 | 94 | expect_true(abs(res.integrate$Summary$M[1] - res.samp$M[1]) < .15) 95 | expect_true(abs(res.integrate$Summary$M[2] - res.samp$M[2]) < .20) 96 | expect_true(abs(res.integrate$Summary$M[3] - 97 | (res.samp$M[2] - res.samp$M[1])) < .15) 98 | }) 99 | 100 | test_that("prediction works with fixed effects only in multilevel poisson models", { 101 | expect_type(res.fixedonly, "list") 102 | expect_equal( 103 | c(ndraws(mpois), 3L), 104 | dim(res.fixedonly$Posterior)) 105 | expect_true(all( 106 | res.fixedonly$Posterior[, 1:2] >= 0)) 107 | expect_true(all( 108 | res.fixedonly$Summary$M >= 0)) 109 | expect_true(res.fixedonly$Summary$M[1] < res.integrate$Summary$M[1]) 110 | expect_true(res.fixedonly$Summary$M[2] < res.integrate$Summary$M[2]) 111 | }) 112 | -------------------------------------------------------------------------------- /tests/testthat/test-rowbootmeans.R: -------------------------------------------------------------------------------- 1 | test_that("rowBootMeans differs from rowMeans and bootstraps in an expected way", { 2 | x <- matrix(1:9, 3) 3 | res <- withr::with_seed( 4 | seed = 1234, 5 | rowBootMeans(x)) 6 | expect_equal(res, c(3, 6, 4)) 7 | expect_false(all(res == rowMeans(x))) 8 | }) 9 | -------------------------------------------------------------------------------- /tests/testthat/test-tab2mat.R: -------------------------------------------------------------------------------- 1 | test_that("tab2mat converts row vector to matrix", { 2 | x <- matrix(1:9, 1) 3 | res <- tab2mat(x) 4 | expect_equal(res, matrix(1:9, 3, byrow = TRUE)) 5 | }) 6 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /vignettes/fixed-effects-marginaleffects.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Marginal Effects for Fixed Effects Models" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: 7 | collapsed: false 8 | smooth_scroll: true 9 | toc_depth: 3 10 | vignette: > 11 | %\VignetteIndexEntry{Marginal Effects for Fixed Effects Models} 12 | %\VignetteEngine{knitr::rmarkdown} 13 | %\VignetteEncoding{UTF-8} 14 | --- 15 | 16 | 17 | 18 | 19 | ``` r 20 | library(knitr) 21 | library(data.table) 22 | #> data.table 1.16.4 using 16 threads (see ?getDTthreads). Latest news: r-datatable.com 23 | library(brms) 24 | #> Loading required package: Rcpp 25 | #> Loading 'brms' package (version 2.22.0). Useful instructions 26 | #> can be found by typing help('brms'). A more detailed introduction 27 | #> to the package is available through vignette('brms_overview'). 28 | #> 29 | #> Attaching package: 'brms' 30 | #> 31 | #> The following object is masked from 'package:stats': 32 | #> 33 | #> ar 34 | library(brmsmargins) 35 | ``` 36 | 37 | This vignette provides a brief overview of how to calculate 38 | marginal effects for Bayesian regression models involving 39 | only fixed effects and fit using the `brms` package. 40 | 41 | ## What are marginal effects? 42 | 43 | Marginal effects can be used to describe how an outcome is 44 | predicted to change with a change in a predictor (or predictors). 45 | It is a derivative. For convenience, typically calculated numerically 46 | rather than analytically. 47 | 48 | To motivate marginal effects, we can look at some regression 49 | models fit in a frequentist framework for simplicity and speed. 50 | Here we use the `mtcars` dataset built into `R`. First, we 51 | can look at a linear regression model of the association between 52 | `mpg` and `hp`. Here we can see the estimated regression coefficient 53 | for `mpg`. 54 | 55 | 56 | ``` r 57 | m.linear <- lm(hp ~ am + mpg, data = mtcars) 58 | 59 | coef(m.linear)["mpg"] 60 | #> mpg 61 | #> -11.19988 62 | ``` 63 | 64 | In linear models with no interactions, no (non linear) transformations, 65 | and a linear link function, the regression coefficient is the 66 | predicted change in the outcome for a one unit change in the predictor, 67 | regardless of any other values. For example, here we can look at the 68 | predicted difference in the outcome for a one unit difference in `mpg` 69 | from 0 to 1, holding `am = 0`. 70 | 71 | 72 | ``` r 73 | yhat <- predict( 74 | m.linear, 75 | newdata = data.frame(am = 0, mpg = c(0, 1)), 76 | type = "response") 77 | 78 | diff(yhat) 79 | #> 2 80 | #> -11.19988 81 | ``` 82 | 83 | We can look at the same estimate but moving `mpg` from 84 | 10 to 11 instead 0 to 1, holding `am = 1`. 85 | 86 | 87 | ``` r 88 | yhat <- predict( 89 | m.linear, 90 | newdata = data.frame(am = 1, mpg = c(10, 11)), 91 | type = "response") 92 | 93 | diff(yhat) 94 | #> 2 95 | #> -11.19988 96 | ``` 97 | 98 | All of these quantities are identical. In this case, the regression 99 | coefficient can be interpreted as a marginal effect: the expected change 100 | in the outcome for a one unit shift in `mpg`, regardless of the 101 | value of `am` and regardless of the values where `mpg` is evaluated. 102 | 103 | This convenient property does not hold for many types of models. 104 | Next consider a logistic regression model. The regression 105 | coefficient, shown below, is on the log odds scale, not the 106 | probability scale. This is not convenient for interpretation, 107 | as the log odds scale is not the same scale as our outcome. 108 | 109 | 110 | ``` r 111 | m.logistic <- glm(vs ~ am + mpg, data = mtcars, family = binomial()) 112 | 113 | coef(m.logistic)["mpg"] 114 | #> mpg 115 | #> 0.6809205 116 | ``` 117 | 118 | We can find predicted differences on the probability scale. 119 | Here moving `mpg` from 10 to 11 holding `am = 0`. 120 | 121 | 122 | ``` r 123 | yhat <- predict( 124 | m.logistic, 125 | newdata = data.frame(am = 0, mpg = c(10, 11)), 126 | type = "response") 127 | 128 | diff(yhat) 129 | #> 2 130 | #> 0.002661989 131 | ``` 132 | 133 | We can look at the same estimate but moving `mpg` from 134 | 20 to 21 instead 10 to 11 again holding `am = 0`. 135 | 136 | 137 | ``` r 138 | yhat <- predict( 139 | m.logistic, 140 | newdata = data.frame(am = 0, mpg = c(20, 21)), 141 | type = "response") 142 | 143 | diff(yhat) 144 | #> 2 145 | #> 0.1175344 146 | ``` 147 | 148 | We can look at the same estimate moving `mpg` from 149 | 20 to 21 as before, but this time holding `am = 1`. 150 | 151 | 152 | ``` r 153 | yhat <- predict( 154 | m.logistic, 155 | newdata = data.frame(am = 1, mpg = c(20, 21)), 156 | type = "response") 157 | 158 | diff(yhat) 159 | #> 2 160 | #> 0.08606869 161 | ``` 162 | 163 | All the estimates in this case differ. The association between `mpg` and 164 | **probability** of `vs` is not linear. 165 | Marginal effects provide a way to get results on the response scale, 166 | which can aid interpretation. 167 | 168 | A common type of marginal effect is an average marginal effect (AME). 169 | To calculate an AME numerically, we can get predicted probabilities 170 | from a model for every observation in the dataset. For continuous variables, 171 | we might use a very small difference to approximate the derivative. 172 | For categorical variables, we might calculate a discrete difference. 173 | 174 | ### Average Marginal Effect (AME) 175 | 176 | Here is an example of a continuous AME. 177 | `h` is a value near to zero used for the numerical 178 | derivative. We take all the values observed in the dataset 179 | for the first set of predicted probabilities. Then we take the 180 | observed values + `h` and calculate new predicted probabilities. 181 | The difference, divided by `h` is the "instantaneous" (i.e., derivative) 182 | on the probability scale for a one unit shift in the predictor, `mpg`, 183 | for each person. When we average all of these, we get the AME. 184 | 185 | 186 | ``` r 187 | h <- .001 188 | 189 | nd.1 <- nd.0 <- model.frame(m.logistic) 190 | nd.1$mpg <- nd.1$mpg + h 191 | 192 | yhat.0 <- predict( 193 | m.logistic, 194 | newdata = nd.0, 195 | type = "response") 196 | 197 | yhat.1 <- predict( 198 | m.logistic, 199 | newdata = nd.1, 200 | type = "response") 201 | 202 | mean((yhat.1 - yhat.0) / h) 203 | #> [1] 0.06922997 204 | ``` 205 | 206 | Here is an example of a discrete AME. The variable, 207 | `am` only takes two values: 0 or 1. So we calculate 208 | predicted probabilities if everyone had `am = 0` and then 209 | again if everyone had `am = 1`. 210 | 211 | 212 | ``` r 213 | nd.1 <- nd.0 <- model.frame(m.logistic) 214 | nd.0$am <- 0 215 | nd.1$am <- 1 216 | 217 | yhat.0 <- predict( 218 | m.logistic, 219 | newdata = nd.0, 220 | type = "response") 221 | 222 | yhat.1 <- predict( 223 | m.logistic, 224 | newdata = nd.1, 225 | type = "response") 226 | 227 | mean((yhat.1 - yhat.0)) 228 | #> [1] -0.2618203 229 | ``` 230 | 231 | In both these examples, we are averaging across the different values 232 | observed in the dataset. In a frequentist framework, additional details 233 | are needed to calculate uncertainty intervals. In a Bayesian framework, 234 | uncertainty intervals can be calculated readily by summarizing the 235 | posterior. 236 | 237 | ## AMEs for Logistic Regression 238 | 239 | The main function for users to use is `brmsmargins()`. Here is an 240 | example calculating AMEs for `mpg` and `am`. First we will fit the same 241 | logistic regression model using `brms`. 242 | 243 | 244 | ``` r 245 | bayes.logistic <- brm( 246 | vs ~ am + mpg, data = mtcars, 247 | family = "bernoulli", seed = 1234, 248 | silent = 2, refresh = 0, 249 | save_pars = save_pars(group = TRUE, latent = FALSE, all = TRUE), 250 | chains = 4L, cores = 4L, backend = "cmdstanr") 251 | #> Loading required package: rstan 252 | #> Loading required package: StanHeaders 253 | #> 254 | #> rstan version 2.32.6 (Stan version 2.32.2) 255 | #> For execution on a local, multicore CPU with excess RAM we recommend calling 256 | #> options(mc.cores = parallel::detectCores()). 257 | #> To avoid recompilation of unchanged Stan programs, we recommend calling 258 | #> rstan_options(auto_write = TRUE) 259 | #> For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions, 260 | #> change `threads_per_chain` option: 261 | #> rstan_options(threads_per_chain = 1) 262 | #> Do not specify '-march=native' in 'LOCAL_CPPFLAGS' or a Makevars file 263 | ``` 264 | 265 | 266 | ``` r 267 | summary(bayes.logistic) 268 | #> Family: bernoulli 269 | #> Links: mu = logit 270 | #> Formula: vs ~ am + mpg 271 | #> Data: mtcars (Number of observations: 32) 272 | #> Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1; 273 | #> total post-warmup draws = 4000 274 | #> 275 | #> Regression Coefficients: 276 | #> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS 277 | #> Intercept -16.12 5.67 -28.84 -7.42 1.00 1379 1959 278 | #> am -3.83 1.86 -7.97 -0.70 1.00 1568 1772 279 | #> mpg 0.87 0.31 0.40 1.56 1.00 1328 1711 280 | #> 281 | #> Draws were sampled using sample(hmc). For each parameter, Bulk_ESS 282 | #> and Tail_ESS are effective sample size measures, and Rhat is the potential 283 | #> scale reduction factor on split chains (at convergence, Rhat = 1). 284 | ``` 285 | 286 | Now we can use `brmsmargins()`. We give it the model object, 287 | a `data.frame` of the values to be added, first 0, then (0 + h), 288 | and a contrast matrix. The default is a 99 percent credible interval, 289 | which we override here to 0.95. We use highest density intervals, 290 | which are the default. We also could have selected "ETI" for 291 | equal tail intervals. `brmsmargins()` will return a list 292 | with the posterior of each prediction, a summary of the posterior 293 | for the predictions, the posterior for the contrasts, and a 294 | summary of the posterior for the contrasts. Here we just have the 295 | one contrast, but multiple could have been specified. 296 | 297 | 298 | ``` r 299 | h <- .001 300 | ame1 <- brmsmargins( 301 | bayes.logistic, 302 | add = data.frame(mpg = c(0, 0 + h)), 303 | contrasts = cbind("AME MPG" = c(-1 / h, 1 / h)), 304 | CI = 0.95, CIType = "HDI") 305 | 306 | kable(ame1$ContrastSummary, digits = 3) 307 | ``` 308 | 309 | 310 | 311 | | M| Mdn| LL| UL| PercentROPE| PercentMID| CI|CIType |ROPE |MID |Label | 312 | |-----:|----:|-----:|-----:|-----------:|----------:|----:|:------|:----|:---|:-------| 313 | | 0.071| 0.07| 0.054| 0.092| NA| NA| 0.95|HDI |NA |NA |AME MPG | 314 | 315 | 316 | 317 | Now we can look at how we could calculate a discrete AME. 318 | This time we use the `at` argument instead of the `add` 319 | argument as we want to hold `am` at specific values, 320 | not add 0 and 1 to the observed `am` values. 321 | Because 0 and 1 are meaningful values of `am`, 322 | we also look at the summary of the posterior for the predictions. 323 | These predictions average across all values of `mpg`. 324 | 325 | 326 | ``` r 327 | ame2 <- brmsmargins( 328 | bayes.logistic, 329 | at = data.frame(am = c(0, 1)), 330 | contrasts = cbind("AME am" = c(-1, 1)), 331 | CI = 0.95, CIType = "HDI") 332 | 333 | kable(ame2$Summary, digits = 3) 334 | ``` 335 | 336 | 337 | 338 | | M| Mdn| LL| UL| PercentROPE| PercentMID| CI|CIType |ROPE |MID | 339 | |-----:|-----:|-----:|-----:|-----------:|----------:|----:|:------|:----|:---| 340 | | 0.544| 0.547| 0.430| 0.662| NA| NA| 0.95|HDI |NA |NA | 341 | | 0.283| 0.277| 0.175| 0.408| NA| NA| 0.95|HDI |NA |NA | 342 | 343 | 344 | 345 | 346 | ``` r 347 | kable(ame2$ContrastSummary) 348 | ``` 349 | 350 | 351 | 352 | | M| Mdn| LL| UL| PercentROPE| PercentMID| CI|CIType |ROPE |MID |Label | 353 | |----------:|----------:|----------:|----------:|-----------:|----------:|----:|:------|:----|:---|:------| 354 | | -0.2607899| -0.2649507| -0.4289198| -0.0891496| NA| NA| 0.95|HDI |NA |NA |AME am | 355 | 356 | 357 | 358 | Note that by default, `brmsmargins()` uses the model frame 359 | from the model object as the dataset. This, however, can be overridden. 360 | You can give it any (valid) dataset and it will add or override the chosen 361 | values and average across the predictions from the different rows of 362 | the dataset. 363 | 364 | 365 | ## AMEs for Poisson Regression 366 | 367 | Here is a short example for Poisson regression used for 368 | count outcomes. We use a dataset drawn from: 369 | https://stats.oarc.ucla.edu/r/dae/poisson-regression/ 370 | 371 | 372 | ``` r 373 | 374 | d <- fread("https://stats.oarc.ucla.edu/stat/data/poisson_sim.csv") 375 | d[, prog := factor(prog, levels = 1:3, labels = c("General", "Academic", "Vocational"))] 376 | 377 | bayes.poisson <- brm( 378 | num_awards ~ prog + math, data = d, 379 | family = "poisson", seed = 1234, 380 | silent = 2, refresh = 0, 381 | save_pars = save_pars(group = TRUE, latent = FALSE, all = TRUE), 382 | chains = 4L, cores = 4L, backend = "cmdstanr") 383 | ``` 384 | 385 | 386 | ``` r 387 | summary(bayes.poisson) 388 | #> Family: poisson 389 | #> Links: mu = log 390 | #> Formula: num_awards ~ prog + math 391 | #> Data: d (Number of observations: 200) 392 | #> Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1; 393 | #> total post-warmup draws = 4000 394 | #> 395 | #> Regression Coefficients: 396 | #> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS 397 | #> Intercept -5.32 0.68 -6.68 -4.01 1.00 2444 2292 398 | #> progAcademic 1.15 0.38 0.48 1.92 1.00 1949 1571 399 | #> progVocational 0.39 0.47 -0.48 1.34 1.00 1808 1427 400 | #> math 0.07 0.01 0.05 0.09 1.00 2729 2349 401 | #> 402 | #> Draws were sampled using sample(hmc). For each parameter, Bulk_ESS 403 | #> and Tail_ESS are effective sample size measures, and Rhat is the potential 404 | #> scale reduction factor on split chains (at convergence, Rhat = 1). 405 | ``` 406 | 407 | AME for a continuous variable, using default CI interval and type. 408 | 409 | 410 | ``` r 411 | h <- .001 412 | ame1.p <- brmsmargins( 413 | bayes.poisson, 414 | add = data.frame(math = c(0, 0 + h)), 415 | contrasts = cbind("AME math" = c(-1 / h, 1 / h))) 416 | 417 | kable(ame1.p$ContrastSummary, digits = 3) 418 | ``` 419 | 420 | 421 | 422 | | M| Mdn| LL| UL| PercentROPE| PercentMID| CI|CIType |ROPE |MID |Label | 423 | |-----:|-----:|-----:|-----:|-----------:|----------:|----:|:------|:----|:---|:--------| 424 | | 0.044| 0.044| 0.025| 0.065| NA| NA| 0.99|HDI |NA |NA |AME math | 425 | 426 | 427 | 428 | AME for a categorical variable. Here we calculate pairwise contrasts 429 | for all three program types. These are the predicted number of awards. 430 | 431 | 432 | ``` r 433 | ame2.p <- brmsmargins( 434 | bayes.poisson, 435 | at = data.frame( 436 | prog = factor(1:3, 437 | labels = c("General", "Academic", "Vocational"))), 438 | contrasts = cbind( 439 | "AME General v Academic" = c(1, -1, 0), 440 | "AME General v Vocational" = c(1, 0, -1), 441 | "AME Academic v Vocational" = c(0, 1, -1))) 442 | 443 | kable(ame2.p$Summary, digits = 3) 444 | ``` 445 | 446 | 447 | 448 | | M| Mdn| LL| UL| PercentROPE| PercentMID| CI|CIType |ROPE |MID | 449 | |-----:|-----:|-----:|-----:|-----------:|----------:|----:|:------|:----|:---| 450 | | 0.262| 0.253| 0.077| 0.522| NA| NA| 0.99|HDI |NA |NA | 451 | | 0.780| 0.776| 0.575| 0.992| NA| NA| 0.99|HDI |NA |NA | 452 | | 0.380| 0.368| 0.158| 0.707| NA| NA| 0.99|HDI |NA |NA | 453 | 454 | 455 | 456 | 457 | ``` r 458 | kable(ame2.p$ContrastSummary, digits = 3) 459 | ``` 460 | 461 | 462 | 463 | | M| Mdn| LL| UL| PercentROPE| PercentMID| CI|CIType |ROPE |MID |Label | 464 | |------:|------:|------:|------:|-----------:|----------:|----:|:------|:----|:---|:-------------------------| 465 | | -0.518| -0.522| -0.806| -0.135| NA| NA| 0.99|HDI |NA |NA |AME General v Academic | 466 | | -0.118| -0.115| -0.510| 0.248| NA| NA| 0.99|HDI |NA |NA |AME General v Vocational | 467 | | 0.400| 0.409| 0.011| 0.759| NA| NA| 0.99|HDI |NA |NA |AME Academic v Vocational | 468 | 469 | 470 | 471 | 472 | ## AMEs for Negative Binomial Regression 473 | 474 | Here is a short example for Negative Binomial regression used for 475 | count outcomes. 476 | 477 | 478 | ``` r 479 | d <- fread("https://stats.oarc.ucla.edu/stat/data/fish.csv") 480 | d[, nofish := factor(nofish, levels = 0:1, labels = c("no fish", "fish"))] 481 | 482 | bayes.nb <- brm( 483 | count ~ nofish + xb, data = d, 484 | family = "negbinomial", seed = 1234, 485 | silent = 2, refresh = 0, 486 | save_pars = save_pars(group = TRUE, latent = FALSE, all = TRUE), 487 | chains = 4L, cores = 4L, backend = "cmdstanr") 488 | ``` 489 | 490 | 491 | ``` r 492 | summary(bayes.nb) 493 | #> Family: negbinomial 494 | #> Links: mu = log; shape = identity 495 | #> Formula: count ~ nofish + xb 496 | #> Data: d (Number of observations: 250) 497 | #> Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1; 498 | #> total post-warmup draws = 4000 499 | #> 500 | #> Regression Coefficients: 501 | #> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS 502 | #> Intercept -1.38 0.21 -1.78 -0.97 1.00 2936 2491 503 | #> nofishfish -0.08 0.25 -0.56 0.42 1.00 3667 2776 504 | #> xb 1.24 0.10 1.06 1.43 1.00 3223 3165 505 | #> 506 | #> Further Distributional Parameters: 507 | #> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS 508 | #> shape 0.72 0.12 0.52 0.99 1.00 3464 2956 509 | #> 510 | #> Draws were sampled using sample(hmc). For each parameter, Bulk_ESS 511 | #> and Tail_ESS are effective sample size measures, and Rhat is the potential 512 | #> scale reduction factor on split chains (at convergence, Rhat = 1). 513 | ``` 514 | 515 | AME for a continuous variable, using default CI interval and type. 516 | 517 | 518 | ``` r 519 | h <- .001 520 | ame1.nb <- brmsmargins( 521 | bayes.nb, 522 | add = data.frame(xb = c(0, 0 + h)), 523 | contrasts = cbind("AME xb" = c(-1 / h, 1 / h))) 524 | 525 | kable(ame1.nb$ContrastSummary, digits = 3) 526 | ``` 527 | 528 | 529 | 530 | | M| Mdn| LL| UL| PercentROPE| PercentMID| CI|CIType |ROPE |MID |Label | 531 | |-----:|-----:|-----:|------:|-----------:|----------:|----:|:------|:----|:---|:------| 532 | | 5.623| 5.296| 2.654| 11.735| NA| NA| 0.99|HDI |NA |NA |AME xb | 533 | 534 | 535 | 536 | AME for a categorical variable. Here we calculate pairwise contrasts 537 | for no fish versus fish. The estimate is how many are caught. 538 | 539 | 540 | ``` r 541 | ame2.nb <- brmsmargins( 542 | bayes.nb, 543 | at = data.frame( 544 | nofish = factor(0:1, 545 | labels = c("no fish", "fish"))), 546 | contrasts = cbind( 547 | "AME No Fish v Fish" = c(1, -1))) 548 | 549 | kable(ame2.nb$Summary, digits = 3) 550 | ``` 551 | 552 | 553 | 554 | | M| Mdn| LL| UL| PercentROPE| PercentMID| CI|CIType |ROPE |MID | 555 | |-----:|-----:|-----:|-----:|-----------:|----------:|----:|:------|:----|:---| 556 | | 4.523| 4.367| 2.485| 7.975| NA| NA| 0.99|HDI |NA |NA | 557 | | 4.274| 4.043| 1.695| 8.816| NA| NA| 0.99|HDI |NA |NA | 558 | 559 | 560 | 561 | 562 | ``` r 563 | kable(ame2.nb$ContrastSummary, digits = 3) 564 | ``` 565 | 566 | 567 | 568 | | M| Mdn| LL| UL| PercentROPE| PercentMID| CI|CIType |ROPE |MID |Label | 569 | |----:|-----:|------:|-----:|-----------:|----------:|----:|:------|:----|:---|:------------------| 570 | | 0.25| 0.343| -3.065| 3.026| NA| NA| 0.99|HDI |NA |NA |AME No Fish v Fish | 571 | 572 | 573 | 574 | ## References 575 | 576 | These references may be useful. 577 | 578 | - Norton, E. C., Dowd, B. E., & Maciejewski, M. L. (2019). Marginal effects—quantifying the effect of changes in risk factors in logistic regression models. *JAMA, 321*(13), 1304-1305. 579 | - Mize, T. D., Doan, L., & Long, J. S. (2019). A general framework for comparing predictions and marginal effects across models. *Sociological Methodology, 49*(1), 152-189. 580 | -------------------------------------------------------------------------------- /vignettes/fixed-effects-marginaleffects.Rmd.orig: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Marginal Effects for Fixed Effects Models" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: 7 | collapsed: false 8 | smooth_scroll: true 9 | toc_depth: 3 10 | vignette: > 11 | %\VignetteIndexEntry{Marginal Effects for Fixed Effects Models} 12 | %\VignetteEngine{knitr::rmarkdown} 13 | %\VignetteEncoding{UTF-8} 14 | --- 15 | 16 | ```{r, include = FALSE} 17 | knitr::opts_chunk$set( 18 | collapse = TRUE, 19 | comment = "#>" 20 | ) 21 | ``` 22 | 23 | ```{r setup} 24 | library(knitr) 25 | library(data.table) 26 | library(brms) 27 | library(brmsmargins) 28 | ``` 29 | 30 | This vignette provides a brief overview of how to calculate 31 | marginal effects for Bayesian regression models involving 32 | only fixed effects and fit using the `brms` package. 33 | 34 | ## What are marginal effects? 35 | 36 | Marginal effects can be used to describe how an outcome is 37 | predicted to change with a change in a predictor (or predictors). 38 | It is a derivative. For convenience, typically calculated numerically 39 | rather than analytically. 40 | 41 | To motivate marginal effects, we can look at some regression 42 | models fit in a frequentist framework for simplicity and speed. 43 | Here we use the `mtcars` dataset built into `R`. First, we 44 | can look at a linear regression model of the association between 45 | `mpg` and `hp`. Here we can see the estimated regression coefficient 46 | for `mpg`. 47 | 48 | ```{r} 49 | m.linear <- lm(hp ~ am + mpg, data = mtcars) 50 | 51 | coef(m.linear)["mpg"] 52 | ``` 53 | 54 | In linear models with no interactions, no (non linear) transformations, 55 | and a linear link function, the regression coefficient is the 56 | predicted change in the outcome for a one unit change in the predictor, 57 | regardless of any other values. For example, here we can look at the 58 | predicted difference in the outcome for a one unit difference in `mpg` 59 | from 0 to 1, holding `am = 0`. 60 | 61 | ```{r} 62 | yhat <- predict( 63 | m.linear, 64 | newdata = data.frame(am = 0, mpg = c(0, 1)), 65 | type = "response") 66 | 67 | diff(yhat) 68 | ``` 69 | 70 | We can look at the same estimate but moving `mpg` from 71 | 10 to 11 instead 0 to 1, holding `am = 1`. 72 | 73 | ```{r} 74 | yhat <- predict( 75 | m.linear, 76 | newdata = data.frame(am = 1, mpg = c(10, 11)), 77 | type = "response") 78 | 79 | diff(yhat) 80 | ``` 81 | 82 | All of these quantities are identical. In this case, the regression 83 | coefficient can be interpreted as a marginal effect: the expected change 84 | in the outcome for a one unit shift in `mpg`, regardless of the 85 | value of `am` and regardless of the values where `mpg` is evaluated. 86 | 87 | This convenient property does not hold for many types of models. 88 | Next consider a logistic regression model. The regression 89 | coefficient, shown below, is on the log odds scale, not the 90 | probability scale. This is not convenient for interpretation, 91 | as the log odds scale is not the same scale as our outcome. 92 | 93 | ```{r} 94 | m.logistic <- glm(vs ~ am + mpg, data = mtcars, family = binomial()) 95 | 96 | coef(m.logistic)["mpg"] 97 | ``` 98 | 99 | We can find predicted differences on the probability scale. 100 | Here moving `mpg` from 10 to 11 holding `am = 0`. 101 | 102 | ```{r} 103 | yhat <- predict( 104 | m.logistic, 105 | newdata = data.frame(am = 0, mpg = c(10, 11)), 106 | type = "response") 107 | 108 | diff(yhat) 109 | ``` 110 | 111 | We can look at the same estimate but moving `mpg` from 112 | 20 to 21 instead 10 to 11 again holding `am = 0`. 113 | 114 | ```{r} 115 | yhat <- predict( 116 | m.logistic, 117 | newdata = data.frame(am = 0, mpg = c(20, 21)), 118 | type = "response") 119 | 120 | diff(yhat) 121 | ``` 122 | 123 | We can look at the same estimate moving `mpg` from 124 | 20 to 21 as before, but this time holding `am = 1`. 125 | 126 | ```{r} 127 | yhat <- predict( 128 | m.logistic, 129 | newdata = data.frame(am = 1, mpg = c(20, 21)), 130 | type = "response") 131 | 132 | diff(yhat) 133 | ``` 134 | 135 | All the estimates in this case differ. The association between `mpg` and 136 | **probability** of `vs` is not linear. 137 | Marginal effects provide a way to get results on the response scale, 138 | which can aid interpretation. 139 | 140 | A common type of marginal effect is an average marginal effect (AME). 141 | To calculate an AME numerically, we can get predicted probabilities 142 | from a model for every observation in the dataset. For continuous variables, 143 | we might use a very small difference to approximate the derivative. 144 | For categorical variables, we might calculate a discrete difference. 145 | 146 | ### Average Marginal Effect (AME) 147 | 148 | Here is an example of a continuous AME. 149 | `h` is a value near to zero used for the numerical 150 | derivative. We take all the values observed in the dataset 151 | for the first set of predicted probabilities. Then we take the 152 | observed values + `h` and calculate new predicted probabilities. 153 | The difference, divided by `h` is the "instantaneous" (i.e., derivative) 154 | on the probability scale for a one unit shift in the predictor, `mpg`, 155 | for each person. When we average all of these, we get the AME. 156 | 157 | ```{r} 158 | h <- .001 159 | 160 | nd.1 <- nd.0 <- model.frame(m.logistic) 161 | nd.1$mpg <- nd.1$mpg + h 162 | 163 | yhat.0 <- predict( 164 | m.logistic, 165 | newdata = nd.0, 166 | type = "response") 167 | 168 | yhat.1 <- predict( 169 | m.logistic, 170 | newdata = nd.1, 171 | type = "response") 172 | 173 | mean((yhat.1 - yhat.0) / h) 174 | ``` 175 | 176 | Here is an example of a discrete AME. The variable, 177 | `am` only takes two values: 0 or 1. So we calculate 178 | predicted probabilities if everyone had `am = 0` and then 179 | again if everyone had `am = 1`. 180 | 181 | ```{r} 182 | nd.1 <- nd.0 <- model.frame(m.logistic) 183 | nd.0$am <- 0 184 | nd.1$am <- 1 185 | 186 | yhat.0 <- predict( 187 | m.logistic, 188 | newdata = nd.0, 189 | type = "response") 190 | 191 | yhat.1 <- predict( 192 | m.logistic, 193 | newdata = nd.1, 194 | type = "response") 195 | 196 | mean((yhat.1 - yhat.0)) 197 | ``` 198 | 199 | In both these examples, we are averaging across the different values 200 | observed in the dataset. In a frequentist framework, additional details 201 | are needed to calculate uncertainty intervals. In a Bayesian framework, 202 | uncertainty intervals can be calculated readily by summarizing the 203 | posterior. 204 | 205 | ## AMEs for Logistic Regression 206 | 207 | The main function for users to use is `brmsmargins()`. Here is an 208 | example calculating AMEs for `mpg` and `am`. First we will fit the same 209 | logistic regression model using `brms`. 210 | 211 | ```{r, results = 'hide'} 212 | bayes.logistic <- brm( 213 | vs ~ am + mpg, data = mtcars, 214 | family = "bernoulli", seed = 1234, 215 | silent = 2, refresh = 0, 216 | save_pars = save_pars(group = TRUE, latent = FALSE, all = TRUE), 217 | chains = 4L, cores = 4L, backend = "cmdstanr") 218 | ``` 219 | 220 | ```{r} 221 | summary(bayes.logistic) 222 | ``` 223 | 224 | Now we can use `brmsmargins()`. We give it the model object, 225 | a `data.frame` of the values to be added, first 0, then (0 + h), 226 | and a contrast matrix. The default is a 99 percent credible interval, 227 | which we override here to 0.95. We use highest density intervals, 228 | which are the default. We also could have selected "ETI" for 229 | equal tail intervals. `brmsmargins()` will return a list 230 | with the posterior of each prediction, a summary of the posterior 231 | for the predictions, the posterior for the contrasts, and a 232 | summary of the posterior for the contrasts. Here we just have the 233 | one contrast, but multiple could have been specified. 234 | 235 | ```{r, results = 'asis'} 236 | h <- .001 237 | ame1 <- brmsmargins( 238 | bayes.logistic, 239 | add = data.frame(mpg = c(0, 0 + h)), 240 | contrasts = cbind("AME MPG" = c(-1 / h, 1 / h)), 241 | CI = 0.95, CIType = "HDI") 242 | 243 | kable(ame1$ContrastSummary, digits = 3) 244 | ``` 245 | 246 | Now we can look at how we could calculate a discrete AME. 247 | This time we use the `at` argument instead of the `add` 248 | argument as we want to hold `am` at specific values, 249 | not add 0 and 1 to the observed `am` values. 250 | Because 0 and 1 are meaningful values of `am`, 251 | we also look at the summary of the posterior for the predictions. 252 | These predictions average across all values of `mpg`. 253 | 254 | ```{r, results = 'asis'} 255 | ame2 <- brmsmargins( 256 | bayes.logistic, 257 | at = data.frame(am = c(0, 1)), 258 | contrasts = cbind("AME am" = c(-1, 1)), 259 | CI = 0.95, CIType = "HDI") 260 | 261 | kable(ame2$Summary, digits = 3) 262 | ``` 263 | 264 | ```{r, results = 'asis'} 265 | kable(ame2$ContrastSummary) 266 | ``` 267 | 268 | Note that by default, `brmsmargins()` uses the model frame 269 | from the model object as the dataset. This, however, can be overridden. 270 | You can give it any (valid) dataset and it will add or override the chosen 271 | values and average across the predictions from the different rows of 272 | the dataset. 273 | 274 | 275 | ## AMEs for Poisson Regression 276 | 277 | Here is a short example for Poisson regression used for 278 | count outcomes. We use a dataset drawn from: 279 | https://stats.oarc.ucla.edu/r/dae/poisson-regression/ 280 | 281 | ```{r, results = 'hide'} 282 | 283 | d <- fread("https://stats.oarc.ucla.edu/stat/data/poisson_sim.csv") 284 | d[, prog := factor(prog, levels = 1:3, labels = c("General", "Academic", "Vocational"))] 285 | 286 | bayes.poisson <- brm( 287 | num_awards ~ prog + math, data = d, 288 | family = "poisson", seed = 1234, 289 | silent = 2, refresh = 0, 290 | save_pars = save_pars(group = TRUE, latent = FALSE, all = TRUE), 291 | chains = 4L, cores = 4L, backend = "cmdstanr") 292 | ``` 293 | 294 | ```{r} 295 | summary(bayes.poisson) 296 | ``` 297 | 298 | AME for a continuous variable, using default CI interval and type. 299 | 300 | ```{r, results = 'asis'} 301 | h <- .001 302 | ame1.p <- brmsmargins( 303 | bayes.poisson, 304 | add = data.frame(math = c(0, 0 + h)), 305 | contrasts = cbind("AME math" = c(-1 / h, 1 / h))) 306 | 307 | kable(ame1.p$ContrastSummary, digits = 3) 308 | ``` 309 | 310 | AME for a categorical variable. Here we calculate pairwise contrasts 311 | for all three program types. These are the predicted number of awards. 312 | 313 | ```{r, results = 'asis'} 314 | ame2.p <- brmsmargins( 315 | bayes.poisson, 316 | at = data.frame( 317 | prog = factor(1:3, 318 | labels = c("General", "Academic", "Vocational"))), 319 | contrasts = cbind( 320 | "AME General v Academic" = c(1, -1, 0), 321 | "AME General v Vocational" = c(1, 0, -1), 322 | "AME Academic v Vocational" = c(0, 1, -1))) 323 | 324 | kable(ame2.p$Summary, digits = 3) 325 | ``` 326 | 327 | ```{r, results = 'asis'} 328 | kable(ame2.p$ContrastSummary, digits = 3) 329 | ``` 330 | 331 | 332 | ## AMEs for Negative Binomial Regression 333 | 334 | Here is a short example for Negative Binomial regression used for 335 | count outcomes. 336 | 337 | ```{r, results = 'hide'} 338 | d <- fread("https://stats.oarc.ucla.edu/stat/data/fish.csv") 339 | d[, nofish := factor(nofish, levels = 0:1, labels = c("no fish", "fish"))] 340 | 341 | bayes.nb <- brm( 342 | count ~ nofish + xb, data = d, 343 | family = "negbinomial", seed = 1234, 344 | silent = 2, refresh = 0, 345 | save_pars = save_pars(group = TRUE, latent = FALSE, all = TRUE), 346 | chains = 4L, cores = 4L, backend = "cmdstanr") 347 | ``` 348 | 349 | ```{r} 350 | summary(bayes.nb) 351 | ``` 352 | 353 | AME for a continuous variable, using default CI interval and type. 354 | 355 | ```{r, results = 'asis'} 356 | h <- .001 357 | ame1.nb <- brmsmargins( 358 | bayes.nb, 359 | add = data.frame(xb = c(0, 0 + h)), 360 | contrasts = cbind("AME xb" = c(-1 / h, 1 / h))) 361 | 362 | kable(ame1.nb$ContrastSummary, digits = 3) 363 | ``` 364 | 365 | AME for a categorical variable. Here we calculate pairwise contrasts 366 | for no fish versus fish. The estimate is how many are caught. 367 | 368 | ```{r, results = 'asis'} 369 | ame2.nb <- brmsmargins( 370 | bayes.nb, 371 | at = data.frame( 372 | nofish = factor(0:1, 373 | labels = c("no fish", "fish"))), 374 | contrasts = cbind( 375 | "AME No Fish v Fish" = c(1, -1))) 376 | 377 | kable(ame2.nb$Summary, digits = 3) 378 | ``` 379 | 380 | ```{r, results = 'asis'} 381 | kable(ame2.nb$ContrastSummary, digits = 3) 382 | ``` 383 | 384 | ## References 385 | 386 | These references may be useful. 387 | 388 | - Norton, E. C., Dowd, B. E., & Maciejewski, M. L. (2019). Marginal effects—quantifying the effect of changes in risk factors in logistic regression models. *JAMA, 321*(13), 1304-1305. 389 | - Mize, T. D., Doan, L., & Long, J. S. (2019). A general framework for comparing predictions and marginal effects across models. *Sociological Methodology, 49*(1), 152-189. 390 | -------------------------------------------------------------------------------- /vignettes/location-scale-marginaleffects.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Marginal Effects for Location Scale Models" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: 7 | collapsed: false 8 | smooth_scroll: true 9 | toc_depth: 3 10 | vignette: > 11 | %\VignetteIndexEntry{Marginal Effects for Location Scale Models} 12 | %\VignetteEngine{knitr::rmarkdown} 13 | %\VignetteEncoding{UTF-8} 14 | --- 15 | 16 | 17 | 18 | 19 | ``` r 20 | library(knitr) 21 | library(data.table) 22 | #> data.table 1.16.4 using 16 threads (see ?getDTthreads). Latest news: r-datatable.com 23 | library(brms) 24 | #> Loading required package: Rcpp 25 | #> Loading 'brms' package (version 2.22.0). Useful instructions 26 | #> can be found by typing help('brms'). A more detailed introduction 27 | #> to the package is available through vignette('brms_overview'). 28 | #> 29 | #> Attaching package: 'brms' 30 | #> 31 | #> The following object is masked from 'package:stats': 32 | #> 33 | #> ar 34 | library(brmsmargins) 35 | ``` 36 | 37 | This vignette provides a brief overview of how to calculate 38 | marginal effects for Bayesian location scale regression models, 39 | involving fixed effects only or mixed effects 40 | (i.e., fixed and random) and fit using the `brms` package. 41 | 42 | A simpler introduction and very brief overview and motivation 43 | for marginal effects is available in the vignette for fixed 44 | effects only. 45 | 46 | This vignette will focus on Gaussian location scale models fit 47 | with `brms`. Gaussian location scale models in `brms` have two 48 | distributional parameters (dpar): 49 | 50 | - the mean or location (often labeled mu) of the distribution, 51 | which is the default parameter and has been examined in the 52 | other vignettes. 53 | - the variability or scale (often labeled sigma) of the distribution, 54 | which is not modeled as an outcome by default, but can be. 55 | 56 | Location scale models allow things like assumptions of homogeneity of 57 | variance to be relaxed. In repeated measures data, random effects 58 | for the scale allow calculating and predicting 59 | intraindividual variability (IIV). 60 | 61 | ## AMEs for Fixed Effects Location Scale Models 62 | 63 | To start with, we will look at a fixed effects only location scale model. 64 | We will simulate a dataset. 65 | 66 | 67 | ``` r 68 | d <- withr::with_seed( 69 | seed = 12345, code = { 70 | nObs <- 1000L 71 | d <- data.table( 72 | grp = rep(0:1, each = nObs / 2L), 73 | x = rnorm(nObs, mean = 0, sd = 0.25)) 74 | d[, y := rnorm(nObs, 75 | mean = x + grp, 76 | sd = exp(1 + x + grp))] 77 | copy(d) 78 | }) 79 | 80 | ls.fe <- brm(bf( 81 | y ~ 1 + x + grp, 82 | sigma ~ 1 + x + grp), 83 | family = "gaussian", 84 | data = d, seed = 1234, 85 | save_pars = save_pars(group = TRUE, latent = FALSE, all = TRUE), 86 | silent = 2, refresh = 0, 87 | chains = 4L, cores = 4L, backend = "cmdstanr") 88 | #> Loading required package: rstan 89 | #> Loading required package: StanHeaders 90 | #> 91 | #> rstan version 2.32.6 (Stan version 2.32.2) 92 | #> For execution on a local, multicore CPU with excess RAM we recommend calling 93 | #> options(mc.cores = parallel::detectCores()). 94 | #> To avoid recompilation of unchanged Stan programs, we recommend calling 95 | #> rstan_options(auto_write = TRUE) 96 | #> For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions, 97 | #> change `threads_per_chain` option: 98 | #> rstan_options(threads_per_chain = 1) 99 | #> Do not specify '-march=native' in 'LOCAL_CPPFLAGS' or a Makevars file 100 | ``` 101 | 102 | 103 | 104 | ``` r 105 | summary(ls.fe) 106 | #> Family: gaussian 107 | #> Links: mu = identity; sigma = log 108 | #> Formula: y ~ 1 + x + grp 109 | #> sigma ~ 1 + x + grp 110 | #> Data: d (Number of observations: 1000) 111 | #> Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1; 112 | #> total post-warmup draws = 4000 113 | #> 114 | #> Regression Coefficients: 115 | #> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS 116 | #> Intercept -0.09 0.13 -0.34 0.16 1.00 5154 3686 117 | #> sigma_Intercept 1.01 0.03 0.95 1.07 1.00 4915 2872 118 | #> x 1.62 0.46 0.72 2.49 1.00 3944 2769 119 | #> grp 1.02 0.34 0.34 1.67 1.00 3055 2998 120 | #> sigma_x 0.85 0.09 0.67 1.02 1.00 4847 2766 121 | #> sigma_grp 1.01 0.04 0.92 1.09 1.00 4862 2775 122 | #> 123 | #> Draws were sampled using sample(hmc). For each parameter, Bulk_ESS 124 | #> and Tail_ESS are effective sample size measures, and Rhat is the potential 125 | #> scale reduction factor on split chains (at convergence, Rhat = 1). 126 | ``` 127 | 128 | Now we can use `brmsmargins()`. By default, it will 129 | be for the location parameter, the mean. As this is 130 | a Gaussian linear model with no transformations and 131 | not interactions, the AMEs are the same as the 132 | regression coefficients. 133 | 134 | Here is an example continuous AME. 135 | 136 | 137 | ``` r 138 | h <- .001 139 | ame1 <- brmsmargins( 140 | ls.fe, 141 | add = data.frame(x = c(0, h)), 142 | contrasts = cbind("AME x" = c(-1 / h, 1 / h)), 143 | CI = 0.95, CIType = "ETI", 144 | effects = "fixedonly") 145 | 146 | knitr::kable(ame1$ContrastSummary, digits = 3) 147 | ``` 148 | 149 | 150 | 151 | | M| Mdn| LL| UL| PercentROPE| PercentMID| CI|CIType |ROPE |MID |Label | 152 | |-----:|-----:|-----:|----:|-----------:|----------:|----:|:------|:----|:---|:-----| 153 | | 1.619| 1.629| 0.717| 2.49| NA| NA| 0.95|ETI |NA |NA |AME x | 154 | 155 | 156 | 157 | Here is an AME for discrete / categorical predictors. 158 | 159 | 160 | ``` r 161 | ame2 <- brmsmargins( 162 | ls.fe, 163 | at = data.frame(grp = c(0, 1)), 164 | contrasts = cbind("AME grp" = c(-1, 1)), 165 | CI = 0.95, CIType = "ETI", 166 | effects = "fixedonly") 167 | 168 | knitr::kable(ame2$ContrastSummary, digits = 3) 169 | ``` 170 | 171 | 172 | 173 | | M| Mdn| LL| UL| PercentROPE| PercentMID| CI|CIType |ROPE |MID |Label | 174 | |-----:|-----:|-----:|-----:|-----------:|----------:|----:|:------|:----|:---|:-------| 175 | | 1.023| 1.033| 0.338| 1.672| NA| NA| 0.95|ETI |NA |NA |AME grp | 176 | 177 | 178 | 179 | In `brms` the scale parameter for Gaussian models, 180 | `sigma` uses a log link function. Therefore when back 181 | transformed to the original scale, the AMEs will not 182 | be the same as the regression coefficients which are on 183 | the link scale (log transformed). 184 | 185 | We specify that we want AMEs for `sigma` by setting: 186 | `dpar = "sigma"`. Here is a continuous example. 187 | 188 | 189 | ``` r 190 | h <- .001 191 | ame3 <- brmsmargins( 192 | ls.fe, 193 | add = data.frame(x = c(0, h)), 194 | contrasts = cbind("AME x" = c(-1 / h, 1 / h)), 195 | CI = 0.95, CIType = "ETI", dpar = "sigma", 196 | effects = "fixedonly") 197 | 198 | knitr::kable(ame3$ContrastSummary, digits = 3) 199 | ``` 200 | 201 | 202 | 203 | | M| Mdn| LL| UL| PercentROPE| PercentMID| CI|CIType |ROPE |MID |Label | 204 | |-----:|-----:|-----:|-----:|-----------:|----------:|----:|:------|:----|:---|:-----| 205 | | 4.473| 4.468| 3.491| 5.495| NA| NA| 0.95|ETI |NA |NA |AME x | 206 | 207 | 208 | 209 | Here is a discrete / categorical example. 210 | 211 | 212 | ``` r 213 | ame4 <- brmsmargins( 214 | ls.fe, 215 | at = data.frame(grp = c(0, 1)), 216 | contrasts = cbind("AME grp" = c(-1, 1)), 217 | CI = 0.95, CIType = "ETI", dpar = "sigma", 218 | effects = "fixedonly") 219 | 220 | knitr::kable(ame4$ContrastSummary, digits = 3) 221 | ``` 222 | 223 | 224 | 225 | | M| Mdn| LL| UL| PercentROPE| PercentMID| CI|CIType |ROPE |MID |Label | 226 | |-----:|-----:|-----:|-----:|-----------:|----------:|----:|:------|:----|:---|:-------| 227 | | 4.905| 4.898| 4.391| 5.433| NA| NA| 0.95|ETI |NA |NA |AME grp | 228 | 229 | 230 | 231 | These results are comparable to the mean difference in standard 232 | deviation by `grp`. Note that in general, these may not closely 233 | align. However, in this instance as `x` and `grp` were simulated 234 | to be uncorrelated, the simple unadjusted results match the 235 | regression results closely. 236 | 237 | 238 | ``` r 239 | d[, .(SD = sd(y)), by = grp][, diff(SD)] 240 | ``` 241 | 242 | [1] 4.976021 243 | 244 | ## AMEs for Mixed Effects Location Scale Models 245 | 246 | We will simulate some multilevel location scale data for model 247 | and fit the mixed effects location scale model. 248 | 249 | 250 | ``` r 251 | dmixed <- withr::with_seed( 252 | seed = 12345, code = { 253 | nGroups <- 100 254 | nObs <- 20 255 | theta.location <- matrix(rnorm(nGroups * 2), nrow = nGroups, ncol = 2) 256 | theta.location[, 1] <- theta.location[, 1] - mean(theta.location[, 1]) 257 | theta.location[, 2] <- theta.location[, 2] - mean(theta.location[, 2]) 258 | theta.location[, 1] <- theta.location[, 1] / sd(theta.location[, 1]) 259 | theta.location[, 2] <- theta.location[, 2] / sd(theta.location[, 2]) 260 | theta.location <- theta.location %*% chol(matrix(c(1.5, -.25, -.25, .5^2), 2)) 261 | theta.location[, 1] <- theta.location[, 1] - 2.5 262 | theta.location[, 2] <- theta.location[, 2] + 1 263 | dmixed <- data.table( 264 | x = rep(rep(0:1, each = nObs / 2), times = nGroups)) 265 | dmixed[, ID := rep(seq_len(nGroups), each = nObs)] 266 | 267 | for (i in seq_len(nGroups)) { 268 | dmixed[ID == i, y := rnorm( 269 | n = nObs, 270 | mean = theta.location[i, 1] + theta.location[i, 2] * x, 271 | sd = exp(1 + theta.location[i, 1] + theta.location[i, 2] * x)) 272 | ] 273 | } 274 | copy(dmixed) 275 | }) 276 | 277 | ls.me <- brm(bf( 278 | y ~ 1 + x + (1 + x | ID), 279 | sigma ~ 1 + x + (1 + x | ID)), 280 | family = "gaussian", 281 | data = dmixed, seed = 1234, 282 | prior = prior(normal(-2.5, 1), class = "Intercept") + 283 | prior(normal(1, .5), class = "b") + 284 | prior(student_t(3, 1.25, 1), class = "sd", coef = "Intercept", group = "ID") + 285 | prior(student_t(3, .5, .5), class = "sd", coef = "x", group = "ID") + 286 | prior(normal(-1.5, 1), class = "Intercept", dpar = "sigma") + 287 | prior(normal(1, .5), class = "b", dpar = "sigma") + 288 | prior(student_t(3, 1.25, 1), class = "sd", coef = "Intercept", group = "ID", dpar = "sigma") + 289 | prior(student_t(3, .5, .5), class = "sd", coef = "x", group = "ID", dpar = "sigma"), 290 | save_pars = save_pars(group = TRUE, latent = FALSE, all = TRUE), 291 | silent = 2, refresh = 0, 292 | chains = 4L, cores = 4L, backend = "cmdstanr") 293 | #> Warning: 326 of 4000 (8.0%) transitions hit the maximum treedepth limit of 10. 294 | #> See https://mc-stan.org/misc/warnings for details. 295 | ``` 296 | 297 | Note that this model has not achieved good convergence, but 298 | as it already took about 6 minutes to run, 299 | for the sake of demonstration we continue. In practice, 300 | one would want to make adjustments to ensure good convergence 301 | and an adequate effective sample size. 302 | 303 | 304 | ``` r 305 | summary(ls.me) 306 | #> Family: gaussian 307 | #> Links: mu = identity; sigma = log 308 | #> Formula: y ~ 1 + x + (1 + x | ID) 309 | #> sigma ~ 1 + x + (1 + x | ID) 310 | #> Data: dmixed (Number of observations: 2000) 311 | #> Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1; 312 | #> total post-warmup draws = 4000 313 | #> 314 | #> Multilevel Hyperparameters: 315 | #> ~ID (Number of levels: 100) 316 | #> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS 317 | #> sd(Intercept) 1.19 0.09 1.03 1.38 1.04 179 318 | #> sd(x) 0.43 0.04 0.35 0.52 1.00 653 319 | #> sd(sigma_Intercept) 1.27 0.10 1.10 1.47 1.01 326 320 | #> sd(sigma_x) 0.50 0.06 0.40 0.62 1.00 1008 321 | #> cor(Intercept,x) -0.39 0.12 -0.61 -0.16 1.01 532 322 | #> cor(sigma_Intercept,sigma_x) -0.36 0.11 -0.56 -0.13 1.00 1294 323 | #> Tail_ESS 324 | #> sd(Intercept) 564 325 | #> sd(x) 1330 326 | #> sd(sigma_Intercept) 890 327 | #> sd(sigma_x) 1703 328 | #> cor(Intercept,x) 1174 329 | #> cor(sigma_Intercept,sigma_x) 2028 330 | #> 331 | #> Regression Coefficients: 332 | #> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS 333 | #> Intercept -2.56 0.12 -2.80 -2.32 1.03 92 169 334 | #> sigma_Intercept -1.48 0.12 -1.74 -1.25 1.02 180 516 335 | #> x 0.94 0.06 0.83 1.05 1.00 512 1229 336 | #> sigma_x 0.97 0.06 0.85 1.09 1.00 1229 1765 337 | #> 338 | #> Draws were sampled using sample(hmc). For each parameter, Bulk_ESS 339 | #> and Tail_ESS are effective sample size measures, and Rhat is the potential 340 | #> scale reduction factor on split chains (at convergence, Rhat = 1). 341 | ``` 342 | 343 | We use `brmsmargins()` similar as for other mixed effects models. 344 | For more details see the vignette on marginal effects for 345 | mixed effects models. 346 | 347 | Here is an example treating `x` as continuous using only the 348 | fixed effects for the AME for the scale parameter, `sigma`. 349 | 350 | 351 | ``` r 352 | h <- .001 353 | ame1a.lsme <- brmsmargins( 354 | ls.me, 355 | add = data.frame(x = c(0, h)), 356 | contrasts = cbind("AME x" = c(-1 / h, 1 / h)), 357 | dpar = "sigma", 358 | effects = "fixedonly") 359 | 360 | knitr::kable(ame1a.lsme$ContrastSummary, digits = 3) 361 | ``` 362 | 363 | 364 | 365 | | M| Mdn| LL| UL| PercentROPE| PercentMID| CI|CIType |ROPE |MID |Label | 366 | |-----:|-----:|-----:|-----:|-----------:|----------:|----:|:------|:----|:---|:-----| 367 | | 0.405| 0.402| 0.284| 0.559| NA| NA| 0.99|HDI |NA |NA |AME x | 368 | 369 | 370 | 371 | Here is the example again, this time integrating out the random effects, 372 | which results in a considerable difference in the estimate of the AME. 373 | 374 | 375 | ``` r 376 | h <- .001 377 | ame1b.lsme <- brmsmargins( 378 | ls.me, 379 | add = data.frame(x = c(0, h)), 380 | contrasts = cbind("AME x" = c(-1 / h, 1 / h)), 381 | dpar = "sigma", 382 | effects = "integrateoutRE", k = 100L, seed = 1234) 383 | 384 | knitr::kable(ame1b.lsme$ContrastSummary, digits = 3) 385 | ``` 386 | 387 | 388 | 389 | | M| Mdn| LL| UL| PercentROPE| PercentMID| CI|CIType |ROPE |MID |Label | 390 | |-----:|-----:|-----:|-----:|-----------:|----------:|----:|:------|:----|:---|:-----| 391 | | 0.801| 0.766| 0.356| 1.582| NA| NA| 0.99|HDI |NA |NA |AME x | 392 | 393 | 394 | 395 | Here is an example treating `x` as discrete, using only 396 | the fixed effects. 397 | 398 | 399 | 400 | ``` r 401 | ame2a.lsme <- brmsmargins( 402 | ls.me, 403 | at = data.frame(x = c(0, 1)), 404 | contrasts = cbind("AME x" = c(-1, 1)), 405 | dpar = "sigma", 406 | effects = "fixedonly") 407 | 408 | knitr::kable(ame2a.lsme$ContrastSummary) 409 | ``` 410 | 411 | 412 | 413 | | M| Mdn| LL| UL| PercentROPE| PercentMID| CI|CIType |ROPE |MID |Label | 414 | |---------:|---------:|---------:|---------:|-----------:|----------:|----:|:------|:----|:---|:-----| 415 | | 0.3751833| 0.3732505| 0.2614079| 0.5074998| NA| NA| 0.99|HDI |NA |NA |AME x | 416 | 417 | 418 | 419 | Here is the example again, this time integrating out the random effects, 420 | likely the more appropriate estimate for most use cases. 421 | 422 | 423 | ``` r 424 | ame2b.lsme <- brmsmargins( 425 | ls.me, 426 | at = data.frame(x = c(0, 1)), 427 | contrasts = cbind("AME x" = c(-1, 1)), 428 | dpar = "sigma", 429 | effects = "integrateoutRE", k = 100L, seed = 1234) 430 | 431 | knitr::kable(ame2b.lsme$ContrastSummary) 432 | ``` 433 | 434 | 435 | 436 | | M| Mdn| LL| UL| PercentROPE| PercentMID| CI|CIType |ROPE |MID |Label | 437 | |---------:|---------:|---------:|--------:|-----------:|----------:|----:|:------|:----|:---|:-----| 438 | | 0.7096137| 0.6790394| 0.3412707| 1.410367| NA| NA| 0.99|HDI |NA |NA |AME x | 439 | 440 | 441 | 442 | This also is relatively close calculating all the 443 | individual standard deviations and taking their differences, 444 | then averaging. 445 | 446 | 447 | ``` r 448 | dmixed[, .(SD = sd(y)), by = .(ID, x) 449 | ][, .(SDdiff = diff(SD)), by = ID][, mean(SDdiff)] 450 | #> [1] 0.6281889 451 | ``` 452 | -------------------------------------------------------------------------------- /vignettes/location-scale-marginaleffects.Rmd.orig: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Marginal Effects for Location Scale Models" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: 7 | collapsed: false 8 | smooth_scroll: true 9 | toc_depth: 3 10 | vignette: > 11 | %\VignetteIndexEntry{Marginal Effects for Location Scale Models} 12 | %\VignetteEngine{knitr::rmarkdown} 13 | %\VignetteEncoding{UTF-8} 14 | --- 15 | 16 | ```{r, include = FALSE} 17 | knitr::opts_chunk$set( 18 | collapse = TRUE, 19 | comment = "#>" 20 | ) 21 | ``` 22 | 23 | ```{r setup} 24 | library(knitr) 25 | library(data.table) 26 | library(brms) 27 | library(brmsmargins) 28 | ``` 29 | 30 | This vignette provides a brief overview of how to calculate 31 | marginal effects for Bayesian location scale regression models, 32 | involving fixed effects only or mixed effects 33 | (i.e., fixed and random) and fit using the `brms` package. 34 | 35 | A simpler introduction and very brief overview and motivation 36 | for marginal effects is available in the vignette for fixed 37 | effects only. 38 | 39 | This vignette will focus on Gaussian location scale models fit 40 | with `brms`. Gaussian location scale models in `brms` have two 41 | distributional parameters (dpar): 42 | 43 | - the mean or location (often labeled mu) of the distribution, 44 | which is the default parameter and has been examined in the 45 | other vignettes. 46 | - the variability or scale (often labeled sigma) of the distribution, 47 | which is not modeled as an outcome by default, but can be. 48 | 49 | Location scale models allow things like assumptions of homogeneity of 50 | variance to be relaxed. In repeated measures data, random effects 51 | for the scale allow calculating and predicting 52 | intraindividual variability (IIV). 53 | 54 | ## AMEs for Fixed Effects Location Scale Models 55 | 56 | To start with, we will look at a fixed effects only location scale model. 57 | We will simulate a dataset. 58 | 59 | ```{r, results = 'hide'} 60 | d <- withr::with_seed( 61 | seed = 12345, code = { 62 | nObs <- 1000L 63 | d <- data.table( 64 | grp = rep(0:1, each = nObs / 2L), 65 | x = rnorm(nObs, mean = 0, sd = 0.25)) 66 | d[, y := rnorm(nObs, 67 | mean = x + grp, 68 | sd = exp(1 + x + grp))] 69 | copy(d) 70 | }) 71 | 72 | ls.fe <- brm(bf( 73 | y ~ 1 + x + grp, 74 | sigma ~ 1 + x + grp), 75 | family = "gaussian", 76 | data = d, seed = 1234, 77 | save_pars = save_pars(group = TRUE, latent = FALSE, all = TRUE), 78 | silent = 2, refresh = 0, 79 | chains = 4L, cores = 4L, backend = "cmdstanr") 80 | ``` 81 | 82 | 83 | ```{r} 84 | summary(ls.fe) 85 | ``` 86 | 87 | Now we can use `brmsmargins()`. By default, it will 88 | be for the location parameter, the mean. As this is 89 | a Gaussian linear model with no transformations and 90 | not interactions, the AMEs are the same as the 91 | regression coefficients. 92 | 93 | Here is an example continuous AME. 94 | 95 | ```{r, results = 'asis'} 96 | h <- .001 97 | ame1 <- brmsmargins( 98 | ls.fe, 99 | add = data.frame(x = c(0, h)), 100 | contrasts = cbind("AME x" = c(-1 / h, 1 / h)), 101 | CI = 0.95, CIType = "ETI", 102 | effects = "fixedonly") 103 | 104 | knitr::kable(ame1$ContrastSummary, digits = 3) 105 | ``` 106 | 107 | Here is an AME for discrete / categorical predictors. 108 | 109 | ```{r, results = 'asis'} 110 | ame2 <- brmsmargins( 111 | ls.fe, 112 | at = data.frame(grp = c(0, 1)), 113 | contrasts = cbind("AME grp" = c(-1, 1)), 114 | CI = 0.95, CIType = "ETI", 115 | effects = "fixedonly") 116 | 117 | knitr::kable(ame2$ContrastSummary, digits = 3) 118 | ``` 119 | 120 | In `brms` the scale parameter for Gaussian models, 121 | `sigma` uses a log link function. Therefore when back 122 | transformed to the original scale, the AMEs will not 123 | be the same as the regression coefficients which are on 124 | the link scale (log transformed). 125 | 126 | We specify that we want AMEs for `sigma` by setting: 127 | `dpar = "sigma"`. Here is a continuous example. 128 | 129 | ```{r, results = 'asis'} 130 | h <- .001 131 | ame3 <- brmsmargins( 132 | ls.fe, 133 | add = data.frame(x = c(0, h)), 134 | contrasts = cbind("AME x" = c(-1 / h, 1 / h)), 135 | CI = 0.95, CIType = "ETI", dpar = "sigma", 136 | effects = "fixedonly") 137 | 138 | knitr::kable(ame3$ContrastSummary, digits = 3) 139 | ``` 140 | 141 | Here is a discrete / categorical example. 142 | 143 | ```{r, results = 'asis'} 144 | ame4 <- brmsmargins( 145 | ls.fe, 146 | at = data.frame(grp = c(0, 1)), 147 | contrasts = cbind("AME grp" = c(-1, 1)), 148 | CI = 0.95, CIType = "ETI", dpar = "sigma", 149 | effects = "fixedonly") 150 | 151 | knitr::kable(ame4$ContrastSummary, digits = 3) 152 | ``` 153 | 154 | These results are comparable to the mean difference in standard 155 | deviation by `grp`. Note that in general, these may not closely 156 | align. However, in this instance as `x` and `grp` were simulated 157 | to be uncorrelated, the simple unadjusted results match the 158 | regression results closely. 159 | 160 | ```{r, results = 'asis'} 161 | d[, .(SD = sd(y)), by = grp][, diff(SD)] 162 | ``` 163 | 164 | ## AMEs for Mixed Effects Location Scale Models 165 | 166 | We will simulate some multilevel location scale data for model 167 | and fit the mixed effects location scale model. 168 | 169 | ```{r, results = 'hide'} 170 | dmixed <- withr::with_seed( 171 | seed = 12345, code = { 172 | nGroups <- 100 173 | nObs <- 20 174 | theta.location <- matrix(rnorm(nGroups * 2), nrow = nGroups, ncol = 2) 175 | theta.location[, 1] <- theta.location[, 1] - mean(theta.location[, 1]) 176 | theta.location[, 2] <- theta.location[, 2] - mean(theta.location[, 2]) 177 | theta.location[, 1] <- theta.location[, 1] / sd(theta.location[, 1]) 178 | theta.location[, 2] <- theta.location[, 2] / sd(theta.location[, 2]) 179 | theta.location <- theta.location %*% chol(matrix(c(1.5, -.25, -.25, .5^2), 2)) 180 | theta.location[, 1] <- theta.location[, 1] - 2.5 181 | theta.location[, 2] <- theta.location[, 2] + 1 182 | dmixed <- data.table( 183 | x = rep(rep(0:1, each = nObs / 2), times = nGroups)) 184 | dmixed[, ID := rep(seq_len(nGroups), each = nObs)] 185 | 186 | for (i in seq_len(nGroups)) { 187 | dmixed[ID == i, y := rnorm( 188 | n = nObs, 189 | mean = theta.location[i, 1] + theta.location[i, 2] * x, 190 | sd = exp(1 + theta.location[i, 1] + theta.location[i, 2] * x)) 191 | ] 192 | } 193 | copy(dmixed) 194 | }) 195 | 196 | ls.me <- brm(bf( 197 | y ~ 1 + x + (1 + x | ID), 198 | sigma ~ 1 + x + (1 + x | ID)), 199 | family = "gaussian", 200 | data = dmixed, seed = 1234, 201 | prior = prior(normal(-2.5, 1), class = "Intercept") + 202 | prior(normal(1, .5), class = "b") + 203 | prior(student_t(3, 1.25, 1), class = "sd", coef = "Intercept", group = "ID") + 204 | prior(student_t(3, .5, .5), class = "sd", coef = "x", group = "ID") + 205 | prior(normal(-1.5, 1), class = "Intercept", dpar = "sigma") + 206 | prior(normal(1, .5), class = "b", dpar = "sigma") + 207 | prior(student_t(3, 1.25, 1), class = "sd", coef = "Intercept", group = "ID", dpar = "sigma") + 208 | prior(student_t(3, .5, .5), class = "sd", coef = "x", group = "ID", dpar = "sigma"), 209 | save_pars = save_pars(group = TRUE, latent = FALSE, all = TRUE), 210 | silent = 2, refresh = 0, 211 | chains = 4L, cores = 4L, backend = "cmdstanr") 212 | ``` 213 | 214 | Note that this model has not achieved good convergence, but 215 | as it already took about 6 minutes to run, 216 | for the sake of demonstration we continue. In practice, 217 | one would want to make adjustments to ensure good convergence 218 | and an adequate effective sample size. 219 | 220 | ```{r} 221 | summary(ls.me) 222 | ``` 223 | 224 | We use `brmsmargins()` similar as for other mixed effects models. 225 | For more details see the vignette on marginal effects for 226 | mixed effects models. 227 | 228 | Here is an example treating `x` as continuous using only the 229 | fixed effects for the AME for the scale parameter, `sigma`. 230 | 231 | ```{r, results = 'asis'} 232 | h <- .001 233 | ame1a.lsme <- brmsmargins( 234 | ls.me, 235 | add = data.frame(x = c(0, h)), 236 | contrasts = cbind("AME x" = c(-1 / h, 1 / h)), 237 | dpar = "sigma", 238 | effects = "fixedonly") 239 | 240 | knitr::kable(ame1a.lsme$ContrastSummary, digits = 3) 241 | ``` 242 | 243 | Here is the example again, this time integrating out the random effects, 244 | which results in a considerable difference in the estimate of the AME. 245 | 246 | ```{r, results = 'asis'} 247 | h <- .001 248 | ame1b.lsme <- brmsmargins( 249 | ls.me, 250 | add = data.frame(x = c(0, h)), 251 | contrasts = cbind("AME x" = c(-1 / h, 1 / h)), 252 | dpar = "sigma", 253 | effects = "integrateoutRE", k = 100L, seed = 1234) 254 | 255 | knitr::kable(ame1b.lsme$ContrastSummary, digits = 3) 256 | ``` 257 | 258 | Here is an example treating `x` as discrete, using only 259 | the fixed effects. 260 | 261 | 262 | ```{r, results = 'asis'} 263 | ame2a.lsme <- brmsmargins( 264 | ls.me, 265 | at = data.frame(x = c(0, 1)), 266 | contrasts = cbind("AME x" = c(-1, 1)), 267 | dpar = "sigma", 268 | effects = "fixedonly") 269 | 270 | knitr::kable(ame2a.lsme$ContrastSummary) 271 | ``` 272 | 273 | Here is the example again, this time integrating out the random effects, 274 | likely the more appropriate estimate for most use cases. 275 | 276 | ```{r, results = 'asis'} 277 | ame2b.lsme <- brmsmargins( 278 | ls.me, 279 | at = data.frame(x = c(0, 1)), 280 | contrasts = cbind("AME x" = c(-1, 1)), 281 | dpar = "sigma", 282 | effects = "integrateoutRE", k = 100L, seed = 1234) 283 | 284 | knitr::kable(ame2b.lsme$ContrastSummary) 285 | ``` 286 | 287 | This also is relatively close calculating all the 288 | individual standard deviations and taking their differences, 289 | then averaging. 290 | 291 | ```{r} 292 | dmixed[, .(SD = sd(y)), by = .(ID, x) 293 | ][, .(SDdiff = diff(SD)), by = ID][, mean(SDdiff)] 294 | ``` 295 | --------------------------------------------------------------------------------