├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   └── pkgdown.yaml
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
    ├── checks.R
    ├── clarify-package.R
    ├── clarify_est_methods.R
    ├── clarify_predict.R
    ├── get_model_components.R
    ├── misim.R
    ├── plot.clarify_adrf.R
    ├── plot.clarify_est.R
    ├── plot.clarify_setx.R
    ├── sim.R
    ├── sim_adrf.R
    ├── sim_ame.R
    ├── sim_apply.R
    ├── sim_setx.R
    ├── summary.clarify_est.R
    ├── transform.clarify_est.R
    ├── utils.R
    └── zzz.R
├── README.Rmd
├── README.md
├── _dev
    └── sim_chain.R
├── _pkgdown.yml
├── clarify.Rproj
├── clarify
    ├── Submission 1
    │   ├── RJournal.sty
    │   ├── RJreferences.bib
    │   ├── RJwrapper.log
    │   ├── RJwrapper.tex
    │   ├── clarify.R
    │   ├── clarify.Rmd
    │   ├── clarify.html
    │   ├── clarify.log
    │   ├── clarify.pdf
    │   ├── clarify.tex
    │   ├── figures
    │   │   ├── unnamed-chunk-10-1.pdf
    │   │   ├── unnamed-chunk-10-1.png
    │   │   ├── unnamed-chunk-14-1.pdf
    │   │   ├── unnamed-chunk-14-1.png
    │   │   ├── unnamed-chunk-16-1.pdf
    │   │   ├── unnamed-chunk-16-1.png
    │   │   ├── unnamed-chunk-25-1.pdf
    │   │   ├── unnamed-chunk-25-1.png
    │   │   ├── unnamed-chunk-28-1.pdf
    │   │   ├── unnamed-chunk-28-1.png
    │   │   ├── unnamed-chunk-8-1.pdf
    │   │   └── unnamed-chunk-8-1.png
    │   ├── initial_checks.log
    │   └── motivation-letter.md
    └── Submission 2
    │   ├── 1-review-1.txt
    │   ├── 1-review-2.txt
    │   ├── RJournal.sty
    │   ├── RJreferences.bib
    │   ├── RJwrapper.log
    │   ├── RJwrapper.tex
    │   ├── clarify.R
    │   ├── clarify.Rmd
    │   ├── clarify.html
    │   ├── clarify.log
    │   ├── clarify.pdf
    │   ├── clarify.tex
    │   ├── figures
    │       ├── plot1-1.png
    │       ├── plot2-1.png
    │       ├── plot3-1.png
    │       ├── plot4-1.png
    │       ├── plot8-1.png
    │       ├── plot9-1.png
    │       ├── unnamed-chunk-10-1.pdf
    │       ├── unnamed-chunk-10-1.png
    │       ├── unnamed-chunk-14-1.pdf
    │       ├── unnamed-chunk-14-1.png
    │       ├── unnamed-chunk-16-1.pdf
    │       ├── unnamed-chunk-16-1.png
    │       ├── unnamed-chunk-25-1.pdf
    │       ├── unnamed-chunk-25-1.png
    │       ├── unnamed-chunk-28-1.pdf
    │       ├── unnamed-chunk-28-1.png
    │       ├── unnamed-chunk-8-1.pdf
    │       └── unnamed-chunk-8-1.png
    │   ├── initial_checks.log
    │   ├── motivation-letter.md
    │   ├── response_to_reviewers.Rmd
    │   └── response_to_reviewers.html
├── man
    ├── clarify-package.Rd
    ├── figures
    │   ├── README-example-1.png
    │   ├── README-example2-1.png
    │   ├── README-unnamed-chunk-6-1.png
    │   └── README-unnamed-chunk-7-1.png
    ├── misim.Rd
    ├── plot.clarify_adrf.Rd
    ├── plot.clarify_setx.Rd
    ├── sim.Rd
    ├── sim_adrf.Rd
    ├── sim_ame.Rd
    ├── sim_apply.Rd
    ├── sim_setx.Rd
    ├── summary.clarify_est.Rd
    └── transform.clarify_est.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   ├── fixtures
    │       ├── make_mdata.R
    │       ├── make_mira.R
    │       ├── mdata.rds
    │       ├── mimira.rds
    │       ├── mira.rds
    │       └── model_list.rds
    │   ├── helper.R
    │   ├── test-misim.R
    │   ├── test-sim.R
    │   ├── test-sim_ame.R
    │   └── test-transform.R
└── vignettes
    ├── .gitignore
    ├── Zelig.Rmd
    ├── clarify.Rmd
    └── references.bib


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^LICENSE\.md$
 4 | ^README\.Rmd$
 5 | ^_pkgdown\.yml$
 6 | ^docs$
 7 | ^pkgdown$
 8 | ^\.github$
 9 | ^\_dev$
10 | ^CRAN-SUBMISSION$
11 | ^clarify$
12 | ^tests/testthat/fixtures/mimira\.rds$
13 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | name: pkgdown
13 | 
14 | jobs:
15 |   pkgdown:
16 |     runs-on: ubuntu-latest
17 |     # Only restrict concurrency for non-PR jobs
18 |     concurrency:
19 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
20 |     env:
21 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
22 |     steps:
23 |       - uses: actions/checkout@v2
24 | 
25 |       - uses: r-lib/actions/setup-pandoc@v2
26 | 
27 |       - uses: r-lib/actions/setup-r@v2
28 |         with:
29 |           use-public-rspm: true
30 | 
31 |       - uses: r-lib/actions/setup-r-dependencies@v2
32 |         with:
33 |           extra-packages: any::pkgdown, local::.
34 |           needs: website
35 | 
36 |       - name: Build site
37 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
38 |         shell: Rscript {0}
39 | 
40 |       - name: Deploy to GitHub pages 🚀
41 |         if: github.event_name != 'pull_request'
42 |         uses: JamesIves/github-pages-deploy-action@4.1.4
43 |         with:
44 |           clean: false
45 |           branch: gh-pages
46 |           folder: docs
47 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | docs
6 | inst/doc
7 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: clarify
 2 | Type: Package
 3 | Title: Simulation-Based Inference for Regression Models
 4 | Version: 0.2.1
 5 | Authors@R: c(
 6 |   person("Noah", "Greifer", role = c("aut", "cre"), 
 7 |          email = "ngreifer@iq.harvard.edu",
 8 | 	       comment = c(ORCID="0000-0003-3067-7154")),
 9 | 	person("Steven", "Worthington", role = c("aut"), 
10 |          email = "sworthington@iq.harvard.edu",
11 | 	       comment = c(ORCID="0000-0001-9550-5797")),
12 | 	person("Stefano", "Iacus", role = c("aut"), 
13 |          email = "siacus@iq.harvard.edu",
14 | 	       comment = c(ORCID="0000-0002-4884-0047")),
15 | 	person("Gary", "King", role = c("aut"), 
16 |          email = "king@harvard.edu",
17 | 	       comment = c(ORCID="0000-0002-5327-7631"))
18 |   )
19 | Description: Performs simulation-based inference as an alternative to the delta method for obtaining valid confidence intervals and p-values for regression post-estimation quantities, such as average marginal effects and predictions at representative values. This framework for simulation-based inference is especially useful when the resulting quantity is not normally distributed and the delta method approximation fails. The methodology is described in King, Tomz, and Wittenberg (2000) <doi:10.2307/2669316>. 'clarify' is meant to replace some of the functionality of the archived package 'Zelig'; see the vignette "Translating Zelig to clarify" for replicating this functionality.
20 | License: GPL (>= 3)
21 | Encoding: UTF-8
22 | Depends: R (>= 3.5.0)
23 | Imports:
24 |     ggplot2 (>= 3.4.0),
25 |     pbapply (>= 1.7-0),
26 |     chk (>= 0.9.0),
27 |     rlang (>= 1.0.6),
28 |     insight (>= 0.19.11),
29 |     marginaleffects (>= 0.20.0),
30 |     mvnfast (>= 0.2.6)
31 | Suggests: 
32 |     testthat (>= 3.0.0),
33 |     MatchIt (>= 4.0.0),
34 |     parallel,
35 |     knitr,
36 |     rmarkdown,
37 |     Amelia,
38 |     MASS, betareg, survey, estimatr, fixest, logistf, geepack, rms,
39 |     robustbase, robust, AER, ivreg, mgcv, sandwich
40 | Config/testthat/edition: 3
41 | RoxygenNote: 7.3.1
42 | Roxygen: list(markdown = TRUE)
43 | URL: https://github.com/iqss/clarify,
44 |     https://iqss.github.io/clarify/
45 | BugReports: https://github.com/iqss/clarify/issues
46 | VignetteBuilder: knitr
47 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(Ops,clarify_est)
 4 | S3method(`[`,clarify_est)
 5 | S3method(`dimnames<-`,clarify_est)
 6 | S3method(`names<-`,clarify_est)
 7 | S3method(as.data.frame,clarify_est)
 8 | S3method(as.matrix,clarify_est)
 9 | S3method(cbind,clarify_est)
10 | S3method(coef,clarify_est)
11 | S3method(confint,clarify_est)
12 | S3method(dimnames,clarify_est)
13 | S3method(names,clarify_est)
14 | S3method(plot,clarify_adrf)
15 | S3method(plot,clarify_est)
16 | S3method(plot,clarify_setx)
17 | S3method(print,clarify_adrf)
18 | S3method(print,clarify_ame)
19 | S3method(print,clarify_est)
20 | S3method(print,clarify_misim)
21 | S3method(print,clarify_setx)
22 | S3method(print,clarify_sim)
23 | S3method(print,summary.clarify_est)
24 | S3method(str,clarify_est)
25 | S3method(summary,clarify_est)
26 | S3method(transform,clarify_est)
27 | S3method(vcov,clarify_est)
28 | export(misim)
29 | export(sim)
30 | export(sim_adrf)
31 | export(sim_ame)
32 | export(sim_apply)
33 | export(sim_setx)
34 | import(ggplot2)
35 | import(stats)
36 | importFrom(utils,str)
37 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # `clarify` 0.2.1
 2 | 
 3 | * In `sim_ame()` and `sim_adrf()`, unit-level weights are no longer used to compute means, consistent with advice in [Gabriel et al. (2023)](https://doi.org/10.1002/sim.9969). For those using these functions after matching or weighting for the ATT or ATC, this will not change results. For matching or weighting for the ATE, this improves robustness against misspecified weights.
 4 | 
 5 | * In `sim_ame()`, more than one variable can be supplied to `var` to generate average adjusted predictions or compute average marginal effects with other variables set to supplied values. The help page for `sim_ame()` has been retooled to reflect this.
 6 | 
 7 | * In `transform()`, values can now be indicated by positional shortcuts of the form `.b{#}`, e.g., `.b1 - .b2`, to facilitate specifying transformations of the desired quantities without using the names of the quantities, which can be frustrating to use.
 8 | 
 9 | * When `reference = TRUE` with `plot()`, a blue line at the median of the simulated estimates is also included on the plot; when this value does not align with the estimate, quantile confidence intervals may be invalid.
10 | 
11 | # `clarify` 0.2.0
12 | 
13 | * `sim_ame()` and `sim_adrf()` now have a `by` argument, which can be used to estimate quantities of interest within subsets of one or more variables.
14 | 
15 | * `sim_setx()` can now receive a data frame for its `x` and `x1` arguments.
16 | 
17 | * `sim_ame()` can accept new options for `contrast`: `"sr"` for the survival ratio and `"srr"` for the switch relative risk.
18 | 
19 | * Slight speed improvements in `sim_ame()` with continuous `var` and `sim_adrf()` with `contrast = `"amef"`.
20 | 
21 | * Typo fixes in vignettes.
22 | 
23 | # `clarify` 0.1.3
24 | 
25 | * Documentation updates incorporating the work of Rainey (2023). `clarify` already implemented the recommendations in Rainey (2023) so no functionality has changed.
26 | 
27 | # `clarify` 0.1.2
28 | 
29 | * Added the argument `reference` to `plot.clarify_est()`, which adds a reference normal distribution to the density of the estimates.
30 | 
31 | * Fixed error in `sim()` documentation about how degrees of freedom are computed. Thanks to @wviechtb. (#8)
32 | 
33 | * Fixed a warning that can occur about recovering model data, from `insight`.
34 | 
35 | # `clarify` 0.1.1
36 | 
37 | * In `summary.clarify_est()`, `null` can now be supplied as a named vector to specify the quantities for which p-values should be computed.
38 | 
39 | * Fixes in anticipation of breaking changes from `marginaleffects` to ensure compatibility (including with older versions).
40 | 
41 | * Updates to the README and vignettes.
42 | 
43 | # `clarify` 0.1.0
44 | 
45 | * First release!
46 | 


--------------------------------------------------------------------------------
/R/clarify-package.R:
--------------------------------------------------------------------------------
 1 | #' @keywords internal
 2 | "_PACKAGE"
 3 | 
 4 | ## usethis namespace: start
 5 | #' @import stats
 6 | #' @import ggplot2
 7 | #' @importFrom utils str
 8 | ## usethis namespace: end
 9 | NULL
10 | 


--------------------------------------------------------------------------------
/R/clarify_est_methods.R:
--------------------------------------------------------------------------------
  1 | #' @exportS3Method names clarify_est
  2 | names.clarify_est <- function(x) {
  3 |   names(attr(x, "original"))
  4 | }
  5 | 
  6 | #' @exportS3Method `names<-` clarify_est
  7 | `names<-.clarify_est` <- function(x, value) {
  8 |   original_names <- names(x)
  9 |   original_class <- class(x)
 10 |   x <- drop_sim_class(x)
 11 |   colnames(x) <- value
 12 |   names(attr(x, "original")) <- value
 13 |   for (i in names(attributes(x))) {
 14 |     if (identical(names(attr(x, i)), original_names)) {
 15 |       names(attr(x, i)) <- value
 16 |     }
 17 |     if (identical(rownames(attr(x, i)), original_names)) {
 18 |       rownames(attr(x, i)) <- value
 19 |     }
 20 |     if (identical(colnames(attr(x, i)), original_names)) {
 21 |       colnames(attr(x, i)) <- value
 22 |     }
 23 |   }
 24 |   class(x) <- original_class
 25 |   x
 26 | }
 27 | 
 28 | #' @export
 29 | Ops.clarify_est <- function(e1, e2 = NULL) {
 30 |   unary <- nargs() == 1L
 31 |   FUN <- get(.Generic, envir = parent.frame(), mode = "function")
 32 | 
 33 |   if (!.Generic %in% c("+", "-", "*", "^", "%%", "%/%", "/")) {
 34 |     .err("only mathematical operations can be applied to `clarify_est` objects")
 35 |   }
 36 | 
 37 |   if (unary) {
 38 |     f <- quote(FUN(left))
 39 |     left <- drop_sim_class(e1)
 40 |     e1[] <- eval(f)
 41 | 
 42 |     left <- attr(e1, "original")
 43 |     attr(e1, "original")[] <- eval(f)
 44 |     return(e1)
 45 |   }
 46 | 
 47 |   f <- quote(FUN(left, right))
 48 | 
 49 |   e1_clarify_est <- inherits(e1, "clarify_est")
 50 |   e2_clarify_est <- inherits(e2, "clarify_est")
 51 | 
 52 |   if (e1_clarify_est && e2_clarify_est) {
 53 |     if (!identical(class(e1), class(e2))) {
 54 |       .wrn(sprintf("`%s` should only be used on `clarify_est` objects produced from the same function",
 55 |                    .Generic))
 56 |     }
 57 | 
 58 |     if (!identical(attr(e1, "hash"), attr(e2, "hash"))) {
 59 |       .err(sprintf("`%s` can only be used on `clarify_est` objects originating from calls applied to the same `clarify-sim` object",
 60 |                    .Generic))
 61 |     }
 62 | 
 63 |     if (any(dim(e2) != dim(e1))) {
 64 |       .err(sprintf("`%s` can only be used on `clarify_est` objects with an equal number of estimated quantities",
 65 |                    .Generic))
 66 |     }
 67 | 
 68 |     if (!identical(attr(e1, "at"), attr(e2, "at"))) {
 69 |       .err(sprintf("`%s` can only be used on `clarify_adrf` objects with the same values of `at`",
 70 |                    .Generic))
 71 |     }
 72 |   }
 73 | 
 74 |   left <- drop_sim_class(e1)
 75 |   right <- drop_sim_class(e2)
 76 | 
 77 |   if (e1_clarify_est)
 78 |     e1[] <- eval(f)
 79 |   else
 80 |     e2[] < eval(f)
 81 | 
 82 |   if (e1_clarify_est)
 83 |     left <- attr(e1, "original")
 84 |   if (e2_clarify_est)
 85 |     right <- attr(e2, "original")
 86 | 
 87 |   if (e1_clarify_est) {
 88 |     attr(e1, "original")[] <- eval(f)
 89 |     attr(e1, "contrast") <- NULL
 90 |     # class(e1) <- "clarify_est"
 91 |     return(e1)
 92 |   }
 93 | 
 94 |   attr(e2, "original")[] <- eval(f)
 95 |   attr(e1, "contrast") <- NULL
 96 |   # class(e2) <- "clarify_est"
 97 |   e2
 98 | }
 99 | 
100 | #' @exportS3Method `[` clarify_est
101 | `[.clarify_est` <- function(x, i, ...) {
102 | 
103 |   Narg <- nargs()
104 | 
105 |   if (Narg == 1) return(x)
106 |   if (Narg > 2) {
107 |     .err("`clarify_est` objects can only by subset as obj[.], not obj[., .]")
108 |   }
109 | 
110 |   attrs <- attributes(x)
111 |   cl <- class(x)
112 | 
113 |   x <- as.matrix(x)[, i, drop = FALSE]
114 | 
115 |   for (z in setdiff(names(attrs), c("names", "dimnames", "dim"))) {
116 |     attr(x, z) <- attrs[[z]]
117 |   }
118 |   attr(x, "original") <- attr(x, "original")[i]
119 | 
120 |   if ("at" %in% names(attrs)) {
121 |     attr(x, "at") <- unname(setNames(attrs[["at"]], names(attrs[["original"]]))[i])
122 |   }
123 |   if ("setx" %in% names(attrs)) {
124 |     attr(x, "setx") <- attrs[["setx"]][i, , drop = FALSE]
125 |   }
126 | 
127 |   class(x) <- cl
128 |   x
129 | }
130 | 
131 | #' @exportS3Method as.matrix clarify_est
132 | as.matrix.clarify_est <- function(x, ...) {
133 |   drop_sim_class(x)
134 |   for (i in setdiff(names(attributes(x)), c("dimnames", "dim"))) {
135 |     attr(x, i) <- NULL
136 |   }
137 |   x
138 | }
139 | 
140 | #' @exportS3Method as.data.frame clarify_est
141 | as.data.frame.clarify_est <- function(x, ...) {
142 |   as.data.frame(as.matrix(x), ...)
143 | }
144 | 
145 | #' @exportS3Method dimnames clarify_est
146 | dimnames.clarify_est <- function(x) {
147 |   .err("do not use `colnames()`, `rownames()`, or `dimnames()` with a `clarify_est` object. Use `names()` instead")
148 | }
149 | 
150 | #' @exportS3Method `dimnames<-` clarify_est
151 | `dimnames<-.clarify_est` <- function(x, value) {
152 |   .err("do not use `colnames()`, `rownames()`, or `dimnames()` with a `clarify_est` object. Use `names()` instead")
153 | }
154 | 
155 | #' @exportS3Method str clarify_est
156 | str.clarify_est <- function(object,
157 |                             max.level = NA, vec.len = getOption("str")$vec.len, digits.d = getOption("str")$digits.d,
158 |                             nchar.max = 128, give.attr = TRUE, drop.deparse.attr = getOption("str")$drop.deparse.attr,
159 |                             give.head = TRUE, give.length = give.head, width = getOption("width"),
160 |                             nest.lev = 0, indent.str = paste(rep.int(" ", max(0, nest.lev + 1)), collapse = ".."),
161 |                             comp.str = "$ ", no.list = FALSE,
162 |                             envir = baseenv(), strict.width = getOption("str")$strict.width, formatNum = getOption("str")$formatNum,
163 |                             list.len = getOption("str")$list.len, deparse.lines = getOption("str")$deparse.lines,
164 |                             ...) {
165 | 
166 |   oDefs <- c("vec.len", "digits.d", "strict.width", "formatNum",
167 |              "drop.deparse.attr", "list.len", "deparse.lines")
168 |   strO <- getOption("str")
169 |   if (!is.list(strO)) {
170 |     warning("invalid options(\"str\") -- using defaults instead")
171 |     strO <- utils::strOptions()
172 |   }
173 |   else {
174 |     if (!all(names(strO) %in% oDefs))
175 |       warning(gettextf("invalid components in options(\"str\"): %s",
176 |                        paste(setdiff(names(strO), oDefs), collapse = ", ")),
177 |               domain = NA)
178 |     strO <- utils::modifyList(utils::strOptions(), strO)
179 |   }
180 | 
181 |   oo <- options(digits = digits.d)
182 |   on.exit(options(oo))
183 |   le <- length(object)
184 | 
185 |   nchar.w <- function(x) nchar(x, type = "w", allowNA = TRUE)
186 | 
187 |   maybe_truncate <- function(x, nx = nchar.w(x), S = "\"",
188 |                              ch = "| __truncated__") {
189 |     ok <- {
190 |       if (anyNA(nx)) !is.na(nx)
191 |     else TRUE
192 |     }
193 | 
194 |     if (any(lrg <- ok & nx > nchar.max)) {
195 |       nc <- nchar(ch <- paste0(S, ch))
196 |       if (nchar.max <= nc)
197 |         stop(gettextf("'nchar.max = %d' is too small",
198 |                       nchar.max), domain = NA)
199 |       x.lrg <- x[lrg]
200 |       tr.x <- strtrim(x.lrg, nchar.max - nc)
201 |       if (any(ii <- tr.x != x.lrg & paste0(tr.x, S) !=
202 |               x.lrg)) {
203 |         x[lrg][ii] <- paste0(tr.x[ii], ch)
204 |       }
205 |     }
206 |     x
207 |   }
208 | 
209 |   nfS <- names(fStr <- formals())
210 |   strSub <- function(obj, ...) {
211 |     nf <- setdiff(nfS, c("object", "give.length", "comp.str",
212 |                          "no.list", names(match.call())[-(1:2)], "..."))
213 |     aList <- as.list(fStr)[nf]
214 |     aList[] <- lapply(nf, function(n) eval(as.name(n)))
215 |     do.call(function(...) str(obj, ...), c(aList, list(...)),
216 |             quote = TRUE)
217 |   }
218 | 
219 |   le.str <- {
220 |     if (give.length) paste0("[1:", paste(le), "]")
221 |     else ""
222 |   }
223 | 
224 |   v.len <- vec.len
225 |   std.attr <- "names"
226 |   cl <- oldClass(object)
227 | 
228 |   if (give.attr)
229 |     a <- attributes(object)
230 |   dCtrl <- eval(formals(deparse)$control)
231 | 
232 |   if (drop.deparse.attr)
233 |     dCtrl <- dCtrl[dCtrl != "showAttributes"]
234 | 
235 |   arrLenstr <- function(obj) {
236 |     rnk <- length(di. <- dim(obj))
237 |     di <- paste0(ifelse(di. > 1, "1:", ""), di., ifelse(di. >
238 |                                                           0, "", " "))
239 |     pDi <- function(...) paste(c("[", ..., "]"), collapse = "")
240 |     if (rnk == 1)
241 |       pDi(di[1L], "(1d)")
242 |     else pDi(paste0(di[-rnk], ", "), di[rnk])
243 |   }
244 | 
245 |   mod <- "num"
246 | 
247 |   le.str <- arrLenstr(object)
248 |   if (m <- match("AsIs", cl, 0L))
249 |     oldClass(object) <- cl[-m]
250 |   std.attr <- "dim"
251 | 
252 |   cl <- cl[1L]
253 |   if (cl != mod && substr(cl, 1L, nchar(mod)) != mod)
254 |     mod <- paste0("'", cl, "' ", mod)
255 |   std.attr <- c(std.attr, "class")
256 | 
257 |   str1 <- paste0(" ", mod, " ", le.str)
258 | 
259 |   iv.len <- round(2.5 * v.len)
260 | 
261 |   ob <- {
262 |     if (le > iv.len)
263 |       as.matrix(object)[seq_len(iv.len)]
264 |     else as.matrix(object)
265 |   }
266 | 
267 |   ao <- abs(ob <- unclass(ob[!is.na(ob)]))
268 | 
269 |   v.len <- {
270 |     if ((all(ao > 1e-10 | ao == 0) && all(ao < 1e+10 | ao == 0) && all(abs(ob - signif(ob, digits.d)) <= 9e-16 * ao)))
271 |       iv.len
272 |     else
273 |       round(1.25 * v.len)
274 |   }
275 | 
276 |   format.fun <- formatNum
277 | 
278 |   if (!exists("format.fun")) {
279 |     format.fun <- format
280 |   }
281 | 
282 |   ile <- min(v.len, le)
283 |   formObj <- function(x) maybe_truncate(paste(format.fun(x), collapse = " "), S = "")
284 | 
285 |   cat(if (give.head) paste0(str1, " "),
286 |       formObj(
287 |         if (ile >= 1 && mod != "...") as.matrix(object)[seq_len(ile)]
288 |         else if (v.len > 0) object),
289 |       if (le > v.len) " ...", "\n", sep = "")
290 | 
291 |   if (give.attr) {
292 |     nam <- names(a)
293 |     give.L <- give.length || identical(attr(give.length, "from"), "data.frame")
294 |     for (i in seq_along(a)) if (all(nam[i] != std.attr)) {
295 |       cat(indent.str, paste0("- attr(*, \"", nam[i], "\")="),
296 |           sep = "")
297 |       strSub(a[[i]], give.length = give.L,
298 |              indent.str = paste(indent.str, ".."),
299 |              nest.lev = nest.lev + 1)
300 |     }
301 |   }
302 | 
303 |   invisible()
304 | }
305 | 
306 | 


--------------------------------------------------------------------------------
/R/clarify_predict.R:
--------------------------------------------------------------------------------
 1 | clarify_predict <- function(x, newdata = NULL, group = NULL, type = NULL) {
 2 |   ord_mean <- identical(type, "mean") && isTRUE(insight::model_info(x)$is_ordinal)
 3 | 
 4 |   if (ord_mean) {
 5 |     type <- NULL
 6 |     group <- NULL
 7 |   }
 8 | 
 9 |   args <- list(model = x, newdata = newdata, vcov = FALSE)
10 |   args$type <- type
11 | 
12 |   p <- try(do.call(marginaleffects::get_predict, args), silent = TRUE)
13 | 
14 |   if (length(p) == 0L || is_error(p)) {
15 |     .err("predicted values could not be extracted from the model")
16 |   }
17 | 
18 |   if (ord_mean) {
19 |     p <- .get_ordinal_mean_preds(p)
20 |   }
21 |   else if (!is.null(group) && "group" %in% names(p)) {
22 |     p <- .subset_group(p, group)
23 |   }
24 | 
25 |   p
26 | }
27 | 
28 | .subset_group <- function(pred, group = NULL) {
29 |   if (is.null(group)) pred
30 |   else pred[pred$group == group, , drop = FALSE]
31 | }
32 | 
33 | .get_p <- function(pred) {
34 |   if ("estimate" %in% names(pred)) pred[["estimate"]]
35 |   else pred[["predicted"]]
36 | }
37 | 
38 | .get_ordinal_mean_preds <- function(p) {
39 |   ids <- unique(p$rowid)
40 |   groups <- unique(p$group)
41 |   m <- matrix(p$estimate, nrow = length(ids), ncol = length(groups))
42 | 
43 |   if (anyNA(groups)) {
44 |     nas <- is.na(groups)
45 |     gn <- rep(NA_real_, length(groups))
46 | 
47 |     if (!anyNA(suppressWarnings(g <- as.numeric(groups[!nas])))) {
48 |       gn[!nas] <- g
49 |     }
50 |     else {
51 |       gn[!nas] <- seq_along(g)
52 |     }
53 |   }
54 |   else {
55 |     if (!anyNA(suppressWarnings(g <- as.numeric(groups)))) {
56 |       groups <- g
57 |     }
58 |     else {
59 |       groups <- seq_along(g)
60 |     }
61 |   }
62 | 
63 |   data.frame(rowid = ids,
64 |              estimate = drop(m %*% groups))
65 | }
66 | 


--------------------------------------------------------------------------------
/R/get_model_components.R:
--------------------------------------------------------------------------------
 1 | #Functions for extracting information from models
 2 | 
 3 | # Get the coefficients from a model as a vector
 4 | get_coefs <- function(fit) {
 5 | 
 6 |   b <- try(marginaleffects::get_coef(fit), silent = TRUE)
 7 | 
 8 |   if (!check_valid_coef(b)) {
 9 |     .err("`sim()` was unable to extract a valid set of coefficients from the model fit; please supply coefficients to the `coefs` argument and a covariance matrix to the `vcov` argument")
10 |   }
11 | 
12 |   b
13 | }
14 | 
15 | # Get the covariance from a model
16 | get_vcov <- function(fit, vcov = NULL) {
17 |   v <- try(marginaleffects::get_vcov(fit, vcov), silent = TRUE)
18 | 
19 |   if (!check_valid_vcov(v)) {
20 |     .err("`sim()` was unable to extract a valid covariance matrix from the model fit; please supply a covariance matrix to the `vcov` argument")
21 |   }
22 | 
23 |   v
24 | }
25 | 
26 | # Get the model degrees of freedom
27 | ## Assesses whether the model is linear and fit with OLS; if not,
28 | ## returns Inf. Linear models fit with MLE get Inf.
29 | get_df <- function(fit) {
30 | 
31 |   if (!insight::is_model_supported(fit)) {
32 |     return(Inf)
33 |   }
34 | 
35 |   statistic <- insight::find_statistic(fit)
36 | 
37 |   if (identical(statistic, "chi-squared statistic")) {
38 |     return(Inf)
39 |   }
40 | 
41 |   insight::get_df(fit, type = "wald", statistic = statistic)
42 | }
43 | 


--------------------------------------------------------------------------------
/R/misim.R:
--------------------------------------------------------------------------------
  1 | #' @title Simulate model coefficients after multiple imputation
  2 | #'
  3 | #' @description `misim()` simulates model parameters from multivariate normal or t distributions after multiple imputation that are then used by [sim_apply()] to calculate quantities of interest.
  4 | #'
  5 | #' @param fitlist a list of model fits, one for each imputed dataset, or a `mira` object (the output of a call to `with()` applied to a `mids` object in `mice`).
  6 | #' @param n the number of simulations to run for each imputed dataset; default is 1000. More is always better but resulting calculations will take longer.
  7 | #' @param vcov a square covariance matrix of the coefficient covariance estimates, a function to use to extract it from `fit`, or a list thereof with an element for each imputed dataset. By default, uses [stats::vcov()] or [insight::get_varcov()] if that doesn't work.
  8 | #' @param coefs a vector of coefficient estimates, a function to use to extract it from `fit`, or a list thereof with an element for each imputed dataset. By default, uses [stats::coef()] or [insight::get_parameters()] if that doesn't work.
  9 | #' @param dist a character vector containing the name of the multivariate distribution(s) to use to draw simulated coefficients. Should be one of `"normal"` (multivariate normal distribution) or `"t_{#}"` (multivariate t distribution), where `{#}` corresponds to the desired degrees of freedom (e.g., `"t_100"`). If `NULL`, the right distributions to use will be determined based on heuristics; see [sim()] for details.
 10 | #'
 11 | #' @return
 12 | #' A `clarify_misim` object, which inherits from `clarify_sim` and has the following components:
 13 | #'  \item{sim.coefs}{a matrix containing the simulated coefficients with a column for each coefficient and a row for each simulation for each imputation}
 14 | #'  \item{coefs}{a matrix containing the original coefficients extracted from `fitlist` or supplied to `coefs`, with a row per imputation.}
 15 | #'  \item{fit}{the list of model fits supplied to `fitlist`}
 16 | #'  \item{imp}{a identifier of which imputed dataset each set of simulated coefficients corresponds to.}
 17 | #' The `"dist"` attribute contains `"normal"` if the coefficients were sampled from a multivariate normal distribution and `"t({df})"` if sampled from a multivariate t distribution. The `"clarify_hash"` attribute contains a unique hash generated by [rlang::hash()].
 18 | #'
 19 | #' @details
 20 | #' `misim()` essentially combines multiple `sim()` calls applied to a list of model fits, each fit in an imputed dataset, into a single combined pool of simulated coefficients. When simulation-based inference is to be used with multiply imputed data, many imputations are required; see Zhou and Reiter (2010).
 21 | #'
 22 | #' @references
 23 | #'
 24 | #' Zhou, X., & Reiter, J. P. (2010). A Note on Bayesian Inference After Multiple Imputation. *The American Statistician*, 64(2), 159–163. \doi{10.1198/tast.2010.09109}
 25 | #'
 26 | #' @examplesIf requireNamespace("Amelia", quietly = TRUE)
 27 | #' data("africa", package = "Amelia")
 28 | #'
 29 | #' # Multiple imputation using Amelia
 30 | #' a.out <- Amelia::amelia(x = africa, m = 10,
 31 | #'                         cs = "country",
 32 | #'                         ts = "year", logs = "gdp_pc",
 33 | #'                         p2s = 0)
 34 | #'
 35 | #' fits <- with(a.out, lm(gdp_pc ~ infl * trade))
 36 | #'
 37 | #' # Simulate coefficients
 38 | #' s <- misim(fits)
 39 | #' s
 40 | #'
 41 | #' @seealso
 42 | #' * [sim()] for simulating model coefficients for a single dataset
 43 | #' * [sim_apply()] for applying a function to each set of simulated coefficients
 44 | #' * [sim_ame()] for computing average marginal effects in each simulation draw
 45 | #' * [sim_setx()] for computing marginal predictions and first differences at typical values in each simulation draw
 46 | #' @export
 47 | #'
 48 | misim <- function(fitlist,
 49 |                   n = 1e3,
 50 |                   vcov = NULL,
 51 |                   coefs = NULL,
 52 |                   dist = NULL) {
 53 | 
 54 |   if (missing(fitlist)) fitlist <- NULL
 55 | 
 56 |   if (inherits(fitlist, "mira")) {
 57 |     fitlist <- fitlist$analyses
 58 |   }
 59 | 
 60 |   if (is.null(fitlist)) {
 61 |     if (is.null(coefs) || is.null(vcov)) {
 62 |       .err("when `fitlist` is not supplied, arguments must be supplied to both `coefs` and `vcov`")
 63 |     }
 64 |     if (!is.list(coefs) && !is.list(vcov)) {
 65 |       .err("when `fitlist` is not supplied, at least one of `coefs` or `vcov` must be a list")
 66 |     }
 67 |     nimp <- if (!is.list(coefs)) length(vcov) else length(coefs)
 68 |   }
 69 |   else {
 70 |     check_fitlist(fitlist)
 71 |     nimp <- length(fitlist)
 72 |   }
 73 | 
 74 |   chk::chk_count(n)
 75 | 
 76 |   if (!is.list(coefs)) {
 77 |     coefs <- lapply(seq_len(nimp), function(i) coefs)
 78 |   }
 79 |   else if (length(coefs) != nimp) {
 80 |     if (is.null(fitlist)) {
 81 |       .err("when `fitlist` is not supplied and `coefs` is supplied as a list, `coefs` must have as many entries as there are entries in `vcov`")
 82 |     }
 83 |     else {
 84 |       .err("when supplied as a list, `coefs` must have as many entries as there are models in `fitlist`")
 85 |     }
 86 |   }
 87 | 
 88 |   coef_supplied <- {
 89 |     if (all(vapply(coefs, is.null, logical(1L)))) "null"
 90 |     else if (all(vapply(coefs, is.function, logical(1L)))) "fun"
 91 |     else if (all(vapply(coefs, check_valid_coef, logical(1L)))) "num"
 92 |     else {
 93 |       .err("`coefs` must be a vector of coefficients, a function that extracts one from each model in `fitlist`, or a list thereof")
 94 |     }
 95 |   }
 96 | 
 97 |   if (!is.list(vcov)) {
 98 |     vcov <- lapply(seq_len(nimp), function(i) vcov)
 99 |   }
100 |   else if (length(vcov) != nimp) {
101 |     if (is.null(fitlist)) {
102 |       .err("when `fitlist` is not supplied and `vcov` is supplied as a list, `vcov` must have as many entries as there are entries in `coefs`")
103 |     }
104 |     else {
105 |       .err("when supplied as a list, `vcov` must have as many entries as there are models in `fitlist`")
106 |     }
107 |   }
108 | 
109 |   vcov_supplied <- {
110 |     if (all(vapply(vcov, is.null, logical(1L)))) "null"
111 |     else if (all(vapply(vcov, is.matrix, logical(1L)))) "num"
112 |     else "marginaleffects_code"
113 |   }
114 | 
115 |   for (i in seq_len(nimp)) {
116 |     coefs[[i]] <- process_coefs(coefs[[i]], fitlist[[i]], coef_supplied)
117 |   }
118 | 
119 |   for (i in seq_len(nimp)) {
120 |     vcov[[i]] <- process_vcov(vcov[[i]], fitlist[[i]], vcov_supplied)
121 |   }
122 | 
123 |   check_coefs_vcov_length_mi(vcov, coefs, vcov_supplied, coef_supplied)
124 | 
125 |   chk::chk_count(n)
126 | 
127 |   if (!is.null(dist)) {
128 |     if (length(dist) == 1) {
129 |       dist <- lapply(seq_len(nimp), function(i) dist)
130 |     }
131 |     else if (length(dist) != nimp) {
132 |       .err("when supplied as a vector, `dist` must have as many values as there are imputations")
133 |     }
134 |     else {
135 |       dist <- as.list(dist)
136 |     }
137 |   }
138 | 
139 |   samplers <- lapply(seq_len(nimp), function(i) {
140 |     get_sampling_dist(fitlist[[i]], dist[[i]])
141 |   })
142 | 
143 |   sim.coefs <- do.call("rbind", lapply(seq_len(nimp), function(i) {
144 |     samplers[[i]](n, coefs[[i]], vcov[[i]])
145 |   }))
146 | 
147 |   out <- list(sim.coefs = sim.coefs,
148 |               coefs = do.call("rbind", coefs),
149 |               fit = fitlist,
150 |               imp = rep(seq_len(nimp), each = n))
151 | 
152 |   dists <- unlist(lapply(samplers, attr, "dist"))
153 |   if (all_the_same(dists)) dists <- dists[1]
154 | 
155 |   attr(out, "dist") <- dists
156 |   attr(out, "use_fit") <- !is.null(fitlist)
157 |   attr(out, "sim_hash") <- rlang::hash(out$sim.coefs)
158 |   class(out) <- c("clarify_misim", "clarify_sim")
159 | 
160 |   out
161 | }
162 | 
163 | #' @export
164 | print.clarify_misim <- function(x, ...) {
165 |   obj <- deparse1(substitute(x))
166 |   cat("A `clarify_misim` object\n")
167 |   cat(sprintf(" - %s coefficients, %s imputations with %s simulated values each\n",
168 |               ncol(x$sim.coefs), nrow(x$coefs), nrow(x$sim.coefs) / nrow(x$coefs)))
169 |   cat(" - sampled distributions: ")
170 |   if (length(attr(x, "dist")) == 1) {
171 |     cat(sprintf("multivariate %s\n", attr(x, "dist")))
172 |   }
173 |   else {
174 |     cat("multiple different multivariate distributions")
175 |     if (exists(obj)) {
176 |       cat(sprintf(" (use `attr(%s, \"dist\") to view them\n"), obj)
177 |     }
178 |     else {
179 |       cat("\n")
180 |     }
181 |   }
182 | 
183 |   invisible(x)
184 | }
185 | 


--------------------------------------------------------------------------------
/R/plot.clarify_adrf.R:
--------------------------------------------------------------------------------
  1 | #' Plot marginal predictions from `sim_adrf()`
  2 | #'
  3 | #' `plot.clarify_adrf()` plots the output of [sim_adrf()]. For the average dose-response function (ADRF, requested with `contrast = "adrf"` in `sim_adrf()`), this is a plot of the average marginal mean of the outcome against the requested values of the focal predictor; for the average marginal effects function (AMEF, requested with `contrast = "amef"` in `sim_adrf()`), this is a plot of the instantaneous average marginal effect of the focal predictor on the outcome against the requested values of the focal predictor.
  4 | #'
  5 | #' @inheritParams plot.clarify_est
  6 | #' @param x a `clarify_adrf` object resulting from a call to [sim_adrf()].
  7 | #' @param ci `logical`; whether to display confidence bands for the estimates. Default is `TRUE`.
  8 | #' @param method the method used to compute confidence bands. Can be `"wald"` to use a Normal approximation or `"quantile"` to use the simulated sampling distribution (default). See [summary.clarify_est()] for details. Abbreviations allowed.
  9 | #' @param baseline `logical`; whether to include a horizontal line at `y = 0` on the plot. Default is `FALSE` for the ADRF (since 0 might not be in the range of the outcome) and `TRUE` for the AMEF.
 10 | #' @param color the color of the line and confidence band in the plot.
 11 | #'
 12 | #' @return A `ggplot` object.
 13 | #'
 14 | #' @details These plots are produced using [ggplot2::geom_line()] and [ggplot2::geom_ribbon()]. The confidence bands should be interpreted pointwise (i.e., they do not account for simultaneous inference).
 15 | #'
 16 | #' @seealso [summary.clarify_est()] for computing p-values and confidence intervals for the estimated quantities.
 17 | #'
 18 | #' @examples
 19 | #' ## See help("sim_adrf") for examples
 20 | #'
 21 | #' @exportS3Method plot clarify_adrf
 22 | plot.clarify_adrf <- function(x,
 23 |                               ci = TRUE,
 24 |                               level = .95,
 25 |                               method = "quantile",
 26 |                               baseline,
 27 |                               color = "black",
 28 |                               ...) {
 29 | 
 30 |   at <- attr(x, "at")
 31 |   var <- attr(x, "var")
 32 |   contrast <- attr(x, "contrast")
 33 |   by <- attr(x, "by")
 34 | 
 35 |   if (missing(baseline)) {
 36 |     baseline <- !is.null(contrast) && contrast == "amef"
 37 |   }
 38 |   else {
 39 |     chk::chk_flag(baseline)
 40 |   }
 41 | 
 42 |   s <- {
 43 |     if (ci)
 44 |       as.data.frame(summary.clarify_est(x, level = level, method = method))
 45 |     else
 46 |       data.frame(Estimate = coef(x))
 47 |   }
 48 | 
 49 |   if (!is.null(by)) {
 50 |     s$by_var <- factor(.extract_by_values(x))
 51 |     if (nlevels(s$by_var) == 1)
 52 |       by <- NULL
 53 |   }
 54 | 
 55 |   p <- ggplot(mapping = aes(x = at))
 56 | 
 57 |   if (baseline) {
 58 |     p <- p + geom_hline(yintercept = 0)
 59 |   }
 60 | 
 61 |   if (is.null(by)) {
 62 |     p <- p + geom_line(aes(y = s$Estimate),
 63 |                        color = color) +
 64 |       labs(x = var, y = "E[Y|X]")
 65 |   }
 66 |   else {
 67 |     p <- p + geom_line(aes(y = s$Estimate, color = s$by_var)) +
 68 |       labs(x = var, y = "E[Y|X]", color = paste(by, collapse = ", "))
 69 |   }
 70 | 
 71 |   if (ci) {
 72 |     if (is.null(by)) {
 73 |       p <- p +
 74 |         geom_ribbon(aes(ymin = s[[2]], ymax = s[[3]]),
 75 |                     alpha = .3, fill = color)
 76 |     }
 77 |     else {
 78 |       p <- p +
 79 |         geom_ribbon(aes(ymin = s[[2]], ymax = s[[3]],
 80 |                         fill = s$by_var),
 81 |                     alpha = .3) +
 82 |         labs(fill = paste(by, collapse = ", "))
 83 |     }
 84 |   }
 85 |   p + labs(x = var, y = if (!is.null(attr(x, "contrast"))) switch(attr(x, "contrast"), "adrf" = sprintf("E[Y(%s)]", var),
 86 |                                                                   "amef" = sprintf("E[dY/d(%s)]", var))) +
 87 |     theme_bw()
 88 | }
 89 | 
 90 | .extract_by_values <- function(obj) {
 91 |   x <- names(obj)
 92 | 
 93 |   if (identical(attr(obj, "contrast"), "amef"))
 94 |     pattern <- "\\,([^]]+)\\]"
 95 |   else
 96 |     pattern <- "\\|([^]]+)\\]"
 97 | 
 98 |   matches <- regexpr(pattern, x, perl = TRUE)
 99 |   out <- regmatches(x, matches)
100 | 
101 |   substr(out, 2, nchar(out) - 1)
102 | }
103 | 


--------------------------------------------------------------------------------
/R/plot.clarify_est.R:
--------------------------------------------------------------------------------
  1 | #' @exportS3Method plot clarify_est
  2 | #' @rdname summary.clarify_est
  3 | plot.clarify_est <- function(x,
  4 |                              parm,
  5 |                              ci = TRUE,
  6 |                              level = .95,
  7 |                              method = "quantile",
  8 |                              reference = FALSE,
  9 |                              ncol = 3,
 10 |                              ...) {
 11 | 
 12 |   chk::chk_flag(ci)
 13 |   chk::chk_flag(reference)
 14 | 
 15 |   original_est <- coef(x)
 16 |   est_names <- names(x)
 17 | 
 18 |   parm <- process_parm(x, parm)
 19 |   if (anyNA(parm)) {
 20 |     .err("`parm` must be a numeric or character vector identifiying the estimates to plot")
 21 |   }
 22 | 
 23 |   est_names <- est_names[parm]
 24 | 
 25 |   est_long <- setNames(utils::stack(as.data.frame(as.matrix(x))[est_names]),
 26 |                        c("val", "est"))
 27 |   original_est_long <- setNames(utils::stack(original_est[est_names]),
 28 |                                 c("val", "est"))
 29 | 
 30 |   p <- ggplot() +
 31 |     geom_density(data = est_long, mapping = aes(x = .data$val),
 32 |                  color = "black", fill = "gray90",
 33 |                  ...) +
 34 |     geom_hline(yintercept = 0) +
 35 |     geom_vline(data = original_est_long, mapping = aes(xintercept = .data$val)) +
 36 |     facet_wrap(vars(.data$est), scales = "free", ncol = min(ncol, nlevels(original_est_long$est)))
 37 | 
 38 |   if (ci) {
 39 |     ci <- confint(x, parm = parm, level = level,
 40 |                   method = method)
 41 | 
 42 |     ci_long <- setNames(utils::stack(as.data.frame(t(ci))), c("val", "est"))
 43 |     p <- p + geom_vline(data = ci_long, mapping = aes(xintercept = .data$val),
 44 |                         linetype = 2)
 45 |   }
 46 | 
 47 |   if (reference) {
 48 |     #Add normal density and mean line
 49 |     ref_means_and_medians <- data.frame(
 50 |       est = factor(levels(est_long$est), levels = levels(est_long$est)),
 51 |       mean = tapply(est_long$val, est_long$est, mean),
 52 |       height = dnorm(0, 0, tapply(est_long$val, est_long$est, sd)),
 53 |       median = tapply(est_long$val, est_long$est, median))
 54 | 
 55 |     p <- p + geom_density(data = est_long, mapping = aes(x = .data$val),
 56 |                           stat = StatNormal, color = "red") +
 57 |       geom_segment(aes(x = .data$mean, xend = .data$mean,
 58 |                        y = 0, yend = .data$height),
 59 |                    data = ref_means_and_medians, color = "red") +
 60 |       geom_segment(aes(x = .data$median, xend = .data$median,
 61 |                        y = 0, yend = .2 * .data$height),
 62 |                    data = ref_means_and_medians, color = "blue")
 63 |   }
 64 | 
 65 |   p +
 66 |     labs(x = "Estimate", y = "Density") +
 67 |     theme_bw() +
 68 |     theme(panel.grid = element_blank())
 69 | }
 70 | 
 71 | #Stat for normal reference density
 72 | StatNormal <- ggplot2::ggproto("StatNormal", ggplot2::Stat,
 73 |                                required_aes = "x|y",
 74 |                                default_aes = aes(x = ggplot2::after_stat(density),
 75 |                                                  y = ggplot2::after_stat(density),
 76 |                                                  fill = NA, weight = NULL),
 77 |                                setup_params = function(data, params) {
 78 |                                  params$flipped_aes <- ggplot2::has_flipped_aes(data, params, main_is_orthogonal = FALSE, main_is_continuous = TRUE)
 79 | 
 80 |                                  has_x <- !(is.null(data$x) && is.null(params$x))
 81 |                                  has_y <- !(is.null(data$y) && is.null(params$y))
 82 |                                  if (!has_x && !has_y) {
 83 |                                    rlang::abort("stat_normal() requires an x or y aesthetic.")
 84 |                                  }
 85 | 
 86 |                                  params
 87 |                                },
 88 |                                extra_params = c("na.rm", "orientation"),
 89 |                                compute_group = function(data, scales, n = 512, trim = FALSE,
 90 |                                                         na.rm = FALSE, flipped_aes = FALSE) {
 91 |                                  data <- ggplot2::flip_data(data, flipped_aes)
 92 |                                  if (trim) {
 93 |                                    range <- range(data$x, na.rm = TRUE)
 94 |                                  } else {
 95 |                                    range <- scales[[flipped_names(flipped_aes)$x]]$dimension()
 96 |                                  }
 97 | 
 98 |                                  density <- compute_norm_dens(data$x, w = data$weight, from = range[1],
 99 |                                                               to = range[2], n = n)
100 |                                  density$flipped_aes <- flipped_aes
101 |                                  ggplot2::flip_data(density, flipped_aes)
102 |                                }
103 | )
104 | 
105 | compute_norm_dens <- function(x, w, from, to, n = 512) {
106 |   nx <- length(x)
107 |   if (is.null(w)) {
108 |     w <- rep(1, nx)
109 |   }
110 | 
111 |   nax <- is.na(x)
112 |   naw <- is.na(w)
113 | 
114 |   x <- x[!nax & !naw]
115 |   w <- w[!nax & !naw]
116 | 
117 |   # if less than 2 points return data frame of NAs and a warning
118 |   if (nx < 2) {
119 |     rlang::warn("Groups with fewer than two data points have been dropped.")
120 |     return(data.frame(
121 |       x = NA_real_,
122 |       density = NA_real_,
123 |       scaled = NA_real_,
124 |       ndensity = NA_real_,
125 |       count = NA_real_,
126 |       n = NA_integer_
127 |     ))
128 |   }
129 | 
130 |   covw <- cov.wt(as.matrix(x), w)
131 |   s <- sqrt(covw$cov)
132 |   m <- covw$center
133 | 
134 |   x <- seq(from, to, length.out = n)
135 |   y <- dnorm(x, m, s)
136 | 
137 |   data.frame(
138 |     x = x,
139 |     density = y,
140 |     scaled =  y / max(y, na.rm = TRUE),
141 |     ndensity = y / max(y, na.rm = TRUE),
142 |     count = y * nx,
143 |     n = nx
144 |   )
145 | }
146 | 


--------------------------------------------------------------------------------
/R/plot.clarify_setx.R:
--------------------------------------------------------------------------------
  1 | #' Plot marginal predictions from `sim_setx()`
  2 | #'
  3 | #' `plot.clarify_sext()` plots the output of [sim_setx()], providing graphics similar to those of [plot.clarify_est()] but with features specifically for plot marginal predictions. For continues predictors, this is a plot of the marginal predictions and their confidence bands across levels of the predictor. Otherwise, this is is a plot of simulated sampling distribution of the marginal predictions.
  4 | #'
  5 | #' @inheritParams plot.clarify_est
  6 | #' @param x a `clarify_est` object resulting from a call to [sim_setx()].
  7 | #' @param var the name of the focal varying predictor, i.e., the variable to be on the x-axis of the plot. All other variables with varying set values will be used to color the resulting plot. See Details. Ignored if no predictors vary or if only one predictor varies in the reference grid or if `x1` was specified in `sim_setx()`. If not set, will use the predictor with the greatest number of unique values specified in the reference grid.
  8 | #' @param ci `logical`; whether to display confidence intervals or bands for the estimates. Default is `TRUE`.
  9 | #' @param method the method used to compute confidence intervals or bands. Can be `"wald"` to use a Normal approximation or `"quantile"` to use the simulated sampling distribution (default). See [summary.clarify_est()] for details. Abbreviations allowed.
 10 | #' @param reference `logical`; whether to overlay a normal density reference distribution over the plots. Default is `FALSE`. Ignored when variables other than the focal varying predictor vary.
 11 | #'
 12 | #' @return A `ggplot` object.
 13 | #'
 14 | #' @details `plot()` creates one of two kinds of plots depending on how the reference grid was specified in the call to `sim_setx()` and what `var` is set to. When the focal varying predictor (i.e., the one set in `var`) is numeric and takes on three or more unique values in the reference grid, the produced plot is a line graph displaying the value of the marginal prediction (denoted as `E[Y|X]`) across values of the focal varying predictor, with confidence bands displayed when `ci = TRUE`. If other predictors also vary, lines for different values will be displayed in different colors. These plots are produced using [ggplot2::geom_line()] and [ggplot2::geom_ribbon()]
 15 | #'
 16 | #' When the focal varying predictor is a factor or character or only takes on two or fewer values in the reference grid, the produced plot is a density plot of the simulated predictions, similar to the plot resulting from [plot.clarify_est()]. When other variables vary, densities for different values will be displayed in different colors. These plots are produced using [ggplot2::geom_density()].
 17 | #'
 18 | #' Marginal predictions are identified by the corresponding levels of the predictors that vary. The user should keep track of whether the non-varying predictors are set at specified or automatically set "typical" levels.
 19 | #'
 20 | #' @seealso [summary.clarify_est()] for computing p-values and confidence intervals for the estimated quantities.
 21 | #'
 22 | #' @examples
 23 | #' ## See help("sim_setx") for examples
 24 | #'
 25 | #' @export
 26 | plot.clarify_setx <- function(x,
 27 |                               var = NULL,
 28 |                               ci = TRUE,
 29 |                               level = .95,
 30 |                               method = "quantile",
 31 |                               reference = FALSE,
 32 |                               ...) {
 33 | 
 34 |   newdata <- attr(x, "setx")
 35 | 
 36 |   if (nrow(newdata) == 1) {
 37 |     if (!is.null(var)) {
 38 |       .wrn("ignoring `var` because no variables vary over predictions")
 39 |     }
 40 |     return(plot.clarify_est(x, parm = 1, ci = ci, level = level,
 41 |                             method = method, reference = reference, ...))
 42 |   }
 43 | 
 44 |   if (isTRUE(attr(x, "fd"))) {
 45 |     if (!is.null(var)) {
 46 |       .wrn("ignoring `var`")
 47 |     }
 48 |     return(plot.clarify_est(x, parm = 1:3, ci = ci, level = level,
 49 |                             method = method, reference = reference, ...))
 50 |   }
 51 | 
 52 |   len_unique_newdata <- vapply(newdata, function(v) length(unique(v)), integer(1L))
 53 |   varying <- names(newdata)[len_unique_newdata > 1]
 54 | 
 55 |   if (length(varying) == 1) {
 56 |     if (!is.null(var) && !identical(var, varying)) {
 57 |       .wrn("ignoring `var` because only one variable varies over predictions")
 58 |     }
 59 |     var <- varying
 60 |   }
 61 |   else if (is.null(var)) {
 62 |     if (any(len_unique_newdata[varying] > 2)) {
 63 |       var <- attr(newdata, "set_preds")[which.max(len_unique_newdata[attr(newdata, "set_preds")])]
 64 |     }
 65 |     else {
 66 |       var <- attr(newdata, "set_preds")[attr(newdata, "set_preds") %in% varying][1]
 67 |     }
 68 |   }
 69 |   else {
 70 |     chk::chk_string(var)
 71 |     if (!var %in% varying) {
 72 |       .err("`var` must be the name of a predictor set to be varying. Allowable options include ", word_list(varying, quotes = TRUE))
 73 |     }
 74 |   }
 75 | 
 76 |   non_var_varying <- setdiff(varying, var)
 77 | 
 78 |   p <- {
 79 |     if (len_unique_newdata[var] == 2 || chk::vld_character_or_factor(newdata[[var]]))
 80 |       setx_sim_plot(x, var, non_var_varying, ci = ci,
 81 |                     level = level, method = method, ...)
 82 |     else
 83 |       setx_reg_plot(x, var, non_var_varying, ci = ci,
 84 |                     level = level, method = method)
 85 |   }
 86 | 
 87 |   p + theme_bw() + scale_fill_brewer(palette = "Set1")
 88 | }
 89 | 
 90 | #sim_plot, but with grouping by non_var_varying if present
 91 | setx_sim_plot <- function(x, var, non_var_varying = NULL, ci = TRUE, level = .95,
 92 |                           method = "quantile", ...) {
 93 | 
 94 |   chk::chk_flag(ci)
 95 | 
 96 |   newdata <- attr(x, "setx")
 97 |   original_est <- coef(x)
 98 |   est_names <- rownames(newdata)
 99 | 
100 |   est_long <- setNames(utils::stack(as.data.frame(x)[est_names]),
101 |                        c("val", "est"))
102 |   est_long <- merge(est_long,
103 |                     newdata[c(var, non_var_varying)],
104 |                     by.x = "est", by.y = 0)
105 |   est_long[[var]] <- paste0(var, " = ", add_quotes(est_long[[var]], chk::vld_character_or_factor(est_long[[var]])))
106 | 
107 |   original_est_long <- setNames(utils::stack(original_est[est_names]),
108 |                                 c("val", "est"))
109 |   original_est_long <- merge(original_est_long,
110 |                              newdata[c(var, non_var_varying)],
111 |                              by.x = "est", by.y = 0)
112 |   original_est_long[[var]] <- paste0(var, " = ", add_quotes(original_est_long[[var]], chk::vld_character_or_factor(original_est_long[[var]])))
113 | 
114 |   if (length(non_var_varying) > 0) {
115 |     non_var_varying_f <- do.call("paste", c(lapply(non_var_varying, function(i) {
116 |       paste0(i, " = ", add_quotes(est_long[[i]], chk::vld_character_or_factor(est_long[[i]])))
117 |     }), list(sep = ", ")))
118 |     non_var_varying_f <- factor(non_var_varying_f, levels = unique(non_var_varying_f))
119 | 
120 |     non_var_varying_f_o <- do.call("paste", c(lapply(non_var_varying, function(i) {
121 |       paste0(i, " = ", add_quotes(original_est_long[[i]], chk::vld_character_or_factor(original_est_long[[i]])))
122 |     }), list(sep = ", ")))
123 |     non_var_varying_f_o <- factor(non_var_varying_f_o, levels = unique(non_var_varying_f_o))
124 |   }
125 |   else {
126 |     non_var_varying_f <- non_var_varying_f_o <- NULL
127 |   }
128 | 
129 |   p <- ggplot() +
130 |     geom_density(data = est_long, mapping = aes(x = .data$val, color = non_var_varying_f,
131 |                                                 fill = non_var_varying_f),
132 |                  alpha = .3, ...) +
133 |     geom_hline(yintercept = 0) +
134 |     geom_vline(data = original_est_long, mapping = aes(xintercept = .data$val,
135 |                                                        color = non_var_varying_f_o)) +
136 |     facet_wrap(vars(.data[[var]]), scales = "free")
137 | 
138 | 
139 |   if (ci) {
140 |     ci <- confint(x, level = level, method = method)
141 |     ci_long <- setNames(utils::stack(as.data.frame(t(ci))), c("val", "est"))
142 | 
143 |     ci_long <- merge(ci_long,
144 |                      newdata[c(var, non_var_varying)],
145 |                      by.x = "est", by.y = 0)
146 |     ci_long[[var]] <- paste0(var, " = ", add_quotes(ci_long[[var]], chk::vld_character_or_factor(ci_long[[var]])))
147 | 
148 |     if (length(non_var_varying) > 0) {
149 |       non_var_varying_f_ci <- do.call("paste", c(lapply(non_var_varying, function(i) {
150 |         paste0(i, " = ", add_quotes(ci_long[[i]], chk::vld_character_or_factor(ci_long[[i]])))
151 |       }), list(sep = ", ")))
152 |       non_var_varying_f_ci <- factor(non_var_varying_f_ci, levels = unique(non_var_varying_f_ci))
153 |     }
154 |     else {
155 |       non_var_varying_f_ci <- NULL
156 |     }
157 | 
158 |     p <- p + geom_vline(data = ci_long, mapping = aes(xintercept = .data$val,
159 |                                                       color = non_var_varying_f_ci),
160 |                         linetype = 2)
161 |   }
162 | 
163 |   p +
164 |     scale_color_brewer(palette = "Set1") +
165 |     labs(x = "Estimate", y = "Density", color = NULL, fill = NULL) +
166 |     theme(panel.background = element_rect(fill = "white", color = "black"),
167 |           panel.border = element_rect(color = "black", fill = NA))
168 | }
169 | 
170 | #Line plot with confidence bands
171 | setx_reg_plot <- function(x, var, non_var_varying = NULL, ci = TRUE, level = .95, method = "quantile") {
172 | 
173 |   newdata <- attr(x, "setx")
174 | 
175 |   if (length(non_var_varying)) {
176 |     non_var_varying_f <- do.call("paste", c(lapply(non_var_varying, function(i) {
177 |       paste0(i, " = ", add_quotes(newdata[[i]], chk::vld_character_or_factor(newdata[[i]])))
178 |     }), list(sep = ", ")))
179 |     non_var_varying_f <- factor(non_var_varying_f, levels = unique(non_var_varying_f))
180 |   }
181 |   else {
182 |     non_var_varying_f <- NULL
183 |   }
184 | 
185 |   s <- {
186 |     if (ci)
187 |       summary.clarify_est(x, level = level, method = method)[rownames(newdata), , drop = FALSE]
188 |     else
189 |       matrix(coef(x)[rownames(newdata)], ncol = 1,
190 |              dimnames = list(rownames(newdata), "Estimate"))
191 |   }
192 | 
193 |   s <- cbind(s, newdata)
194 | 
195 |   p <- ggplot(s, aes(x = .data[[var]], color = non_var_varying_f,
196 |                      fill = non_var_varying_f)) +
197 |     geom_line(aes(y = .data$Estimate)) +
198 |     scale_color_brewer(palette = "Set1") +
199 |     labs(x = var, y = sprintf("E[Y|%s]", var), color = NULL, fill = NULL)
200 | 
201 |   if (ci) {
202 |     p <- p +
203 |       geom_ribbon(aes(ymin = .data[[colnames(s)[2]]], ymax = .data[[colnames(s)[3]]],
204 |                       color = NULL),
205 |                   alpha = .3)
206 |   }
207 | 
208 |   p
209 | }
210 | 


--------------------------------------------------------------------------------
/R/sim.R:
--------------------------------------------------------------------------------
  1 | #' Simulate model parameters
  2 | #'
  3 | #' @description `sim()` simulates model parameters from a multivariate normal or t distribution that are then used by [sim_apply()] to calculate quantities of interest.
  4 | #'
  5 | #' @param fit a model fit, such as the output of a call to [lm()] or [glm()]. Can be left unspecified if `coefs` and `vcov` are not functions.
  6 | #' @param n the number of simulations to run; default is 1000. More is always better but resulting calculations will take longer.
  7 | #' @param vcov either a square covariance matrix of the coefficient covariance estimates or a function to use to extract it from `fit`. By default, uses [stats::vcov()] or [insight::get_varcov()] if that doesn't work.
  8 | #' @param coefs either a vector of coefficient estimates or a function to use to extract it from `fit`. By default, uses [stats::coef()] or [insight::get_parameters()] if that doesn't work.
  9 | #' @param dist a string containing the name of the multivariate distribution to use to draw simulated coefficients. Should be one of `"normal"` (multivariate normal distribution) or `"t({#})"` (multivariate t distribution), where `{#}` corresponds to the desired degrees of freedom (e.g., `"t(100)"`). If `NULL`, the right distribution to use will be determined based on heuristics; see Details.
 10 | #'
 11 | #' @return
 12 | #' A `clarify_sim` object, which has the following components:
 13 | #'  \item{sim.coefs}{a matrix containing the simulated coefficients with a column for each coefficient and a row for each simulation}
 14 | #'  \item{coefs}{the original coefficients extracted from `fit` or supplied to `coefs`.}
 15 | #'  \item{vcov}{the covariance matrix of the coefficients extracted from `fit` or supplied to `vcov`}
 16 | #'  \item{fit}{the original model fit supplied to `fit`}
 17 | #' The `"dist"` attribute contains `"normal"` if the coefficients were sampled from a multivariate normal distribution and `"t(df)"` if sampled from a multivariate t distribution. The `"clarify_hash"` attribute contains a unique hash generated by [rlang::hash()].
 18 | #'
 19 | #' @details When `dist` is `NULL`, `sim()` samples from a multivariate normal or t distribution depending on the degrees of freedom extracted from `insight::get_df(., type = "wald")`. If `Inf`, a normal distribution will be used; otherwise, a t-distribution with the returned degrees of freedom will be used. Models not supported by `insight` will use a normal distribution.
 20 | #'
 21 | #' When a multivariate normal is used, it is sampled from with means equal to the estimated coefficients and the parameter covariance matrix as the covariance matrix using [mvnfast::rmvn()]. When a multivariate t distribution is used, it is sampled from with means equal to the estimated coefficients and scaling matrix equal to `cov*(df - 2)/df`, where `cov` is the parameter covariance matrix and `df` is the residual degrees of freedom for the model, using [mvnfast::rmvt()].
 22 | #'
 23 | #' @seealso
 24 | #' * [misim()] for simulating model coefficients after multiple imputation
 25 | #' * [sim_apply()] for applying a function to each set of simulated coefficients
 26 | #' * [sim_ame()] for computing average marginal effects in each simulation draw
 27 | #' * [sim_setx()] for computing marginal predictions and first differences at typical values in each simulation draw
 28 | #' * [sim_adrf()] for computing average dose-response functions in each simulation draw
 29 | #'
 30 | #' @examples
 31 | #'
 32 | #' data("lalonde", package = "MatchIt")
 33 | #' fit <- lm(re78 ~ treat * (age + race + nodegree + re74), data = lalonde)
 34 | #'
 35 | #' # Simulate coefficients
 36 | #' s <- sim(fit)
 37 | #' s
 38 | #'
 39 | #' ## Could also use a robust covariance matrix, e.g.,
 40 | #' s <- sim(fit, vcov = "HC3")
 41 | #'
 42 | #' # Simulated coefficients assuming a normal distribution
 43 | #' # for coefficients; default for `lm` objects is a t-
 44 | #' # distribution
 45 | #' s <- sim(fit, dist = "normal")
 46 | #' s
 47 | #'
 48 | #' @export
 49 | sim <- function(fit,
 50 |                 n = 1e3,
 51 |                 vcov = NULL,
 52 |                 coefs = NULL,
 53 |                 dist = NULL) {
 54 | 
 55 |   if (missing(fit)) fit <- NULL
 56 | 
 57 |   if (!is.null(fit)) {
 58 |     if (!insight::is_regression_model(fit)) {
 59 |       .wrn("`fit` was not detected to be a regression model; proceed with caution")
 60 |     }
 61 |     # if (insight::is_mixed_model(fit)) {
 62 |     #   .wrn("`sim()` may not fully support models with random effects; proceed with caution")
 63 |     # }
 64 |   }
 65 | 
 66 |   chk::chk_count(n)
 67 | 
 68 |   coef_supplied <- {
 69 |     if (is.null(coefs)) "null"
 70 |     else if (is.function(coefs)) "fun"
 71 |     else if (check_valid_coef(coefs)) "num"
 72 |     else {
 73 |       .err("`coefs` must be a vector of coefficients or a function that extracts one from `fit`")
 74 |     }
 75 |   }
 76 | 
 77 |   vcov_supplied <- {
 78 |     if (is.null(vcov)) "null"
 79 |     else if (is.matrix(vcov)) "num"
 80 |     else "marginaleffects_code"
 81 |   }
 82 | 
 83 |   coefs <- process_coefs(coefs, fit, coef_supplied)
 84 | 
 85 |   vcov <- process_vcov(vcov, fit, vcov_supplied)
 86 | 
 87 |   check_coefs_vcov_length(vcov, coefs, vcov_supplied, coef_supplied)
 88 | 
 89 |   sampler <- get_sampling_dist(fit, dist)
 90 | 
 91 |   out <- list(sim.coefs = sampler(n, coefs, vcov),
 92 |               coefs = coefs,
 93 |               vcov = vcov,
 94 |               fit = fit)
 95 | 
 96 |   attr(out, "dist") <- attr(sampler, "dist")
 97 |   attr(out, "use_fit") <- !is.null(fit)
 98 |   attr(out, "sim_hash") <- rlang::hash(out$sim.coefs)
 99 |   class(out) <- "clarify_sim"
100 | 
101 |   out
102 | }
103 | 
104 | #' @export
105 | print.clarify_sim <- function(x, ...) {
106 |   cat("A `clarify_sim` object\n")
107 |   cat(sprintf(" - %s coefficients, %s simulated values\n", ncol(x$sim.coefs), nrow(x$sim.coefs)))
108 |   cat(sprintf(" - sampled distribution: multivariate %s\n", attr(x, "dist")))
109 |   if (!is.null(x$fit)) {
110 |     cat(" - original fitting function call:\n\n")
111 |     print(insight::get_call(x$fit))
112 |   }
113 | 
114 |   invisible(x)
115 | }
116 | 
117 | #Returns a function that generates random variates, with arguments
118 | #`n`, `mu`, and `cov`; name of distribution is stored in attr(., "dist")
119 | get_sampling_dist <- function(fit = NULL, dist = NULL) {
120 | 
121 |   if (!is.null(dist)) {
122 |     chk::chk_string(dist)
123 |     dist <- tolower(dist)
124 |     if (startsWith(dist, "t(") && endsWith(dist, ")")) {
125 |       df <- substr(dist, 3, nchar(dist) - 1)
126 |       if (nchar(df) == 0 || anyNA(suppressWarnings(df <- as.numeric(df))) || !chk::vld_number(df)) {
127 |         .err("when `dist` is supplied as t({#}), `{#}` must be a number")
128 |       }
129 |       df <- as.numeric(df)
130 |       dist <- "t"
131 |     }
132 |     else if (!anyNA(pmatch(dist, "normal"))) {
133 |       dist <- "normal"
134 |     }
135 |     else {
136 |       .err("`dist` must be \"normal\" or \"t({#})\", where `{#}` corresponds to the desired degrees of freedom")
137 |     }
138 |   }
139 |   else if (is.null(fit)) {
140 |     dist <- "normal"
141 |   }
142 |   else {
143 |     df <- get_df(fit)
144 | 
145 |     if (any(is.finite(df)) && all(df > 0)) dist <- "t"
146 |     else dist <- "normal"
147 |   }
148 | 
149 |   f <- {
150 |     if (dist == "t")
151 |       function(n, mu, cov) {
152 |         sigma <- cov * (df - 2) / df
153 |         #Need pivoted cholesky for when cov isn't PSD (sometimes true for fixed effects models)
154 |         ch <- suppressWarnings(chol(sigma, pivot = TRUE))
155 |         piv <- attr(ch, "pivot")
156 |         x <- mvnfast::rmvt(n, mu = mu[piv], sigma = ch, isChol = TRUE, df = df, kpnames = TRUE)
157 |         x[, order(piv), drop = FALSE]
158 |       }
159 |     else
160 |       function(n, mu, cov) {
161 |         #Need pivoted cholesky for when cov isn't PSD (sometimes true for fixed effects models)
162 |         ch <- suppressWarnings(chol(cov, pivot = TRUE))
163 |         piv <- attr(ch, "pivot")
164 |         x <- mvnfast::rmvn(n, mu = mu[piv], sigma = ch, isChol = TRUE, kpnames = TRUE)
165 |         x[, order(piv), drop = FALSE]
166 |       }
167 |   }
168 | 
169 |   attr(f, "dist") <- if (dist == "t") sprintf("t(%s)", df) else dist
170 | 
171 |   f
172 | }
173 | 
174 | #Extracts coefs based on given inputs
175 | process_coefs <- function(coefs, fit = NULL, coef_supplied) {
176 |   if (coef_supplied == "null") {
177 |     if (is.null(fit)) {
178 |       .err("`coefs` must be supplied when `fit` is not specified")
179 |     }
180 |     coefs <- marginaleffects::get_coef(fit)
181 |     if (!check_valid_coef(coefs)) {
182 |       .err("a valid set of coefficients could not be extracted automatically; please supply coefficients to the `coefs` argument and a covariance matrix to the `vcov` argument")
183 |     }
184 |   }
185 |   if (coef_supplied == "fun") {
186 |     if (is.null(fit)) {
187 |       .err("`fit` must be supplied when `coefs` is a function")
188 |     }
189 | 
190 |     coefs <- try_chk(coefs(fit))
191 |     if (!check_valid_coef(coefs)) {
192 |       .err("the output of the function supplied to `coefs` must be a numeric vector")
193 |     }
194 |   }
195 |   else if (coef_supplied == "num") {
196 |     #do nothing
197 |   }
198 | 
199 |   if (anyNA(coefs) || any(!is.finite(coefs))) {
200 |     .err("the coefficients cannot contain `NA` or non-finite values. This can occur with rank-deficient fits")
201 |   }
202 | 
203 |   coefs
204 | }
205 | 
206 | #Extracts vcov based on given inputs
207 | process_vcov <- function(vcov, fit = NULL, vcov_supplied) {
208 |   if (vcov_supplied == "null") {
209 |     if (is.null(fit)) {
210 |       .err("`vcov` must be supplied when `fit` is not specified")
211 |     }
212 |     vcov <- marginaleffects::get_vcov(fit)
213 |     if (!check_valid_vcov(vcov)) {
214 |       .err("a valid covariance matrix could not be extracted automatically; please supply an argument to `vcov`")
215 |     }
216 |   }
217 |   else if (vcov_supplied == "num") {
218 |     if (!check_valid_vcov(vcov)) {
219 |       .err("when supplied as a matrix, `vcov` must be a square, symmetric, numeric matrix")
220 |     }
221 |   }
222 |   else {
223 |     if (is.null(fit)) {
224 |       .err("`fit` must be supplied when `vcov` is a not supplied as a matrix")
225 |     }
226 | 
227 |     vcov <- marginaleffects::get_vcov(fit, vcov)
228 |     if (!check_valid_vcov(vcov)) {
229 |       .err("a valid covariance matrix could not be extracted using the argument supplied to `vcov`")
230 |     }
231 |   }
232 | 
233 |   if (anyNA(vcov) || any(!is.finite(vcov))) {
234 |     .err("the covariance matrix cannot contain `NA` or non-finite values. This can occur with rank-deficient fits")
235 |   }
236 | 
237 |   vcov
238 | }
239 | 


--------------------------------------------------------------------------------
/R/transform.clarify_est.R:
--------------------------------------------------------------------------------
  1 | #' Transform and combine `clarify_est` objects
  2 | #'
  3 | #' @description
  4 | #' `transform()` modifies a `clarify_est` object by allowing for the calculation of new quantities from the existing quantities without re-simulating them. `cbind()` binds two `clarify_est` objects together.
  5 | #'
  6 | #' @param _data the `clarify_est` object to be transformed.
  7 | #' @param ... for `transform()`, arguments in the form `name = value`, where `name` is the name of a new quantity to be computed and `value` is an expression that is a function of the existing quantities corresponding to the new quantity to be computed. See Details. For `cbind()`, `clarify_est` objects to be combined.
  8 | #' @param deparse.level ignored.
  9 | #'
 10 | #' @details
 11 | #' For `transform()`, the expression on the right side of the `=` should use the names of the existing quantities (e.g., `` `E[Y(1)]` - `E[Y(1)]` ``), with `` ` `` appropriately included when the quantity name include parentheses or brackets. Alternatively, it can use indexes prefixed by `.b`, e.g., `.b2 - .b1`, to refer to the corresponding quantity by position. This can aid in computing derived quantities of quantities with complicated names. (Note that if a quantity is named something like `.b1`, it will need to be referred to by position rather than name, as the position-based label takes precedence). See examples. Setting an existing value to `NULL` will remove that quantity from the object.
 12 | #'
 13 | #' `cbind()` does not rename the quanities or check for uniqueness of the names, so it is important to rename them yourself prior to combining the objects.
 14 | #'
 15 | #' @return
 16 | #' A `clarify_est` object, either with new columns added (when using `transform()`) or combining two `clarify_est` objects. Note that any type attributes corresponding to the `sim_apply()` wrapper used (e.g., `sim_ame()`) is lost when using either function. This can affect any helper functions (e.g., `plot()`) designed to work with the output of specific wrappers.
 17 | #'
 18 | #' @seealso [transform()], [cbind()], [sim()]
 19 | #'
 20 | #' @examples
 21 | #' data("lalonde", package = "MatchIt")
 22 | #'
 23 | #' # Fit the model
 24 | #' fit <- lm(re78 ~ treat * (age + educ + race +
 25 | #'              married + re74 + re75),
 26 | #'            data = lalonde)
 27 | #'
 28 | #' # Simulate coefficients
 29 | #' set.seed(123)
 30 | #' s <- sim(fit, n = 100)
 31 | #'
 32 | #' # Average adjusted predictions for `treat` within
 33 | #' # subsets of `race`
 34 | #' est_b <- sim_ame(s, var = "treat", verbose = FALSE,
 35 | #'                  subset = race == "black")
 36 | #' est_b
 37 | #'
 38 | #' est_h <- sim_ame(s, var = "treat", verbose = FALSE,
 39 | #'                  subset = race == "hispan")
 40 | #' est_h
 41 | #'
 42 | #' # Compute differences between adjusted predictions
 43 | #' est_b <- transform(est_b,
 44 | #'                    diff = `E[Y(1)]` - `E[Y(0)]`)
 45 | #' est_b
 46 | #'
 47 | #' est_h <- transform(est_h,
 48 | #'                    diff = `E[Y(1)]` - `E[Y(0)]`)
 49 | #' est_h
 50 | #'
 51 | #' # Bind estimates together after renaming
 52 | #' names(est_b) <- paste0(names(est_b), "_b")
 53 | #' names(est_h) <- paste0(names(est_h), "_h")
 54 | #'
 55 | #' est <- cbind(est_b, est_h)
 56 | #' est
 57 | #'
 58 | #' # Compute difference in race-specific differences
 59 | #' est <- transform(est,
 60 | #'                  `diff-diff` = .b6 - .b3)
 61 | #'
 62 | #' summary(est,
 63 | #'         parm = c("diff_b", "diff_h", "diff-diff"))
 64 | #'
 65 | #' # Remove last quantity by using `NULL`
 66 | #' transform(est, `diff-diff` = NULL)
 67 | 
 68 | #' @exportS3Method transform clarify_est
 69 | #' @name transform.clarify_est
 70 | transform.clarify_est <- function(`_data`, ...) {
 71 | 
 72 |   # Process dots to substitute .b{#} for corresponding value
 73 |   dots <- substitute(list(...))
 74 | 
 75 |   available_b <- sprintf(".b%s", seq_along(names(`_data`)))
 76 | 
 77 |   names_list <- setNames(lapply(add_quotes(names(`_data`), "`"), str2lang),
 78 |                          available_b)
 79 | 
 80 |   for (i in seq_along(dots)[-1]) {
 81 |     if (!is.null(dots[[i]]))
 82 |       dots[[i]] <- do.call("substitute", list(dots[[i]], names_list))
 83 |   }
 84 | 
 85 |   e <- try(eval(dots, as.data.frame(`_data`), parent.frame()), silent = TRUE)
 86 | 
 87 |   if (is_error(e)) .err(conditionMessage(attr(e, "condition")), tidy = FALSE)
 88 | 
 89 |   n <- nrow(`_data`)
 90 |   if (!all(vapply(e, function(e.) length(e.) == 0 || (length(e.) == n && is.numeric(e.)), logical(1L)))) {
 91 |     .err("all transformations must be vector operations of the variables in the original `clarify_est` object")
 92 |   }
 93 | 
 94 |   e_original <- eval(dots, as.list(attr(`_data`, "original")), parent.frame())
 95 | 
 96 |   inx <- match(names(e), names(`_data`))
 97 |   matched <- !is.na(inx)
 98 | 
 99 |   if (any(matched)) {
100 |     nulls <- lengths(e[matched]) == 0
101 | 
102 |     if (any(!nulls)) {
103 |       for (i in seq_along(e)[matched][!nulls]) {
104 |         `_data`[, inx[i]] <- e[[i]]
105 |         attr(`_data`, "original")[inx[i]] <- as.numeric(e_original[i])
106 |       }
107 |     }
108 | 
109 |     if (any(nulls)) {
110 |       `_data` <- `_data`[-inx[matched][nulls]]
111 |     }
112 |   }
113 | 
114 |   if (!all(matched)) {
115 |     nulls <- lengths(e[!matched]) == 0
116 |     if (any(!nulls)) {
117 |       new_e <- as.matrix(do.call("cbind", e[!matched][!nulls]))
118 |       attr(new_e, "original") <- do.call("c", e_original[!matched][!nulls])
119 |       attr(new_e, "sim_hash") <- attr(`_data`, "sim_hash")
120 |       class(new_e) <- c("clarify_est", class(new_e))
121 |       return(cbind.clarify_est(`_data`, new_e))
122 |     }
123 |   }
124 | 
125 |   `_data`
126 | }
127 | 
128 | #' @exportS3Method cbind clarify_est
129 | #' @rdname transform.clarify_est
130 | cbind.clarify_est <- function(..., deparse.level = 1) {
131 |   if (...length() == 0) return(NULL)
132 | 
133 |   for (i in seq_len(...length())) {
134 |     if (!inherits(...elt(i), "clarify_est")) {
135 |       .err("all supplied objects must be `clarify_est` objects, the output of calls to `sim_apply()` or its wrappers")
136 |     }
137 |   }
138 | 
139 |   obj <- list(...)
140 |   hashes <- lapply(obj, attr, "sim_hash")
141 | 
142 |   if (any(lengths(hashes) == 0) || any(!vapply(hashes, chk::vld_string, logical(1L)))) {
143 |     .err("all supplied objects must be unmodified `clarify_est` objects")
144 |   }
145 |   if (!all_the_same(unlist(hashes)) || !all_the_same(unlist(lapply(obj, nrow)))) {
146 |     .err("all supplied objects must be calls of `sim_apply()` or its wrappers on the same `clarify_sim` object")
147 |   }
148 | 
149 |   out <- do.call("cbind", lapply(obj, drop_sim_class))
150 | 
151 |   attr(out, "original") <- do.call("c", lapply(obj, attr, "original"))
152 |   attr(out, "sim_hash") <- hashes[[1]]
153 |   class(out) <- c("clarify_est", class(out))
154 | 
155 |   out
156 | }
157 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | #Utilities
  2 | word_list <- function(word.list = NULL, and.or = c("and", "or"), is.are = FALSE, quotes = FALSE) {
  3 |   #When given a vector of strings, creates a string of the form "a and b"
  4 |   #or "a, b, and c"
  5 |   #If is.are, adds "is" or "are" appropriately
  6 |   L <- length(word.list)
  7 |   word.list <- add_quotes(word.list, quotes)
  8 | 
  9 |   if (L == 0) {
 10 |     out <- ""
 11 |     attr(out, "plural") <- FALSE
 12 |   }
 13 |   else {
 14 |     word.list <- word.list[!word.list %in% c(NA_character_, "")]
 15 |     L <- length(word.list)
 16 |     if (L == 0) {
 17 |       out <- ""
 18 |       attr(out, "plural") <- FALSE
 19 |     }
 20 |     else if (L == 1) {
 21 |       out <- word.list
 22 |       if (is.are) out <- paste(out, "is")
 23 |       attr(out, "plural") <- FALSE
 24 |     }
 25 |     else {
 26 |       and.or <- match_arg(and.or)
 27 |       if (L == 2) {
 28 |         out <- paste(word.list, collapse = paste0(" ", and.or, " "))
 29 |       }
 30 |       else {
 31 |         out <- paste(paste(word.list[seq_len(L - 1)], collapse = ", "),
 32 |                      word.list[L], sep = paste0(", ", and.or, " "))
 33 | 
 34 |       }
 35 |       if (is.are) out <- paste(out, "are")
 36 |       attr(out, "plural") <- TRUE
 37 |     }
 38 | 
 39 |   }
 40 | 
 41 |   out
 42 | }
 43 | 
 44 | #Add quotation marks around a string.
 45 | add_quotes <- function(x, quotes = 2L) {
 46 |   if (!isFALSE(quotes)) {
 47 |     if (isTRUE(quotes)) quotes <- 2
 48 | 
 49 |     if (chk::vld_string(quotes)) x <- paste0(quotes, x, quotes)
 50 |     else if (chk::vld_whole_number(quotes)) {
 51 |       if (as.integer(quotes) == 0) return(x)
 52 |       else if (as.integer(quotes) == 1) x <- paste0("\'", x, "\'")
 53 |       else if (as.integer(quotes) == 2) x <- paste0("\"", x, "\"")
 54 |       else stop("`quotes` must be boolean, 1, 2, or a string.")
 55 |     }
 56 |     else {
 57 |       stop("'quotes' must be boolean, 1, 2, or a string.")
 58 |     }
 59 |   }
 60 |   x
 61 | }
 62 | 
 63 | #More informative and cleaner version of base::match.arg. From WeightIt with edits.
 64 | match_arg <- function(arg, choices, several.ok = FALSE) {
 65 |   #Replaces match.arg() but gives cleaner error message and processing
 66 |   #of arg.
 67 |   if (missing(arg))
 68 |     stop("No argument was supplied to match_arg().")
 69 |   arg.name <- deparse1(substitute(arg))
 70 | 
 71 |   if (missing(choices)) {
 72 |     formal.args <- formals(sys.function(sysP <- sys.parent()))
 73 |     choices <- eval(formal.args[[as.character(substitute(arg))]],
 74 |                     envir = sys.frame(sysP))
 75 |   }
 76 | 
 77 |   if (is.null(arg)) return(choices[1L])
 78 |   else if (!is.character(arg))
 79 |     stop(sprintf("The argument to `%s` must be NULL or a character vector", arg.name), call. = FALSE)
 80 | 
 81 |   if (!several.ok) {
 82 |     if (identical(arg, choices)) return(arg[1L])
 83 |     if (length(arg) > 1L) {
 84 |       stop(sprintf("The argument to `%s` must be of length 1", arg.name), call. = FALSE)
 85 |     }
 86 |   }
 87 |   else if (length(arg) == 0) {
 88 |     stop(sprintf("The argument to `%s` must be of length >= 1", arg.name), call. = FALSE)
 89 |   }
 90 | 
 91 |   i <- pmatch(arg, choices, nomatch = 0L, duplicates.ok = TRUE)
 92 |   if (all(i == 0L))
 93 |     stop(sprintf("The argument to `%s` should be %s%s.",
 94 |                  arg.name,
 95 |                  ngettext(length(choices), "", if (several.ok) "at least one of " else "one of "),
 96 |                  word_list(choices, and.or = "or", quotes = 2)),
 97 |          call. = FALSE)
 98 | 
 99 |   i <- i[i > 0L]
100 | 
101 |   choices[i]
102 | }
103 | 
104 | #Format percentage for CI labels
105 | fmt.prc <- function(probs, digits = 3) {
106 |   paste(format(100 * probs, trim = TRUE, scientific = FALSE, digits = digits), "%")
107 | }
108 | 
109 | #Check if all values are the same
110 | all_the_same <- function(x) {
111 |   if (is.list(x)) {
112 |     for (i in x) if (!identical(i, x[[1]])) return(FALSE)
113 |     return(TRUE)
114 |   }
115 | 
116 |   if (is.numeric(x)) {
117 |     return(abs(max(x) - min(x)) < 1e-9)
118 |   }
119 | 
120 |   length(unique(x)) == 1
121 | }
122 | 
123 | #Tidy tryCatching
124 | try_chk <- function(expr) {
125 |   tryCatch(expr,
126 |            error = function(e) .err(conditionMessage(e)))
127 | }
128 | 
129 | #mode
130 | Mode <- function(v, na.rm = TRUE) {
131 |   if (anyNA(v)) {
132 |     if (na.rm) v <- v[!is.na(v)]
133 |     else {
134 |       #Return NA, keeping type of `v`
135 |       v <- v[1]
136 |       is.na(v) <- TRUE
137 |       return(v)
138 |     }
139 |   }
140 | 
141 |   if (length(v) == 0) return(v)
142 |   if (is.factor(v)) {
143 |     if (nlevels(v) == 1) return(levels(v)[1])
144 |     mode <- levels(v)[which.max(tabulate(v, nbins = nlevels(v)))]
145 |     mode <- factor(mode, levels = levels(v))
146 |   }
147 |   else {
148 |     uv <- unique(v)
149 |     if (length(uv) == 1) return(uv)
150 |     mode <- uv[which.max(tabulate(match(v, uv)))]
151 |   }
152 |   mode
153 | }
154 | 
155 | #Recursively search a list for a value (key) and return location of value
156 | list.search <- function(x, key) {
157 |   for (i in seq_along(x)) {
158 |     if (identical(x[[i]], key)) {
159 |       return(i)
160 |     }
161 | 
162 |     if (is.list(x[[i]])) {
163 |       l <- list.search(x[[i]], key)
164 |       if (!is.null(l)) return(c(i, l))
165 |     }
166 |   }
167 | 
168 |   NULL
169 | }
170 | 
171 | #Checks if input is "try-error", i.e., failure of try()
172 | is_error <- function(x) {
173 |   inherits(x, "try-error")
174 | }
175 | 
176 | pkg_caller_call <- function(start = 1) {
177 |   package.funs <- c(getNamespaceExports(utils::packageName()),
178 |                     .getNamespaceInfo(asNamespace(utils::packageName()), "S3methods")[, 3])
179 |   k <- start #skip checking pkg_caller_call()
180 |   e_max <- start
181 |   while (!is.null(e <- rlang::caller_call(k))) {
182 |     if (!is.null(n <- rlang::call_name(e)) &&
183 |         n %in% package.funs) e_max <- k
184 |     k <- k + 1
185 |   }
186 |   rlang::caller_call(e_max)
187 | }
188 | 
189 | .err <- function(...) {
190 |   chk::err(..., call = pkg_caller_call(start = 2))
191 | }
192 | 
193 | .wrn <- function(..., immediate = TRUE) {
194 |   if (immediate && isTRUE(all.equal(getOption("warn"), 0))) {
195 |     op <- options(warn = 1)
196 |     on.exit(options(op))
197 |   }
198 |   chk::wrn(...)
199 | }
200 | 
201 | drop_sim_class <- function(x) {
202 |   class(x) <- class(x)[!startsWith(class(x), "clarify_")]
203 |   x
204 | }
205 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | ###pkg load stuff
2 | utils::globalVariables(c(".b1", ".b2"))
3 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | ```{r, include = FALSE}
  8 | knitr::opts_chunk$set(
  9 |   collapse = TRUE,
 10 |   comment = "#>",
 11 |   fig.path = "man/figures/README-",
 12 |   out.width = "80%"
 13 | )
 14 | ```
 15 | 
 16 | # `clarify`: Simulation-Based Inference for Regression Models
 17 | 
 18 | <!-- badges: start -->
 19 | [![CRAN status](https://www.r-pkg.org/badges/version/clarify)](https://CRAN.R-project.org/package=clarify)
 20 | <!-- badges: end -->
 21 | 
 22 | `clarify` implements simulation-based inference for computing functions of model parameters, such as average marginal effects and predictions at representative values of the predictors. See the `clarify` [website](https://iqss.github.io/clarify/) for documentation and other examples. `clarify` was designed to replicate and expand on functionality previously provided by the `Zelig` package.
 23 | 
 24 | ## Installation
 25 | 
 26 | `clarify` can be installed from CRAN using
 27 | 
 28 | ```{r, eval = F}
 29 | install.packages("clarify")
 30 | ```
 31 | 
 32 | You can install the development version of `clarify` from [GitHub](https://github.com/iqss/clarify) with
 33 | 
 34 | ```{r, eval = F}
 35 | install.packages("remotes")
 36 | remotes::install_github("iqss/clarify")
 37 | ```
 38 | 
 39 | ## Example
 40 | 
 41 | Below is an example of performing g-computation for the average treatment effect on the treated (ATT) after logistic regression to compute the average causal risk ratio and its confidence interval. First we load the data (in this case the `lalonde` dataset from `MatchIt`) and fit a logistic regression using functions outside of `clarify`:
 42 | 
 43 | ```{r, fig.width=7, fig.height=3}
 44 | library(clarify)
 45 | 
 46 | data("lalonde", package = "MatchIt")
 47 | 
 48 | # Fit the model
 49 | fit <- glm(I(re78 > 0) ~ treat + age + educ + race + married +
 50 |              nodegree + re74 + re75,
 51 |            data = lalonde, family = binomial)
 52 | ```
 53 | 
 54 | Next, to estimate the ATT risk ratio, we simulate coefficients from their implied distribution and compute the effects of interest in each simulation, yielding a distribution of estimates that we can summarize and use for inference:
 55 | 
 56 | ```{r example, fig.width=7, fig.height=3}
 57 | # Simulate coefficients from a multivariate normal distribution
 58 | set.seed(123)
 59 | sim_coefs <- sim(fit)
 60 | 
 61 | # Marginal risk ratio ATT, simulation-based
 62 | sim_est <- sim_ame(sim_coefs, var = "treat", subset = treat == 1,
 63 |                    contrast = "RR", verbose = FALSE)
 64 | 
 65 | sim_est
 66 | 
 67 | # View the estimates, confidence intervals, and p-values
 68 | summary(sim_est, null = c(`RR` = 1))
 69 | 
 70 | # Plot the resulting sampling distributions
 71 | plot(sim_est)
 72 | ```
 73 | 
 74 | Below, we provide information on the framework `clarify` uses and some other examples. For a complete vignette, see `vignette("clarify")`.
 75 | 
 76 | ## Introduction
 77 | 
 78 | Simulation-based inference is an alternative to the delta method and bootstrapping for performing inference on quantities that are functions of model parameters. It involves simulating model coefficients from their multivariate distribution using their estimated values and covariance from a single model fit to the original data, computing the quantities of interest from each set of model coefficients, and then performing inference using the resulting distribution of the estimates as their sampling distribution. Confidence intervals can be computed using the percentiles of the resulting sampling distribution, and p-values can be computed by inverting the confidence intervals. Alternatively, if the resulting sampling distribution is normally distributed, its standard error can be estimated as the standard deviation of the estimates and normal-theory Wald confidence intervals and p-values can be computed. The methodology of simulation-based inference is explained in King, Tomz, and Wittenberg (2000).
 79 | 
 80 | `clarify` was designed to provide a simple, general interface for simulation-based inference and includes a few convenience functions to perform common tasks like computing average marginal effects. The primary functions of `clarify` are `sim()`, `sim_apply()`, `summary()`, and `plot()`. These work together to create a simple workflow for simulation-based inference.
 81 | 
 82 | * `sim()` simulates model parameters from a fitted model
 83 | * `sim_apply()` applies an estimator to the simulated coefficients, or to the original object but with the new coefficients inserted
 84 | * `summary()` produces confidence intervals and p-values for the resulting estimates
 85 | * `plot()` produces plots of the simulated sampling distribution of the resulting estimates
 86 | 
 87 | There are also some wrappers for `sim_apply()` for performing some common operations: `sim_ame()` computes the average marginal effect of a variable, mirroring `marginaleffects::avg_predictions()` and `marginaleffects::avg_slopes()`; `sim_setx()` computes predictions at typical values of the covariates and differences between them, mirroring `Zelig::setx()` and `Zelig::setx1()`; and `sim_adrf()` computes average dose-response functions. `clarify` also offers support for models fit to multiply imputed data with the `misim()` function.
 88 | 
 89 | In the example above, we used `sim_ame()` to compute the ATT, but we could have also done so manually using `sim_apply()`, as demonstrated below:
 90 | 
 91 | ```{r example2, fig.width=7, fig.height=3}
 92 | # Write a function that computes the g-computation estimate for the ATT
 93 | ATT_fun <- function(fit) {
 94 |   d <- subset(lalonde, treat == 1)
 95 |   d$treat <- 1
 96 |   p1 <- mean(predict(fit, newdata = d, type = "response"))
 97 |   d$treat <- 0
 98 |   p0 <- mean(predict(fit, newdata = d, type = "response"))
 99 |   c(`E[Y(0)]` = p0, `E[Y(1)]` = p1, `RR` = p1 / p0)
100 | }
101 | 
102 | # Apply that function to the simulated coefficient
103 | sim_est <- sim_apply(sim_coefs, ATT_fun, verbose = FALSE)
104 | 
105 | sim_est
106 | 
107 | # View the estimates, confidence intervals, and p-values;
108 | # they are the same as when using sim_ame() above
109 | summary(sim_est, null = c(`RR` = 1))
110 | 
111 | # Plot the resulting sampling distributions
112 | plot(sim_est, reference = TRUE, ci = FALSE)
113 | ```
114 | 
115 | The plot of the simulated sampling distribution indicates that the sampling distribution for the risk ratio is not normally distributed around the estimate, indicating that the delta method may be a poor approximation and the asymmetric confidence intervals produced using the simulation may be more valid. Note that the estimates are those computed from the original model coefficients; the distribution is used only for computing confidence intervals, in line with recommendations by Rainey (2023).
116 | 
117 | If we want to compute the risk difference, we can do that using `transform()` on the already-produced output:
118 | 
119 | ```{r}
120 | #Transform estimates into new quantities of interest
121 | sim_est <- transform(sim_est, `RD` = `E[Y(1)]` - `E[Y(0)]`)
122 | summary(sim_est, null = c(`RR` = 1, `RD` = 0))
123 | ```
124 | 
125 | We can also use `clarify` to compute predictions and first differences at set and typical values of the predictors, mimicking the functionality of `Zelig`'s `setx()` and `setx1()` functions, using `sim_setx()`:
126 | 
127 | ```{r, fig.width=7, fig.height=3}
128 | # Predictions across age and treat at typical values
129 | # of the other predictors
130 | sim_est <- sim_setx(sim_coefs, x = list(age = 20:50, treat = 0:1),
131 |                     verbose = FALSE)
132 | 
133 | #Plot of predicted values across age for each value of treat
134 | plot(sim_est)
135 | ```
136 | 
137 | See `vignette("Zelig", package = "clarify")` for more examples of translating a `Zelig`-based workflow into one that uses `clarify` to estimate the same quantities of interest.
138 | 
139 | `clarify` offers parallel processing for all estimation functions to speed up computation. Functionality is also available for the analysis of models fit to multiply imputed data. See `vignette("clarify")` for more details.
140 | 
141 | ## References
142 | 
143 | King, G., Tomz, M., & Wittenberg, J. (2000). Making the Most of Statistical Analyses: Improving Interpretation and Presentation. *American Journal of Political Science*, 44(2), 347–361. https://doi.org/10.2307/2669316
144 | 
145 | Rainey, C. (2023). A careful consideration of CLARIFY: Simulation-induced bias in point estimates of quantities of interest. *Political Science Research and Methods*, 1–10. https://doi.org/10.1017/psrm.2023.8
146 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | 
  4 | # `clarify`: Simulation-Based Inference for Regression Models
  5 | 
  6 | <!-- badges: start -->
  7 | 
  8 | [![CRAN
  9 | status](https://www.r-pkg.org/badges/version/clarify)](https://CRAN.R-project.org/package=clarify)
 10 | <!-- badges: end -->
 11 | 
 12 | `clarify` implements simulation-based inference for computing functions
 13 | of model parameters, such as average marginal effects and predictions at
 14 | representative values of the predictors. See the `clarify`
 15 | [website](https://iqss.github.io/clarify/) for documentation and other
 16 | examples. `clarify` was designed to replicate and expand on
 17 | functionality previously provided by the `Zelig` package.
 18 | 
 19 | ## Installation
 20 | 
 21 | `clarify` can be installed from CRAN using
 22 | 
 23 | ``` r
 24 | install.packages("clarify")
 25 | ```
 26 | 
 27 | You can install the development version of `clarify` from
 28 | [GitHub](https://github.com/iqss/clarify) with
 29 | 
 30 | ``` r
 31 | install.packages("remotes")
 32 | remotes::install_github("iqss/clarify")
 33 | ```
 34 | 
 35 | ## Example
 36 | 
 37 | Below is an example of performing g-computation for the average
 38 | treatment effect on the treated (ATT) after logistic regression to
 39 | compute the average causal risk ratio and its confidence interval. First
 40 | we load the data (in this case the `lalonde` dataset from `MatchIt`) and
 41 | fit a logistic regression using functions outside of `clarify`:
 42 | 
 43 | ``` r
 44 | library(clarify)
 45 | 
 46 | data("lalonde", package = "MatchIt")
 47 | 
 48 | # Fit the model
 49 | fit <- glm(I(re78 > 0) ~ treat + age + educ + race + married +
 50 |              nodegree + re74 + re75,
 51 |            data = lalonde, family = binomial)
 52 | ```
 53 | 
 54 | Next, to estimate the ATT risk ratio, we simulate coefficients from
 55 | their implied distribution and compute the effects of interest in each
 56 | simulation, yielding a distribution of estimates that we can summarize
 57 | and use for inference:
 58 | 
 59 | ``` r
 60 | # Simulate coefficients from a multivariate normal distribution
 61 | set.seed(123)
 62 | sim_coefs <- sim(fit)
 63 | 
 64 | # Marginal risk ratio ATT, simulation-based
 65 | sim_est <- sim_ame(sim_coefs, var = "treat", subset = treat == 1,
 66 |                    contrast = "RR", verbose = FALSE)
 67 | 
 68 | sim_est
 69 | #> A `clarify_est` object (from `sim_ame()`)
 70 | #>  - Average adjusted predictions for `treat`
 71 | #>  - 1000 simulated values
 72 | #>  - 3 quantities estimated:                  
 73 | #>  E[Y(0)] 0.6830995
 74 | #>  E[Y(1)] 0.7567568
 75 | #>  RR      1.1078280
 76 | 
 77 | # View the estimates, confidence intervals, and p-values
 78 | summary(sim_est, null = c(`RR` = 1))
 79 | #>         Estimate 2.5 % 97.5 % P-value
 80 | #> E[Y(0)]    0.683 0.587  0.753       .
 81 | #> E[Y(1)]    0.757 0.686  0.813       .
 82 | #> RR         1.108 0.971  1.298    0.13
 83 | 
 84 | # Plot the resulting sampling distributions
 85 | plot(sim_est)
 86 | ```
 87 | 
 88 | <img src="man/figures/README-example-1.png" width="80%" />
 89 | 
 90 | Below, we provide information on the framework `clarify` uses and some
 91 | other examples. For a complete vignette, see `vignette("clarify")`.
 92 | 
 93 | ## Introduction
 94 | 
 95 | Simulation-based inference is an alternative to the delta method and
 96 | bootstrapping for performing inference on quantities that are functions
 97 | of model parameters. It involves simulating model coefficients from
 98 | their multivariate distribution using their estimated values and
 99 | covariance from a single model fit to the original data, computing the
100 | quantities of interest from each set of model coefficients, and then
101 | performing inference using the resulting distribution of the estimates
102 | as their sampling distribution. Confidence intervals can be computed
103 | using the percentiles of the resulting sampling distribution, and
104 | p-values can be computed by inverting the confidence intervals.
105 | Alternatively, if the resulting sampling distribution is normally
106 | distributed, its standard error can be estimated as the standard
107 | deviation of the estimates and normal-theory Wald confidence intervals
108 | and p-values can be computed. The methodology of simulation-based
109 | inference is explained in King, Tomz, and Wittenberg (2000).
110 | 
111 | `clarify` was designed to provide a simple, general interface for
112 | simulation-based inference and includes a few convenience functions to
113 | perform common tasks like computing average marginal effects. The
114 | primary functions of `clarify` are `sim()`, `sim_apply()`, `summary()`,
115 | and `plot()`. These work together to create a simple workflow for
116 | simulation-based inference.
117 | 
118 | - `sim()` simulates model parameters from a fitted model
119 | - `sim_apply()` applies an estimator to the simulated coefficients, or
120 |   to the original object but with the new coefficients inserted
121 | - `summary()` produces confidence intervals and p-values for the
122 |   resulting estimates
123 | - `plot()` produces plots of the simulated sampling distribution of the
124 |   resulting estimates
125 | 
126 | There are also some wrappers for `sim_apply()` for performing some
127 | common operations: `sim_ame()` computes the average marginal effect of a
128 | variable, mirroring `marginaleffects::avg_predictions()` and
129 | `marginaleffects::avg_slopes()`; `sim_setx()` computes predictions at
130 | typical values of the covariates and differences between them, mirroring
131 | `Zelig::setx()` and `Zelig::setx1()`; and `sim_adrf()` computes average
132 | dose-response functions. `clarify` also offers support for models fit to
133 | multiply imputed data with the `misim()` function.
134 | 
135 | In the example above, we used `sim_ame()` to compute the ATT, but we
136 | could have also done so manually using `sim_apply()`, as demonstrated
137 | below:
138 | 
139 | ``` r
140 | # Write a function that computes the g-computation estimate for the ATT
141 | ATT_fun <- function(fit) {
142 |   d <- subset(lalonde, treat == 1)
143 |   d$treat <- 1
144 |   p1 <- mean(predict(fit, newdata = d, type = "response"))
145 |   d$treat <- 0
146 |   p0 <- mean(predict(fit, newdata = d, type = "response"))
147 |   c(`E[Y(0)]` = p0, `E[Y(1)]` = p1, `RR` = p1 / p0)
148 | }
149 | 
150 | # Apply that function to the simulated coefficient
151 | sim_est <- sim_apply(sim_coefs, ATT_fun, verbose = FALSE)
152 | 
153 | sim_est
154 | #> A `clarify_est` object (from `sim_apply()`)
155 | #>  - 1000 simulated values
156 | #>  - 3 quantities estimated:                  
157 | #>  E[Y(0)] 0.6830995
158 | #>  E[Y(1)] 0.7567568
159 | #>  RR      1.1078280
160 | 
161 | # View the estimates, confidence intervals, and p-values;
162 | # they are the same as when using sim_ame() above
163 | summary(sim_est, null = c(`RR` = 1))
164 | #>         Estimate 2.5 % 97.5 % P-value
165 | #> E[Y(0)]    0.683 0.587  0.753       .
166 | #> E[Y(1)]    0.757 0.686  0.813       .
167 | #> RR         1.108 0.971  1.298    0.13
168 | 
169 | # Plot the resulting sampling distributions
170 | plot(sim_est, reference = TRUE, ci = FALSE)
171 | ```
172 | 
173 | <img src="man/figures/README-example2-1.png" width="80%" />
174 | 
175 | The plot of the simulated sampling distribution indicates that the
176 | sampling distribution for the risk ratio is not normally distributed
177 | around the estimate, indicating that the delta method may be a poor
178 | approximation and the asymmetric confidence intervals produced using the
179 | simulation may be more valid. Note that the estimates are those computed
180 | from the original model coefficients; the distribution is used only for
181 | computing confidence intervals, in line with recommendations by Rainey
182 | (2023).
183 | 
184 | If we want to compute the risk difference, we can do that using
185 | `transform()` on the already-produced output:
186 | 
187 | ``` r
188 | #Transform estimates into new quantities of interest
189 | sim_est <- transform(sim_est, `RD` = `E[Y(1)]` - `E[Y(0)]`)
190 | summary(sim_est, null = c(`RR` = 1, `RD` = 0))
191 | #>         Estimate   2.5 %  97.5 % P-value
192 | #> E[Y(0)]   0.6831  0.5872  0.7528       .
193 | #> E[Y(1)]   0.7568  0.6859  0.8134       .
194 | #> RR        1.1078  0.9708  1.2976    0.13
195 | #> RD        0.0737 -0.0215  0.1757    0.13
196 | ```
197 | 
198 | We can also use `clarify` to compute predictions and first differences
199 | at set and typical values of the predictors, mimicking the functionality
200 | of `Zelig`’s `setx()` and `setx1()` functions, using `sim_setx()`:
201 | 
202 | ``` r
203 | # Predictions across age and treat at typical values
204 | # of the other predictors
205 | sim_est <- sim_setx(sim_coefs, x = list(age = 20:50, treat = 0:1),
206 |                     verbose = FALSE)
207 | 
208 | #Plot of predicted values across age for each value of treat
209 | plot(sim_est)
210 | ```
211 | 
212 | <img src="man/figures/README-unnamed-chunk-6-1.png" width="80%" />
213 | 
214 | See `vignette("Zelig", package = "clarify")` for more examples of
215 | translating a `Zelig`-based workflow into one that uses `clarify` to
216 | estimate the same quantities of interest.
217 | 
218 | `clarify` offers parallel processing for all estimation functions to
219 | speed up computation. Functionality is also available for the analysis
220 | of models fit to multiply imputed data. See `vignette("clarify")` for
221 | more details.
222 | 
223 | ## References
224 | 
225 | King, G., Tomz, M., & Wittenberg, J. (2000). Making the Most of
226 | Statistical Analyses: Improving Interpretation and Presentation.
227 | *American Journal of Political Science*, 44(2), 347–361.
228 | <https://doi.org/10.2307/2669316>
229 | 
230 | Rainey, C. (2023). A careful consideration of CLARIFY:
231 | Simulation-induced bias in point estimates of quantities of interest.
232 | *Political Science Research and Methods*, 1–10.
233 | <https://doi.org/10.1017/psrm.2023.8>
234 | 


--------------------------------------------------------------------------------
/_dev/sim_chain.R:
--------------------------------------------------------------------------------
 1 | # # Function to chain simulations, i.e., to simulate values within each simulation. Intended use is for
 2 | # # using outputs of first stage, which are estimated with uncertainty, in second stage. Turned out
 3 | # # not to give valid results when tested with propensity score weighting.
 4 | # sim_chain <- function(sim, FUN, n = 10, vcov = NULL, coefs = NULL, dist = NULL, verbose = TRUE,
 5 | #                       cl = NULL, ...) {
 6 | #   coef_template <- get_coef_template(sim$fit, sim$coefs)
 7 | #   coef_location <- get_coef_location(sim$fit, sim$coefs, coef_template)
 8 | #
 9 | #   opb <- pbapply::pboptions(type = if (verbose) "timer" else "none")
10 | #   on.exit(pbapply::pboptions(opb))
11 | #
12 | #   apply_FUN <- make_apply_FUN(FUN, coef_location, coef_template)
13 | #
14 | #   # Test apply_FUN() on original model coefficients
15 | #   test <- try(apply_FUN(fit = sim$fit, coefs = sim$coefs, ...), silent = TRUE)
16 | #   if (is_error(test)) {
17 | #     .err("`FUN` failed to run on an initial check with the following error:\n",
18 | #          conditionMessage(attr(test, "condition")))
19 | #   }
20 | #   test_sim <- sim(test, n = 1, vcov = vcov, coefs = coefs, dist = dist)
21 | #
22 | #   if (is.null(names(test))) names(test) <- paste0("est", seq_along(test))
23 | #
24 | #   sim.list <- pbapply::pblapply(seq_len(nrow(sim$sim.coefs)), function(i) {
25 | #     sim(apply_FUN(fit = sim$fit, coefs = sim$sim.coefs[i,], ...),
26 | #         n = n, vcov = vcov, coefs = coefs, dist = dist)
27 | #   }, cl = cl)
28 | #
29 | #   out <- list(sim.coefs = do.call("rbind", lapply(sim.list, `[[`, "sim.coefs")),
30 | #               coefs = test_sim$coefs,
31 | #               fit = test)
32 | #
33 | #   attr(out, "dist") <- attr(test_sim, "dist")
34 | #   attr(out, "use_fit") <- TRUE
35 | #   attr(out, "sim_hash") <- rlang::hash(out$sim.coefs)
36 | #   class(out) <- "simbased_sim"
37 | #
38 | #   out
39 | # }
40 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: https://iqss.github.io/clarify/
 2 | template:
 3 |   bootstrap: 5
 4 | 
 5 | reference:
 6 | - title: Main Functions
 7 | - contents:
 8 |   - sim
 9 |   - sim_apply
10 |   - summary.clarify_est
11 |   - transform.clarify_est
12 | - title: Wrappers
13 | - contents:
14 |   - sim_adrf
15 |   - plot.clarify_adrf
16 |   - sim_ame
17 |   - sim_setx
18 |   - plot.clarify_setx
19 |   - misim
20 | 


--------------------------------------------------------------------------------
/clarify.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | 


--------------------------------------------------------------------------------
/clarify/Submission 1/RJreferences.bib:
--------------------------------------------------------------------------------
  1 | 
  2 | @article{kingMakingMostStatistical2000,
  3 | 	title = {Making the Most of Statistical Analyses: Improving Interpretation and Presentation},
  4 | 	author = {King, Gary and Tomz, Michael and Wittenberg, Jason},
  5 | 	year = {2000},
  6 | 	date = {2000},
  7 | 	journal = {American Journal of Political Science},
  8 | 	pages = {347--361},
  9 | 	volume = {44},
 10 | 	number = {2},
 11 | 	doi = {10.2307/2669316},
 12 | 	note = {tex.ids= kingMakingMostStatistical2000a
 13 | publisher: [Midwest Political Science Association, Wiley]}
 14 | }
 15 | 
 16 | @article{zhouNoteBayesianInference2010,
 17 | 	title = {A Note on Bayesian Inference After Multiple Imputation},
 18 | 	author = {Zhou, Xiang and Reiter, Jerome P.},
 19 | 	year = {2010},
 20 | 	month = {05},
 21 | 	date = {2010-05},
 22 | 	journal = {The American Statistician},
 23 | 	pages = {159--163},
 24 | 	volume = {64},
 25 | 	number = {2},
 26 | 	doi = {10.1198/tast.2010.09109},
 27 | 	langid = {en}
 28 | }
 29 | 
 30 | @article{tomzClarifySoftwareInterpreting2003,
 31 | 	title = {Clarify: Software for Interpreting and Presenting Statistical Results},
 32 | 	author = {Tomz, Michael and Wittenberg, Jason and King, Gary},
 33 | 	year = {2003},
 34 | 	month = {01},
 35 | 	date = {2003-01-15},
 36 | 	journal = {Journal of Statistical Software},
 37 | 	pages = {1--30},
 38 | 	volume = {8},
 39 | 	doi = {10.18637/jss.v008.i01},
 40 | 	langid = {en}
 41 | }
 42 | 
 43 | @article{imaiCommonFrameworkStatistical2008a,
 44 | 	title = {Toward a Common Framework for Statistical Analysis and Development},
 45 | 	author = {Imai, Kosuke and King, Gary and Lau, Olivia},
 46 | 	year = {2008},
 47 | 	month = {12},
 48 | 	date = {2008-12-01},
 49 | 	journal = {Journal of Computational and Graphical Statistics},
 50 | 	pages = {892--913},
 51 | 	volume = {17},
 52 | 	number = {4},
 53 | 	doi = {10.1198/106186008X384898}
 54 | }
 55 | 
 56 | @article{puhrFirthLogisticRegression2017,
 57 | 	title = {Firth's logistic regression with rare events: accurate effect estimates and predictions?},
 58 | 	author = {Puhr, Rainer and Heinze, Georg and Nold, Mariana and Lusa, Lara and Geroldinger, Angelika},
 59 | 	year = {2017},
 60 | 	month = {06},
 61 | 	date = {2017-06-30},
 62 | 	journal = {Statistics in Medicine},
 63 | 	pages = {2302--2317},
 64 | 	volume = {36},
 65 | 	number = {14},
 66 | 	doi = {10.1002/sim.7273},
 67 | 	note = {Publisher: John Wiley & Sons, Ltd},
 68 | 	langid = {en}
 69 | }
 70 | 
 71 | @article{kingLogisticRegressionRare2001,
 72 | 	title = {Logistic Regression in Rare Events Data},
 73 | 	author = {King, Gary and Zeng, Langche},
 74 | 	year = {2001},
 75 | 	date = {2001},
 76 | 	journal = {Political Analysis},
 77 | 	pages = {137--163},
 78 | 	volume = {9},
 79 | 	number = {2},
 80 | 	doi = {10.1093/oxfordjournals.pan.a004868},
 81 | 	langid = {en}
 82 | }
 83 | 
 84 | @article{raineyCarefulConsiderationCLARIFY2023,
 85 |   title = {A Careful Consideration of {{CLARIFY}}: Simulation-Induced Bias in Point Estimates of Quantities of Interest},
 86 |   shorttitle = {A Careful Consideration of {{CLARIFY}}},
 87 |   author = {Rainey, Carlisle},
 88 |   year = {2023},
 89 |   month = apr,
 90 |   journal = {Political Science Research and Methods},
 91 |   pages = {1--10},
 92 |   publisher = {{Cambridge University Press}},
 93 |   issn = {2049-8470, 2049-8489},
 94 |   doi = {10.1017/psrm.2023.8},
 95 |   urldate = {2023-05-03},
 96 |   langid = {english},
 97 |   keywords = {Maximum likelihood estimation (MLE)},
 98 | }
 99 | 
100 | 
101 | @article{rainey2017,
102 | 	title = {Transformation-Induced Bias: Unbiased Coefficients Do Not Imply Unbiased Quantities of Interest},
103 | 	author = {Rainey, Carlisle},
104 | 	year = {2017},
105 | 	month = {07},
106 | 	date = {2017-07},
107 | 	journal = {Political Analysis},
108 | 	pages = {402--409},
109 | 	volume = {25},
110 | 	number = {3},
111 | 	doi = {10.1017/pan.2017.11},
112 | 	langid = {en}
113 | }
114 | 
115 | @article{JSSv042i08,
116 | 	title = {MatchIt: Nonparametric preprocessing for parametric causal inference},
117 | 	author = {Ho, Daniel E. and Imai, Kosuke and King, Gary and Stuart, Elizabeth A.},
118 | 	year = {2011},
119 | 	date = {2011},
120 | 	journal = {Journal of Statistical Software, Articles},
121 | 	pages = {1{\textendash}28},
122 | 	volume = {42},
123 | 	number = {8},
124 | 	doi = {10.18637/jss.v042.i08},
125 | 	note = {Citation Key: JSSv042i08
126 | tex.ids= hoMatchItNonparametricPreprocessing2011}
127 | }
128 | 
129 | @article{dehejiaCausalEffectsNonexperimental1999,
130 | 	title = {Causal Effects in Nonexperimental Studies: Reevaluating the Evaluation of Training Programs},
131 | 	author = {Dehejia, Rajeev H. and Wahba, Sadek},
132 | 	year = {1999},
133 | 	month = {12},
134 | 	date = {1999-12},
135 | 	journal = {Journal of the American Statistical Association},
136 | 	pages = {1053--1062},
137 | 	volume = {94},
138 | 	number = {448},
139 | 	doi = {10.1080/01621459.1999.10473858},
140 | 	langid = {en}
141 | }
142 | 
143 | @article{greiferChoosingCausalEstimand2023,
144 | 	title = {Choosing the Causal Estimand for Propensity Score Analysis of Observational Studies},
145 | 	author = {Greifer, Noah and Stuart, Elizabeth A.},
146 | 	year = {2023},
147 | 	doi = {10.48550/arXiv.2106.10577}
148 | }
149 | 
150 | @book{longRegressionModelsCategorical2014,
151 | 	title = {Regression models for categorical dependent variables using Stata},
152 | 	author = {Long, J. Scott and Freese, Jeremy},
153 | 	year = {2014},
154 | 	date = {2014},
155 | 	publisher = {Stata Press Publication, StataCorp LP},
156 | 	edition = {Third edition},
157 | 	note = {OCLC: ocn890178695},
158 | 	address = {College Station, Texas}
159 | }
160 | 


--------------------------------------------------------------------------------
/clarify/Submission 1/RJwrapper.tex:
--------------------------------------------------------------------------------
 1 | \documentclass[a4paper]{report}
 2 | \usepackage[utf8]{inputenc}
 3 | \usepackage[T1]{fontenc}
 4 | \usepackage{RJournal}
 5 | \usepackage{amsmath,amssymb,array}
 6 | \usepackage{booktabs}
 7 | 
 8 | 
 9 | % tightlist command for lists without linebreak
10 | \providecommand{\tightlist}{%
11 |   \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
12 | 
13 | 
14 | % Always define CSL refs as bib entries are contained in separate doc
15 | % Pandoc citation processing
16 | \newlength{\cslhangindent}
17 | \setlength{\cslhangindent}{1.5em}
18 | \newlength{\csllabelwidth}
19 | \setlength{\csllabelwidth}{3em}
20 | \newlength{\cslentryspacingunit} % times entry-spacing
21 | \setlength{\cslentryspacingunit}{\parskip}
22 | % for Pandoc 2.8 to 2.10.1
23 | \newenvironment{cslreferences}%
24 |   {}%
25 |   {\par}
26 | % For Pandoc 2.11+
27 | \newenvironment{CSLReferences}[2] % #1 hanging-ident, #2 entry spacing
28 |  {% don't indent paragraphs
29 |   \setlength{\parindent}{0pt}
30 |   % turn on hanging indent if param 1 is 1
31 |   \ifodd #1
32 |   \let\oldpar\par
33 |   \def\par{\hangindent=\cslhangindent\oldpar}
34 |   \fi
35 |   % set entry spacing
36 |   \setlength{\parskip}{#2\cslentryspacingunit}
37 |  }%
38 |  {}
39 | \usepackage{calc}
40 | \newcommand{\CSLBlock}[1]{#1\hfill\break}
41 | \newcommand{\CSLLeftMargin}[1]{\parbox[t]{\csllabelwidth}{#1}}
42 | \newcommand{\CSLRightInline}[1]{\parbox[t]{\linewidth - \csllabelwidth}{#1}\break}
43 | \newcommand{\CSLIndent}[1]{\hspace{\cslhangindent}#1}
44 | 
45 | 
46 | 
47 | \begin{document}
48 | 
49 | 
50 | %% do not edit, for illustration only
51 | \sectionhead{Contributed research article}
52 | \volume{XX}
53 | \volnumber{YY}
54 | \year{20ZZ}
55 | \month{AAAA}
56 | 
57 | \begin{article}
58 |   \input{clarify}
59 | \end{article}
60 | 
61 | 
62 | \end{document}
63 | 


--------------------------------------------------------------------------------
/clarify/Submission 1/clarify.R:
--------------------------------------------------------------------------------
  1 | # Generated by `rjournal_pdf_article()` using `knitr::purl()`: do not edit by hand
  2 | # Please edit clarify.Rmd to modify this file
  3 | 
  4 | ## ----setup,include=FALSE------------------------------------------------------
  5 | knitr::opts_chunk$set(
  6 |   fig.path = "figures/",
  7 |   fig.align='center',
  8 |   fig.height = 2
  9 | )
 10 | 
 11 | if (!requireNamespace("clarify")) {
 12 |   install.packages("clarify")
 13 | }
 14 | 
 15 | if (!requireNamespace("MatchIt")) {
 16 |   install.packages("MatchIt")
 17 | }
 18 | 
 19 | if (!requireNamespace("Amelia")) {
 20 |   install.packages("Amelia")
 21 | }
 22 | 
 23 | 
 24 | ## -----------------------------------------------------------------------------
 25 | library(clarify)
 26 | 
 27 | 
 28 | ## -----------------------------------------------------------------------------
 29 | data("lalonde", package = "MatchIt")
 30 | 
 31 | lalonde$re78_0 <- ifelse(lalonde$re78 > 0, 1, 0)
 32 | 
 33 | head(lalonde)
 34 | 
 35 | 
 36 | ## -----------------------------------------------------------------------------
 37 | fit <- glm(re78_0 ~ treat * married + age + educ + race +
 38 |              nodegree + re74 + re75, data = lalonde,
 39 |            family = binomial("probit"))
 40 | 
 41 | 
 42 | ## -----------------------------------------------------------------------------
 43 | set.seed(1234)
 44 | 
 45 | # Drawing 1000 simulated coefficients using an HC2 robust
 46 | # covariance matrix
 47 | s <- sim(fit, n = 1000,
 48 |          vcov = "HC2")
 49 | 
 50 | s
 51 | 
 52 | 
 53 | ## -----------------------------------------------------------------------------
 54 | sim_fun1 <- function(fit) {
 55 |   predict(fit, newdata = lalonde["PSID1",], type = "response")
 56 | }
 57 | 
 58 | 
 59 | ## -----------------------------------------------------------------------------
 60 | est1 <- sim_apply(s, FUN = sim_fun1, verbose = FALSE)
 61 | 
 62 | est1
 63 | 
 64 | 
 65 | ## -----------------------------------------------------------------------------
 66 | sim_fun2 <- function(coefs) {
 67 |   hispan <- unname(coefs["racehispan"])
 68 |   white <- unname(coefs["racewhite"])
 69 |   
 70 |   c("w - h" = white - hispan)
 71 | }
 72 | 
 73 | est2 <- sim_apply(s, FUN = sim_fun2, verbose = FALSE)
 74 | 
 75 | est2
 76 | 
 77 | 
 78 | ## ---- fig.width=4-------------------------------------------------------------
 79 | plot(est1, reference = TRUE, ci = FALSE)
 80 | 
 81 | 
 82 | ## -----------------------------------------------------------------------------
 83 | summary(est1)
 84 | 
 85 | 
 86 | ## ---- fig.width=4-------------------------------------------------------------
 87 | plot(est2, reference = TRUE, ci = FALSE)
 88 | 
 89 | summary(est2, method = "wald", null = 0)
 90 | 
 91 | 
 92 | ## -----------------------------------------------------------------------------
 93 | est3 <- sim_setx(s,
 94 |                  x = list(treat = 0:1,
 95 |                           re75 = c(0, 20000),
 96 |                           race = "black"),
 97 |                  verbose = FALSE)
 98 | 
 99 | 
100 | ## -----------------------------------------------------------------------------
101 | summary(est3)
102 | 
103 | 
104 | ## -----------------------------------------------------------------------------
105 | attr(est3, "setx")
106 | 
107 | 
108 | ## ---- fig.width=5-------------------------------------------------------------
109 | plot(est3, var = "re75", ci = FALSE)
110 | 
111 | 
112 | ## -----------------------------------------------------------------------------
113 | est4 <- sim_setx(s,
114 |                  x = list(treat = 0:1,
115 |                           re75 = seq(0, 20000, by = 2000),
116 |                           race = "black"),
117 |                  verbose = FALSE)
118 | 
119 | 
120 | ## ---- fig.width=5-------------------------------------------------------------
121 | plot(est4)
122 | 
123 | 
124 | ## -----------------------------------------------------------------------------
125 | est5 <- sim_setx(s,
126 |                  x = list(treat = 0, re75 = 0),
127 |                  x1 = list(treat = 1, re75 = 0),
128 |                  verbose = FALSE)
129 | 
130 | 
131 | ## -----------------------------------------------------------------------------
132 | summary(est5)
133 | 
134 | 
135 | ## -----------------------------------------------------------------------------
136 | est6 <- sim_ame(s,
137 |                 var = "treat",
138 |                 subset = treat == 1,
139 |                 contrast = "rr",
140 |                 verbose = FALSE)
141 | 
142 | 
143 | ## -----------------------------------------------------------------------------
144 | summary(est6, null = c(`RR` = 1))
145 | 
146 | 
147 | ## -----------------------------------------------------------------------------
148 | est7 <- sim_ame(s,
149 |                 var = "age",
150 |                 verbose = FALSE)
151 | 
152 | 
153 | ## -----------------------------------------------------------------------------
154 | summary(est7)
155 | 
156 | 
157 | ## -----------------------------------------------------------------------------
158 | est6b <- sim_ame(s,
159 |                  var = "treat",
160 |                  subset = treat == 1,
161 |                  by = ~married,
162 |                  contrast = "rr",
163 |                  verbose = FALSE)
164 | 
165 | summary(est6b)
166 | 
167 | 
168 | ## -----------------------------------------------------------------------------
169 | age_seq <- seq(18, 50, by = 2)
170 | 
171 | est8 <- sim_adrf(s,
172 |                  var = "age",
173 |                  contrast = "adrf",
174 |                  at = age_seq,
175 |                  verbose = FALSE)
176 | 
177 | 
178 | ## ---- fig.width=5-------------------------------------------------------------
179 | plot(est8)
180 | 
181 | 
182 | ## -----------------------------------------------------------------------------
183 | summary(est8, parm = 1:4)
184 | 
185 | 
186 | ## -----------------------------------------------------------------------------
187 | est9 <- sim_adrf(s,
188 |                  var = "age",
189 |                  contrast = "amef",
190 |                  at = age_seq,
191 |                  verbose = FALSE)
192 | 
193 | 
194 | ## ---- fig.width=5-------------------------------------------------------------
195 | plot(est9)
196 | 
197 | 
198 | ## -----------------------------------------------------------------------------
199 | lalonde <- transform(lalonde,
200 |                      re78_0 = ifelse(re78 == 0, 1, 0))
201 | 
202 | 
203 | ## -----------------------------------------------------------------------------
204 | est6 <- transform(est6,
205 |                   RD = `E[Y(1)]` - `E[Y(0)]`)
206 | 
207 | 
208 | ## -----------------------------------------------------------------------------
209 | summary(est6, null = c(`RR` = 1, `RD` = 0))
210 | 
211 | 
212 | ## -----------------------------------------------------------------------------
213 | est6b |>
214 |   transform(RR_ratio = `RR[1]` / `RR[0]`) |>
215 |   summary(parm = c("RR[0]", "RR[1]", "RR_ratio"),
216 |           null = 1)
217 | 
218 | 
219 | ## -----------------------------------------------------------------------------
220 | # AME of treat with race = "black"
221 | est10b <- sim_ame(s, var = "treat", subset = race == "black",
222 |                   contrast = "diff", verbose = FALSE)
223 | summary(est10b)
224 | 
225 | # AME of treat with race = "hispan"
226 | est10h <- sim_ame(s, var = "treat", subset = race == "hispan",
227 |                   contrast = "diff", verbose = FALSE)
228 | summary(est10h)
229 | 
230 | 
231 | ## -----------------------------------------------------------------------------
232 | names(est10b) <- paste(names(est10b), "b", sep = "_")
233 | names(est10h) <- paste(names(est10h), "h", sep = "_")
234 | 
235 | 
236 | ## -----------------------------------------------------------------------------
237 | est10 <- cbind(est10b, est10h)
238 | summary(est10)
239 | 
240 | 
241 | ## -----------------------------------------------------------------------------
242 | est10 <- transform(est10,
243 |                    `Dh - Db` = Diff_h - Diff_b)
244 | summary(est10, parm = "Dh - Db")
245 | 
246 | 
247 | ## ---- include=F---------------------------------------------------------------
248 | amelia_ok <- requireNamespace("Amelia", quietly = TRUE)
249 | knitr::opts_chunk$set(
250 |   eval = amelia_ok
251 | )
252 | if (amelia_ok) library(Amelia)
253 | 
254 | 
255 | ## ---- message=F---------------------------------------------------------------
256 | library(Amelia)
257 | data("africa", package = "Amelia")
258 | 
259 | # Multiple imputation
260 | a.out <- amelia(x = africa, m = 10, cs = "country",
261 |                 ts = "year", logs = "gdp_pc", p2s = 0)
262 | 
263 | # Fit model to each dataset
264 | model.list <- with(a.out, lm(gdp_pc ~ infl * trade))
265 | 
266 | # Simulate coefficients, 100 draws per imputation
267 | si <- misim(model.list, n = 100)
268 | 
269 | si
270 | 
271 | 
272 | ## -----------------------------------------------------------------------------
273 | sim_fun <- function(fit) {
274 |   #Extract the original dataset using get_predictors()
275 |   X <- insight::get_predictors(fit)
276 |   
277 |   p0 <- predict(fit)
278 |   
279 |   #Predictions after perturbing infl slightly
280 |   p1 <- predict(fit, newdata = transform(X, infl = infl + 1e-5))
281 |   
282 |  c(AME = mean((p1 - p0) / 1e-5))
283 | }
284 | 
285 | est_mi <- sim_apply(si, FUN = sim_fun, verbose = FALSE)
286 | 
287 | summary(est_mi)
288 | 
289 | 
290 | ## -----------------------------------------------------------------------------
291 | est_mi2 <- sim_ame(si, var = "infl", verbose = FALSE)
292 | 
293 | summary(est_mi2)
294 | 
295 | 


--------------------------------------------------------------------------------
/clarify/Submission 1/clarify.log:
--------------------------------------------------------------------------------
 1 | This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.9.22)  28 SEP 2023 14:03
 2 | entering extended mode
 3 |  restricted \write18 enabled.
 4 |  %&-line parsing enabled.
 5 | **clarify.tex
 6 | (./clarify.tex
 7 | LaTeX2e <2023-06-01> patch level 1
 8 | L3 programming layer <2023-08-29>
 9 | ! Undefined control sequence.
10 | l.7 \maketitle
11 |                
12 | Here is how much of TeX's memory you used:
13 |  16 strings out of 476894
14 |  383 string characters out of 5807862
15 |  1917791 words of memory out of 5000000
16 |  21409 multiletter control sequences out of 15000+600000
17 |  558069 words of font info for 36 fonts, out of 8000000 for 9000
18 |  14 hyphenation exceptions out of 8191
19 |  13i,0n,12p,88b,9s stack positions out of 10000i,1000n,20000p,200000b,200000s
20 | 
21 | !  ==> Fatal error occurred, no output PDF file produced!
22 | 


--------------------------------------------------------------------------------
/clarify/Submission 1/clarify.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 1/clarify.pdf


--------------------------------------------------------------------------------
/clarify/Submission 1/figures/unnamed-chunk-10-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 1/figures/unnamed-chunk-10-1.pdf


--------------------------------------------------------------------------------
/clarify/Submission 1/figures/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 1/figures/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/clarify/Submission 1/figures/unnamed-chunk-14-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 1/figures/unnamed-chunk-14-1.pdf


--------------------------------------------------------------------------------
/clarify/Submission 1/figures/unnamed-chunk-14-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 1/figures/unnamed-chunk-14-1.png


--------------------------------------------------------------------------------
/clarify/Submission 1/figures/unnamed-chunk-16-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 1/figures/unnamed-chunk-16-1.pdf


--------------------------------------------------------------------------------
/clarify/Submission 1/figures/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 1/figures/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/clarify/Submission 1/figures/unnamed-chunk-25-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 1/figures/unnamed-chunk-25-1.pdf


--------------------------------------------------------------------------------
/clarify/Submission 1/figures/unnamed-chunk-25-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 1/figures/unnamed-chunk-25-1.png


--------------------------------------------------------------------------------
/clarify/Submission 1/figures/unnamed-chunk-28-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 1/figures/unnamed-chunk-28-1.pdf


--------------------------------------------------------------------------------
/clarify/Submission 1/figures/unnamed-chunk-28-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 1/figures/unnamed-chunk-28-1.png


--------------------------------------------------------------------------------
/clarify/Submission 1/figures/unnamed-chunk-8-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 1/figures/unnamed-chunk-8-1.pdf


--------------------------------------------------------------------------------
/clarify/Submission 1/figures/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 1/figures/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/clarify/Submission 1/initial_checks.log:
--------------------------------------------------------------------------------
 1 | Initial check results: 
 2 | 
 3 | SUCCESS: Submission has consistently named tex, bib, and R files
 4 | WARNING: The archive contains hidden files which will be removed: .DS_Store
 5 | SUCCESS: File and directory names are compliant.
 6 | SUCCESS: No problematic file found
 7 | SUCCESS: Possible motivation letter found: motivation-letter.md
 8 | ERROR: The title is not in title case! Suggest title to be changed to:
 9 | Clarify: Simulation-Based Inference for Regression Models.
10 | Initial check results: 
11 | 
12 | SUCCESS: Submission has consistently named tex, bib, and R files
13 | WARNING: The archive contains hidden files which will be removed: .DS_Store
14 | SUCCESS: File and directory names are compliant.
15 | SUCCESS: No problematic file found
16 | SUCCESS: Possible motivation letter found: motivation-letter.md
17 | SUCCESS: The article title is properly formatted.
18 | Initial check results: 
19 | 
20 | SUCCESS: Submission has consistently named tex, bib, and R files
21 | WARNING: The archive contains hidden files which will be removed: .DS_Store
22 | SUCCESS: File and directory names are compliant.
23 | SUCCESS: No problematic file found
24 | SUCCESS: Possible motivation letter found: motivation-letter.md
25 | SUCCESS: The article title is properly formatted.
26 | 


--------------------------------------------------------------------------------
/clarify/Submission 1/motivation-letter.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: pdf_document
 3 | fontsize: 12pt
 4 | ---
 5 | 
 6 | \thispagestyle{empty}
 7 | \today
 8 | 
 9 | Editor   
10 | The R Journal  
11 | \bigskip
12 | 
13 | 
14 | Thank you for considering our article "`clarify`: Simulation-Based Inference for Regression Models" for publication in the R Journal. The article describes the use of our package `clarify` for performing simulation-based inference of post-estimation quantities from regression, which enhances the interpretation of these models without making the same assumptions and approximations similar methods make. We believe this will be useful to anyone performing statistical analysis with regression and therefore will have broad appeal to R users. Please feel free to reach out with any questions or additional materials that might be useful.
15 | 
16 | \bigskip
17 | \bigskip
18 | 
19 | Regards,
20 |     
21 |     
22 |     
23 |     
24 | Noah Greifer  
25 | Institute for Quantitative Social Science  
26 | Harvard University  
27 | Cambridge, MA, USA  
28 | ngreifer@iq.harvard.edu
29 | 
30 | \bigskip
31 | 


--------------------------------------------------------------------------------
/clarify/Submission 2/1-review-1.txt:
--------------------------------------------------------------------------------
 1 | A bit of background on my perspective as a reviewer. As the paper notes, there is some “disagreement” in practice (and maybe in theory) about the relative value of the delta-method versus simulation-based inference. I fall into the delta-method camp. As such, I’m slightly skeptical of the method the paper proposes. That said, I don’t have a huge problem with the approach that the authors propose—I would never suggest that a researcher should switch from {clarify} to {marginaleffects}, for example. That said, I’m going to make an argument for publication, and that argument is from the perspective of a skeptic. Others might make stronger arguments for publication.
 2 | 
 3 | 
 4 | 
 5 | # Strengths
 6 | 
 7 | 
 8 | 
 9 | First, the paper is extremely clear about both the software and the method. I sometimes find modern software frustrating because it’s not always clear what the software is doing. I assume that it’s doing good things, but I’m not always sure what it’s doing. That is not the case with this paper. The authors describe the software interface clearly, but also the method that the software is implementing.
10 | 
11 | 
12 | 
13 | Second, this package is important for historical reasons. CLARIFY was (is?) an immensely popular Stata package among political scientists. {Zelig} never took off, but was widely known among political scientists. In my opinion, {clarify} is an excellent, smart replacement for {Zelig}. This paper is worth publishing because of the historical importance of CLARIFY and Zelig among political scientists. Will {clarify} be as popular as {marginaleffects}? I don’t think so. Will it help political scientists trained in and around 2003 to 2018? Absolutely. King, Tomz, and Wittenberg (2000) is one of the most cited political science papers ever, so it would be a real shame to not have a well-documented R package to implement those ideas. (I’m not sure how popular this method was/is outside political science; perhaps it’s also important in some adjacent social sciences.)
14 | 
15 | 
16 | 
17 | This two points seem uncontroversial and should weigh heavily in favor of publication.
18 | 
19 | 
20 | 
21 | # A (Minor) Weakness
22 | 
23 | 
24 | 
25 | I would gently push back on a few stronger claims in the paper regarding the performance of the simulation-based intervals over the delta intervals. Here are a few examples:
26 | 
27 |  1. “often more accurate than using the delta method” (p. 1)
28 |  2. “Given its non-Normality, the quantile-based bounds are clearly more appropriate than those resulting from the Normal approximation, as the bounds computed from the Normal approximation would be outside the bounds of the estimate.” (p. 6)
29 |  3. “Inverting the uncertainty interval involves finding the smallest confidence level such that the null value is within the confidence bounds. The p-value for the test is one minus this level.” (p. 6)
30 |  4. “including plots to assess the normality of the distributions of simulated values (important for assessing whether Wald-type confidence intervals and p-values are valid)” (p. 16)
31 | 
32 | 
33 | I think the authors would agree with this summary of their position: When the simulations of the quantity of interest is not normal, then simulation-based inference should be preferred.” 
34 | 
35 | 
36 | 
37 | It isn’t clear to me why this summary would be true. 
38 | 
39 | 
40 | 
41 | For example, if I knew the sampling distribution was skewed to the right, then I would want a CI with a longer arm to the left and a shorter arm to the right, else the CI will misses won’t be symmetric (e.g., 2.5% low, 2.5% high). To make this concrete, suppose you get a point estimate at the 97.5th percentile of a right-skewed sampling distribution. Then you have to go really far back to the left to capture the truth. If you get a point estimate at the 2.5th percentile of this same sampling distribution, then you only need to go a little bit to the right to capture the truth. Thus, the CI with the nominal behavior would seem to require a short arm to the right and a long arm to the left. Simulation-based inference does the opposite of this. It’s my intuition that equal-armed CIs would work better than CIs with a long and short arm on the “wrong” side. This is merely my intuition.
42 | 
43 | 
44 | 
45 | The quote from p. 1 is stated as a matter of fact, but it’s not clear what the authors mean by “more accurate” or what conception of “accurate” would make this statement true. Similarly for “valid” on p. 16.
46 | 
47 | 
48 | 
49 | The most natural reading of “more accurate” seems to be “closer to nominal coverage” (e.g., 95% capture rate). The authors suggest that the delta method will not approximately achieve this coverage when the sampling distribution is far from symmetric. This seems non-controversial. But it seems to me (based on intuition/theory and simulations) that the simulation-based intervals will also behave poorly in these same scenarios (with this poor behavior translating to the p-values). Claims 1, 2, and 4 above seem to assume that simulation-based inference will meaningfully improve on the delta method when the sampling distribution is non-normal, but I can’t quite see why that should be (see discussion above). Claim 3 depends on claims 1, 2, and 4. 
50 | 
51 | 
52 | 
53 | If they are able, I suggest that the authors (1) clarify their usage of “accurate” and “valid” and (2) support these points with references and or brief justifications. It’s certainly beyond the scope of the paper to fully justify these claims, but given the matter-of-factness with which the authors make these claims, perhaps stating the claims more clearly or justifying them briefly would be helpful to readers. 
54 | 
55 | 
56 | 
57 | I should emphasize, though, that this is “small beans”—mostly theoretical navel gazing—because asymptotic results apply and simulation-based inference is easy to use and historically important and popular.
58 | 
59 | 


--------------------------------------------------------------------------------
/clarify/Submission 2/1-review-2.txt:
--------------------------------------------------------------------------------
 1 | Thank you for giving me the opportunity to review this interesting paper.
 2 | 
 3 | I like the paper a lot. It is clear and well-written. 
 4 | 
 5 | The software is of high quality. I have tried it, read the documentation, and skimmed its code base. The authors follow modern best practices for development, including many unit tests, thorough documentation, and a nice website with a useful vignette and a migration guide for users of the older `Zelig` package. Well done!
 6 | 
 7 | # Motivation
 8 | 
 9 | The motivation for simulation-based inference could be improved. For example, on page 1 the authors state that:
10 | 
11 | > Simulation-based inference is not only often more accurate than using the delta method, it is also simpler to understand and implement, as it does not require understanding Taylor series or the calculus that underlies it. This makes it more palatable to nontechnical audiences and easier to learn for students without sacrificing statistical performance.
12 | 
13 | This is a red herring. Easy to use software implements the delta method by default, and nontechnical audiences and students essentially never have to implement the delta method themselves. 
14 | 
15 | Also, what does "accurate" mean, exactly? What does "often" mean? Under what conditions, exactly? This is a paper about software, so I don't expect a full theoretical investigation. But there should be at least be a little bit more discussion and breadcrumbs for users to follow. Are there good theoretical or simulation studies on the properties of this strategy, identifying when it works better or worse? Currently, the reader might leave the paper with the impression that simulation-based inference strictly dominates the delta method and bootstrapping in all cases, except along the computational cost dimension. Is that really true? I'll freely admit that I'm a bit skeptical, but I would love to see some references to authors who probe this question. I'm sure other readers would find this useful too in deciding whether they should use `clarify`.
16 | 
17 | The flip side of this question is: If simulation-based inference is easier and more accurate, why isn't everyone using it already? What's the market failure?
18 | 
19 | # Approximations in simulation-based inference
20 | 
21 | The authors write:
22 | 
23 | > Arriving at the posterior distribution does not require taking any derivatives or making any approximations beyond those usually used for inference on model parameter estimates.
24 | 
25 | This may seem trivial, taking M random draws will lead to a different result than drawing a different M' set of coefficients. Clearly, there's a simulation-related approximation going on. This should be acknowledged early in the intro. The authors should revise the text to avoid saying that simulation-based inference requires no approximation.
26 | 
27 | # Assumption or approximation?
28 | 
29 | The abstract and introduction sell simulation-based inference as a way to relax "assumptions" of the delta method, but then goes on to talk about "approximations" that may fail:
30 | 
31 | > The usual method for estimating the uncertainty of the derived quantities is known as the “delta method”, which involves two approximations: 1) that the variance of the derived quantity can be represented as a first-order Taylor series, and 2) that the estimate of the derived quantity is normally distributed. 
32 | 
33 | The classic textbook treatment of the delta method talks about two assumptions: continuity, and normality of the $\hat{\theta}$ --- not of derived quantities $h(\hat{\theta})$. Do the authors see a distinction between a violation of delta method assumptions and a "failure" of approximations in finite samples? If so, this should be cleared up in the abstract and intro to avoid confusion.
34 | 
35 | # Backtransformation
36 | 
37 | Quotes like these two feel misleading: because standard practice in such GLM models it to build confidence intervals by backtransformation, rather than by naively constructing symmetric intervals:
38 | 
39 | > For example, predicted probabilities close to 0 or 1 or ratios of coefficients or probabilities typically do not have normal (or even symmetrical) distributions in finite samples, and the usual Wald-type confidence interval limits produced from delta method standard errors can include values outside the domain of the quantity of interest.
40 | 
41 | > One can see again how a delta method or Normal approximation may not have yielded valid uncertainty intervals given the non-Normality of the distributions.
42 | 
43 | Most of the alternative packages in `R` will automatically use backtransformation to build confidence intervals that do not stretch outside reasonable bounds. Of course, this is not possible or easy for all model types. But if they authors want to make a big deal out of that critique --- open with it on page 1 and reiterate it in the text --- they should probably show an example where this is a real problem
44 | 
45 | # Minor notes
46 | 
47 | * Why are all functions prefixed with `sim_`? I would argue that these prefixes are extraneous, and that this is what the namespace is for.
48 | * Can `plot()` draw on more dimensions if more variables are included in `sim_setx()`?
49 | * It is not clear to me from the documentation what `null` values are acceptable in `summary()`. I see RD and RR. What else?
50 | 
51 | > The largest difference is that clarify supports iterative building of more and more complex hypotheses through the transform() method, which quickly computes new quantities and transformation from the existing computed quantities, whereas marginaleffects only supports a single transformation 
52 | 
53 | Or users can call `posterior_draws()` to manipulate the draws themselves, but this is admittedly less convenient.
54 | 
55 | 


--------------------------------------------------------------------------------
/clarify/Submission 2/RJwrapper.tex:
--------------------------------------------------------------------------------
 1 | \documentclass[a4paper]{report}
 2 | \usepackage[utf8]{inputenc}
 3 | \usepackage[T1]{fontenc}
 4 | \usepackage{RJournal}
 5 | \usepackage{amsmath,amssymb,array}
 6 | \usepackage{booktabs}
 7 | 
 8 | 
 9 | % tightlist command for lists without linebreak
10 | \providecommand{\tightlist}{%
11 |   \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
12 | 
13 | 
14 | % Always define CSL refs as bib entries are contained in separate doc
15 | % Pandoc citation processing
16 | \newlength{\cslhangindent}
17 | \setlength{\cslhangindent}{1.5em}
18 | \newlength{\csllabelwidth}
19 | \setlength{\csllabelwidth}{3em}
20 | \newlength{\cslentryspacingunit} % times entry-spacing
21 | \setlength{\cslentryspacingunit}{\parskip}
22 | % for Pandoc 2.8 to 2.10.1
23 | \newenvironment{cslreferences}%
24 |   {}%
25 |   {\par}
26 | % For Pandoc 2.11+
27 | \newenvironment{CSLReferences}[2] % #1 hanging-ident, #2 entry spacing
28 |  {% don't indent paragraphs
29 |   \setlength{\parindent}{0pt}
30 |   % turn on hanging indent if param 1 is 1
31 |   \ifodd #1
32 |   \let\oldpar\par
33 |   \def\par{\hangindent=\cslhangindent\oldpar}
34 |   \fi
35 |   % set entry spacing
36 |   \setlength{\parskip}{#2\cslentryspacingunit}
37 |  }%
38 |  {}
39 | \usepackage{calc}
40 | \newcommand{\CSLBlock}[1]{#1\hfill\break}
41 | \newcommand{\CSLLeftMargin}[1]{\parbox[t]{\csllabelwidth}{#1}}
42 | \newcommand{\CSLRightInline}[1]{\parbox[t]{\linewidth - \csllabelwidth}{#1}\break}
43 | \newcommand{\CSLIndent}[1]{\hspace{\cslhangindent}#1}
44 | 
45 | 
46 | 
47 | \begin{document}
48 | 
49 | 
50 | %% do not edit, for illustration only
51 | \sectionhead{Contributed research article}
52 | \volume{XX}
53 | \volnumber{YY}
54 | \year{20ZZ}
55 | \month{AAAA}
56 | 
57 | \begin{article}
58 |   \input{clarify}
59 | \end{article}
60 | 
61 | 
62 | \end{document}
63 | 


--------------------------------------------------------------------------------
/clarify/Submission 2/clarify.R:
--------------------------------------------------------------------------------
  1 | # Generated by `rjournal_pdf_article()` using `knitr::purl()`: do not edit by hand
  2 | # Please edit clarify.Rmd to modify this file
  3 | 
  4 | ## ----setup,include=FALSE------------------------------------------------------
  5 | knitr::opts_chunk$set(
  6 |   fig.path = "figures/",
  7 |   fig.align='center',
  8 |   fig.height = 2
  9 | )
 10 | 
 11 | if (!requireNamespace("clarify")) {
 12 |   install.packages("clarify")
 13 | }
 14 | 
 15 | if (!requireNamespace("MatchIt")) {
 16 |   install.packages("MatchIt")
 17 | }
 18 | 
 19 | if (!requireNamespace("Amelia")) {
 20 |   install.packages("Amelia")
 21 | }
 22 | 
 23 | 
 24 | ## -----------------------------------------------------------------------------
 25 | library(clarify)
 26 | 
 27 | 
 28 | ## -----------------------------------------------------------------------------
 29 | data("lalonde", package = "MatchIt")
 30 | 
 31 | lalonde$re78_0 <- ifelse(lalonde$re78 > 0, 1, 0)
 32 | 
 33 | head(lalonde)
 34 | 
 35 | 
 36 | ## -----------------------------------------------------------------------------
 37 | fit <- glm(re78_0 ~ treat * married + age + educ + race +
 38 |              nodegree + re74 + re75, data = lalonde,
 39 |            family = binomial("probit"))
 40 | 
 41 | 
 42 | ## -----------------------------------------------------------------------------
 43 | set.seed(1234)
 44 | 
 45 | # Drawing 1000 simulated coefficients using an HC2 robust
 46 | # covariance matrix
 47 | s <- sim(fit, n = 1000,
 48 |          vcov = "HC2")
 49 | 
 50 | s
 51 | 
 52 | 
 53 | ## -----------------------------------------------------------------------------
 54 | sim_fun1 <- function(fit) {
 55 |   predict(fit, newdata = lalonde["PSID1",], type = "response")
 56 | }
 57 | 
 58 | 
 59 | ## -----------------------------------------------------------------------------
 60 | est1 <- sim_apply(s, FUN = sim_fun1, verbose = FALSE)
 61 | 
 62 | est1
 63 | 
 64 | 
 65 | ## -----------------------------------------------------------------------------
 66 | sim_fun2 <- function(coefs) {
 67 |   hispan <- unname(coefs["racehispan"])
 68 |   white <- unname(coefs["racewhite"])
 69 |   
 70 |   c("w - h" = white - hispan)
 71 | }
 72 | 
 73 | est2 <- sim_apply(s, FUN = sim_fun2, verbose = FALSE)
 74 | 
 75 | est2
 76 | 
 77 | 
 78 | ## ---- fig.width=4-------------------------------------------------------------
 79 | plot(est1, reference = TRUE, ci = FALSE)
 80 | 
 81 | 
 82 | ## -----------------------------------------------------------------------------
 83 | summary(est1)
 84 | 
 85 | 
 86 | ## ---- fig.width=4-------------------------------------------------------------
 87 | plot(est2, reference = TRUE, ci = FALSE)
 88 | 
 89 | summary(est2, method = "wald", null = 0)
 90 | 
 91 | 
 92 | ## -----------------------------------------------------------------------------
 93 | est3 <- sim_setx(s,
 94 |                  x = list(treat = 0:1,
 95 |                           re75 = c(0, 20000),
 96 |                           race = "black"),
 97 |                  verbose = FALSE)
 98 | 
 99 | 
100 | ## -----------------------------------------------------------------------------
101 | summary(est3)
102 | 
103 | 
104 | ## -----------------------------------------------------------------------------
105 | attr(est3, "setx")
106 | 
107 | 
108 | ## ---- fig.width=5-------------------------------------------------------------
109 | plot(est3, var = "re75", ci = FALSE)
110 | 
111 | 
112 | ## -----------------------------------------------------------------------------
113 | est4 <- sim_setx(s,
114 |                  x = list(treat = 0:1,
115 |                           re75 = seq(0, 20000, by = 2000),
116 |                           race = "black"),
117 |                  verbose = FALSE)
118 | 
119 | 
120 | ## ---- fig.width=5-------------------------------------------------------------
121 | plot(est4)
122 | 
123 | 
124 | ## -----------------------------------------------------------------------------
125 | est5 <- sim_setx(s,
126 |                  x = list(treat = 0, re75 = 0),
127 |                  x1 = list(treat = 1, re75 = 0),
128 |                  verbose = FALSE)
129 | 
130 | 
131 | ## -----------------------------------------------------------------------------
132 | summary(est5)
133 | 
134 | 
135 | ## -----------------------------------------------------------------------------
136 | est6 <- sim_ame(s,
137 |                 var = "treat",
138 |                 subset = treat == 1,
139 |                 contrast = "rr",
140 |                 verbose = FALSE)
141 | 
142 | 
143 | ## -----------------------------------------------------------------------------
144 | summary(est6, null = c(`RR` = 1))
145 | 
146 | 
147 | ## -----------------------------------------------------------------------------
148 | est7 <- sim_ame(s,
149 |                 var = "age",
150 |                 verbose = FALSE)
151 | 
152 | 
153 | ## -----------------------------------------------------------------------------
154 | summary(est7)
155 | 
156 | 
157 | ## -----------------------------------------------------------------------------
158 | est6b <- sim_ame(s,
159 |                  var = "treat",
160 |                  subset = treat == 1,
161 |                  by = ~married,
162 |                  contrast = "rr",
163 |                  verbose = FALSE)
164 | 
165 | summary(est6b)
166 | 
167 | 
168 | ## -----------------------------------------------------------------------------
169 | age_seq <- seq(18, 50, by = 2)
170 | 
171 | est8 <- sim_adrf(s,
172 |                  var = "age",
173 |                  contrast = "adrf",
174 |                  at = age_seq,
175 |                  verbose = FALSE)
176 | 
177 | 
178 | ## ---- fig.width=5-------------------------------------------------------------
179 | plot(est8)
180 | 
181 | 
182 | ## -----------------------------------------------------------------------------
183 | summary(est8, parm = 1:4)
184 | 
185 | 
186 | ## -----------------------------------------------------------------------------
187 | est9 <- sim_adrf(s,
188 |                  var = "age",
189 |                  contrast = "amef",
190 |                  at = age_seq,
191 |                  verbose = FALSE)
192 | 
193 | 
194 | ## ---- fig.width=5-------------------------------------------------------------
195 | plot(est9)
196 | 
197 | 
198 | ## -----------------------------------------------------------------------------
199 | lalonde <- transform(lalonde,
200 |                      re78_0 = ifelse(re78 == 0, 1, 0))
201 | 
202 | 
203 | ## -----------------------------------------------------------------------------
204 | est6 <- transform(est6,
205 |                   RD = `E[Y(1)]` - `E[Y(0)]`)
206 | 
207 | 
208 | ## -----------------------------------------------------------------------------
209 | summary(est6, null = c(`RR` = 1, `RD` = 0))
210 | 
211 | 
212 | ## -----------------------------------------------------------------------------
213 | est6b |>
214 |   transform(RR_ratio = `RR[1]` / `RR[0]`) |>
215 |   summary(parm = c("RR[0]", "RR[1]", "RR_ratio"),
216 |           null = 1)
217 | 
218 | 
219 | ## -----------------------------------------------------------------------------
220 | # AME of treat with race = "black"
221 | est10b <- sim_ame(s, var = "treat", subset = race == "black",
222 |                   contrast = "diff", verbose = FALSE)
223 | summary(est10b)
224 | 
225 | # AME of treat with race = "hispan"
226 | est10h <- sim_ame(s, var = "treat", subset = race == "hispan",
227 |                   contrast = "diff", verbose = FALSE)
228 | summary(est10h)
229 | 
230 | 
231 | ## -----------------------------------------------------------------------------
232 | names(est10b) <- paste(names(est10b), "b", sep = "_")
233 | names(est10h) <- paste(names(est10h), "h", sep = "_")
234 | 
235 | 
236 | ## -----------------------------------------------------------------------------
237 | est10 <- cbind(est10b, est10h)
238 | summary(est10)
239 | 
240 | 
241 | ## -----------------------------------------------------------------------------
242 | est10 <- transform(est10,
243 |                    `Dh - Db` = Diff_h - Diff_b)
244 | summary(est10, parm = "Dh - Db")
245 | 
246 | 
247 | ## ---- include=F---------------------------------------------------------------
248 | amelia_ok <- requireNamespace("Amelia", quietly = TRUE)
249 | knitr::opts_chunk$set(
250 |   eval = amelia_ok
251 | )
252 | if (amelia_ok) library(Amelia)
253 | 
254 | 
255 | ## ---- message=F---------------------------------------------------------------
256 | library(Amelia)
257 | data("africa", package = "Amelia")
258 | 
259 | # Multiple imputation
260 | a.out <- amelia(x = africa, m = 10, cs = "country",
261 |                 ts = "year", logs = "gdp_pc", p2s = 0)
262 | 
263 | # Fit model to each dataset
264 | model.list <- with(a.out, lm(gdp_pc ~ infl * trade))
265 | 
266 | # Simulate coefficients, 100 draws per imputation
267 | si <- misim(model.list, n = 100)
268 | 
269 | si
270 | 
271 | 
272 | ## -----------------------------------------------------------------------------
273 | sim_fun <- function(fit) {
274 |   #Extract the original dataset using get_predictors()
275 |   X <- insight::get_predictors(fit)
276 |   
277 |   p0 <- predict(fit)
278 |   
279 |   #Predictions after perturbing infl slightly
280 |   p1 <- predict(fit, newdata = transform(X, infl = infl + 1e-5))
281 |   
282 |  c(AME = mean((p1 - p0) / 1e-5))
283 | }
284 | 
285 | est_mi <- sim_apply(si, FUN = sim_fun, verbose = FALSE)
286 | 
287 | summary(est_mi)
288 | 
289 | 
290 | ## -----------------------------------------------------------------------------
291 | est_mi2 <- sim_ame(si, var = "infl", verbose = FALSE)
292 | 
293 | summary(est_mi2)
294 | 
295 | 


--------------------------------------------------------------------------------
/clarify/Submission 2/clarify.log:
--------------------------------------------------------------------------------
 1 | This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) (preloaded format=pdflatex 2023.9.22)  28 SEP 2023 14:03
 2 | entering extended mode
 3 |  restricted \write18 enabled.
 4 |  %&-line parsing enabled.
 5 | **clarify.tex
 6 | (./clarify.tex
 7 | LaTeX2e <2023-06-01> patch level 1
 8 | L3 programming layer <2023-08-29>
 9 | ! Undefined control sequence.
10 | l.7 \maketitle
11 |                
12 | Here is how much of TeX's memory you used:
13 |  16 strings out of 476894
14 |  383 string characters out of 5807862
15 |  1917791 words of memory out of 5000000
16 |  21409 multiletter control sequences out of 15000+600000
17 |  558069 words of font info for 36 fonts, out of 8000000 for 9000
18 |  14 hyphenation exceptions out of 8191
19 |  13i,0n,12p,88b,9s stack positions out of 10000i,1000n,20000p,200000b,200000s
20 | 
21 | !  ==> Fatal error occurred, no output PDF file produced!
22 | 


--------------------------------------------------------------------------------
/clarify/Submission 2/clarify.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/clarify.pdf


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/plot1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/plot1-1.png


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/plot2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/plot2-1.png


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/plot3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/plot3-1.png


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/plot4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/plot4-1.png


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/plot8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/plot8-1.png


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/plot9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/plot9-1.png


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/unnamed-chunk-10-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/unnamed-chunk-10-1.pdf


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/unnamed-chunk-14-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/unnamed-chunk-14-1.pdf


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/unnamed-chunk-14-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/unnamed-chunk-14-1.png


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/unnamed-chunk-16-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/unnamed-chunk-16-1.pdf


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/unnamed-chunk-16-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/unnamed-chunk-16-1.png


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/unnamed-chunk-25-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/unnamed-chunk-25-1.pdf


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/unnamed-chunk-25-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/unnamed-chunk-25-1.png


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/unnamed-chunk-28-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/unnamed-chunk-28-1.pdf


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/unnamed-chunk-28-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/unnamed-chunk-28-1.png


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/unnamed-chunk-8-1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/unnamed-chunk-8-1.pdf


--------------------------------------------------------------------------------
/clarify/Submission 2/figures/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/clarify/Submission 2/figures/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/clarify/Submission 2/initial_checks.log:
--------------------------------------------------------------------------------
 1 | Initial check results: 
 2 | 
 3 | SUCCESS: Submission has consistently named tex, bib, and R files
 4 | WARNING: The archive contains hidden files which will be removed: .DS_Store
 5 | SUCCESS: File and directory names are compliant.
 6 | SUCCESS: No problematic file found
 7 | SUCCESS: Possible motivation letter found: motivation-letter.md
 8 | ERROR: The title is not in title case! Suggest title to be changed to:
 9 | Clarify: Simulation-Based Inference for Regression Models.
10 | Initial check results: 
11 | 
12 | SUCCESS: Submission has consistently named tex, bib, and R files
13 | WARNING: The archive contains hidden files which will be removed: .DS_Store
14 | SUCCESS: File and directory names are compliant.
15 | SUCCESS: No problematic file found
16 | SUCCESS: Possible motivation letter found: motivation-letter.md
17 | SUCCESS: The article title is properly formatted.
18 | Initial check results: 
19 | 
20 | SUCCESS: Submission has consistently named tex, bib, and R files
21 | WARNING: The archive contains hidden files which will be removed: .DS_Store
22 | SUCCESS: File and directory names are compliant.
23 | SUCCESS: No problematic file found
24 | SUCCESS: Possible motivation letter found: motivation-letter.md
25 | SUCCESS: The article title is properly formatted.
26 | 


--------------------------------------------------------------------------------
/clarify/Submission 2/motivation-letter.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: pdf_document
 3 | fontsize: 12pt
 4 | ---
 5 | 
 6 | \thispagestyle{empty}
 7 | \today
 8 | 
 9 | Editor   
10 | The R Journal  
11 | \bigskip
12 | 
13 | 
14 | Thank you for considering our article "`clarify`: Simulation-Based Inference for Regression Models" for publication in the R Journal. The article describes the use of our package `clarify` for performing simulation-based inference of post-estimation quantities from regression, which enhances the interpretation of these models without making the same assumptions and approximations similar methods make. We believe this will be useful to anyone performing statistical analysis with regression and therefore will have broad appeal to R users. Please feel free to reach out with any questions or additional materials that might be useful.
15 | 
16 | \bigskip
17 | \bigskip
18 | 
19 | Regards,
20 |     
21 |     
22 |     
23 |     
24 | Noah Greifer  
25 | Institute for Quantitative Social Science  
26 | Harvard University  
27 | Cambridge, MA, USA  
28 | ngreifer@iq.harvard.edu
29 | 
30 | \bigskip
31 | 


--------------------------------------------------------------------------------
/man/clarify-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/clarify-package.R
 3 | \docType{package}
 4 | \name{clarify-package}
 5 | \alias{clarify}
 6 | \alias{clarify-package}
 7 | \title{clarify: Simulation-Based Inference for Regression Models}
 8 | \description{
 9 | Performs simulation-based inference as an alternative to the delta method for obtaining valid confidence intervals and p-values for regression post-estimation quantities, such as average marginal effects and predictions at representative values. This framework for simulation-based inference is especially useful when the resulting quantity is not normally distributed and the delta method approximation fails. The methodology is described in King, Tomz, and Wittenberg (2000) \doi{10.2307/2669316}. 'clarify' is meant to replace some of the functionality of the archived package 'Zelig'; see the vignette "Translating Zelig to clarify" for replicating this functionality.
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://github.com/iqss/clarify}
15 |   \item \url{https://iqss.github.io/clarify/}
16 |   \item Report bugs at \url{https://github.com/iqss/clarify/issues}
17 | }
18 | 
19 | }
20 | \author{
21 | \strong{Maintainer}: Noah Greifer \email{ngreifer@iq.harvard.edu} (\href{https://orcid.org/0000-0003-3067-7154}{ORCID})
22 | 
23 | Authors:
24 | \itemize{
25 |   \item Steven Worthington \email{sworthington@iq.harvard.edu} (\href{https://orcid.org/0000-0001-9550-5797}{ORCID})
26 |   \item Stefano Iacus \email{siacus@iq.harvard.edu} (\href{https://orcid.org/0000-0002-4884-0047}{ORCID})
27 |   \item Gary King \email{king@harvard.edu} (\href{https://orcid.org/0000-0002-5327-7631}{ORCID})
28 | }
29 | 
30 | }
31 | \keyword{internal}
32 | 


--------------------------------------------------------------------------------
/man/figures/README-example-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/man/figures/README-example-1.png


--------------------------------------------------------------------------------
/man/figures/README-example2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/man/figures/README-example2-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/man/figures/README-unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/man/figures/README-unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/man/misim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/misim.R
 3 | \name{misim}
 4 | \alias{misim}
 5 | \title{Simulate model coefficients after multiple imputation}
 6 | \usage{
 7 | misim(fitlist, n = 1000, vcov = NULL, coefs = NULL, dist = NULL)
 8 | }
 9 | \arguments{
10 | \item{fitlist}{a list of model fits, one for each imputed dataset, or a \code{mira} object (the output of a call to \code{with()} applied to a \code{mids} object in \code{mice}).}
11 | 
12 | \item{n}{the number of simulations to run for each imputed dataset; default is 1000. More is always better but resulting calculations will take longer.}
13 | 
14 | \item{vcov}{a square covariance matrix of the coefficient covariance estimates, a function to use to extract it from \code{fit}, or a list thereof with an element for each imputed dataset. By default, uses \code{\link[stats:vcov]{stats::vcov()}} or \code{\link[insight:get_varcov]{insight::get_varcov()}} if that doesn't work.}
15 | 
16 | \item{coefs}{a vector of coefficient estimates, a function to use to extract it from \code{fit}, or a list thereof with an element for each imputed dataset. By default, uses \code{\link[stats:coef]{stats::coef()}} or \code{\link[insight:get_parameters]{insight::get_parameters()}} if that doesn't work.}
17 | 
18 | \item{dist}{a character vector containing the name of the multivariate distribution(s) to use to draw simulated coefficients. Should be one of \code{"normal"} (multivariate normal distribution) or \code{"t_{#}"} (multivariate t distribution), where \verb{\{#\}} corresponds to the desired degrees of freedom (e.g., \code{"t_100"}). If \code{NULL}, the right distributions to use will be determined based on heuristics; see \code{\link[=sim]{sim()}} for details.}
19 | }
20 | \value{
21 | A \code{clarify_misim} object, which inherits from \code{clarify_sim} and has the following components:
22 | \item{sim.coefs}{a matrix containing the simulated coefficients with a column for each coefficient and a row for each simulation for each imputation}
23 | \item{coefs}{a matrix containing the original coefficients extracted from \code{fitlist} or supplied to \code{coefs}, with a row per imputation.}
24 | \item{fit}{the list of model fits supplied to \code{fitlist}}
25 | \item{imp}{a identifier of which imputed dataset each set of simulated coefficients corresponds to.}
26 | The \code{"dist"} attribute contains \code{"normal"} if the coefficients were sampled from a multivariate normal distribution and \code{"t({df})"} if sampled from a multivariate t distribution. The \code{"clarify_hash"} attribute contains a unique hash generated by \code{\link[rlang:hash]{rlang::hash()}}.
27 | }
28 | \description{
29 | \code{misim()} simulates model parameters from multivariate normal or t distributions after multiple imputation that are then used by \code{\link[=sim_apply]{sim_apply()}} to calculate quantities of interest.
30 | }
31 | \details{
32 | \code{misim()} essentially combines multiple \code{sim()} calls applied to a list of model fits, each fit in an imputed dataset, into a single combined pool of simulated coefficients. When simulation-based inference is to be used with multiply imputed data, many imputations are required; see Zhou and Reiter (2010).
33 | }
34 | \examples{
35 | \dontshow{if (requireNamespace("Amelia", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
36 | data("africa", package = "Amelia")
37 | 
38 | # Multiple imputation using Amelia
39 | a.out <- Amelia::amelia(x = africa, m = 10,
40 |                         cs = "country",
41 |                         ts = "year", logs = "gdp_pc",
42 |                         p2s = 0)
43 | 
44 | fits <- with(a.out, lm(gdp_pc ~ infl * trade))
45 | 
46 | # Simulate coefficients
47 | s <- misim(fits)
48 | s
49 | \dontshow{\}) # examplesIf}
50 | }
51 | \references{
52 | Zhou, X., & Reiter, J. P. (2010). A Note on Bayesian Inference After Multiple Imputation. \emph{The American Statistician}, 64(2), 159–163. \doi{10.1198/tast.2010.09109}
53 | }
54 | \seealso{
55 | \itemize{
56 | \item \code{\link[=sim]{sim()}} for simulating model coefficients for a single dataset
57 | \item \code{\link[=sim_apply]{sim_apply()}} for applying a function to each set of simulated coefficients
58 | \item \code{\link[=sim_ame]{sim_ame()}} for computing average marginal effects in each simulation draw
59 | \item \code{\link[=sim_setx]{sim_setx()}} for computing marginal predictions and first differences at typical values in each simulation draw
60 | }
61 | }
62 | 


--------------------------------------------------------------------------------
/man/plot.clarify_adrf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.clarify_adrf.R
 3 | \name{plot.clarify_adrf}
 4 | \alias{plot.clarify_adrf}
 5 | \title{Plot marginal predictions from \code{sim_adrf()}}
 6 | \usage{
 7 | \method{plot}{clarify_adrf}(
 8 |   x,
 9 |   ci = TRUE,
10 |   level = 0.95,
11 |   method = "quantile",
12 |   baseline,
13 |   color = "black",
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{x}{a \code{clarify_adrf} object resulting from a call to \code{\link[=sim_adrf]{sim_adrf()}}.}
19 | 
20 | \item{ci}{\code{logical}; whether to display confidence bands for the estimates. Default is \code{TRUE}.}
21 | 
22 | \item{level}{the confidence level desired. Default is .95 for 95\% confidence intervals.}
23 | 
24 | \item{method}{the method used to compute confidence bands. Can be \code{"wald"} to use a Normal approximation or \code{"quantile"} to use the simulated sampling distribution (default). See \code{\link[=summary.clarify_est]{summary.clarify_est()}} for details. Abbreviations allowed.}
25 | 
26 | \item{baseline}{\code{logical}; whether to include a horizontal line at \code{y = 0} on the plot. Default is \code{FALSE} for the ADRF (since 0 might not be in the range of the outcome) and \code{TRUE} for the AMEF.}
27 | 
28 | \item{color}{the color of the line and confidence band in the plot.}
29 | 
30 | \item{...}{for \code{plot()}, further arguments passed to \code{\link[ggplot2:geom_density]{ggplot2::geom_density()}}.}
31 | }
32 | \value{
33 | A \code{ggplot} object.
34 | }
35 | \description{
36 | \code{plot.clarify_adrf()} plots the output of \code{\link[=sim_adrf]{sim_adrf()}}. For the average dose-response function (ADRF, requested with \code{contrast = "adrf"} in \code{sim_adrf()}), this is a plot of the average marginal mean of the outcome against the requested values of the focal predictor; for the average marginal effects function (AMEF, requested with \code{contrast = "amef"} in \code{sim_adrf()}), this is a plot of the instantaneous average marginal effect of the focal predictor on the outcome against the requested values of the focal predictor.
37 | }
38 | \details{
39 | These plots are produced using \code{\link[ggplot2:geom_path]{ggplot2::geom_line()}} and \code{\link[ggplot2:geom_ribbon]{ggplot2::geom_ribbon()}}. The confidence bands should be interpreted pointwise (i.e., they do not account for simultaneous inference).
40 | }
41 | \examples{
42 | ## See help("sim_adrf") for examples
43 | 
44 | }
45 | \seealso{
46 | \code{\link[=summary.clarify_est]{summary.clarify_est()}} for computing p-values and confidence intervals for the estimated quantities.
47 | }
48 | 


--------------------------------------------------------------------------------
/man/plot.clarify_setx.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot.clarify_setx.R
 3 | \name{plot.clarify_setx}
 4 | \alias{plot.clarify_setx}
 5 | \title{Plot marginal predictions from \code{sim_setx()}}
 6 | \usage{
 7 | \method{plot}{clarify_setx}(
 8 |   x,
 9 |   var = NULL,
10 |   ci = TRUE,
11 |   level = 0.95,
12 |   method = "quantile",
13 |   reference = FALSE,
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{x}{a \code{clarify_est} object resulting from a call to \code{\link[=sim_setx]{sim_setx()}}.}
19 | 
20 | \item{var}{the name of the focal varying predictor, i.e., the variable to be on the x-axis of the plot. All other variables with varying set values will be used to color the resulting plot. See Details. Ignored if no predictors vary or if only one predictor varies in the reference grid or if \code{x1} was specified in \code{sim_setx()}. If not set, will use the predictor with the greatest number of unique values specified in the reference grid.}
21 | 
22 | \item{ci}{\code{logical}; whether to display confidence intervals or bands for the estimates. Default is \code{TRUE}.}
23 | 
24 | \item{level}{the confidence level desired. Default is .95 for 95\% confidence intervals.}
25 | 
26 | \item{method}{the method used to compute confidence intervals or bands. Can be \code{"wald"} to use a Normal approximation or \code{"quantile"} to use the simulated sampling distribution (default). See \code{\link[=summary.clarify_est]{summary.clarify_est()}} for details. Abbreviations allowed.}
27 | 
28 | \item{reference}{\code{logical}; whether to overlay a normal density reference distribution over the plots. Default is \code{FALSE}. Ignored when variables other than the focal varying predictor vary.}
29 | 
30 | \item{...}{for \code{plot()}, further arguments passed to \code{\link[ggplot2:geom_density]{ggplot2::geom_density()}}.}
31 | }
32 | \value{
33 | A \code{ggplot} object.
34 | }
35 | \description{
36 | \code{plot.clarify_sext()} plots the output of \code{\link[=sim_setx]{sim_setx()}}, providing graphics similar to those of \code{\link[=plot.clarify_est]{plot.clarify_est()}} but with features specifically for plot marginal predictions. For continues predictors, this is a plot of the marginal predictions and their confidence bands across levels of the predictor. Otherwise, this is is a plot of simulated sampling distribution of the marginal predictions.
37 | }
38 | \details{
39 | \code{plot()} creates one of two kinds of plots depending on how the reference grid was specified in the call to \code{sim_setx()} and what \code{var} is set to. When the focal varying predictor (i.e., the one set in \code{var}) is numeric and takes on three or more unique values in the reference grid, the produced plot is a line graph displaying the value of the marginal prediction (denoted as \code{E[Y|X]}) across values of the focal varying predictor, with confidence bands displayed when \code{ci = TRUE}. If other predictors also vary, lines for different values will be displayed in different colors. These plots are produced using \code{\link[ggplot2:geom_path]{ggplot2::geom_line()}} and \code{\link[ggplot2:geom_ribbon]{ggplot2::geom_ribbon()}}
40 | 
41 | When the focal varying predictor is a factor or character or only takes on two or fewer values in the reference grid, the produced plot is a density plot of the simulated predictions, similar to the plot resulting from \code{\link[=plot.clarify_est]{plot.clarify_est()}}. When other variables vary, densities for different values will be displayed in different colors. These plots are produced using \code{\link[ggplot2:geom_density]{ggplot2::geom_density()}}.
42 | 
43 | Marginal predictions are identified by the corresponding levels of the predictors that vary. The user should keep track of whether the non-varying predictors are set at specified or automatically set "typical" levels.
44 | }
45 | \examples{
46 | ## See help("sim_setx") for examples
47 | 
48 | }
49 | \seealso{
50 | \code{\link[=summary.clarify_est]{summary.clarify_est()}} for computing p-values and confidence intervals for the estimated quantities.
51 | }
52 | 


--------------------------------------------------------------------------------
/man/sim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sim.R
 3 | \name{sim}
 4 | \alias{sim}
 5 | \title{Simulate model parameters}
 6 | \usage{
 7 | sim(fit, n = 1000, vcov = NULL, coefs = NULL, dist = NULL)
 8 | }
 9 | \arguments{
10 | \item{fit}{a model fit, such as the output of a call to \code{\link[=lm]{lm()}} or \code{\link[=glm]{glm()}}. Can be left unspecified if \code{coefs} and \code{vcov} are not functions.}
11 | 
12 | \item{n}{the number of simulations to run; default is 1000. More is always better but resulting calculations will take longer.}
13 | 
14 | \item{vcov}{either a square covariance matrix of the coefficient covariance estimates or a function to use to extract it from \code{fit}. By default, uses \code{\link[stats:vcov]{stats::vcov()}} or \code{\link[insight:get_varcov]{insight::get_varcov()}} if that doesn't work.}
15 | 
16 | \item{coefs}{either a vector of coefficient estimates or a function to use to extract it from \code{fit}. By default, uses \code{\link[stats:coef]{stats::coef()}} or \code{\link[insight:get_parameters]{insight::get_parameters()}} if that doesn't work.}
17 | 
18 | \item{dist}{a string containing the name of the multivariate distribution to use to draw simulated coefficients. Should be one of \code{"normal"} (multivariate normal distribution) or \code{"t({#})"} (multivariate t distribution), where \verb{\{#\}} corresponds to the desired degrees of freedom (e.g., \code{"t(100)"}). If \code{NULL}, the right distribution to use will be determined based on heuristics; see Details.}
19 | }
20 | \value{
21 | A \code{clarify_sim} object, which has the following components:
22 | \item{sim.coefs}{a matrix containing the simulated coefficients with a column for each coefficient and a row for each simulation}
23 | \item{coefs}{the original coefficients extracted from \code{fit} or supplied to \code{coefs}.}
24 | \item{vcov}{the covariance matrix of the coefficients extracted from \code{fit} or supplied to \code{vcov}}
25 | \item{fit}{the original model fit supplied to \code{fit}}
26 | The \code{"dist"} attribute contains \code{"normal"} if the coefficients were sampled from a multivariate normal distribution and \code{"t(df)"} if sampled from a multivariate t distribution. The \code{"clarify_hash"} attribute contains a unique hash generated by \code{\link[rlang:hash]{rlang::hash()}}.
27 | }
28 | \description{
29 | \code{sim()} simulates model parameters from a multivariate normal or t distribution that are then used by \code{\link[=sim_apply]{sim_apply()}} to calculate quantities of interest.
30 | }
31 | \details{
32 | When \code{dist} is \code{NULL}, \code{sim()} samples from a multivariate normal or t distribution depending on the degrees of freedom extracted from \code{insight::get_df(., type = "wald")}. If \code{Inf}, a normal distribution will be used; otherwise, a t-distribution with the returned degrees of freedom will be used. Models not supported by \code{insight} will use a normal distribution.
33 | 
34 | When a multivariate normal is used, it is sampled from with means equal to the estimated coefficients and the parameter covariance matrix as the covariance matrix using \code{\link[mvnfast:rmvn]{mvnfast::rmvn()}}. When a multivariate t distribution is used, it is sampled from with means equal to the estimated coefficients and scaling matrix equal to \code{cov*(df - 2)/df}, where \code{cov} is the parameter covariance matrix and \code{df} is the residual degrees of freedom for the model, using \code{\link[mvnfast:rmvt]{mvnfast::rmvt()}}.
35 | }
36 | \examples{
37 | 
38 | data("lalonde", package = "MatchIt")
39 | fit <- lm(re78 ~ treat * (age + race + nodegree + re74), data = lalonde)
40 | 
41 | # Simulate coefficients
42 | s <- sim(fit)
43 | s
44 | 
45 | ## Could also use a robust covariance matrix, e.g.,
46 | s <- sim(fit, vcov = "HC3")
47 | 
48 | # Simulated coefficients assuming a normal distribution
49 | # for coefficients; default for `lm` objects is a t-
50 | # distribution
51 | s <- sim(fit, dist = "normal")
52 | s
53 | 
54 | }
55 | \seealso{
56 | \itemize{
57 | \item \code{\link[=misim]{misim()}} for simulating model coefficients after multiple imputation
58 | \item \code{\link[=sim_apply]{sim_apply()}} for applying a function to each set of simulated coefficients
59 | \item \code{\link[=sim_ame]{sim_ame()}} for computing average marginal effects in each simulation draw
60 | \item \code{\link[=sim_setx]{sim_setx()}} for computing marginal predictions and first differences at typical values in each simulation draw
61 | \item \code{\link[=sim_adrf]{sim_adrf()}} for computing average dose-response functions in each simulation draw
62 | }
63 | }
64 | 


--------------------------------------------------------------------------------
/man/sim_adrf.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/sim_adrf.R
  3 | \name{sim_adrf}
  4 | \alias{sim_adrf}
  5 | \alias{print.clarify_adrf}
  6 | \title{Compute an average dose-response function}
  7 | \usage{
  8 | sim_adrf(
  9 |   sim,
 10 |   var,
 11 |   subset = NULL,
 12 |   by = NULL,
 13 |   contrast = "adrf",
 14 |   at = NULL,
 15 |   n = 21,
 16 |   outcome = NULL,
 17 |   type = NULL,
 18 |   eps = 1e-05,
 19 |   verbose = TRUE,
 20 |   cl = NULL
 21 | )
 22 | 
 23 | \method{print}{clarify_adrf}(x, digits = NULL, max.ests = 6, ...)
 24 | }
 25 | \arguments{
 26 | \item{sim}{a \code{clarify_sim} object; the output of a call to \code{\link[=sim]{sim()}} or
 27 | \code{\link[=misim]{misim()}}.}
 28 | 
 29 | \item{var}{the name of a variable for which the ADRF or AMEF is to be computed. This variable must be present in the model supplied to \code{sim()} and must be a numeric variable taking on more than two unique values.}
 30 | 
 31 | \item{subset}{optional; a vector used to subset the data used to compute the ADRF or AMEF. This will be evaluated within the original dataset used to fit the model using \code{\link[=subset]{subset()}}, so nonstandard evaluation is allowed.}
 32 | 
 33 | \item{by}{a one-sided formula or character vector containing the names of variables for which to stratify the estimates. Each quantity will be computed within each level of the complete cross of the variables specified in \code{by}.}
 34 | 
 35 | \item{contrast}{a string naming the type of quantity to be produced: \code{"adrf"} for the ADRF (the default) or \code{"amef"} for the AMEF.}
 36 | 
 37 | \item{at}{the levels of the variable named in \code{var} at which to evaluate the ADRF or AMEF. Should be a vector of numeric values corresponding to possible levels of \code{var}. If \code{NULL}, will be set to a range from slightly below the lowest observed value of \code{var} to slightly above the largest value.}
 38 | 
 39 | \item{n}{when \code{at = NULL}, the number of points to evaluate the ADRF or AMEF. Default is 21. Ignored when \code{at} is not \code{NULL}.}
 40 | 
 41 | \item{outcome}{a string containing the name of the outcome or outcome level for multivariate (multiple outcomes) or multi-category outcomes. Ignored for univariate (single outcome) and binary outcomes.}
 42 | 
 43 | \item{type}{a string containing the type of predicted values (e.g., the link or the response). Passed to \code{\link[marginaleffects:get_predict]{marginaleffects::get_predict()}} and eventually to \code{predict()} in most cases. The default and allowable option depend on the type of model supplied, but almost always corresponds to the response scale (e.g., predicted probabilities for binomial models).}
 44 | 
 45 | \item{eps}{when \code{contrast = "amef"}, the value by which to shift the value of \code{var} to approximate the derivative. See Details.}
 46 | 
 47 | \item{verbose}{\code{logical}; whether to display a text progress bar indicating
 48 | progress and estimated time remaining for the procedure. Default is \code{TRUE}.}
 49 | 
 50 | \item{cl}{a cluster object created by \code{\link[parallel:makeCluster]{parallel::makeCluster()}}, or an
 51 | integer to indicate the number of child-processes (integer values are
 52 | ignored on Windows) for parallel evaluations. See \code{\link[pbapply:pbapply]{pbapply::pblapply()}} for
 53 | details. If \code{NULL}, no parallelization will take place.}
 54 | 
 55 | \item{x}{a \code{clarify_adrf} object.}
 56 | 
 57 | \item{digits}{the minimum number of significant digits to be used; passed to \code{\link[=print.data.frame]{print.data.frame()}}.}
 58 | 
 59 | \item{max.ests}{the maximum number of estimates to display.}
 60 | 
 61 | \item{...}{optional arguments passed to \code{FUN}.}
 62 | }
 63 | \value{
 64 | A \code{clarify_adrf} object, which inherits from \code{clarify_est} and is similar to
 65 | the output of \code{sim_apply()}, with the additional attributes \code{"var"} containing
 66 | the variable named in \code{var}, \code{"by"} containing the names of the variables specified in \code{by} (if any), \code{"at"} containing values at which the ADRF or AMEF is evaluated, and \code{"contrast"} containing the argument supplied to \code{contrast}. For an ADRF, the average marginal means will be named
 67 | \code{E[Y({v})]}, where \code{{v}} is replaced with the values in \code{at}. For an AMEF, the average marginal effects will be
 68 | named \code{dY/d({x})|{a}} where \code{{x}} is replaced with \code{var} and \code{{a}} is replaced by the values in \code{at}.
 69 | }
 70 | \description{
 71 | \code{sim_adrf()} is a wrapper for \code{\link[=sim_apply]{sim_apply()}} that computes average dose-response functions (ADRFs) and average marginal effect functions (AMEFs). An ADRF describes the relationship between values a focal variable can take and the expected value of the outcome were all units to be given each value of the variable. An AMEF describes the relationship between values a focal variable can take and the derivative of ADRF at each value.
 72 | }
 73 | \details{
 74 | The ADRF is composed of average marginal means across levels of the focal predictor. For each level of the focal predictor, predicted values of the outcome are computed after setting the value of the predictor to that level, and those values of the outcome are averaged across all units in the sample to arrive at an average marginal mean. Thus, the ADRF represent the relationship between the "dose" (i.e., the level of the focal predictor) and the average "response" (i.e., the outcome variable). It is the continuous analog to the average marginal effect computed for a binary predictor, e.g., using \code{\link[=sim_ame]{sim_ame()}}. Although inference can be at each level of the predictor or between two levels of the predictor, typically a plot of the ADRF is the most useful relevant quantity. These can be requested using \code{\link[=plot.clarify_adrf]{plot.clarify_adrf()}}.
 75 | 
 76 | The AMEF is the derivative of the ADRF; if we call the derivative of the ADRF at each point a "treatment effect" (i.e., the rate at which the outcome changes corresponding to a small change in the predictor, or "treatment"), the AMEF is a function that relates the size of the treatment effect to the level of the treatment. The shape of the AMEF is usually of less importance than the value of the AMEF at each level of the predictor, which corresponds to the size of the treatment effect at the corresponding level. The AMEF is computed by computing the ADRF at each level of the focal predictor specified in \code{at}, shifting the predictor value by a tiny amount (control by \code{eps}), and computing the ratio of the change in the outcome to the shift, then averaging this value across all units. This quantity is related the the average marginal effect of a continuous predictor as computed by \code{\link[=sim_ame]{sim_ame()}}, but rather than average these treatment effects across all observed levels of the treatment, the AMEF is a function evaluated at each possible level of the treatment. The "tiny amount" used is \code{eps} times the standard deviation of \code{var}.
 77 | }
 78 | \examples{
 79 | data("lalonde", package = "MatchIt")
 80 | 
 81 | # Fit the model
 82 | fit <- glm(I(re78 > 0) ~ treat + age + race +
 83 |              married + re74,
 84 |            data = lalonde, family = binomial)
 85 | 
 86 | # Simulate coefficients
 87 | set.seed(123)
 88 | s <- sim(fit, n = 100)
 89 | 
 90 | # ADRF for `age`
 91 | est <- sim_adrf(s, var = "age",
 92 |                 at = seq(15, 55, length.out = 6),
 93 |                 verbose = FALSE)
 94 | est
 95 | plot(est)
 96 | 
 97 | # AMEF for `age`
 98 | est <- sim_adrf(s, var = "age", contrast = "amef",
 99 |                at = seq(15, 55, length.out = 6),
100 |                verbose = FALSE)
101 | est
102 | summary(est)
103 | plot(est)
104 | 
105 | # ADRF for `age` within levels of `married`
106 | est <- sim_adrf(s, var = "age",
107 |                 at = seq(15, 55, length.out = 6),
108 |                 by = ~married,
109 |                 verbose = FALSE)
110 | est
111 | plot(est)
112 | 
113 | ## Difference between ADRFs
114 | est_diff <- est[7:12] - est[1:6]
115 | plot(est_diff) + ggplot2::labs(y = "Diff")
116 | }
117 | \seealso{
118 | \code{\link[=plot.clarify_adrf]{plot.clarify_adrf()}} for plotting the ADRF or AMEF; \code{\link[=sim_ame]{sim_ame()}} for computing average marginal effects; \code{\link[=sim_apply]{sim_apply()}}, which provides a general interface to computing any
119 | quantities for simulation-based inference; \code{\link[=summary.clarify_est]{summary.clarify_est()}} for computing
120 | p-values and confidence intervals for the estimated quantities.
121 | 
122 | \code{\link[marginaleffects:slopes]{marginaleffects::avg_slopes()}} and \code{\link[marginaleffects:predictions]{marginaleffects::avg_predictions()}} for delta method-based implementations of computing average marginal effects and average marginal means.
123 | }
124 | 


--------------------------------------------------------------------------------
/man/sim_apply.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/sim_apply.R
  3 | \name{sim_apply}
  4 | \alias{sim_apply}
  5 | \title{Apply a function to simulated parameter values}
  6 | \usage{
  7 | sim_apply(sim, FUN, verbose = TRUE, cl = NULL, ...)
  8 | }
  9 | \arguments{
 10 | \item{sim}{a \code{clarify_sim} object; the output of a call to \code{\link[=sim]{sim()}} or
 11 | \code{\link[=misim]{misim()}}.}
 12 | 
 13 | \item{FUN}{a function to be applied to each set of simulated coefficients.
 14 | See Details.}
 15 | 
 16 | \item{verbose}{\code{logical}; whether to display a text progress bar indicating
 17 | progress and estimated time remaining for the procedure. Default is \code{TRUE}.}
 18 | 
 19 | \item{cl}{a cluster object created by \code{\link[parallel:makeCluster]{parallel::makeCluster()}}, or an
 20 | integer to indicate the number of child-processes (integer values are
 21 | ignored on Windows) for parallel evaluations. See \code{\link[pbapply:pbapply]{pbapply::pblapply()}} for
 22 | details. If \code{NULL}, no parallelization will take place.}
 23 | 
 24 | \item{...}{optional arguments passed to \code{FUN}.}
 25 | }
 26 | \value{
 27 | A \code{clarify_est} object, which is a matrix with a column for each
 28 | estimated quantity and a row for each simulation. The original estimates
 29 | (\code{FUN} applied to the original coefficients or model fit object) are stored
 30 | in the attribute \code{"original"}. The \code{"sim_hash"} attribute contains the
 31 | simulation hash produced by \code{sim()}.
 32 | }
 33 | \description{
 34 | \code{sim_apply()} applies a function that produces quantities of
 35 | interest to each set of simulated coefficients produced by \code{\link[=sim]{sim()}}; these
 36 | calculated quantities form the posterior sampling distribution for the
 37 | quantities of interest. Capabilities are available for parallelization.
 38 | }
 39 | \details{
 40 | \code{sim_apply()} applies a function, \code{FUN}, to each set of simulated
 41 | coefficients, similar to \code{\link[=apply]{apply()}}. This function should return a numeric
 42 | vector containing one or more estimated quantities. This should be a named
 43 | vector to more easily keep track of the meaning of each estimated quantity.
 44 | Care should be taken to ensure that the returned vector is the same length
 45 | each time \code{FUN} is called. \code{NA}s are allowed in the output but should be
 46 | avoided if possible.
 47 | 
 48 | The arguments to \code{FUN} can be specified in a few ways. If \code{FUN} has an
 49 | argument called \code{coefs}, a simulated set of coefficients will be passed to
 50 | this argument, and \code{FUN} should compute and return a quantity based on the
 51 | coefficients (e.g., the difference between two coefficients if one wants to
 52 | test whether two coefficients are equal). If \code{FUN} has an argument called
 53 | \code{fit}, a model fit object of the same type as the one originally supplied
 54 | to \code{sim()} (e.g., an \code{lm} or \code{glm} object) will be passed to this argument,
 55 | where the coefficients of the fit object have been replaced by the
 56 | simulated coefficients generated by \code{sim()}, and \code{FUN} should compute and
 57 | return a quantity based on the model fit (e.g., a computation based on the
 58 | output of \code{predict()}). If neither \code{coefs} nor \code{fit} are the names of
 59 | arguments to \code{FUN}, the model fit object with replaced coefficients will be
 60 | supplied to the first argument of \code{FUN}.
 61 | 
 62 | When custom coefficients are supplied to \code{sim()}, i.e., when the \code{coefs}
 63 | argument to \code{sim()} is not left at its default value, \code{FUN} must accept a
 64 | \code{coefs} argument and a warning will be thrown if it accepts a \code{fit}
 65 | argument. This is because \code{sim_apply()} does not know how to reconstruct
 66 | the original fit object with the new coefficients inserted. The quantities
 67 | computed by \code{sim_apply()} must therefore be computed directly from the
 68 | coefficients.
 69 | 
 70 | If \code{FUN} is not supplied at all, the simulated values of the coefficients will be returned in the output with a warning. Set \code{FUN} to \code{NULL} or \code{verbose} to \code{FALSE} to suppress this warning.
 71 | \subsection{\code{sim_apply()} with multiply imputed data}{
 72 | 
 73 | When using \code{\link[=misim]{misim()}} and \code{sim_apply()} with multiply imputed data, the
 74 | coefficients are supplied to the model fit corresponding to the imputation
 75 | identifier associated with each set of coefficients, which means if \code{FUN}
 76 | uses a dataset extracted from a model (e.g., using \code{\link[insight:get_data]{insight::get_data()}}), it will do so from the model fit in
 77 | the corresponding imputation.
 78 | 
 79 | The original estimates (see Value below) are computed as the mean of the
 80 | estimates across the imputations using the original coefficients averaged
 81 | across imputations. That is, first, the coefficients estimated in the
 82 | models in the imputed datasets are combined to form a single set of pooled
 83 | coefficients; then, for each imputation, the quantities of interest are
 84 | computed using the pooled coefficients; finally, the mean of the resulting
 85 | estimates across the imputations are taken as the "original" estimates.
 86 | Note this procedure is only valid for quantities with symmetric sampling
 87 | distributions, which excludes quantities like risk ratios and odds ratios,
 88 | but includes log risk ratios and log odds ratios. The desired quantities
 89 | can be transformed from their log versions using
 90 | \code{\link[=transform]{transform()}}.
 91 | }
 92 | }
 93 | \examples{
 94 | 
 95 | data("lalonde", package = "MatchIt")
 96 | fit <- lm(re78 ~ treat + age + race + nodegree + re74,
 97 |           data = lalonde)
 98 | coef(fit)
 99 | 
100 | set.seed(123)
101 | s <- sim(fit, n = 500)
102 | 
103 | # Function to compare predicted values for two units
104 | # using `fit` argument
105 | sim_fun <- function(fit) {
106 |   pred1 <- unname(predict(fit, newdata = lalonde[1,]))
107 |   pred2 <- unname(predict(fit, newdata = lalonde[2,]))
108 |   c(pred1 = pred1, pred2 = pred2)
109 | }
110 | 
111 | est <- sim_apply(s, sim_fun, verbose = FALSE)
112 | 
113 | # Add difference between predicted values as
114 | # additional quantity
115 | est <- transform(est, `diff 1-2` = pred1 - pred2)
116 | 
117 | # Examine estimates and confidence intervals
118 | summary(est)
119 | 
120 | # Function to compare coefficients using `coefs`
121 | # argument
122 | sim_fun <- function(coefs) {
123 |   setNames(coefs["racewhite"] - coefs["racehispan"],
124 |            "wh - his")
125 | }
126 | 
127 | est <- sim_apply(s, sim_fun, verbose = FALSE)
128 | 
129 | # Examine estimates and confidence intervals
130 | summary(est)
131 | 
132 | # Another way to do the above:
133 | est <- sim_apply(s, FUN = NULL)
134 | est <- transform(est,
135 |                  `wh - his` = `racewhite` - `racehispan`)
136 | 
137 | summary(est, parm = "wh - his")
138 | 
139 | }
140 | \seealso{
141 | \itemize{
142 | \item \code{\link[=sim]{sim()}} for generating the simulated coefficients
143 | \item \code{\link[=summary.clarify_est]{summary.clarify_est()}} for computing p-values and confidence intervals for
144 | the estimated quantities
145 | \item \code{\link[=plot.clarify_est]{plot.clarify_est()}} for plotting estimated
146 | quantities and their simulated posterior sampling distribution.
147 | }
148 | }
149 | 


--------------------------------------------------------------------------------
/man/sim_setx.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/sim_setx.R
  3 | \name{sim_setx}
  4 | \alias{sim_setx}
  5 | \alias{print.clarify_setx}
  6 | \title{Compute predictions and first differences at set values}
  7 | \usage{
  8 | sim_setx(
  9 |   sim,
 10 |   x = list(),
 11 |   x1 = list(),
 12 |   outcome = NULL,
 13 |   type = NULL,
 14 |   verbose = TRUE,
 15 |   cl = NULL
 16 | )
 17 | 
 18 | \method{print}{clarify_setx}(x, digits = NULL, max.ests = 6, ...)
 19 | }
 20 | \arguments{
 21 | \item{sim}{a \code{clarify_sim} object; the output of a call to \code{\link[=sim]{sim()}} or
 22 | \code{\link[=misim]{misim()}}.}
 23 | 
 24 | \item{x}{a data.frame containing a reference grid of predictor values or a named list of values each predictor should take defining such a
 25 | reference grid, e.g., \code{list(v1 = 1:4, v2 = c("A", "B"))}.
 26 | Any omitted predictors are fixed at a "typical" value. See Details.
 27 | When \code{x1} is specified, \code{x} should identify a single reference unit.
 28 | 
 29 | For \code{print()}, a \code{clarify_setx} object.}
 30 | 
 31 | \item{x1}{a data.frame or named list of the value each predictor should take to compute the
 32 | first difference from the predictor combination specified in \code{x}. \code{x1} can
 33 | only identify a single unit. See Details.}
 34 | 
 35 | \item{outcome}{a string containing the name of the outcome or outcome level for multivariate (multiple outcomes) or multi-category outcomes. Ignored for univariate (single outcome) and binary outcomes.}
 36 | 
 37 | \item{type}{a string containing the type of predicted values (e.g., the link or the response). Passed to \code{\link[marginaleffects:get_predict]{marginaleffects::get_predict()}} and eventually to \code{predict()} in most cases. The default and allowable option depend on the type of model supplied, but almost always corresponds to the response scale (e.g., predicted probabilities for binomial models).}
 38 | 
 39 | \item{verbose}{\code{logical}; whether to display a text progress bar indicating
 40 | progress and estimated time remaining for the procedure. Default is \code{TRUE}.}
 41 | 
 42 | \item{cl}{a cluster object created by \code{\link[parallel:makeCluster]{parallel::makeCluster()}}, or an
 43 | integer to indicate the number of child-processes (integer values are
 44 | ignored on Windows) for parallel evaluations. See \code{\link[pbapply:pbapply]{pbapply::pblapply()}} for
 45 | details. If \code{NULL}, no parallelization will take place.}
 46 | 
 47 | \item{digits}{the minimum number of significant digits to be used; passed to \code{\link[=print.data.frame]{print.data.frame()}}.}
 48 | 
 49 | \item{max.ests}{the maximum number of estimates to display.}
 50 | 
 51 | \item{...}{optional arguments passed to \code{FUN}.}
 52 | }
 53 | \value{
 54 | a \code{clarify_setx} object, which inherits from \code{clarify_est} and is similar to the output of \code{sim_apply()}, with the following additional attributes:
 55 | \itemize{
 56 | \item \code{"setx"} - a data frame containing the values at which predictions are to be made
 57 | \item \code{"fd"} - whether or not the first difference is to be computed; set to \code{TRUE} if \code{x1} is specified and \code{FALSE} otherwise
 58 | }
 59 | }
 60 | \description{
 61 | \code{sim_setx()} is a wrapper for \code{\link[=sim_apply]{sim_apply()}} that computes predicted values of
 62 | the outcome at specified values of the predictors, sometimes called marginal
 63 | predictions. One can also compute the difference between two marginal
 64 | predictions (the "first difference"). Although any function that accepted
 65 | \code{clarify_est} objects can be used with \code{sim_setx()} output objects, a
 66 | special plotting function, \code{\link[=plot.clarify_setx]{plot.clarify_setx()}}, can be used to plot marginal
 67 | predictions.
 68 | }
 69 | \details{
 70 | When \code{x} is a named list of predictor values, they will be crossed
 71 | to form a reference grid for the marginal predictions. Any predictors not
 72 | set in \code{x} are assigned their "typical" value, which, for factor,
 73 | character, logical, and binary variables is the mode, for numeric variables
 74 | is the mean, and for ordered variables is the median. These values can be
 75 | seen in the \code{"setx"} attribute of the output object. If \code{x} is empty, a
 76 | prediction will be made at a point corresponding to the typical value of
 77 | every predictor. Estimates are identified (in \code{summary()}, etc.) only by
 78 | the variables that differ across predictions.
 79 | 
 80 | When \code{x1} is supplied, the first difference is computed, which here is
 81 | considered as the difference between two marginal predictions. One marginal
 82 | prediction must be specified in \code{x} and another, ideally with a single
 83 | predictor changed, specified in \code{x1}.
 84 | }
 85 | \examples{
 86 | data("lalonde", package = "MatchIt")
 87 | 
 88 | fit <- lm(re78 ~ treat + age + educ + married + race + re74,
 89 |           data = lalonde)
 90 | 
 91 | # Simulate coefficients
 92 | set.seed(123)
 93 | s <- sim(fit, n = 100)
 94 | 
 95 | # Predicted values at specified values of values, typical
 96 | # values for other predictors
 97 | est <- sim_setx(s, x = list(treat = 0:1,
 98 |                             re74 = c(0, 10000)),
 99 |                 verbose = FALSE)
100 | summary(est)
101 | plot(est)
102 | 
103 | # Predicted values at specified grid of values, typical
104 | # values for other predictors
105 | est <- sim_setx(s, x = list(age = c(20, 25, 30, 35),
106 |                             married = 0:1),
107 |                 verbose = FALSE)
108 | summary(est)
109 | plot(est)
110 | 
111 | # First differences of treat at specified value of
112 | # race, typical values for other predictors
113 | est <- sim_setx(s, x = data.frame(treat = 0, race = "hispan"),
114 |                 x1 = data.frame(treat = 1, race = "hispan"),
115 |                 verbose = FALSE)
116 | summary(est)
117 | plot(est)
118 | 
119 | }
120 | \seealso{
121 | \code{\link[=sim_apply]{sim_apply()}}, which provides a general interface to computing any
122 | quantities for simulation-based inference; \code{\link[=plot.clarify_setx]{plot.clarify_setx()}} for plotting the
123 | output of a call to \code{sim_setx()}; \code{\link[=summary.clarify_est]{summary.clarify_est()}} for computing
124 | p-values and confidence intervals for the estimated quantities.
125 | }
126 | 


--------------------------------------------------------------------------------
/man/summary.clarify_est.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/plot.clarify_est.R, R/summary.clarify_est.R
  3 | \name{plot.clarify_est}
  4 | \alias{plot.clarify_est}
  5 | \alias{summary.clarify_est}
  6 | \alias{confint.clarify_est}
  7 | \title{Plotting and inference for \code{clarify_est} objects}
  8 | \usage{
  9 | \method{plot}{clarify_est}(
 10 |   x,
 11 |   parm,
 12 |   ci = TRUE,
 13 |   level = 0.95,
 14 |   method = "quantile",
 15 |   reference = FALSE,
 16 |   ncol = 3,
 17 |   ...
 18 | )
 19 | 
 20 | \method{summary}{clarify_est}(object, parm, level = 0.95, method = "quantile", null = NA, ...)
 21 | 
 22 | \method{confint}{clarify_est}(object, parm, level = 0.95, method = "quantile", ...)
 23 | }
 24 | \arguments{
 25 | \item{parm}{a vector of the names or indices of the estimates to plot. If unspecified, all estimates will be displayed.}
 26 | 
 27 | \item{ci}{\code{logical}; whether to display confidence interval limits for the estimates. Default is \code{TRUE}.}
 28 | 
 29 | \item{level}{the confidence level desired. Default is .95 for 95\% confidence intervals.}
 30 | 
 31 | \item{method}{the method used to compute p-values and confidence intervals. Can be \code{"wald"} to use a Normal approximation or \code{"quantile"} to use the simulated sampling distribution (default). See Details. Abbreviations allowed.}
 32 | 
 33 | \item{reference}{\code{logical}; whether to overlay a normal density reference distribution over the plots. Default is \code{FALSE}.}
 34 | 
 35 | \item{ncol}{the number of columns used when wrapping multiple plots; default is 3.}
 36 | 
 37 | \item{...}{for \code{plot()}, further arguments passed to \code{\link[ggplot2:geom_density]{ggplot2::geom_density()}}.}
 38 | 
 39 | \item{object, x}{a \code{clarify_est} object; the output of a call to \code{\link[=sim_apply]{sim_apply()}} or its wrappers.}
 40 | 
 41 | \item{null}{the values of the parameters under the null hypothesis for the p-value calculations. Should have length equal to the number of quantities estimated, or one, in which case it will be recycled, or it can be a named vector with just the names of quantities for which null values are to be set. Set values to \code{NA} to omit p-values for those quantities. When all values are \code{NA}, the default, no p-values are produced.}
 42 | }
 43 | \value{
 44 | For \code{summary()}, a \code{summary.clarify_est} object, which is a matrix containing the coefficient estimates, standard errors, test statistics, p-values, and confidence intervals. Not all columns will be present depending on the arguments supplied to \code{summary()}.
 45 | 
 46 | For \code{confint()}, a matrix containing the confidence intervals for the requested quantities.
 47 | 
 48 | For \code{plot()}, a \code{ggplot} object.
 49 | }
 50 | \description{
 51 | \code{summary()} tabulates the estimates and confidence intervals and (optionally) p-values from a \code{clarify_est} object. \code{confint()} computes confidence intervals. \code{plot()} plots the "posterior" distribution of estimates.
 52 | }
 53 | \details{
 54 | \code{summary()} uses the estimates computed from the original model as its estimates and uses the simulated parameters for inference only, in line with the recommendations of Rainey (2023).
 55 | 
 56 | When \code{method = "wald"}, the standard deviation of the simulation estimates is used as the standard error, which is used in the z-statistics and the confidence intervals. The p-values and confidence intervals are valid only when the sampling distribution of the resulting statistic is normal (which can be assessed using \code{plot()}). When \code{method = "quantile"}, the confidence interval is calculated using the quantiles of the simulation estimates corresponding to \code{level}, and the p-value is calculated as twice the proportion of simulation estimates less than or greater than \code{null}, whichever is smaller; this is equivalent to inverting the confidence interval but is only truly valid when the true sampling distribution is only a location shift from the sampling distribution under the null hypothesis and should therefore be interpreted with caution. Using \verb{"method = "quantile"} (the default) is recommended because the confidence intervals will be valid even if the sampling distribution is not Normally distributed. The precision of the p-values and confidence intervals depends on the number of simulations requested (the value of \code{n} supplied to \code{\link[=sim]{sim()}}).
 57 | 
 58 | The plots are produced using \code{\link[ggplot2:geom_density]{ggplot2::geom_density()}} and can be customized with \pkg{ggplot2} functions. When \code{reference = TRUE}, a reference Normal distribution is produced using the empirical mean and standard deviation of the simulated values. A blue references line is plotted at the median of the simulated values. For Wald-based inference to be valid, the reference distribution should overlap with the empirical distribution, in which case the quantile-based and Wald-based intervals should be similar. For quantile-based inference to be valid, the median of the estimates should overlap with the estimated value; this is a necessary but not sufficient condition, though.
 59 | }
 60 | \examples{
 61 | data("lalonde", package = "MatchIt")
 62 | fit <- glm(I(re78 > 0) ~ treat + age + race + nodegree + re74,
 63 |           data = lalonde)
 64 | 
 65 | s <- sim(fit, n = 100)
 66 | 
 67 | # Compute average marginal means for `treat`
 68 | est <- sim_ame(s, var = "treat", verbose = FALSE)
 69 | coef(est)
 70 | 
 71 | # Compute average marginal effects on risk difference
 72 | # (RD) and risk ratio (RR) scale
 73 | est <- transform(est,
 74 |                  RD = `E[Y(1)]` - `E[Y(0)]`,
 75 |                  RR = `E[Y(1)]` / `E[Y(0)]`)
 76 | 
 77 | # Compute confidence intervals and p-values,
 78 | # using given null values for computing p-values
 79 | summary(est, null = c(`RD` = 0, `RR` = 1))
 80 | 
 81 | # Same tests using normal approximation and alternate
 82 | # syntax for `null`
 83 | summary(est, null = c(NA, NA, 0, 1),
 84 |         normal = TRUE)
 85 | 
 86 | # Plot the RD and RR with a reference distribution
 87 | plot(est, parm = c("RD", "RR"), reference = TRUE,
 88 |      ci = FALSE)
 89 | 
 90 | # Plot the RD and RR with quantile confidence bounds
 91 | plot(est, parm = c("RD", "RR"), ci = TRUE)
 92 | 
 93 | }
 94 | \references{
 95 | Rainey, C. (2023). A careful consideration of CLARIFY: Simulation-induced bias in point estimates of quantities of interest. \emph{Political Science Research and Methods}, 1–10. \doi{10.1017/psrm.2023.8}
 96 | }
 97 | \seealso{
 98 | \itemize{
 99 | \item \code{\link[=sim_apply]{sim_apply()}} for applying a function to each set of simulated coefficients
100 | }
101 | }
102 | 


--------------------------------------------------------------------------------
/man/transform.clarify_est.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/transform.clarify_est.R
 3 | \name{transform.clarify_est}
 4 | \alias{transform.clarify_est}
 5 | \alias{cbind.clarify_est}
 6 | \title{Transform and combine \code{clarify_est} objects}
 7 | \usage{
 8 | \method{transform}{clarify_est}(`_data`, ...)
 9 | 
10 | \method{cbind}{clarify_est}(..., deparse.level = 1)
11 | }
12 | \arguments{
13 | \item{_data}{the \code{clarify_est} object to be transformed.}
14 | 
15 | \item{...}{for \code{transform()}, arguments in the form \code{name = value}, where \code{name} is the name of a new quantity to be computed and \code{value} is an expression that is a function of the existing quantities corresponding to the new quantity to be computed. See Details. For \code{cbind()}, \code{clarify_est} objects to be combined.}
16 | 
17 | \item{deparse.level}{ignored.}
18 | }
19 | \value{
20 | A \code{clarify_est} object, either with new columns added (when using \code{transform()}) or combining two \code{clarify_est} objects. Note that any type attributes corresponding to the \code{sim_apply()} wrapper used (e.g., \code{sim_ame()}) is lost when using either function. This can affect any helper functions (e.g., \code{plot()}) designed to work with the output of specific wrappers.
21 | }
22 | \description{
23 | \code{transform()} modifies a \code{clarify_est} object by allowing for the calculation of new quantities from the existing quantities without re-simulating them. \code{cbind()} binds two \code{clarify_est} objects together.
24 | }
25 | \details{
26 | For \code{transform()}, the expression on the right side of the \code{=} should use the names of the existing quantities (e.g., \code{`E[Y(1)]` - `E[Y(1)]`}), with \verb{`} appropriately included when the quantity name include parentheses or brackets. Alternatively, it can use indexes prefixed by \code{.b}, e.g., \code{.b2 - .b1}, to refer to the corresponding quantity by position. This can aid in computing derived quantities of quantities with complicated names. (Note that if a quantity is named something like \code{.b1}, it will need to be referred to by position rather than name, as the position-based label takes precedence). See examples. Setting an existing value to \code{NULL} will remove that quantity from the object.
27 | 
28 | \code{cbind()} does not rename the quanities or check for uniqueness of the names, so it is important to rename them yourself prior to combining the objects.
29 | }
30 | \examples{
31 | data("lalonde", package = "MatchIt")
32 | 
33 | # Fit the model
34 | fit <- lm(re78 ~ treat * (age + educ + race +
35 |              married + re74 + re75),
36 |            data = lalonde)
37 | 
38 | # Simulate coefficients
39 | set.seed(123)
40 | s <- sim(fit, n = 100)
41 | 
42 | # Average adjusted predictions for `treat` within
43 | # subsets of `race`
44 | est_b <- sim_ame(s, var = "treat", verbose = FALSE,
45 |                  subset = race == "black")
46 | est_b
47 | 
48 | est_h <- sim_ame(s, var = "treat", verbose = FALSE,
49 |                  subset = race == "hispan")
50 | est_h
51 | 
52 | # Compute differences between adjusted predictions
53 | est_b <- transform(est_b,
54 |                    diff = `E[Y(1)]` - `E[Y(0)]`)
55 | est_b
56 | 
57 | est_h <- transform(est_h,
58 |                    diff = `E[Y(1)]` - `E[Y(0)]`)
59 | est_h
60 | 
61 | # Bind estimates together after renaming
62 | names(est_b) <- paste0(names(est_b), "_b")
63 | names(est_h) <- paste0(names(est_h), "_h")
64 | 
65 | est <- cbind(est_b, est_h)
66 | est
67 | 
68 | # Compute difference in race-specific differences
69 | est <- transform(est,
70 |                  `diff-diff` = .b6 - .b3)
71 | 
72 | summary(est,
73 |         parm = c("diff_b", "diff_h", "diff-diff"))
74 | 
75 | # Remove last quantity by using `NULL`
76 | transform(est, `diff-diff` = NULL)
77 | }
78 | \seealso{
79 | \code{\link[=transform]{transform()}}, \code{\link[=cbind]{cbind()}}, \code{\link[=sim]{sim()}}
80 | }
81 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
 1 | # This file is part of the standard setup for testthat.
 2 | # It is recommended that you do not modify it.
 3 | #
 4 | # Where should you do additional test configuration?
 5 | # Learn more about the roles of various files in:
 6 | # * https://r-pkgs.org/tests.html
 7 | # * https://testthat.r-lib.org/reference/test_package.html#special-files
 8 | 
 9 | library(testthat)
10 | library(clarify)
11 | 
12 | test_check("clarify")
13 | 


--------------------------------------------------------------------------------
/tests/testthat/fixtures/make_mdata.R:
--------------------------------------------------------------------------------
 1 | #Make matched data
 2 | m <- MatchIt::matchit(treat ~ age + educ + race + married + re74,
 3 |                       data = MatchIt::lalonde, method = "full", estimand = "ATE",
 4 |                       caliper = .05)
 5 | md <- MatchIt::match.data(m, data = MatchIt::lalonde)
 6 | md$binY <- as.numeric(md$re78 > 0)
 7 | 
 8 | set.seed(1993)
 9 | md$countY <- rpois(nrow(md), 5)
10 | md$propY <- runif(nrow(md))
11 | 
12 | saveRDS(md, test_path("fixtures", "mdata.rds"))
13 | 


--------------------------------------------------------------------------------
/tests/testthat/fixtures/make_mira.R:
--------------------------------------------------------------------------------
 1 | d <- cobalt::lalonde_mis
 2 | d$binY <- as.numeric(d$re78 > 0)
 3 | 
 4 | imp <- mice::mice(d, maxit = 5, m = 10, printFlag = FALSE,
 5 |                   seed = 1234567)
 6 | 
 7 | #mipo:
 8 | mira <- with(imp, glm(binY ~ treat + age + educ + race + re74, family = binomial))
 9 | saveRDS(mira, test_path("fixtures", "mira.rds"))
10 | 
11 | #list of models:
12 | model_list <- lapply(mice::complete(imp, "all"), function(data) {
13 |   glm(binY ~ treat + age + educ + race + re74, family = binomial,
14 |       data = data)
15 | })
16 | saveRDS(model_list, test_path("fixtures", "model_list.rds"))
17 | 
18 | m <- MatchThem::matchthem(treat ~ age + educ + race + married + re74 + re75,
19 |                           imp, estimand = "ATE", method = "full", link = "probit")
20 | 
21 | #mimipo
22 | mimira <- with(m, glm(binY ~ treat + age + educ + race + re74,
23 |                       family = "quasibinomial"))
24 | 
25 | saveRDS(mimira, test_path("fixtures", "mimira.rds"))
26 | 


--------------------------------------------------------------------------------
/tests/testthat/fixtures/mdata.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/tests/testthat/fixtures/mdata.rds


--------------------------------------------------------------------------------
/tests/testthat/fixtures/mimira.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/tests/testthat/fixtures/mimira.rds


--------------------------------------------------------------------------------
/tests/testthat/fixtures/mira.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/tests/testthat/fixtures/mira.rds


--------------------------------------------------------------------------------
/tests/testthat/fixtures/model_list.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/IQSS/clarify/ab7e624ba3c8277b5001482c2c689b5d0bd93a65/tests/testthat/fixtures/model_list.rds


--------------------------------------------------------------------------------
/tests/testthat/helper.R:
--------------------------------------------------------------------------------
 1 | #testthat helpers
 2 | 
 3 | expect_good_clarify_sim <- function(s) {
 4 |   expect_s3_class(s, "clarify_sim")
 5 |   expect_true(all(c("sim.coefs", "coefs", "vcov") %in% names(s)))
 6 | 
 7 |   expect_true(is.matrix(s$sim.coefs))
 8 |   expect_type(s$sim.coefs, "double")
 9 |   expect_type(s$coefs, "double")
10 |   expect_true(is.matrix(s$vcov))
11 |   expect_type(s$vcov, "double")
12 | 
13 |   expect_vector(attr(s, "dist"), character(), 1)
14 |   expect_vector(attr(s, "sim_hash"), character(), 1)
15 |   expect_vector(attr(s, "use_fit"), logical(), 1)
16 | 
17 |   expect_equal(isTRUE(!is.null(s$fit)), attr(s, "use_fit"))
18 | 
19 |   expect_equal(length(s$coefs), ncol(s$sim.coefs))
20 |   expect_equal(ncol(s$vcov), nrow(s$vcov))
21 |   expect_equal(length(s$coefs), nrow(s$vcov))
22 | 
23 |   expect_false(any(!is.finite(s$sim.coefs)))
24 |   expect_false(any(!is.finite(s$coefs)))
25 |   expect_false(any(!is.finite(s$vcov)))
26 | }
27 | 
28 | expect_good_clarify_est <- function(e) {
29 |   expect_s3_class(e, "clarify_est")
30 |   expect_length(dim(e), 2L)
31 |   expect_type(e, "double")
32 | 
33 |   expect_vector(attr(e, "original"), numeric(), ncol(e))
34 |   expect_vector(attr(e, "sim_hash"), character(), 1)
35 | 
36 |   expect_identical(names(e), names(attr(e, "original")))
37 | 
38 |   expect_false(any(apply(e, 2, all_the_same)))
39 | }
40 | 
41 | expect_good_clarify_misim <- function(s) {
42 |   expect_s3_class(s, "clarify_misim")
43 |   expect_s3_class(s, "clarify_sim")
44 |   expect_true(all(c("sim.coefs", "coefs", "imp") %in% names(s)))
45 | 
46 |   expect_true(is.matrix(s$sim.coefs))
47 |   expect_type(s$sim.coefs, "double")
48 |   expect_true(is.matrix(s$coefs))
49 |   expect_type(s$coefs, "double")
50 |   if (!is.null(s$fit)) expect_equal(nrow(s$coefs), length(s$fit))
51 |   expect_type(s$imp, "integer")
52 |   expect_equal(max(s$imp), nrow(s$coefs))
53 |   expect_equal(length(s$imp), nrow(s$sim.coefs))
54 | 
55 |   expect_vector(attr(s, "dist"), character(), 1)
56 |   expect_vector(attr(s, "sim_hash"), character(), 1)
57 |   expect_vector(attr(s, "use_fit"), logical(), 1)
58 | 
59 |   expect_equal(isTRUE(!is.null(s$fit)), attr(s, "use_fit"))
60 | 
61 |   expect_equal(ncol(s$coefs), ncol(s$sim.coefs))
62 | 
63 |   expect_false(any(!is.finite(s$sim.coefs)))
64 |   expect_false(any(!is.finite(s$coefs)))
65 | }
66 | 


--------------------------------------------------------------------------------
/tests/testthat/test-transform.R:
--------------------------------------------------------------------------------
 1 | test_that("transform() works", {
 2 |   mdata <- readRDS(test_path("fixtures", "mdata.rds"))
 3 | 
 4 |   fit <- lm(re78 ~ treat * age + educ + race + re74, data = mdata,
 5 |             weights = weights)
 6 | 
 7 |   s <- sim(fit, n = 5)
 8 | 
 9 |   e0 <- sim_ame(s, "treat", verbose = FALSE)
10 | 
11 |   e1 <- transform(e0, diff = `E[Y(1)]` - `E[Y(0)]`)
12 | 
13 |   expect_good_clarify_est(e1)
14 |   expect_equal(length(names(e1)), 3)
15 |   expect_equal(as.matrix(e1)[,2] - as.matrix(e1)[,1],
16 |                as.matrix(e1)[,3])
17 | 
18 |   #Test positional matching
19 |   e2 <- transform(e0, diff = .b2 - .b1)
20 | 
21 |   expect_good_clarify_est(e2)
22 | 
23 |   expect_equal(e1, e2)
24 | 
25 |   # test that positional matching is prioritized
26 |   e3 <- e0; names(e3) <- c(".b2", ".b1")
27 | 
28 |   e3 <- transform(e3, diff = .b2 - .b1)
29 |   expect_good_clarify_est(e3)
30 |   expect_equal(e1[3], e3[3])
31 | 
32 |   #Test that NULL removes existing values but not new ones
33 |   e4 <- transform(e2, diff = NULL)
34 |   expect_good_clarify_est(e4)
35 |   expect_equal(e1[-3], e4)
36 | 
37 |   e4 <- transform(e0,
38 |                   diff2 = .b1 - .b2,
39 |                   diff2 = NULL)
40 |   expect_good_clarify_est(e4)
41 |   expect_equal(length(names(e4)), 3)
42 | })
43 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/vignettes/Zelig.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Translating Zelig to clarify"
  3 | output: rmarkdown::html_vignette
  4 | vignette: >
  5 |   %\VignetteIndexEntry{Translating Zelig to clarify}
  6 |   %\VignetteEngine{knitr::rmarkdown}
  7 |   %\VignetteEncoding{UTF-8}
  8 | bibliography: references.bib
  9 | ---
 10 | 
 11 | ```{r, include = FALSE}
 12 | knitr::opts_chunk$set(
 13 |   collapse = TRUE,
 14 |   comment = "#>",
 15 |   warning = FALSE,
 16 |   fig.width = 6.5,
 17 |   fig.height = 2.75
 18 | )
 19 | ```
 20 | 
 21 | ## Introduction
 22 | 
 23 | In this document, we demonstrate some common uses of `Zelig` [@imaiCommonFrameworkStatistical2008a] and how the same tasks can be performed using `clarify`. We'll include examples for computing predictions at representative values (i.e., `setx()` and `sim()` in `Zelig`), the rare-events logit model, estimating the average treatment effect (ATT) after matching, and combining estimates after multiple imputation.
 24 | 
 25 | The usual workflow in `Zelig` is to fit a model using `zelig()`, specify quantities of interest to simulate using `setx()` on the `zelig()` output, and then simulate those quantities using `sim()`. `clarify` uses a similar approach, except that the model is fit outside `clarify` using functions in a different R package. In addition, `clarify`'s `sim_apply()` allows for the computation of any arbitrary quantity of interest. Unlike `Zelig`, `clarify` follows the recommendations of @raineyCarefulConsiderationCLARIFY2023 to use the estimates computed from the original model coefficients rather than the average of the simulated draws. We'll demonstrate how to replicate a standard `Zelig` analysis using `clarify` step-by-step. Because simulation-based inference involves randomness and some of the algorithms may not perfectly align, one shouldn't expect results to be identical, though in most cases, they should be similar.
 26 | 
 27 | ```{r}
 28 | ## library("Zelig")
 29 | library("clarify")
 30 | set.seed(100)
 31 | ```
 32 | 
 33 | Note that both `Zelig` and `clarify` have a function called "`sim()`", so we will always make it clear which package's `sim()` is being used.
 34 | 
 35 | ## Predictions at representative values
 36 | 
 37 | Here we'll use the `lalonde` dataset in `{MatchIt}` and fit a linear model for `re78` as a function of the treatment `treat` and covariates.
 38 | 
 39 | ```{r}
 40 | data("lalonde", package = "MatchIt")
 41 | ```
 42 | 
 43 | We'll be interested in the predicted values of the outcome for a typical unit at each level of treatment and their first difference.
 44 | 
 45 | ### `Zelig` workflow
 46 | 
 47 | In `Zelig`, we fit the model using `zelig()`:
 48 | 
 49 | ```{r, eval = FALSE}
 50 | fit <- zelig(re78 ~ treat + age + educ + married + race +
 51 |                nodegree + re74 + re75, data = lalonde,
 52 |              model = "ls", cite = FALSE)
 53 | ```
 54 | 
 55 | Next, we use `setx()` and `setx1()` to set our values of `treat`:
 56 | 
 57 | ```{r, eval = FALSE}
 58 | fit <- setx(fit, treat = 0)
 59 | fit <- setx1(fit, treat = 1)
 60 | ```
 61 | 
 62 | Next we simulate the values using `sim()`:
 63 | 
 64 | ```{r, eval = FALSE}
 65 | fit <- Zelig::sim(fit)
 66 | ```
 67 | 
 68 | Finally, we can print and plot the predicted values and first differences:
 69 | 
 70 | ```{r, eval = FALSE}
 71 | fit
 72 | ```
 73 | 
 74 | ```{r, eval = F}
 75 | plot(fit)
 76 | ```
 77 | 
 78 | ### `clarify` workflow
 79 | 
 80 | In `clarify`, we fit the model using functions outside `clarify`, like `stats::lm()`or `fixest::feols()`.
 81 | 
 82 | ```{r}
 83 | fit <- lm(re78 ~ treat + age + educ + married + race +
 84 |             nodegree + re74 + re75, data = lalonde)
 85 | ```
 86 | 
 87 | Next, we simulate the model coefficients using `clarify::sim()`:
 88 | 
 89 | ```{r}
 90 | s <- clarify::sim(fit)
 91 | ```
 92 | 
 93 | Next, we use `sim_setx()` to set our values of the predictors:
 94 | 
 95 | ```{r}
 96 | est <- sim_setx(s, x = list(treat = 0), x1 = list(treat = 1),
 97 |                 verbose = FALSE)
 98 | ```
 99 | 
100 | Finally, we can summarize and plot the predicted values:
101 | 
102 | ```{r}
103 | summary(est)
104 | 
105 | plot(est)
106 | ```
107 | 
108 | ## Rare-events logit
109 | 
110 | `Zelig` uses a special method for logistic regression with rare events as described in @kingLogisticRegressionRare2001. This is the primary implementation of the method in R. However, newer methods have been developed that perform similarly to or better than the method of King and Zeng [@puhrFirthLogisticRegression2017] and are implemented in R packages that are compatible with `clarify`, such as `logistf` and `brglm2`.
111 | 
112 | Here, we'll use the `lalonde` dataset with a constructed rare outcome variable to demonstrate how to perform a rare events logistic regression in `Zelig` and in `clarify`.
113 | 
114 | ```{r}
115 | data("lalonde", package = "MatchIt")
116 | 
117 | #Rare outcome: 1978 earnings over $20k; ~6% prevalence
118 | lalonde$re78_20k <- lalonde$re78 >= 20000
119 | ```
120 | 
121 | ### `Zelig` workflow
122 | 
123 | In `Zelig`, we fit a rare events logistic model using `zelig()` with `model = "relogit"`.
124 | 
125 | ```{r, eval = FALSE}
126 | fit <- zelig(re78_20k ~ treat + age + educ + married + race +
127 |                nodegree + re74 + re75, data = lalonde,
128 |              model = "relogit", cite = FALSE)
129 | 
130 | fit
131 | ```
132 | 
133 | We can compute predicted values at representative values using `setx()` and `Zelig::sim()` as above.
134 | 
135 | ```{r, eval = FALSE}
136 | fit <- setx(fit, treat = 0)
137 | fit <- setx1(fit, treat = 1)
138 | 
139 | fit <- Zelig::sim(fit)
140 | 
141 | fit
142 | ```
143 | 
144 | ```{r, eval = FALSE}
145 | plot(fit)
146 | ```
147 | 
148 | ### `clarify` workflow
149 | 
150 | Here, we'll use `logistf::logistif()` with `flic = TRUE`, which performs a variation on Firth's logistic regression with a correction for bias in the intercept [@puhrFirthLogisticRegression2017].
151 | 
152 | ```{r}
153 | fit <- logistf::logistf(re78_20k ~ treat + age + educ + married + race +
154 |                           nodegree + re74 + re75, data = lalonde,
155 |                         flic = TRUE)
156 | 
157 | summary(fit)
158 | ```
159 | 
160 | We can compute predictions at representative values using `clarify::sim()` and `sim_setx()`.
161 | 
162 | ```{r}
163 | s <- clarify::sim(fit)
164 | 
165 | est <- sim_setx(s, x = list(treat = 0), x1 = list(treat = 1),
166 |                 verbose = FALSE)
167 | 
168 | summary(est)
169 | ```
170 | 
171 | ```{r}
172 | plot(est)
173 | ```
174 | 
175 | ## Estimating the ATT after matching
176 | 
177 | Here we'll use the `lalonde` dataset and perform propensity score matching and then fit a linear model for `re78` as a function of the treatment `treat`, the covariates, and their interaction. From this model, we'll compute the ATT of `treat` using `Zelig` and `clarify`.
178 | 
179 | ```{r}
180 | data("lalonde", package = "MatchIt")
181 | 
182 | m.out <- MatchIt::matchit(treat ~ age + educ + married + race +
183 |                             nodegree + re74 + re75, data = lalonde,
184 |                           method = "nearest")
185 | ```
186 | 
187 | ### `Zelig` workflow
188 | 
189 | In `Zelig`, we fit the model using `zelig()` directly on the `matchit` object:
190 | 
191 | ```{r, eval = FALSE}
192 | fit <- zelig(re78 ~ treat * (age + educ + married + race +
193 |                                nodegree + re74 + re75),
194 |              data = m.out, model = "ls", cite = FALSE)
195 | ```
196 | 
197 | Next, we use `ATT()` to request the ATT of `treat` and simulate the values:
198 | 
199 | ```{r, eval = FALSE}
200 | fit <- ATT(fit, "treat")
201 | ```
202 | 
203 | ```{r, eval = F}
204 | fit
205 | ```
206 | 
207 | ```{r, eval = F}
208 | plot(fit)
209 | ```
210 | 
211 | ### `clarify` workflow
212 | 
213 | In `clarify`, we need to extract the matched dataset and fit a model outside `clarify` using another package.
214 | 
215 | ```{r}
216 | m.data <- MatchIt::match.data(m.out)
217 | 
218 | fit <- lm(re78 ~ treat * (age + educ + married + race +
219 |                             nodegree + re74 + re75),
220 |           data = m.data)
221 | ```
222 | 
223 | Next, we simulate the model coefficients using `clarify::sim()`. Because we performed pair matching, we will request a cluster-robust standard error:
224 | 
225 | ```{r}
226 | s <- clarify::sim(fit, vcov = ~subclass)
227 | ```
228 | 
229 | Next, we use `sim_ame()` to request the average marginal effect of `treat` within the subset of treated units:
230 | 
231 | ```{r}
232 | est <- sim_ame(s, var = "treat", subset = treat == 1,
233 |                contrast = "diff", verbose = FALSE)
234 | ```
235 | 
236 | Finally, we can summarize and plot the ATT:
237 | 
238 | ```{r}
239 | summary(est)
240 | 
241 | plot(est)
242 | ```
243 | 
244 | ## Combining results after multiple imputation
245 | 
246 | Here we'll use the `africa` dataset in `{Amelia}` to demonstrate combining estimates after multiple imputation. This analysis is also demonstrated using `clarify` at the end of `vignette("clarify")`.
247 | 
248 | ```{r, message=F}
249 | library(Amelia)
250 | data("africa", package = "Amelia")
251 | ```
252 | 
253 | First we multiply impute the data using `amelia()` using the specification in the `{Amelia}` documentation.
254 | 
255 | ```{r}
256 | # Multiple imputation
257 | a.out <- amelia(x = africa, m = 10, cs = "country",
258 |                 ts = "year", logs = "gdp_pc", p2s = 0)
259 | ```
260 | 
261 | ### `Zelig` workflow
262 | 
263 | With `Zelig`, we can supply the `amelia` object directly to the `data` argument of `zelig()` to fit a model in each imputed dataset:
264 | 
265 | ```{r, eval = FALSE}
266 | fit <- zelig(gdp_pc ~ infl * trade, data = a.out,
267 |              model = "ls", cite = FALSE)
268 | ```
269 | 
270 | Summarizing the coefficient estimates after the simulation can be done using `summary()`:
271 | 
272 | ```{r, eval = FALSE}
273 | summary(fit)
274 | ```
275 | 
276 | We can use `Zelig::sim()` and `setx()` to compute predictions at specified values of the predictors:
277 | 
278 | ```{r, eval = FALSE}
279 | fit <- setx(fit, infl = 0, trade = 40)
280 | fit <- setx1(fit, infl = 0, trade = 60)
281 | 
282 | fit <- Zelig::sim(fit)
283 | ```
284 | 
285 | `Zelig` does not allow you to combine predicted values across imputations.
286 | 
287 | ```{r, eval = F}
288 | fit
289 | ```
290 | 
291 | ```{r, eval = F}
292 | plot(fit)
293 | ```
294 | 
295 | ### `clarify` workflow
296 | 
297 | `clarify` does not combine coefficients, unlike `zelig()`; instead, the models should be fit using `Amelia::with()`. To view the combined coefficient estimates, use `Amelia::mi.combine()`.
298 | 
299 | ```{r}
300 | #Use Amelia functions to model and combine coefficients
301 | fits <- with(a.out, lm(gdp_pc ~ infl * trade))
302 | 
303 | mi.combine(fits)
304 | ```
305 | 
306 | Derived quantities can be computed using `clarify::misim()` and `sim_apply()` or its wrappers on the `with()` output, which is a list of regression model fits:
307 | 
308 | ```{r}
309 | #Simulate coefficients, 100 in each of 10 imputations
310 | s <- misim(fits, n = 100)
311 | 
312 | #Compute predictions at specified values
313 | est <- sim_setx(s, x = list(infl = 0, trade = 40),
314 |                 x1 = list(infl = 0, trade = 60),
315 |                 verbose = FALSE)
316 | 
317 | summary(est)
318 | 
319 | plot(est)
320 | ```
321 | 
322 | ## References
323 | 


--------------------------------------------------------------------------------
/vignettes/references.bib:
--------------------------------------------------------------------------------
  1 | 
  2 | @article{kingMakingMostStatistical2000,
  3 | 	title = {Making the Most of Statistical Analyses: Improving Interpretation and Presentation},
  4 | 	author = {King, Gary and Tomz, Michael and Wittenberg, Jason},
  5 | 	year = {2000},
  6 | 	date = {2000},
  7 | 	journal = {American Journal of Political Science},
  8 | 	pages = {347--361},
  9 | 	volume = {44},
 10 | 	number = {2},
 11 | 	doi = {10.2307/2669316},
 12 | 	url = {https://www.jstor.org/stable/2669316},
 13 | 	note = {tex.ids= kingMakingMostStatistical2000a
 14 | publisher: [Midwest Political Science Association, Wiley]}
 15 | }
 16 | 
 17 | @article{zhouNoteBayesianInference2010,
 18 | 	title = {A Note on Bayesian Inference After Multiple Imputation},
 19 | 	author = {Zhou, Xiang and Reiter, Jerome P.},
 20 | 	year = {2010},
 21 | 	month = {05},
 22 | 	date = {2010-05},
 23 | 	journal = {The American Statistician},
 24 | 	pages = {159--163},
 25 | 	volume = {64},
 26 | 	number = {2},
 27 | 	doi = {10.1198/tast.2010.09109},
 28 | 	url = {http://www.tandfonline.com/doi/abs/10.1198/tast.2010.09109},
 29 | 	langid = {en}
 30 | }
 31 | 
 32 | @article{tomzClarifySoftwareInterpreting2003,
 33 | 	title = {Clarify: Software for Interpreting and Presenting Statistical Results},
 34 | 	author = {Tomz, Michael and Wittenberg, Jason and King, Gary},
 35 | 	year = {2003},
 36 | 	month = {01},
 37 | 	date = {2003-01-15},
 38 | 	journal = {Journal of Statistical Software},
 39 | 	pages = {1--30},
 40 | 	volume = {8},
 41 | 	doi = {10.18637/jss.v008.i01},
 42 | 	url = {https://doi.org/10.18637/jss.v008.i01},
 43 | 	langid = {en}
 44 | }
 45 | 
 46 | @article{imaiCommonFrameworkStatistical2008a,
 47 | 	title = {Toward a Common Framework for Statistical Analysis and Development},
 48 | 	author = {Imai, Kosuke and King, Gary and Lau, Olivia},
 49 | 	year = {2008},
 50 | 	month = {12},
 51 | 	date = {2008-12-01},
 52 | 	journal = {Journal of Computational and Graphical Statistics},
 53 | 	pages = {892--913},
 54 | 	volume = {17},
 55 | 	number = {4},
 56 | 	doi = {10.1198/106186008X384898},
 57 | 	url = {https://doi.org/10.1198/106186008X384898},
 58 | 	note = {{\_}eprint: https://doi.org/10.1198/106186008X384898
 59 | tex.ids= imaiCommonFrameworkStatistical2008
 60 | publisher: Taylor & Francis}
 61 | }
 62 | 
 63 | @article{puhrFirthLogisticRegression2017,
 64 | 	title = {Firth's logistic regression with rare events: accurate effect estimates and predictions?},
 65 | 	author = {Puhr, Rainer and Heinze, Georg and Nold, Mariana and Lusa, Lara and Geroldinger, Angelika},
 66 | 	year = {2017},
 67 | 	month = {06},
 68 | 	date = {2017-06-30},
 69 | 	journal = {Statistics in Medicine},
 70 | 	pages = {2302--2317},
 71 | 	volume = {36},
 72 | 	number = {14},
 73 | 	doi = {10.1002/sim.7273},
 74 | 	url = {http://onlinelibrary.wiley.com/doi/10.1002/sim.7273},
 75 | 	note = {Publisher: John Wiley & Sons, Ltd},
 76 | 	langid = {en}
 77 | }
 78 | 
 79 | @article{kingLogisticRegressionRare2001,
 80 | 	title = {Logistic Regression in Rare Events Data},
 81 | 	author = {King, Gary and Zeng, Langche},
 82 | 	year = {2001},
 83 | 	date = {2001},
 84 | 	journal = {Political Analysis},
 85 | 	pages = {137--163},
 86 | 	volume = {9},
 87 | 	number = {2},
 88 | 	doi = {10.1093/oxfordjournals.pan.a004868},
 89 | 	url = {https://www.cambridge.org/core/product/identifier/S1047198700003740/type/journal_article},
 90 | 	langid = {en}
 91 | }
 92 | 
 93 | @article{raineyCarefulConsiderationCLARIFY2023,
 94 |   title = {A Careful Consideration of {{CLARIFY}}: Simulation-Induced Bias in Point Estimates of Quantities of Interest},
 95 |   shorttitle = {A Careful Consideration of {{CLARIFY}}},
 96 |   author = {Rainey, Carlisle},
 97 |   year = {2023},
 98 |   month = apr,
 99 |   journal = {Political Science Research and Methods},
100 |   pages = {1--10},
101 |   publisher = {{Cambridge University Press}},
102 |   issn = {2049-8470, 2049-8489},
103 |   doi = {10.1017/psrm.2023.8},
104 |   urldate = {2023-05-03},
105 |   abstract = {Some work in political methodology recommends that applied researchers obtain point estimates of quantities of interest by simulating model coefficients, transforming these simulated coefficients into simulated quantities of interest, and then averaging the simulated quantities of interest (e.g., CLARIFY). But other work advises applied researchers to directly transform coefficient estimates to estimate quantities of interest. I point out that these two approaches are not interchangeable and examine their properties. I show that the simulation approach compounds the transformation-induced bias identified by Rainey (2017), adding bias with direction and magnitude similar to the transformation-induced bias. I refer to this easily avoided additional bias as ``simulation-induced bias.'' Even if researchers use simulation to estimate standard errors, they should directly transform maximum likelihood estimates of coefficient estimates to obtain point estimates of quantities of interest.},
106 |   langid = {english},
107 |   keywords = {Maximum likelihood estimation (MLE)},
108 |   file = {/Users/NoahGreifer/Zotero/storage/2QK82P9A/Rainey - 2023 - A careful consideration of CLARIFY simulation-ind.pdf}
109 | }
110 | 
111 | 
112 | @article{rainey2017,
113 | 	title = {Transformation-Induced Bias: Unbiased Coefficients Do Not Imply Unbiased Quantities of Interest},
114 | 	author = {Rainey, Carlisle},
115 | 	year = {2017},
116 | 	month = {07},
117 | 	date = {2017-07},
118 | 	journal = {Political Analysis},
119 | 	pages = {402--409},
120 | 	volume = {25},
121 | 	number = {3},
122 | 	doi = {10.1017/pan.2017.11},
123 | 	url = {http://dx.doi.org/10.1017/pan.2017.11},
124 | 	langid = {en}
125 | }
126 | 


--------------------------------------------------------------------------------