├── .Rbuildignore
├── .Rprofile
├── .gitignore
├── .travis.yml
├── CRAN-SUBMISSION
├── CausalModels.Rproj
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
    ├── doubly_robust.R
    ├── gestimation.R
    ├── helper_funcs.R
    ├── imports.R
    ├── initialize.R
    ├── ipweighting.R
    ├── iv_est.R
    ├── outcome_regression.R
    ├── propensity_matching.R
    ├── propensity_scores.R
    ├── standardization.R
    └── zzz.R
├── README.Rmd
├── README.md
├── cran-comments.md
├── man
    ├── doubly_robust.Rd
    ├── figures
    │   └── README-pressure-1.png
    ├── gestimation.Rd
    ├── init_params.Rd
    ├── ipweighting.Rd
    ├── iv_est.Rd
    ├── outcome_regression.Rd
    ├── propensity_matching.Rd
    ├── propensity_scores.Rd
    └── standardization.Rd
├── renv.lock
└── renv
    ├── .gitignore
    ├── activate.R
    └── settings.dcf


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^renv$
2 | ^renv\.lock$
3 | ^.*\.Rproj$
4 | ^\.Rproj\.user$
5 | ^cran-comments\.md$
6 | ^\.travis\.yml$
7 | ^README\.Rmd$
8 | ^CRAN-SUBMISSION$
9 | 


--------------------------------------------------------------------------------
/.Rprofile:
--------------------------------------------------------------------------------
1 | source("renv/activate.R")
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
2 | 
3 | language: R
4 | cache: packages
5 | 


--------------------------------------------------------------------------------
/CRAN-SUBMISSION:
--------------------------------------------------------------------------------
1 | Version: 0.2.1
2 | Date: 2025-04-24 23:48:43 UTC
3 | SHA: 9c9ee22b6640a3a40ddb7e24353333fe7fd8be39
4 | 


--------------------------------------------------------------------------------
/CausalModels.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: CausalModels
 2 | Type: Package
 3 | Title: Causal Inference Modeling for Estimation of Causal Effects
 4 | Version: 0.2.1
 5 | Authors@R: 
 6 |    c(person(given = "Joshua",
 7 |              family = "Anderson",
 8 |              role = c("aut", "cre", "cph"),
 9 |              email = "jwanderson198@gmail.com"),
10 |      person(given = "Cyril",
11 |              family = "Rakovski",
12 |              role = "rev",
13 |              email = "rakovski@chapman.edu"),
14 |      person(given = "Yesha",
15 |              family = "Patel",
16 |              role = "rev"),
17 |      person(given = "Erin",
18 |              family = "Lee",
19 |              role = "rev"))
20 | Maintainer: Joshua Anderson <jwanderson198@gmail.com>
21 | Description: 
22 |   Provides an array of statistical models common in causal inference such as 
23 |   standardization, IP weighting, propensity matching, outcome regression, and doubly-robust 
24 |   estimators. Estimates of the average treatment effects from each model are given with the 
25 |   standard error and a 95% Wald confidence interval (Hernan, Robins (2020) <https://miguelhernan.org/whatifbook/>).
26 | License: GPL-3
27 | URL: https://github.com/ander428/CausalModels
28 | BugReports: https://github.com/ander428/CausalModels/issues
29 | Encoding: UTF-8
30 | LazyData: true
31 | RoxygenNote: 7.1.2
32 | Language: en-US
33 | Imports:
34 |   stats,
35 |   causaldata,
36 |   boot,
37 |   multcomp,
38 |   geepack
39 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(predict,doubly_robust)
 4 | S3method(predict,gestimation)
 5 | S3method(predict,ipweighting)
 6 | S3method(predict,outcome_regression)
 7 | S3method(predict,propensity_matching)
 8 | S3method(predict,propensity_scores)
 9 | S3method(predict,standardization)
10 | S3method(print,doubly_robust)
11 | S3method(print,gestimation)
12 | S3method(print,ipweighting)
13 | S3method(print,outcome_regression)
14 | S3method(print,propensity_matching)
15 | S3method(print,propensity_scores)
16 | S3method(print,standardization)
17 | S3method(print,summary.doubly_robust)
18 | S3method(print,summary.gestimation)
19 | S3method(print,summary.ipweighting)
20 | S3method(print,summary.standardization)
21 | S3method(summary,doubly_robust)
22 | S3method(summary,gestimation)
23 | S3method(summary,ipweighting)
24 | S3method(summary,outcome_regression)
25 | S3method(summary,propensity_matching)
26 | S3method(summary,propensity_scores)
27 | S3method(summary,standardization)
28 | export(doubly_robust)
29 | export(gestimation)
30 | export(init_params)
31 | export(ipweighting)
32 | export(iv_est)
33 | export(outcome_regression)
34 | export(propensity_matching)
35 | export(propensity_scores)
36 | export(standardization)
37 | import(boot)
38 | import(causaldata)
39 | import(geepack)
40 | import(multcomp)
41 | import(stats)
42 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # CausalModels 0.1.0
 2 | 
 3 | - Added a `NEWS.md` file to track changes to the package.
 4 | 
 5 | ### Version 0.2.0 - 2022/10/27
 6 | 
 7 | #### Added
 8 | 
 9 | - one parameter g-estimation
10 | 
11 | #### Fixed
12 | 
13 | - Typo in ipweighting documentation
14 | 
15 | ### Version 0.2.1 - 2025/04/27
16 | 
17 | #### Fixed
18 | 
19 | - generic treatment names bug
20 | 


--------------------------------------------------------------------------------
/R/doubly_robust.R:
--------------------------------------------------------------------------------
  1 | #' @title Doubly Robust Model
  2 | #' @description \code{`doubly_robust`} trains both an outcome model and a propensity model to generate predictions
  3 | #' for the outcome and probability of treatment respectively. By default, the model uses
  4 | #' \code{\link[=standardization]{standardization}} and \code{\link[=propensity_scores]{propensity_scores}} to form a
  5 | #' doubly-robust model between standardization and IP weighting. Alternatively, any outcome and treatment
  6 | #' models can be provided instead, but must be compatible with the \code{\link[stats:predict]{predict}} generic function in R.
  7 | #' Since many propensity models may not predict probabilities without additional arguments into the
  8 | #' predict function, the predictions themselves can be given for both the outcome and propensity scores.
  9 | #'
 10 | #' @param data a data frame containing the variables in the model.
 11 | #' This should be the same data used in \code{\link[=init_params]{init_params}}.
 12 | #' @param out.mod (optional) a regression model that predicts the outcome. NOTE: the model given
 13 | #' must be compatible with the \code{\link[stats:predict]{predict}} generic function.
 14 | #' @param p.mod (optional) a propensity model that predicts the probability of treatment. NOTE: the model given
 15 | #' must be compatible with the \code{\link[stats:predict]{predict}} generic function.
 16 | #' @param f (optional) an object of class "formula" that overrides the default parameter
 17 | #' @param family the family to be used in the general linear model.
 18 | #' By default, this is set to \code{\link[stats:gaussian]{gaussian}}.
 19 | #' @param simple a boolean indicator to build default formula with interactions.
 20 | #' If true, interactions will be excluded. If false, interactions will be included. By
 21 | #' default, simple is set to false.
 22 | #' @param scores (optional) use calculated outcome estimates.
 23 | #' @param p.f (optional) an object of class "formula" that overrides the default formula for the denominator of the IP
 24 | #' weighting function.
 25 | #' @param p.simple a boolean indicator to build default formula with interactions for the propensity models.
 26 | #' If true, interactions will be excluded. If false, interactions will be included. By
 27 | #' default, simple is set to false.
 28 | #' NOTE: if this is changed, the coefficient for treatment may not accurately represent the average causal effect.
 29 | #' @param p.family the family to be used in the underlying propensity model.
 30 | #' By default, this is set to \code{\link[stats:gaussian]{binomial}}.
 31 | #' @param p.scores (optional) use calculated propensity scores.
 32 | #' @param n.boot an integer value that indicates number of bootstrap iterations to calculate standard error.
 33 | #' @param ... additional arguments that may be passed to the underlying \code{\link[stats:glm]{glm}} model.
 34 | #'
 35 | #' @returns \code{doubly_robust} returns an object of \code{\link[base:class]{class}} "doubly_robust".
 36 | #'
 37 | #' The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with
 38 | #' the underlying \code{glm} model.
 39 | #'
 40 | #' An object of class \code{"doubly_robust"} is a list containing the following:
 41 | #'
 42 | #'  \item{out.call}{the matched call of the outcome model.}
 43 | #'  \item{p.call}{the matched call of the propensity model.}
 44 | #'  \item{out.model}{the underlying outcome model.}
 45 | #'  \item{p.model}{the underlying propensity model.}
 46 | #'  \item{y_hat}{the estimated outcome values.}
 47 | #'  \item{p.scores}{the estimated propensity scores.}
 48 | #'  \item{ATE}{the estimated average treatment effect (risk difference).}
 49 | #'  \item{ATE.summary}{a data frame containing the ATE, SE, and 95\% CI of the ATE. }
 50 | #'  \item{data}{the data frame used to train the model.}
 51 | #'
 52 | #' @export
 53 | #'
 54 | #' @examples
 55 | #' library(causaldata)
 56 | #' data(nhefs)
 57 | #' nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ]
 58 | #' nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
 59 | #'
 60 | #' confounders <- c(
 61 | #'   "sex", "race", "age", "education", "smokeintensity",
 62 | #'   "smokeyrs", "exercise", "active", "wt71"
 63 | #' )
 64 | #'
 65 | #' init_params(wt82_71, qsmk,
 66 | #'   covariates = confounders,
 67 | #'   data = nhefs.nmv
 68 | #' )
 69 | #'
 70 | #' # model using all defaults
 71 | #' model <- doubly_robust(data = nhefs.nmv)
 72 | #' summary(model)
 73 | #'
 74 | #' # use alternative outcome model
 75 | #' out.mod <- propensity_matching(data = nhefs.nmv)
 76 | #' db.model <- doubly_robust(
 77 | #'   out.mod = out.mod,
 78 | #'   data = nhefs.nmv
 79 | #' )
 80 | #' db.model
 81 | #'
 82 | #' # give calculated outcome predictions and give formula for propensity scores
 83 | #' db.model <- doubly_robust(
 84 | #'   scores = predict(out.mod),
 85 | #'   p.f = qsmk ~ sex + race + age,
 86 | #'   data = nhefs.nmv
 87 | #' )
 88 | #' db.model
 89 | #'
 90 | doubly_robust <- function(data, out.mod = NULL, p.mod = NULL, f = NA,
 91 |                           family = gaussian(), simple = pkg.env$simple, scores = NA,
 92 |                           p.f = NA, p.simple = pkg.env$simple, p.family = binomial(),
 93 |                           p.scores = NA, n.boot = 50, ...) {
 94 |   check_init()
 95 | 
 96 |   # grab function parameters
 97 |   params <- as.list(match.call()[-1])
 98 | 
 99 |   if (anyNA(scores)) {
100 |     # generate standardization if outcome model not specified
101 |     if (is.null(out.mod)) {
102 |       out.mod <- standardization(f = f, data = data, simple = simple, family = family)
103 |     }
104 | 
105 |     scores <- predict(out.mod)
106 |   }
107 | 
108 | 
109 |   if (anyNA(p.scores)) {
110 |     # generate propensity model if treatment model not specified
111 |     if (is.null(p.mod)) {
112 |       p.mod <- propensity_scores(f = p.f, data = data, family = p.family, simple = p.simple)
113 |     }
114 | 
115 |     p.scores <- predict(p.mod)
116 |   }
117 | 
118 |   boot_result <- boot(data = data, statistic = function(data, indices) {
119 |     data <- data[indices, ]
120 |     scores <- scores[indices]
121 |     p.scores <- p.scores[indices]
122 |     return(doubly_robust_est(scores, p.scores, data))
123 |   }, R = n.boot)
124 | 
125 |   # calculate 95% CI
126 |   beta <- boot_result$t0
127 |   SE <- sd(boot_result$t)
128 |   ATE <- data.frame(
129 |     "ATE" = beta,
130 |     "SE" = SE,
131 |     conf_int(beta, SE),
132 |     check.names = FALSE
133 |   )
134 | 
135 |   output <- list(
136 |     "out.call" = out.mod$call, "p.call" = p.mod$call, "out.model" = out.mod,
137 |     "p.model" = p.mod, "y_hat" = scores, "p.scores" = p.scores,
138 |     "ATE" = beta, "ATE.summary" = ATE, "data" = data
139 |   )
140 | 
141 |   class(output) <- "doubly_robust"
142 |   return(output)
143 | }
144 | 
145 | #' @export
146 | print.doubly_robust <- function(x, ...) {
147 |   cat("Outcome Model\r\n\r\n")
148 |   cat("Call:\r\n")
149 |   print(x$out.call, ...)
150 |   cat("\r\nPredictions:\r\n")
151 |   print(summary(x$y_hat))
152 |   cat("\r\nPropensity Model\r\n\r\n")
153 |   cat("Call:\r\n")
154 |   print(x$p.call, ...)
155 |   cat("\r\nPredictions:\r\n")
156 |   print(summary(x$p.scores))
157 |   cat("\r\n")
158 |   cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "")
159 |   cat("Estimate - ", x$ATE, "\r\n")
160 |   cat("SE       - ", x$ATE.summary$SE, "\r\n")
161 |   cat("95% CI   - (", x$ATE.summary$`2.5 %`, ", ", x$ATE.summary$`97.5 %`, ")", "\r\n")
162 | }
163 | 
164 | #' @export
165 | summary.doubly_robust <- function(object, ...) {
166 |   s1 <- summary(object$out.model, ...)
167 |   s1$ATE <- object$ATE
168 |   s2 <- summary(object$p.model, ...)
169 |   s2$ATE <- object$ATE
170 |   s <- list("out.summary" = s1, "p.summary" = s2)
171 |   class(s) <- "summary.doubly_robust"
172 |   return(s)
173 | }
174 | 
175 | #' @export
176 | print.summary.doubly_robust <- function(x, ...) {
177 |   s1 <- x$out.summary
178 |   s2 <- x$p.summary
179 |   cat("Outcome Model Summary\r\n")
180 |   print(s1, ...)
181 |   cat("Propensity Model Summary\r\n")
182 |   print(s2, ...)
183 | }
184 | 
185 | #' @export
186 | predict.doubly_robust <- function(object, ...) {
187 |   return(doubly_robust_est(object$y_hat, object$p.scores, object$data))
188 | }
189 | 
190 | doubly_robust_est <- function(S, W, data) {
191 |   n <- length(S)
192 |   tr <- as.numeric(levels(data[[pkg.env$treatment]]))[data[[pkg.env$treatment]]]
193 | 
194 |   return(sum(S - ((tr * (data[[pkg.env$outcome]] - S)) / W)) / n)
195 | }
196 | 


--------------------------------------------------------------------------------
/R/gestimation.R:
--------------------------------------------------------------------------------
  1 | 
  2 | #' @title One Parameter G-Estimation of Structural Nested Mean Models
  3 | #' @description `gestimation` uses the \code{\link[=propensity_scores]{propensity_scores}} function to generate inverse probability
  4 | #' weights. The weights can either be standardized weights or non-standardized weights. A grid search is done on \eqn{\alpha}
  5 | #' to construct the best \eqn{\beta} coefficient in the structural nested mean model. Alternatively, a linear mean model can be used
  6 | #' for a closed form estimator.
  7 | #'
  8 | #' @param data a data frame containing the variables in the model.
  9 | #' This should be the same data used in \code{\link[=init_params]{init_params}}.
 10 | #' @param grid a list of possible \eqn{\beta} values that will be used in the grid search.
 11 | #' @param ids (optional) see documentation for \code{\link[geepack:geeglm]{geeglm}}. By default rownames of the data will be used.
 12 | #' @param f (optional) an object of class "formula" that overrides the default parameter. NOTE: for g-estimation this should be
 13 | #' a propensity formula.
 14 | #' @param family the family to be used in the general linear model.
 15 | #' By default, this is set to \code{\link[stats:gaussian]{gaussian}}.
 16 | #' @param simple (optional) a boolean indicator to build default formula with interactions for the g-estimation model.
 17 | #' If true, interactions will be excluded. If false, interactions will be included. By default, simple is set to false.
 18 | #' NOTE: \eqn{\beta}  will be appended to the end of the formula
 19 | #' @param p.f (optional) an object of class "formula" that overrides the default formula for the denominator of the IP
 20 | #' weighting function.
 21 | #' @param p.simple (optional) a boolean indicator to build default formula with interactions for the propensity models.
 22 | #' If true, interactions will be excluded. If false, interactions will be included. By
 23 | #' default, simple is set to false.
 24 | #' NOTE: if this is changed, the coefficient for treatment may not accurately represent the average causal effect.
 25 | #' @param p.family the family to be used in the underlying propensity model.
 26 | #' By default, this is set to \code{\link[stats:binomial]{binomial}}.
 27 | #' @param p.scores (optional) use calculated propensity scores for the weights. If using standardized weights,
 28 | #' the numerator will still be modeled.
 29 | #' @param SW a boolean indicator to indicate the use of standardized weights. By default, this is set to true.
 30 | #' @param n.boot (optional) an integer value that indicates number of bootstrap iterations to calculate standard error.
 31 | #' If no value is given, the standard error from the underlying linear model will be used. NOTE: when type is 'one.grid'
 32 | #' bootstrapping is not performed. By default, this is set to 100.
 33 | #' @param type the type of g-estimation to perform. It must be one of \code{"one.grid"} or \code{"one.linear"} for a
 34 | #' one parameter grid and linear mean model estimation respectively.
 35 | #' @param ... additional arguments that may be passed to the underlying model.
 36 | #'
 37 | #' @returns \code{gestimation} returns an object of \code{\link[base:class]{class} "gestimation"}.
 38 | #'
 39 | #' The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with
 40 | #' the underlying \code{glm} or \code{geeglm} model.
 41 | #'
 42 | #' An object of class \code{"gestimation"} is a list containing the following:
 43 | #'
 44 | #'  \item{call}{the matched call.}
 45 | #'  \item{formula}{the formula used in the model.}
 46 | #'  \item{model}{the underlying glm model. If the model performed a grid search, this will be renamed 'best.model'}
 47 | #'  \item{weights}{the estimated IP weights.}
 48 | #'  \item{type}{returns the value used for the 'type' parameter.}
 49 | #'  \item{ATE}{the estimated average treatment effect (risk difference).}
 50 | #'  \item{ATE.summary}{a data frame containing the ATE, SE, and 95\% CI of the ATE. }
 51 | #'
 52 | #'
 53 | #' @examples
 54 | #' library(causaldata)
 55 | #' data(nhefs)
 56 | #' nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ]
 57 | #' nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
 58 | #'
 59 | #' confounders <- c(
 60 | #'   "sex", "race", "age", "education", "smokeintensity",
 61 | #'   "smokeyrs", "exercise", "active", "wt71"
 62 | #' )
 63 | #'
 64 | #' init_params(wt82_71, qsmk,
 65 | #'   covariates = confounders,
 66 | #'   data = nhefs.nmv
 67 | #' )
 68 | #'
 69 | #' gest.model <- gestimation(nhefs.nmv, type = "one.linear", n.boot = 150)
 70 | #' gest.model$ATE.summary
 71 | 
 72 | #' @export
 73 | gestimation <- function(data, grid, ids = list(), f = NA, family = binomial(), simple = pkg.env$simple,
 74 |                         p.f = NA, p.simple = pkg.env$simple, p.family = binomial(), p.scores = NA,
 75 |                         SW = TRUE, n.boot = 100, type = "one.grid", ...) {
 76 |   check_init()
 77 | 
 78 |   # grab function parameters
 79 |   params <- as.list(match.call()[-1])
 80 | 
 81 |   # if no formula provided
 82 |   if (is.na(as.character(f))[1]) {
 83 |     # override simple
 84 |     if (simple != pkg.env$simple) {
 85 |       f <- build_formula(
 86 |         out = pkg.env$treatment, cov = pkg.env$covariates,
 87 |         data = data, simple = p.simple
 88 |       )
 89 |     }
 90 |     # use default
 91 |     else {
 92 |       f <- formula(pkg.env$f_tr)
 93 |     }
 94 |   }
 95 | 
 96 |   # if user does not give propensity scores
 97 |   if (anyNA(p.scores)) {
 98 |     if (!is.na(as.character(p.f))[1]) {
 99 |       p.scores <- propensity_scores(p.f, data = data, family = p.family)$p.scores
100 |     }
101 |     # if no given propensity formula
102 |     else {
103 |       if (p.simple != pkg.env$simple) {
104 |         p.f <- build_formula(
105 |           out = pkg.env$treatment, cov = pkg.env$covariates,
106 |           data = data, simple = p.simple
107 |         )
108 |       }
109 |       # use default
110 |       else {
111 |         p.f <- formula(pkg.env$f_tr)
112 |       }
113 | 
114 |       p.scores <- propensity_scores(p.f, data = data, family = p.family)$p.scores
115 |     }
116 |   }
117 | 
118 |   # if user does give propensity scores
119 |   else {
120 |     if (!is.na(as.character(p.f))[1]) {
121 |       message("Ignoring given propensity formula since propensity scores have been given.")
122 |     }
123 |     message("Using given propensity scores.")
124 |   }
125 | 
126 |   # use standardized weights
127 |   if (SW) {
128 |     numer_scores <- propensity_scores(as.formula(paste(pkg.env$treatment, "~1")), family = binomial(), data = data)$p.scores
129 | 
130 |     data$weights <- numer_scores / p.scores
131 |   } else {
132 |     data$weights <- 1 / p.scores
133 |   }
134 | 
135 |   # perform grid search
136 |   if (type == "one.grid") {
137 |     if (length(grid) == 0) {
138 |       errorCondition("'grid' parameter must have be a list with at least one value.")
139 |     }
140 | 
141 |     coefs <- cbind(rep(NA, length(grid)), rep(NA, length(grid)), rep(NA, length(grid)), rep(NA, length(grid)))
142 |     colnames(coefs) <- list("Effect", "Estimate", "Std. Error", "Pr(>|W|)")
143 |     data$weights <- rep(1, nrow(data))
144 | 
145 |     # validate rownames
146 |     if (length(ids) != nrow(data)) {
147 |       message("IDs list does not match number of rows in data. Using rownames by default.")
148 |       data$ids <- rownames(data)
149 |     } else {
150 |       data$ids <- ids
151 |     }
152 | 
153 |     f_terms <- as.character(f)
154 |     f <- as.formula(paste(f_terms[[2]], f_terms[[1]], f_terms[[3]], "+", "beta"))
155 | 
156 |     i <- 0
157 |     model <- NA
158 |     models <- list()
159 | 
160 |     for (val in grid) {
161 |       i <- i + 1
162 |       data[[pkg.env$treatment]] <- as.numeric(as.character(data[[pkg.env$treatment]])) # geeglm requires numeric response
163 |       data$beta <- data[[pkg.env$outcome]] - (val * data[[pkg.env$treatment]] - 1)
164 | 
165 |       model <- geepack::geeglm(f, family = family, data = data, weights = weights, id = ids, corstr = "independence")
166 |       models[[i]] <- model
167 |       estimate <- summary(model)$coefficients["beta", "Estimate"]
168 | 
169 |       coefs[i, 1] <- val
170 |       coefs[i, 2] <- estimate
171 |       coefs[i, 3] <- summary(model)$coefficients["beta", "Std.err"]
172 |       coefs[i, 4] <- summary(model)$coefficients["beta", "Pr(>|W|)"]
173 |     }
174 | 
175 |     result <- as.data.frame(coefs)
176 |     rownames(result) <- paste("Effect =", result$Effect)
177 | 
178 |     conf_int_idx <- which(result$`Pr(>|W|)` > 0.05)
179 | 
180 |     ATE.summary <- data.frame(
181 |       Beta = result$Effect[[which.min(abs(result$Estimate))]],
182 |       SE = NA,
183 |       `2.5 %` = result$Effect[[min(conf_int_idx)]],
184 |       `97.5 %` = result$Effect[[max(conf_int_idx)]],
185 |       check.names = FALSE
186 |     )
187 |     # colnames(ATE.summary) <- c("Beta", "SE", "2.5 %", "97.5 %")
188 | 
189 |     output <- list(
190 |       "call" = model$call, "formula" = f, "best.model" = models[[which.min(abs(result$Estimate))]],
191 |       "weights" = data$weights, "result" = result[-1], type = "one.grid",
192 |       "ATE" = ATE.summary$Beta, "ATE.summary" = ATE.summary
193 |     )
194 | 
195 |     class(output) <- "gestimation"
196 |     return(output)
197 |   }
198 | 
199 |   # one parameter linear mean model
200 |   else if (type == "one.linear") {
201 |     data[[pkg.env$treatment]] <- as.numeric(as.character(data[[pkg.env$treatment]]))
202 | 
203 |     model_func <- function(data, indices, f, family, weights, ...) {
204 |       if (!anyNA(indices)) {
205 |         data <- data[indices, ]
206 |       }
207 | 
208 |       model <- glm(f, data = data, weights = weights, family = family)
209 |       preds <- predict(model, data, type = "response")
210 |       estimate <- sum(data$weights * data[[pkg.env$outcome]] * (data[[pkg.env$treatment]] - preds)) /
211 |         sum(data$weights * data[[pkg.env$treatment]] * (data[[pkg.env$treatment]] - preds))
212 |       return(list("model" = model, "ATE" = estimate))
213 |     }
214 | 
215 |     # build model
216 |     result <- model_func(data = data, indices = NA, f = f, family = family, weights = data$weights, ...)
217 |     model <- result$model
218 |     beta <- 0
219 |     SE <- 0
220 |     ATE <- result$ATE
221 | 
222 |     if (n.boot > 1) {
223 |       # build bootstrapped estimates
224 |       boot_result <- boot(
225 |         data = data, R = n.boot, f = f, family = family, weights = data$weights,
226 |         statistic = function(data, indices, f, family, ...) {
227 |           model_func(data, indices, f, family, weights, ...)$ATE
228 |         }, ...
229 |       )
230 | 
231 |       # calculate 95% CI
232 |       beta <- boot_result$t0
233 |       SE <- sd(boot_result$t)
234 |     }
235 | 
236 |     # calculate causal stats
237 |     ATE.summary <- data.frame(
238 |       "Beta" = beta,
239 |       "SE" = SE,
240 |       conf_int(beta, SE),
241 |       check.names = FALSE
242 |     )
243 | 
244 |     output <- list(
245 |       "call" = model$call, "formula" = f, "model" = model, type = "one.linear",
246 |       "weights" = data$weights, "ATE" = ATE.summary$Beta, "ATE.summary" = ATE.summary
247 |     )
248 | 
249 |     class(output) <- "gestimation"
250 |     return(output)
251 |   } else {
252 |     errorCondition("Invalid model type. Must be one of ('one.grid', 'one.linear')")
253 |   }
254 | }
255 | 
256 | #' @export
257 | print.gestimation <- function(x, ...) {
258 |   if (x$type == "one.grid") {
259 |     cat("Best Model:")
260 |     cat("\r\n")
261 |     print(x$best.model, ...)
262 |   } else {
263 |     print(x$model, ...)
264 |   }
265 | 
266 |   cat("\r\n")
267 |   cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "")
268 |   cat("Estimate - ", x$ATE, "\r\n")
269 |   cat("SE       - ", x$ATE.summary$SE, "\r\n")
270 |   cat("95% CI   - (", x$ATE.summary$`2.5 %`, ", ", x$ATE.summary$`97.5 %`, ")", "\r\n")
271 | }
272 | 
273 | #' @export
274 | summary.gestimation <- function(object, ...) {
275 |   model <- ifelse(object$type == "one.grid", object$best.model, object$model)
276 |   s <- summary(model, ...)
277 |   s$ATE <- object$ATE.summary
278 |   class(s) <- "summary.gestimation"
279 |   return(s)
280 | }
281 | 
282 | #' @export
283 | print.summary.gestimation <- function(x, ...) {
284 |   class(x) <- "summary.glm"
285 |   print(x, ...)
286 |   cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "")
287 |   print(x$ATE, row.names = FALSE)
288 |   cat("\r\n")
289 | }
290 | 
291 | #' @export
292 | predict.gestimation <- function(object, ...) {
293 |   model <- ifelse(object$type == "one.grid", object$best.model, object$model)
294 |   return(predict(model, ...))
295 | }
296 | 


--------------------------------------------------------------------------------
/R/helper_funcs.R:
--------------------------------------------------------------------------------
 1 | 
 2 | build_formula <- function(out, cov = list(), tr = NA, data, simple = FALSE) {
 3 |   # set treatment variable value
 4 |   tr <- ifelse(!is.na(tr), as.character(tr), "")
 5 | 
 6 |   # separate continuous and distrete covariates
 7 |   num_vars <- unlist(lapply(data[cov], is.numeric))
 8 |   cont <- cov[num_vars]
 9 |   disc <- cov[!num_vars]
10 |   glm
11 |   # collapse discrete covariates with +
12 |   disc_val <- paste(unlist(disc), collapse = " + ")
13 |   disc_val <- ifelse(tr != "" && disc_val != "", paste("+", disc_val), disc_val) # add + before if prev term exists
14 | 
15 |   # return basic formula with no interactions
16 |   if (simple) {
17 |     cont_val <- paste(unlist(cont), collapse = " + ") # collapse all terms with +
18 |     cont_val <- ifelse(disc_val != "" || tr != "", # add + before if prev term exists
19 |       paste("+", cont_val), cont_val
20 |     )
21 | 
22 |     # return formula that doesn't include a treatment
23 |     if (tr == "") {
24 |       return(as.formula(paste(out, "~", disc_val, cont_val, collapse = " ")))
25 |     }
26 |     # return a formula with a treatment
27 |     else {
28 |       return(as.formula(paste(out, "~", tr, disc_val, cont_val, collapse = " ")))
29 |     }
30 |   }
31 |   # if treatment exists, add interations between treatment and continuous
32 |   # add cubic terms for each continuous variable
33 |   else {
34 |     # helper function to write out interactions between two vars
35 |     print_interact <- function(x, y, use.I = TRUE) {
36 |       if (!use.I) {
37 |         return(paste("(", x, "*", y, ")", collapse = ""))
38 |       }
39 | 
40 |       return(paste("I(", x, "*", y, ")", collapse = ""))
41 |     }
42 | 
43 |     cont_val <- ""
44 |     # only execute if there are continuous covariates
45 |     if (!anyNA(cont) && !identical(cont, character(0))) {
46 |       # logic for a single continuous covariate
47 |       if (length(cont) == 1) {
48 |         tr_interact <- ifelse(tr != "", paste("+", print_interact(tr, cont, FALSE)), "")
49 |         cont_val <- paste(cont, tr_interact, "+", print_interact(cont, cont))
50 |       }
51 |       # logic for a set of continuous covariates
52 |       else {
53 |         i <- 1
54 |         for (var in cont) {
55 |           tr_interact <- ifelse(tr != "", paste("+", print_interact(tr, var, FALSE)), "")
56 |           plus_op <- ifelse(i == 1, "", "+") # if first value, don't add plus
57 |           cont_val <- paste(cont_val, plus_op, var, tr_interact, "+", print_interact(var, var))
58 |           i <- i + 1
59 |         }
60 |       }
61 |       cont_val <- ifelse(tr != "" || disc_val != "", paste("+", cont_val), cont_val) # add + to front if prev terms exist
62 |     }
63 |   }
64 |   if (tr == "") {
65 |     return(as.formula(paste(out, "~", disc_val, cont_val, collapse = " ")))
66 |   } else {
67 |     return(as.formula(paste(out, "~", tr, disc_val, cont_val, collapse = " ")))
68 |   }
69 | }
70 | 
71 | check_init <- function() {
72 |   if (!pkg.env$init) {
73 |     stop("Parameters not initialized. Please run init_params(...) before using any other functions")
74 |   }
75 | }
76 | 
77 | 
78 | conf_int <- function(x, se) {
79 |   lb <- x - qnorm(0.975) * se
80 |   ub <- x + qnorm(0.975) * se
81 |   return(list("2.5 %" = lb, "97.5 %" = ub))
82 | }
83 | 
84 | 
85 | contrast_matrix <- function(model, rows, names) {
86 |   mat <- matrix(0, nrow = rows, ncol = length(coef(model)))
87 |   colnames(mat) <- names(coef(model))
88 |   rownames(mat) <- names
89 |   return(mat)
90 | }
91 | 


--------------------------------------------------------------------------------
/R/imports.R:
--------------------------------------------------------------------------------
1 | #' @import stats
2 | #' @import causaldata
3 | #' @import boot
4 | #' @import multcomp
5 | #' @import geepack
6 | NULL
7 | 


--------------------------------------------------------------------------------
/R/initialize.R:
--------------------------------------------------------------------------------
 1 | #' @title Initialize CausalModels Package
 2 | #' @description This function is required to be run first before any other function can run.
 3 | #' This will set within the package the global outcome, treatment, and covariate functions for each model to use.
 4 | #'
 5 | #' @param outcome the outcome variable of interest (must be continuous).
 6 | #' @param treatment the treatment with the causal effect of interest on the outcome.
 7 | #' @param covariates  a list/vector of covariate names to be use for confounding adjustment.
 8 | #' @param data a data frame containing the variables in the model.
 9 | #' @param simple a boolean indicator to build default formula with interactions.
10 | #' If true, interactions will be excluded. If false, interactions will be included. By
11 | #' default, simple is set to false.
12 | #'
13 | #' @returns No return value. Called for parameter initialization.
14 | #'
15 | #' @export
16 | #'
17 | init_params <- function(outcome, treatment, covariates, data, simple = FALSE) {
18 |   params <- as.list(match.call()[-1])
19 |   cov <- params$cov
20 | 
21 |   if (!is.numeric(data[[as.character(params$outcome)]])) {
22 |     assign("init", FALSE, pkg.env)
23 |     stop("Outcome variable must be numeric!")
24 |   }
25 | 
26 |   if (!is.factor(data[[as.character(params$treatment)]])) {
27 |     assign("init", FALSE, pkg.env)
28 |     stop("Treatment must be of type factor!")
29 |   } else if (length(levels(data[[as.character(params$treatment)]])) > 2) {
30 |     assign("init", FALSE, pkg.env)
31 |     stop(paste("Treatment must be binary!", as.character(params$treatment), "has more than two levels!"))
32 |   }
33 | 
34 |   tryCatch(data[as.character(params$covariates)[-1]], error = function(e) {
35 |     assign("init", TRUE, pkg.env)
36 |     stop(e)
37 |   })
38 | 
39 |   assign("outcome", as.character(params$outcome), pkg.env)
40 |   assign("treatment", as.character(params$treatment), pkg.env)
41 | 
42 |   # if covariates are given as strings
43 |   if (length(as.character(params$covariates)[-1]) == 0) {
44 |     cov <- covariates
45 |     assign("covariates", cov, pkg.env)
46 |   }
47 |   # if covariates are not given as strings
48 |   else {
49 |     assign("covariates", as.character(params$covariates)[-1], pkg.env)
50 |   }
51 | 
52 |   assign("simple", simple, pkg.env)
53 |   assign("p_simple", simple, pkg.env)
54 | 
55 |   f_out <- build_formula(
56 |     out = as.character(params$outcome),
57 |     tr = as.character(params$treatment),
58 |     cov = pkg.env$covariates,
59 |     data = data, simple = simple
60 |   )
61 |   f_tr <- build_formula(
62 |     out = as.character(params$treatment),
63 |     cov = pkg.env$covariates,
64 |     data = data, simple = simple
65 |   )
66 |   assign("f_out", f_out, pkg.env)
67 |   assign("f_tr", f_tr, pkg.env)
68 | 
69 |   assign("init", TRUE, pkg.env)
70 | 
71 |   cat("Successfully initialized!\r\n\r\n")
72 | 
73 |   cat("Summary:\r\n\r\n")
74 |   cat(paste("Outcome -", pkg.env$outcome, "\r\n"))
75 |   cat(paste("Treatment -", pkg.env$treatment, "\r\n"))
76 |   cat(paste("Covariates -", "[", paste(pkg.env$covariates, collapse = ", "), "]", "\r\n\r\n"))
77 |   cat(paste("Size -", nrow(data), "x", ncol(data), "\r\n\r\n"))
78 |   cat(paste("Default formula for outcome models:", "\r\n"))
79 |   cat(paste(deparse(f_out, width.cutoff = 500), "\r\n\r\n", collapse = ""))
80 |   cat(paste("Default formula for propensity models:", "\r\n"))
81 |   cat(paste(deparse(f_tr, width.cutoff = 500), "\r\n", collapse = ""))
82 | }
83 | 


--------------------------------------------------------------------------------
/R/ipweighting.R:
--------------------------------------------------------------------------------
  1 | #' @title Parametric IP Weighting
  2 | #' @description `ipweighting` uses the \code{\link[=propensity_scores]{propensity_scores}} function to generate inverse probability
  3 | #' weights. The weights can either be standardized weights or non-standardized weights. The weights are used to train a
  4 | #' general linear model whose coefficient for treatment represents the average treatment effect on the additive scale.
  5 | #'
  6 | #' @param data a data frame containing the variables in the model.
  7 | #' This should be the same data used in \code{\link[=init_params]{init_params}}.
  8 | #' @param f (optional) an object of class "formula" that overrides the default parameter
  9 | #' @param family the family to be used in the general linear model.
 10 | #' By default, this is set to \code{\link[stats:gaussian]{gaussian}}.
 11 | #' @param p.f (optional) an object of class "formula" that overrides the default formula for the denominator of the IP
 12 | #' weighting function.
 13 | #' @param p.simple a boolean indicator to build default formula with interactions for the propensity models.
 14 | #' If true, interactions will be excluded. If false, interactions will be included. By
 15 | #' default, simple is set to false.
 16 | #' NOTE: if this is changed, the coefficient for treatment may not accurately represent the average causal effect.
 17 | #' @param p.family the family to be used in the underlying propensity model.
 18 | #' By default, this is set to \code{\link[stats:binomial]{binomial}}.
 19 | #' @param p.scores (optional) use calculated propensity scores for the weights. If using standardized weights,
 20 | #' the numerator will still be modeled.
 21 | #' @param SW a boolean indicator to indicate the use of standardized weights. By default, this is set to true.
 22 | #' @param n.boot (optional) an integer value that indicates number of bootstrap iterations to calculate standard error.
 23 | #' If no value is given, the standard error from the underlying linear model will be used.
 24 | #' @param ... additional arguments that may be passed to the underlying \code{\link[stats:glm]{glm}} model.
 25 | #'
 26 | #' @returns \code{ipweighting} returns an object of \code{\link[base:class]{class} "ipweighting"}.
 27 | #'
 28 | #' The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with
 29 | #' the underlying \code{glm} model.
 30 | #'
 31 | #' An object of class \code{"ipweighting"} is a list containing the following:
 32 | #'
 33 | #'  \item{call}{the matched call.}
 34 | #'  \item{formula}{the formula used in the model.}
 35 | #'  \item{model}{the underlying glm model.}
 36 | #'  \item{weights}{the estimated IP weights.}
 37 | #'  \item{ATE}{the estimated average treatment effect (risk difference).}
 38 | #'  \item{ATE.summary}{a data frame containing the ATE, SE, and 95\% CI of the ATE. }
 39 | #'
 40 | #' @export
 41 | #'
 42 | #' @examples
 43 | #' library(causaldata)
 44 | #' data(nhefs)
 45 | #' nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ]
 46 | #' nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
 47 | #'
 48 | #' confounders <- c(
 49 | #'   "sex", "race", "age", "education", "smokeintensity",
 50 | #'   "smokeyrs", "exercise", "active", "wt71"
 51 | #' )
 52 | #'
 53 | #' init_params(wt82_71, qsmk,
 54 | #'   covariates = confounders,
 55 | #'   data = nhefs.nmv
 56 | #' )
 57 | #'
 58 | #' # model using all defaults
 59 | #' model <- ipweighting(data = nhefs.nmv)
 60 | #' summary(model)
 61 | #'
 62 | #' # Model using calculated propensity scores and manual outcome formula
 63 | #' p.scores <- propensity_scores(nhefs.nmv)$p.scores
 64 | #' model <- ipweighting(wt82_71 ~ qsmk, p.scores = p.scores, data = nhefs.nmv)
 65 | #' summary(model)
 66 | #'
 67 | ipweighting <- function(data, f = NA, family = gaussian(), p.f = NA, p.simple = pkg.env$simple,
 68 |                         p.family = binomial(), p.scores = NA, SW = TRUE, n.boot = 0, ...) {
 69 |   check_init()
 70 |   data$weights <- rep(1, nrow(data))
 71 | 
 72 |   # grab function parameters
 73 |   params <- as.list(match.call()[-1])
 74 | 
 75 |   # if user gives an outcome formula
 76 |   if (is.na(as.character(f))[1]) {
 77 |     f <- as.formula(paste(pkg.env$outcome, "~", pkg.env$treatment))
 78 |   }
 79 | 
 80 |   # if user does not give propensity scores
 81 |   if (anyNA(p.scores)) {
 82 |     if (!is.na(as.character(p.f))[1]) {
 83 |       p.scores <- propensity_scores(p.f, data = data, family = p.family)$p.scores
 84 |     }
 85 |     # if no given propensity formula
 86 |     else {
 87 |       if (p.simple != pkg.env$simple) {
 88 |         p.f <- build_formula(
 89 |           out = pkg.env$treatment, cov = pkg.env$covariates,
 90 |           data = data, simple = p.simple
 91 |         )
 92 |       }
 93 |       # use default
 94 |       else {
 95 |         p.f <- formula(pkg.env$f_tr)
 96 |       }
 97 | 
 98 |       p.scores <- propensity_scores(p.f, data = data, family = p.family)$p.scores
 99 |     }
100 |   }
101 |   # if user does give propensity scores
102 |   else {
103 |     if (!is.na(as.character(p.f))[1]) {
104 |       message("Ignoring given propensity formula since propensity scores have been given.")
105 |     }
106 |     message("Using given propensity scores.")
107 |   }
108 | 
109 |   if (SW) {
110 |     numer_scores <- propensity_scores(as.formula(paste(pkg.env$treatment, "~1")), family = binomial(), data = data)$p.scores
111 | 
112 |     data$weights <- numer_scores / p.scores
113 |   } else {
114 |     data$weights <- 1 / p.scores
115 |   }
116 | 
117 |   model_func <- function(data, indices, f, family, weights, ...) {
118 |     if (!anyNA(indices)) {
119 |       data <- data[indices, ]
120 |     }
121 | 
122 |     model <- glm(f, weights = weights, data = data, family = family, ...)
123 |     model$call$formula <- formula(f) # manually set model formula to prevent "formula = formula"
124 | 
125 |     return(list("model" = model, "ATE" = coef(model)[[2]]))
126 |   }
127 | 
128 |   # build model
129 |   result <- model_func(data = data, indices = NA, f = f, family = family, weights = data$weights, ...)
130 |   model <- result$model
131 |   beta <- 0
132 |   SE <- 0
133 |   ATE <- list()
134 |   if (n.boot > 1) {
135 |     # build bootstrapped estimates
136 |     boot_result <- boot(
137 |       data = data, R = n.boot, f = f, family = family, weights = data$weights,
138 |       statistic = function(data, indices, f, family, weights, ...) {
139 |         model_func(data, indices, f, family, weights, ...)$ATE
140 |       }, ...
141 |     )
142 | 
143 |     # calculate 95% CI
144 |     beta <- boot_result$t0
145 |     SE <- sd(boot_result$t)
146 |     ATE <- data.frame(
147 |       "Beta" = beta,
148 |       "SE" = SE,
149 |       conf_int(beta, SE),
150 |       check.names = FALSE
151 |     )
152 |   } else {
153 |     # calculate causal stats
154 |     beta <- coef(model)[[2]]
155 |     SE <- coef(summary(model))[2, 2]
156 |     ATE <- data.frame(
157 |       "Beta" = beta,
158 |       "SE" = SE,
159 |       conf_int(beta, SE),
160 |       check.names = FALSE
161 |     )
162 |   }
163 | 
164 | 
165 |   output <- list(
166 |     "call" = model$call, "formula" = model$call$formula, "model" = model,
167 |     "weights" = data$weights, "ATE" = beta, "ATE.summary" = ATE
168 |   )
169 | 
170 |   class(output) <- "ipweighting"
171 |   return(output)
172 | }
173 | 
174 | #' @export
175 | print.ipweighting <- function(x, ...) {
176 |   print(x$model, ...)
177 |   cat("\r\n")
178 |   cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "")
179 |   cat("Estimate - ", x$ATE, "\r\n")
180 |   cat("SE       - ", x$ATE.summary$SE, "\r\n")
181 |   cat("95% CI   - (", x$ATE.summary$`2.5 %`, ", ", x$ATE.summary$`97.5 %`, ")", "\r\n")
182 | }
183 | 
184 | #' @export
185 | summary.ipweighting <- function(object, ...) {
186 |   s <- summary(object$model, ...)
187 |   s$ATE <- object$ATE.summary
188 |   class(s) <- "summary.ipweighting"
189 |   return(s)
190 | }
191 | 
192 | #' @export
193 | print.summary.ipweighting <- function(x, ...) {
194 |   class(x) <- "summary.glm"
195 |   print(x, ...)
196 |   cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "")
197 |   print(x$ATE, row.names = FALSE)
198 |   cat("\r\n")
199 | }
200 | 
201 | #' @export
202 | predict.ipweighting <- function(object, ...) {
203 |   return(predict(object$model, ...))
204 | }
205 | 


--------------------------------------------------------------------------------
/R/iv_est.R:
--------------------------------------------------------------------------------
 1 | #' @title Standard Instrumental Variable Estimator
 2 | #' @description `iv_est` calculates the standard IV estimand using the conditional means on a given instrumental variable.
 3 | #'
 4 | #' @param IV the instrumental variable to be used in the conditional means. Must be a factor with no more than 2 levels.
 5 | #' It is assumed the second level is the positive level, i.e., the binary equivalent of the second factor level should be 1
 6 | #' and the first should be 0.
 7 | #' @param data a data frame containing the variables in the model.
 8 | #' This should be the same data used in \code{\link[=init_params]{init_params}}.
 9 | #' @param n.boot an integer value that indicates number of bootstrap iterations to calculate standard error.
10 | #'
11 | #' @returns \code{iv_est} returns a data frame containing the standard IV estimate, standard error, and Wald 95% CI.
12 | #'
13 | #' @export
14 | #'
15 | #' @examples
16 | #' library(causaldata)
17 | #' data(nhefs)
18 | #' nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ]
19 | #' nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
20 | #'
21 | #' confounders <- c(
22 | #'   "sex", "race", "age", "education", "smokeintensity",
23 | #'   "smokeyrs", "exercise", "active", "wt71"
24 | #' )
25 | #' nhefs.iv <- nhefs[which(!is.na(nhefs$wt82) & !is.na(nhefs$price82)), ]
26 | #' nhefs.iv$highprice <- as.factor(ifelse(nhefs.iv$price82 >= 1.5, 1, 0))
27 | #' nhefs.iv$qsmk <- as.factor(nhefs.iv$qsmk)
28 | 
29 | #' init_params(wt82_71, qsmk,
30 | #'             covariates = confounders,
31 | #'             data = nhefs.iv)
32 | #'
33 | #' iv_est("highprice", nhefs.iv)
34 | 
35 | iv_est <- function(IV, data, n.boot = 50) {
36 |   check_init()
37 | 
38 |   # grab function parameters
39 |   params <- as.list(match.call()[-1])
40 |   IV_levels <- levels(data[[IV]])
41 | 
42 |   IV <- as.character(params$IV)
43 |   if (!is.factor(data[[IV]])) {
44 |     stop("Instrumental variable must be a factor")
45 |   } else if (length(IV_levels) > 2) {
46 |     stop("Instrumental variable must be binary")
47 |   }
48 | 
49 |   est_func <- function(data, indices, ...) {
50 |     data <- data[indices, ]
51 |     # manual calculation of standard IV Estimator
52 |     numer_1 <- mean(data[data[[IV]] == IV_levels[[2]], ][[pkg.env$outcome]], na.rm = TRUE)
53 |     numer_0 <- mean(data[data[[IV]] == IV_levels[[1]], ][[pkg.env$outcome]], na.rm = TRUE)
54 |     denom_1 <- mean(as.numeric(data[data[[IV]] == IV_levels[[2]], ][[pkg.env$treatment]]), na.rm = TRUE)
55 |     denom_0 <- mean(as.numeric(data[data[[IV]] == IV_levels[[1]], ][[pkg.env$treatment]]), na.rm = TRUE)
56 | 
57 |     return((numer_1 - numer_0) / (denom_1 - denom_0))
58 |   }
59 | 
60 |   boot_result <- boot(data = data, statistic = est_func, R = n.boot)
61 | 
62 |   # calculate 95% CI
63 |   beta <- boot_result$t0
64 |   SE <- sd(boot_result$t)
65 |   ATE <- data.frame(
66 |     "ATE" = beta,
67 |     "SE" = SE,
68 |     conf_int(beta, SE),
69 |     check.names = FALSE
70 |   )
71 | 
72 |   return(ATE)
73 | }
74 | 


--------------------------------------------------------------------------------
/R/outcome_regression.R:
--------------------------------------------------------------------------------
  1 | #' @title Outcome Regression
  2 | #' @description `outcome_regression` builds a linear model using all covariates. The treatment effects are stratified
  3 | #' within the levels of the covariates. The model will automatically provide all discrete covariates in a contrast matrix.
  4 | #' To view estimated change in treatment effect from continuous variables, a list called \code{contrasts}, needs to be given
  5 | #' with specific values to estimate. A vector of values can be given for any particular continuous variable.
  6 | #'
  7 | #' @param data a data frame containing the variables in the model.
  8 | #' This should be the same data used in \code{\link[=init_params]{init_params}}.
  9 | #' @param f (optional) an object of class "formula" that overrides the default parameter
 10 | #' @param simple a boolean indicator to build default formula with interactions.
 11 | #' If true, interactions will be excluded. If false, interactions will be included. By
 12 | #' default, simple is set to false.
 13 | #' @param family the family to be used in the general linear model.
 14 | #' By default, this is set to \code{\link[stats:gaussian]{gaussian}}.
 15 | #' NOTE: if this is changed, the assumptions about the model output may be incorrect and may not provide
 16 | #' accurate treatment effects.
 17 | #' @param contrasts a list of continuous covariates and values in the model to be included in the contrast matrix
 18 | #' (e.g. \code{list(age = c(18, 25, 40), weight = c(90, 159))}).
 19 | #' @param ... additional arguments that may be passed to the underlying \code{\link[multcomp:glht]{glht}} model.
 20 | #'
 21 | #' @returns \code{outcome_regression} returns an object of \code{\link[base:class]{class} "outcome_regression"}
 22 | #'
 23 | #' The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with
 24 | #' the underlying \code{glht} model.
 25 | #'
 26 | #' An object of class \code{"outcome_regression"} is a list containing the following:
 27 | #'
 28 | #'  \item{call}{the matched call.}
 29 | #'  \item{formula}{the formula used in the model.}
 30 | #'  \item{model}{the underlying glht model.}
 31 | #'  \item{ATE}{a data frame containing the ATE, SE, and 95\% CI of the ATE. }
 32 | #'  \item{ATE.summary}{a more detailed summary of the ATE estimations from glht. }
 33 | #'
 34 | #' @export
 35 | #'
 36 | #' @examples
 37 | #' library(causaldata)
 38 | #' library(multcomp)
 39 | #'
 40 | #' data(nhefs)
 41 | #' nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ]
 42 | #' nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
 43 | #'
 44 | #' confounders <- c(
 45 | #'   "sex", "race", "age", "education", "smokeintensity",
 46 | #'   "smokeyrs", "exercise", "active", "wt71"
 47 | #' )
 48 | #'
 49 | #' init_params(wt82_71, qsmk,
 50 | #'   covariates = confounders,
 51 | #'   data = nhefs.nmv
 52 | #' )
 53 | #'
 54 | #' out.mod <- outcome_regression(nhefs.nmv, contrasts = list(
 55 | #'   age = c(21, 55),
 56 | #'   smokeintensity = c(5, 20, 40)
 57 | #' ))
 58 | #' print(out.mod)
 59 | #' summary(out.mod)
 60 | #' head(data.frame(preds = predict(out.mod)))
 61 | #'
 62 | outcome_regression <- function(data, f = NA, simple = pkg.env$simple,
 63 |                                family = gaussian(), contrasts = list(), ...) {
 64 |   check_init()
 65 | 
 66 |   # grab function parameters
 67 |   params <- as.list(match.call()[-1])
 68 | 
 69 |   # if no formula provided
 70 |   if (is.na(as.character(f))[1]) {
 71 |     # override simple
 72 |     if (simple != pkg.env$simple) {
 73 |       f <- build_formula(
 74 |         out = pkg.env$outcome, tr = pkg.env$treatment,
 75 |         cov = pkg.env$covariates, data = data, simple = simple
 76 |       )
 77 |     }
 78 |     # use default
 79 |     else {
 80 |       f <- formula(pkg.env$f_out)
 81 |     }
 82 |   }
 83 | 
 84 |   model <- NA
 85 |   call <- NA
 86 |   ATE <- NA
 87 |   ATE.summary <- NA
 88 | 
 89 |   model <- glm(f, data = data, family = family, ...)
 90 |   model$call$formula <- f
 91 | 
 92 |   # grab and divide model matrix
 93 |   temp_map <- data.frame(model.matrix(model))
 94 |   num_vars <- sapply(temp_map, function(x) {
 95 |     length(unique(x)) > 2
 96 |   })
 97 |   disc <- temp_map[!num_vars]
 98 |   disc <- disc[, -c(1, ncol(disc))] # exclude intercept and treatment
 99 |   has_interaction <- unlist(lapply(names(num_vars), function(x) {
100 |     grepl(pkg.env$treatment,
101 |       x,
102 |       fixed = TRUE
103 |     )
104 |   }))
105 |   continuous <- temp_map[has_interaction][-1]
106 | 
107 | 
108 | 
109 |   # if there are no interactions, use original variables
110 |   if (ncol(continuous) == 0) {
111 |     continuous <- temp_map[num_vars]
112 |   }
113 |   # if only sum interactions, use original for those without one
114 |   else {
115 |     no_interaction <- list()
116 |     for (i in 1:length(num_vars)) {
117 |       no_interaction[i] <- (!has_interaction[[i]] && num_vars[[i]] &&
118 |         !any(grepl(names(num_vars)[[i]], names(continuous))))
119 |     }
120 |     continuous <- cbind(continuous, temp_map[unlist(no_interaction)])
121 |   }
122 | 
123 |   continuous_names <- names(continuous)
124 | 
125 |   cont_input <- list()
126 |   if(length(names(disc)) > 0) {
127 |     for (i in 1:length(names(disc))) {
128 |       cont_input[[i]] <- paste("Effect of", pkg.env$treatment, "at", names(disc[i]))
129 |     }
130 |   }
131 | 
132 | 
133 |   if (length(contrasts) > 0) {
134 |     j <- length(cont_input) + 1
135 |     for (i in 1:length(contrasts)) {
136 |       cont_input[[j]] <- paste("Effect of", pkg.env$treatment, "at", names(contrasts[i]), "of", contrasts[[i]])
137 |       j <- j + 1
138 |     }
139 |   }
140 |   else if (length(cont_input) > 0) {
141 |     warning("No contrasts included in analysis. This will exclude results from any continuous covariates.")
142 |   }
143 | 
144 |   cont_input <- unlist(cont_input)
145 | 
146 |   cont_mat <- contrast_matrix(model, length(cont_input), cont_input)
147 | 
148 |   sum_cont <- 0
149 |   for (cont in contrasts) {
150 |     sum_cont <- sum_cont + length(cont)
151 |   }
152 | 
153 |   j <- 1
154 |   cont_mat[1:length(cont_input), paste0(pkg.env$treatment, levels(data[[pkg.env$treatment]])[2])] <- 1
155 | 
156 |   for (i in 1:length(names(disc))) {
157 |     cont_mat[j, names(disc[i])] <- 1
158 |     j <- j + 1
159 |   }
160 | 
161 |   if (length(contrasts) > 0) {
162 |     for (i in 1:length(contrasts)) {
163 |       name <- names(contrasts[i])
164 |       for (val in contrasts[[i]]) {
165 |         if (paste(pkg.env$treatment, levels(data[[pkg.env$treatment]])[2],
166 |           ":", name,
167 |           sep = ""
168 |         ) %in% colnames(cont_mat)) {
169 |           cont_mat[j, paste(pkg.env$treatment, levels(data[[pkg.env$treatment]])[2], ":", name, sep = "")] <- val
170 |         } else if (paste(name, ":", pkg.env$treatment,
171 |           levels(data[[pkg.env$treatment]])[2],
172 |           sep = ""
173 |         ) %in% colnames(cont_mat)) {
174 |           cont_mat[j, paste(name, ":", pkg.env$treatment, levels(data[[pkg.env$treatment]])[2], sep = "")] <- val
175 |         } else {
176 |           cont_mat[j, name] <- val
177 |         }
178 |         j <- j + 1
179 |       }
180 |     }
181 |   }
182 | 
183 | 
184 | 
185 |   model <- glht(model, linfct = cont_mat)
186 |   call <- model$model$call
187 | 
188 |   # summarize model
189 |   sum_model <- summary(model)$test
190 | 
191 |   results <- data.frame(
192 |     "Estimate" = sum_model$coefficients,
193 |     "Std. Error" = sum_model$sigma,
194 |     conf_int(sum_model$coefficients, sum_model$sigma),
195 |     check.names = FALSE
196 |   )
197 | 
198 |   ATE.summary <- summary(model)
199 |   ATE <- results
200 | 
201 |   output <- list(
202 |     "call" = call, "formula" = call$formula, "model" = model,
203 |     "ATE" = ATE, "ATE.summary" = ATE.summary
204 |   )
205 | 
206 |   class(output) <- "outcome_regression"
207 |   return(output)
208 | }
209 | 
210 | #' @export
211 | print.outcome_regression <- function(x, ...) {
212 |   cat("\r\n")
213 |   cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "")
214 |   cat("\r\n")
215 |   print(x$ATE)
216 | }
217 | 
218 | #' @export
219 | summary.outcome_regression <- function(object, ...) {
220 |   summary(object$model, ...)
221 | }
222 | 
223 | #' @export
224 | predict.outcome_regression <- function(object, ...) {
225 |   return(predict(object$model$model, ...))
226 | }
227 | 


--------------------------------------------------------------------------------
/R/propensity_matching.R:
--------------------------------------------------------------------------------
  1 | #' @title Propensity Matching
  2 | #' @description `propensity_matching` uses either stratification or standardization to model an outcome
  3 | #' conditional on the propensity scores. In stratification, the model will break the propensity scores
  4 | #' into groups and output a \code{\link[multcomp:glht]{glht}} model based off a contrast matrix which
  5 | #' estimates the change in average causal effect within groups of propensity scores. In standardization,
  6 | #' the model will output a \code{\link[=standardization]{standardization}} model that conditions on the
  7 | #' propensity strata rather than the covariates. The model can also predict the expected outcome.
  8 | #'
  9 | #' @param data a data frame containing the variables in the model.
 10 | #' This should be the same data used in \code{\link[=init_params]{init_params}}.
 11 | #' @param f (optional) an object of class "formula" that overrides the default parameter
 12 | #' @param simple a boolean indicator to build default formula with interactions.
 13 | #' If true, interactions will be excluded. If false, interactions will be included. By
 14 | #' default, simple is set to false.
 15 | #' @param p.scores (optional) use calculated propensity scores for matching. Otherwise, propensity scores
 16 | #' will be automatically modeled.
 17 | #' @param p.simple a boolean indicator to build default formula with interactions for the propensity models.
 18 | #' If true, interactions will be excluded. If false, interactions will be included. By
 19 | #' default, simple is set to false.
 20 | #' @param type a string representing the type of propensity model to be used. By default, the function will stratify. Standardization with
 21 | #' propensity scores may also be used. The value given for \code{type} must be in \code{c("strata", "stdm")}.
 22 | #' @param grp.width a decimal value to specify the range to stratify the propensity scores. If option \code{quant} is set to true,
 23 | #' this will represent the spread of percentiles. If false, it will represent the spread of raw values of propensity
 24 | #' scores. Must be a decimal between 0 and 1. By default, this is set to 0.1. This option is ignored for standardization.
 25 | #' @param quant a boolean indicator to specify the type of stratification. If true (default), the model will stratify by
 26 | #' percentiles. If false, the scores will be grouped by a range of their raw values. This option is ignored for standardization.
 27 | #' @param ... additional arguments that may be passed to the underlying \code{\link[=propensity_scores]{propensity_scores}} function.
 28 | #'
 29 | #' @returns \code{propensity_matching} returns an object of \code{\link[base:class]{class} "propensity_matching"}
 30 | #'
 31 | #' The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with the underlying \code{glht} or
 32 | #' \code{standardization} model.
 33 | #'
 34 | #' An object of class \code{"propensity_matching"} is a list containing the following:
 35 | #'
 36 | #'  \item{call}{the matched call.}
 37 | #'  \item{formula}{the formula used in the model.}
 38 | #'  \item{model}{either the underlying \code{glht} or \code{standardization} model.}
 39 | #'  \item{p.scores}{the estimated propensity scores}
 40 | #'  \item{ATE}{a data frame containing the ATE, SE, and 95\% CI of the ATE. }
 41 | #'  \item{ATE.summary}{either a data frame containing the \code{glht} or \code{standardization} summary. }
 42 | #'
 43 | #' @export
 44 | #'
 45 | #' @examples
 46 | #' library(causaldata)
 47 | #' library(multcomp)
 48 | #'
 49 | #' data(nhefs)
 50 | #' nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ]
 51 | #' nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
 52 | #'
 53 | #' confounders <- c(
 54 | #'   "sex", "race", "age", "education", "smokeintensity",
 55 | #'   "smokeyrs", "exercise", "active", "wt71"
 56 | #' )
 57 | #'
 58 | #' init_params(wt82_71, qsmk,
 59 | #'   covariates = confounders,
 60 | #'   data = nhefs.nmv
 61 | #' )
 62 | #'
 63 | #' pm.model <- propensity_matching(nhefs.nmv)
 64 | #' pm.model$ATE.summary
 65 | #' summary(pm.model)
 66 | #' head(data.frame(preds = predict(pm.model)))
 67 | #'
 68 | propensity_matching <- function(data, f = NA, simple = pkg.env$simple, p.scores = NA, p.simple = pkg.env$simple,
 69 |                                 type = "strata", grp.width = 0.1, quant = TRUE, ...) {
 70 |   check_init()
 71 | 
 72 |   # grab function parameters
 73 |   params <- as.list(match.call()[-1])
 74 | 
 75 |   # check valid model type
 76 |   if (!type %in% c("strata", "stdm")) {
 77 |     stop("Invalid model type! Must be on of the following values: 'strata', 'std', or 'std.boot'")
 78 |   }
 79 | 
 80 |   if (grp.width <= 0 || grp.width >= 1) {
 81 |     stop("Invalid parameter! You must set 0 < grp.width < 1")
 82 |   }
 83 | 
 84 |   # if no given propensity scores
 85 |   if (anyNA(p.scores)) {
 86 |     # if no formula provided
 87 |     if (is.na(as.character(f))[1]) {
 88 |       # override simple
 89 |       if (p.simple != pkg.env$simple) {
 90 |         f <- build_formula(
 91 |           out = pkg.env$treatment, cov = pkg.env$covariates,
 92 |           data = data, simple = p.simple
 93 |         )
 94 |       }
 95 |       # use default
 96 |       else {
 97 |         f <- formula(pkg.env$f_tr)
 98 |       }
 99 |     }
100 | 
101 |     p.scores <- propensity_scores(f = f, data = data, ...)$p.scores
102 |   }
103 | 
104 |   # initialize output objects
105 |   model <- NA
106 |   call <- NA
107 |   ATE <- NA
108 |   ATE.summary <- NA
109 | 
110 |   # using stratification
111 |   if (type == "strata") {
112 |     p.grp <- list()
113 |     lookup <- NA
114 |     if (quant) { # use percentiles
115 |       # group by propensity percentiles w/ width = grp.width
116 |       quants <- c(quantile(p.scores, probs = seq(0, 1, grp.width)))
117 |       p.grp <- cut(p.scores, breaks = quants, include.lowest = TRUE)
118 | 
119 |       # create lookup table for groups
120 |       lookup <- data.frame("n" = table(p.grp), names(quants)[-1])
121 |       colnames(lookup) <- c("breaks", "n", "percentile")
122 |       lookup$grp.name <- paste("p.grp", seq(1:(length(quants) - 1)), sep = "")
123 | 
124 |       levels(p.grp) <- 1:nrow(lookup) # rename levels
125 |       lookup <- lookup[c("grp.name", "breaks", "n", "percentile")]
126 |     } else { # use raw groupings
127 |       # group by propensity breaks w/ width = grp.width
128 |       quants <- seq(0, 1, grp.width)
129 |       p.grp <- cut(p.scores, breaks = quants, include.lowest = T)
130 | 
131 |       # create lookup table for groups
132 |       lookup <- data.frame("n" = table(p.grp))
133 |       colnames(lookup) <- c("breaks", "n")
134 |       lookup$grp.name <- paste("p.grp", seq(1:(length(quants) - 1)), sep = "")
135 | 
136 |       levels(p.grp) <- 1:nrow(lookup) # rename levels
137 |       lookup <- lookup[c("grp.name", "breaks", "n")]
138 |     }
139 | 
140 |     # build linear model to make estimates in the contrast matrix
141 |     model.f <- as.formula(paste(pkg.env$outcome, "~", pkg.env$treatment, "* p.grp"))
142 |     model <- glm(model.f, data = data)
143 |     model$call$formula <- model.f
144 | 
145 |     # build contrast matrix of all propensity groups
146 |     cont_mat <- contrast_matrix(
147 |       model, nrow(lookup),
148 |       c(paste("Effect of", pkg.env$treatment, "for p.score in", lookup$breaks))
149 |     )
150 | 
151 | 
152 |     # set all treatment values to 1
153 |     cont_mat[1:nrow(lookup), names(model$coefficients)[2]] <- 1
154 | 
155 |     # try to fill the diag of the matrix with 1s
156 |     tryCatch(
157 |       for (i in 2:nrow(lookup)) {
158 |         cont_mat[i, paste(names(model$coefficients)[2], ":", lookup$grp.name[[i]], sep = "")] <- 1
159 |       },
160 |       # if this fails, there are likely one or more groups with <= 1 samples
161 |       error = function(e) {
162 |         stop("Unable to stratify propensity scores. This is likely due to a lack of positivity in the groups.
163 |            Try setting 'grp.width' to a larger value.")
164 |       }
165 |     )
166 | 
167 |     # build model for contrast matrix
168 |     model <- glht(model, cont_mat)
169 |     call <- model$model$call
170 | 
171 |     # summarize model
172 |     sum_model <- summary(model)$test
173 | 
174 |     results <- data.frame(
175 |       "Estimate" = sum_model$coefficients,
176 |       "Std. Error" = sum_model$sigma,
177 |       conf_int(sum_model$coefficients, sum_model$sigma),
178 |       check.names = FALSE
179 |     )
180 | 
181 |     ATE.summary <- cbind(lookup, results)
182 |     ATE <- results
183 |   }
184 | 
185 |   # using standardization
186 |   else if (type == "stdm") {
187 |     model.f <- build_formula(
188 |       out = pkg.env$outcome, tr = pkg.env$treatment,
189 |       cov = c("p.scores"), simple = simple, data = cbind(data, p.scores)
190 |     )
191 |     model <- standardization(f = model.f, data = cbind(data, p.scores))
192 | 
193 |     call <- model$call
194 |     call$formula <- model.f
195 |     ATE <- model$ATE
196 |     ATE.summary <- model$ATE.summary
197 |   }
198 | 
199 |   output <- list(
200 |     "call" = call, "formula" = call$formula, "model" = model, "p.scores" = p.scores,
201 |     "ATE" = ATE, "ATE.summary" = ATE.summary, "type" = type
202 |   )
203 | 
204 |   class(output) <- "propensity_matching"
205 |   return(output)
206 | }
207 | 
208 | #' @export
209 | print.propensity_matching <- function(x, ...) {
210 |   if (x$type == "strata") {
211 |     cat("\r\n")
212 |     cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "")
213 |     cat("\r\n")
214 |     print(x$ATE)
215 |   } else if (x$type == "stdm") {
216 |     print(x$model, ...)
217 |   }
218 | }
219 | 
220 | #' @export
221 | summary.propensity_matching <- function(object, ...) {
222 |   if (object$type == "strata" || object$type == "stdm") {
223 |     summary(object$model, ...)
224 |   }
225 | }
226 | 
227 | #' @export
228 | predict.propensity_matching <- function(object, ...) {
229 |   if (object$type == "strata") {
230 |     return(predict(object$model$model, ...))
231 |   } else if (object$type == "stdm") {
232 |     return(predict(object$model, ...))
233 |   }
234 | }
235 | 


--------------------------------------------------------------------------------
/R/propensity_scores.R:
--------------------------------------------------------------------------------
 1 | #' @title Propensity Scores
 2 | #' @description `propensity_scores` builds a logistic regression with the target as the treatment variable
 3 | #' and the covariates as the independent variables.
 4 | #'
 5 | #' @param data a data frame containing the variables in the model.
 6 | #' This should be the same data used in \code{\link[=init_params]{init_params}}.
 7 | #' @param f (optional) an object of class "formula" that overrides the default parameter
 8 | #' @param simple a boolean indicator to build default formula with interactions.
 9 | #' If true, interactions will be excluded. If false, interactions will be included. By
10 | #' default, simple is set to false.
11 | #' @param family the family to be used in the general linear model.
12 | #' By default, this is set to \code{\link[stats:binomial]{binomial}}
13 | #' NOTE: if this is changed, the outcome of the model may not be the probabilities and the results will not be valid.
14 | #' @param ... additional arguments that may be passed to the underlying \code{\link[stats:glm]{glm}} model.
15 | #'
16 | #' @returns \code{propensity_scores} returns an object of \code{\link[base:class]{class} "propensity_scores"}
17 | #'
18 | #' The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with
19 | #' the underlying \code{glm} model.
20 | #'
21 | #' An object of class \code{"propensity_scores"} is a list containing the following:
22 | #'
23 | #'  \item{call}{the matched call.}
24 | #'  \item{formula}{the formula used in the model.}
25 | #'  \item{model}{the underlying glm model.}
26 | #'  \item{p.scores}{the estimated propensity scores.}
27 | #'
28 | #' @export
29 | #'
30 | #' @examples
31 | #' library(causaldata)
32 | #' data(nhefs)
33 | #' nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ]
34 | #' nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
35 | #'
36 | #' confounders <- c(
37 | #'   "sex", "race", "age", "education", "smokeintensity",
38 | #'   "smokeyrs", "exercise", "active", "wt71"
39 | #' )
40 | #'
41 | #' init_params(wt82_71, qsmk,
42 | #'   covariates = confounders,
43 | #'   data = nhefs.nmv
44 | #' )
45 | #'
46 | #' p.score <- propensity_scores(nhefs.nmv)
47 | #' p.score
48 | #'
49 | propensity_scores <- function(data, f = NA, simple = pkg.env$simple, family = binomial(), ...) {
50 |   check_init()
51 | 
52 |   # grab function parameters
53 |   params <- as.list(match.call()[-1])
54 | 
55 |   # if no formula provided
56 |   if (is.na(as.character(f))[1]) {
57 |     # override simple
58 |     if (simple != pkg.env$simple) {
59 |       f <- build_formula(
60 |         out = pkg.env$treatment, cov = pkg.env$covariates,
61 |         data = data, simple = simple
62 |       )
63 |     }
64 |     # use default
65 |     else {
66 |       f <- formula(pkg.env$f_tr)
67 |     }
68 |   }
69 | 
70 |   # build model and generate scores
71 |   model <- glm(f, data = data, family = family, ...)
72 | 
73 |   scores <- predict(model, type = "response")
74 | 
75 |   # probability of treatment for the untreated
76 |   # scores[which(data[[pkg.env$treatment]] == 0)] = 1 - scores[which(data[[pkg.env$treatment]] == 0)]
77 | 
78 |   model$call$formula <- formula(f) # manually set model formula to prevent "formula = formula"
79 |   output <- list("call" = model$call, "formula" = model$call$formula, "model" = model, "p.scores" = scores)
80 | 
81 |   class(output) <- "propensity_scores"
82 |   return(output)
83 | }
84 | 
85 | #' @export
86 | print.propensity_scores <- function(x, ...) {
87 |   print(x$model, ...)
88 | }
89 | 
90 | #' @export
91 | summary.propensity_scores <- function(object, ...) {
92 |   summary(object$model, ...)
93 | }
94 | 
95 | #' @export
96 | predict.propensity_scores <- function(object, ...) {
97 |   return(predict(object$model, type = "response", ...))
98 | }
99 | 


--------------------------------------------------------------------------------
/R/standardization.R:
--------------------------------------------------------------------------------
  1 | #' @title Parametric Standardization
  2 | #' @description `standardization` uses a standard \code{\link[stats:glm]{glm}} linear model to perform parametric standardization
  3 | #' by adjusting bias through including all confounders as covariates. The model will calculate during training both the risk difference
  4 | #' and the risk ratio. Both can be accessed from the model as well as estimates of the counterfactuals of treatment.
  5 | #'
  6 | #' @param data a data frame containing the variables in the model.
  7 | #' This should be the same data used in \code{\link[=init_params]{init_params}}.
  8 | #' @param f (optional) an object of class "formula" that overrides the default parameter
  9 | #' @param family the family to be used in the general linear model.
 10 | #' By default, this is set to \code{\link[stats:gaussian]{gaussian}}.
 11 | #' @param simple a boolean indicator to build default formula with interactions.
 12 | #' If true, interactions will be excluded. If false, interactions will be included. By
 13 | #' default, simple is set to false.
 14 | #' NOTE: if this is changed, the coefficient for treatment may not accurately represent the average causal effect.
 15 | #' @param n.boot an integer value that indicates number of bootstrap iterations to calculate standard error.
 16 | #' @param ... additional arguments that may be passed to the underlying \code{\link[stats:glm]{glm}} model.
 17 | #'
 18 | #' @returns \code{standardization} returns an object of \code{\link[base:class]{class} "standardization"}.
 19 | #'
 20 | #' The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with
 21 | #' the underlying \code{glm} model.
 22 | #'
 23 | #' An object of class \code{"standardization"} is a list containing the following:
 24 | #'
 25 | #' \item{call}{the matched call.}
 26 | #' \item{formula}{the formula used in the model.}
 27 | #' \item{model}{the underlying glm model.}
 28 | #' \item{ATE}{a data frame containing estimates of the treatment effect
 29 | #'  of the observed, counterfactuals, and risk metrics.}
 30 | #' \item{ATE.summary}{a data frame containing the ATE, SE, and 95\% CI of the ATE. }
 31 | #'
 32 | #' @export
 33 | #'
 34 | #' @examples
 35 | #' library(causaldata)
 36 | #'
 37 | #' data(nhefs)
 38 | #' nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ]
 39 | #' nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
 40 | #'
 41 | #' confounders <- c(
 42 | #'   "sex", "race", "age", "education", "smokeintensity",
 43 | #'   "smokeyrs", "exercise", "active", "wt71"
 44 | #' )
 45 | #'
 46 | #' init_params(wt82_71, qsmk,
 47 | #'   covariates = confounders,
 48 | #'   data = nhefs.nmv
 49 | #' )
 50 | #'
 51 | #' # model using all defaults
 52 | #' model <- standardization(data = nhefs.nmv)
 53 | #' print(model)
 54 | #' summary(model)
 55 | #' print(model$ATE.summary)
 56 | #' print(model$ATE.summary$Estimate[[2]] -
 57 | #'   model$ATE.summary$Estimate[[3]]) # manually calculate risk difference
 58 | #'
 59 | standardization <- function(data, f = NA, family = gaussian(), simple = pkg.env$simple, n.boot = 50, ...) {
 60 |   check_init()
 61 | 
 62 |   # grab function parameters
 63 |   params <- as.list(match.call()[-1])
 64 | 
 65 |   # if no formula provided
 66 |   if (is.na(as.character(f))[1]) {
 67 |     # override simple
 68 |     if (simple != pkg.env$simple) {
 69 |       f <- build_formula(
 70 |         out = pkg.env$outcome, tr = pkg.env$treatment,
 71 |         cov = pkg.env$covariates,
 72 |         data = data, simple = simple
 73 |       )
 74 |     }
 75 |     # use default
 76 |     else {
 77 |       f <- formula(pkg.env$f_out)
 78 |     }
 79 |   }
 80 | 
 81 |   # make three copies of the dataset
 82 |   cp <- data
 83 |   cp$label <- "observed"
 84 |   tr0 <- cp
 85 |   tr0$label <- "cf_untreated"
 86 |   tr0[pkg.env$treatment] <- 0
 87 |   tr0[pkg.env$outcome] <- NA
 88 |   tr1 <- cp
 89 |   tr1$label <- "cf_treated"
 90 |   tr1[pkg.env$treatment] <- 1
 91 |   tr1[pkg.env$outcome] <- NA
 92 | 
 93 |   combined_data <- rbind(cp, tr0, tr1) # combine copies
 94 | 
 95 |   model_func <- function(data, indices, f, family, ...) {
 96 |     if (!anyNA(indices)) {
 97 |       data <- data[indices, ]
 98 |     }
 99 | 
100 |     # build model using all three copies
101 |     model <- glm(formula = f, data = data, family = family, ...)
102 |     data$Y_hat <- predict(model, data)
103 | 
104 |     # calculate means in each group
105 |     means <- c(
106 |       mean(data$Y_hat[data$label == "observed"]), # estimated outcome of the observed
107 |       mean(data$Y_hat[data$label == "cf_treated"]), # estimated counterfactual of the treated
108 |       mean(data$Y_hat[data$label == "cf_untreated"]), # estimated counterfactual of the untreated
109 |       mean(data$Y_hat[data$label == "cf_treated"]) - # estimated risk differnece
110 |         mean(data$Y_hat[data$label == "cf_untreated"]),
111 |       mean(data$Y_hat[data$label == "cf_treated"]) / # estimated risk ratio
112 |         mean(data$Y_hat[data$label == "cf_untreated"])
113 |     )
114 |     return(list("model" = model, "means" = means))
115 |   }
116 | 
117 |   # build model and bootstrapped estimates
118 |   result <- model_func(data = combined_data, indices = NA, f = f, family = family, ...)
119 |   boot_result <- boot(
120 |     data = combined_data, R = n.boot, f = f, family = family,
121 |     statistic = function(data, indices, f, family, ...) {
122 |       model_func(data, indices, f, family, ...)$means[[4]]
123 |     }, ...
124 |   )
125 | 
126 |   # calculate 95% CI
127 |   beta <- boot_result$t0
128 |   SE <- sd(boot_result$t)
129 |   ATE.summary <- data.frame(
130 |     "Beta" = beta,
131 |     "SE" = SE,
132 |     conf_int(beta, SE),
133 |     check.names = FALSE
134 |   )
135 | 
136 |   model <- result$model
137 |   ATE <- data.frame(Estimate = result$means)
138 |   rownames(ATE) <- c(
139 |     "Observed effect", "Counterfactual (treated)",
140 |     "Counterfactual (untreated)", "Risk difference",
141 |     "Risk ratio"
142 |   )
143 | 
144 |   model$call$formula <- formula(f) # manually set model formula to prevent "formula = formula"
145 |   output <- list(
146 |     "call" = model$call, "formula" = model$call$formula,
147 |     "model" = model, "ATE" = ATE, "ATE.summary" = ATE.summary
148 |   )
149 |   class(output) <- "standardization"
150 |   return(output)
151 | }
152 | 
153 | #' @export
154 | print.standardization <- function(x, ...) {
155 |   print(x$model, ...)
156 |   cat("\r\n")
157 |   cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "")
158 |   cat("Estimate - ", x$ATE$Estimate[[4]], "\r\n")
159 |   cat("SE       - ", x$ATE.summary$SE, "\r\n")
160 |   cat("95% CI   - (", x$ATE.summary$`2.5 %`, ", ", x$ATE.summary$`97.5 %`, ")", "\r\n")
161 | }
162 | 
163 | #' @export
164 | summary.standardization <- function(object, ...) {
165 |   s <- summary(object$model, ...)
166 |   s$ATE <- object$ATE.summary
167 |   class(s) <- "summary.standardization"
168 |   return(s)
169 | }
170 | 
171 | #' @export
172 | print.summary.standardization <- function(x, ...) {
173 |   class(x) <- "summary.glm"
174 |   print(x, ...)
175 |   cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "")
176 |   print(x$ATE, row.names = FALSE)
177 |   cat("\r\n")
178 | }
179 | 
180 | #' @export
181 | predict.standardization <- function(object, ...) {
182 |   return(predict(object$model, ...))
183 | }
184 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
1 | # function to be executed on package load
2 | .onLoad <- function(libname, pkgname) {
3 |   assign("pkg.env", new.env(), topenv())
4 |   assign("init", FALSE, pkg.env)
5 | }
6 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | ```{r, include = FALSE}
  6 | knitr::opts_chunk$set(
  7 |   collapse = TRUE,
  8 |   comment = "#>",
  9 |   fig.path = "man/figures/README-",
 10 |   out.width = "100%"
 11 | )
 12 | ```
 13 | 
 14 | # CausalModels
 15 | 
 16 | <!-- badges: start -->
 17 | <!-- badges: end -->
 18 | 
 19 | The goal of CausalModels is to provide a survey of fundamental causal
 20 | inference models in one single location. While there are many packages
 21 | for these types of models, CausalModels brings them all to one place
 22 | with a simple user experience. The package uses a format that is familiar
 23 | to users using well known statistical and machine learning models. While causal inference
 24 | models require careful consideration of variables to correctly infer a causal effect,
 25 | CausalModels uses simple code while requiring the user to make these considerations. This
 26 | enables efficient and thoughtful research using causal inference.
 27 | As of May 30, 2022, the package has been 
 28 | [published on CRAN](https://cran.r-project.org/package=CausalModels).
 29 | 
 30 | ## Change Log
 31 | 
 32 | ### Version 0.2.1 - 2025/04/27
 33 | 
 34 | #### Fixed
 35 | 
 36 | - generic treatment names bug (#7)
 37 | 
 38 | ### Version 0.2.0 - 2022/10/27
 39 | 
 40 | #### Added
 41 | 
 42 | - one parameter g-estimation
 43 | 
 44 | #### Fixed
 45 | 
 46 | - Typo in ipweighting documentation
 47 | 
 48 | 
 49 | ## Installation
 50 | 
 51 | You can install the development version of CausalModels from
 52 | [GitHub](https://github.com/) with:
 53 | 
 54 | ``` r
 55 | # install.packages("devtools")
 56 | devtools::install_github("ander428/CausalModels")
 57 | ```
 58 | 
 59 | Since the package has been published on CRAN, the production version can be installed with:
 60 | 
 61 | ``` r
 62 | install.packages("CausalModels")
 63 | ```
 64 | 
 65 | ## Example
 66 | 
 67 | This is a basic example which shows you how to solve a common problem:
 68 | 
 69 | ``` r
 70 | library(CausalModels)
 71 | library(causaldata)
 72 | 
 73 | data(nhefs)
 74 | 
 75 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)),]
 76 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
 77 | 
 78 | confounders <- c("sex", "race", "age", "education", "smokeintensity",
 79 |                  "smokeyrs", "exercise", "active", "wt71")
 80 | 
 81 | # initialize package
 82 | ?init_params
 83 | init_params(wt82_71, qsmk,
 84 |             covariates = confounders,
 85 |             data = nhefs.nmv, simple = F)
 86 | #> Successfully initialized!
 87 | #> 
 88 | #> Summary:
 89 | #> 
 90 | #> Outcome - wt82_71 
 91 | #> Treatment - qsmk 
 92 | #> Covariates - [ sex, race, age, education, smokeintensity, smokeyrs, exercise, active, wt71 ] 
 93 | #> 
 94 | #> Size - 1566 x 67 
 95 | #> 
 96 | #> Default formula for outcome models: 
 97 | #> wt82_71 ~ qsmk + sex + race + education + exercise + active + age + (qsmk * age) + I(age * age) + smokeintensity + (qsmk * smokeintensity) + I(smokeintensity * smokeintensity) + smokeyrs + (qsmk * smokeyrs) + I(smokeyrs * smokeyrs) + wt71 + (qsmk * wt71) + I(wt71 * wt71) 
 98 | #> 
 99 | #> Default formula for propensity models: 
100 | #> qsmk ~ sex + race + education + exercise + active + age + I(age * age) + smokeintensity + I(smokeintensity * smokeintensity) + smokeyrs + I(smokeyrs * smokeyrs) + wt71 + I(wt71 * wt71) 
101 | 
102 | # mode the causal effect of qsmk on wt82_71
103 | model <- standardization(nhefs.nmv)
104 | print(model)
105 | #> 
106 | #> Call:  glm(formula = wt82_71 ~ qsmk + sex + race + education + exercise + 
107 | #>     active + age + (qsmk * age) + I(age * age) + smokeintensity + 
108 | #>     (qsmk * smokeintensity) + I(smokeintensity * smokeintensity) + 
109 | #>     smokeyrs + (qsmk * smokeyrs) + I(smokeyrs * smokeyrs) + wt71 + 
110 | #>     (qsmk * wt71) + I(wt71 * wt71), family = family, data = combined_data)
111 | #> 
112 | #> Coefficients:
113 | #>                        (Intercept)                               qsmk1  
114 | #>                         -0.9699812                           0.5509460  
115 | #>                               sex1                               race1  
116 | #>                         -1.4371844                           0.5868376  
117 | #>                         education2                          education3  
118 | #>                          0.8174769                           0.5824119  
119 | #>                         education4                          education5  
120 | #>                          1.5240890                          -0.1792422  
121 | #>                          exercise1                           exercise2  
122 | #>                          0.3063727                           0.3550789  
123 | #>                            active1                             active2  
124 | #>                         -0.9460683                          -0.2707615  
125 | #>                                age                        I(age * age)  
126 | #>                          0.3495673                          -0.0060652  
127 | #>                     smokeintensity  I(smokeintensity * smokeintensity)  
128 | #>                          0.0482197                          -0.0009597  
129 | #>                           smokeyrs              I(smokeyrs * smokeyrs)  
130 | #>                          0.1418662                          -0.0018076  
131 | #>                               wt71                      I(wt71 * wt71)  
132 | #>                          0.0393011                          -0.0009787  
133 | #>                          qsmk1:age                qsmk1:smokeintensity  
134 | #>                          0.0123138                           0.0448028  
135 | #>                     qsmk1:smokeyrs                          qsmk1:wt71  
136 | #>                         -0.0235529                           0.0291350  
137 | #> 
138 | #> Degrees of Freedom: 1565 Total (i.e. Null);  1542 Residual
139 | #>   (3132 observations deleted due to missingness)
140 | #> Null Deviance:       97180 
141 | #> Residual Deviance: 82690     AIC: 10710
142 | #> 
143 | #> Average treatment effect of qsmk:
144 | #> 3.4927 
145 | summary(model)
146 | #> 
147 | #> Call:
148 | #> glm(formula = wt82_71 ~ qsmk + sex + race + education + exercise + 
149 | #>     active + age + (qsmk * age) + I(age * age) + smokeintensity + 
150 | #>     (qsmk * smokeintensity) + I(smokeintensity * smokeintensity) + 
151 | #>     smokeyrs + (qsmk * smokeyrs) + I(smokeyrs * smokeyrs) + wt71 + 
152 | #>     (qsmk * wt71) + I(wt71 * wt71), family = family, data = combined_data)
153 | #> 
154 | #> Deviance Residuals: 
155 | #>     Min       1Q   Median       3Q      Max  
156 | #> -41.913   -4.168   -0.314    3.869   44.573  
157 | #> 
158 | #> Coefficients:
159 | #>                                      Estimate Std. Error t value Pr(>|t|)    
160 | #> (Intercept)                        -0.9699812  4.3673208  -0.222 0.824266    
161 | #> qsmk1                               0.5509460  2.8229123   0.195 0.845286    
162 | #> sex1                               -1.4371844  0.4693195  -3.062 0.002235 ** 
163 | #> race1                               0.5868376  0.5828368   1.007 0.314158    
164 | #> education2                          0.8174769  0.6085125   1.343 0.179339    
165 | #> education3                          0.5824119  0.5575569   1.045 0.296382    
166 | #> education4                          1.5240890  0.8351981   1.825 0.068221 .  
167 | #> education5                         -0.1792422  0.7462118  -0.240 0.810205    
168 | #> exercise1                           0.3063727  0.5360193   0.572 0.567697    
169 | #> exercise2                           0.3550789  0.5592886   0.635 0.525603    
170 | #> active1                            -0.9460683  0.4105673  -2.304 0.021338 *  
171 | #> active2                            -0.2707615  0.6851128  -0.395 0.692745    
172 | #> age                                 0.3495673  0.1648157   2.121 0.034084 *  
173 | #> I(age * age)                       -0.0060652  0.0017347  -3.496 0.000485 ***
174 | #> smokeintensity                      0.0482197  0.0518339   0.930 0.352375    
175 | #> I(smokeintensity * smokeintensity) -0.0009597  0.0009409  -1.020 0.307878    
176 | #> smokeyrs                            0.1418662  0.0943836   1.503 0.133023    
177 | #> I(smokeyrs * smokeyrs)             -0.0018076  0.0015458  -1.169 0.242437    
178 | #> wt71                                0.0393011  0.0836422   0.470 0.638514    
179 | #> I(wt71 * wt71)                     -0.0009787  0.0005255  -1.862 0.062751 .  
180 | #> qsmk1:age                           0.0123138  0.0670159   0.184 0.854238    
181 | #> qsmk1:smokeintensity                0.0448028  0.0360169   1.244 0.213712    
182 | #> qsmk1:smokeyrs                     -0.0235529  0.0654333  -0.360 0.718931    
183 | #> qsmk1:wt71                          0.0291350  0.0276439   1.054 0.292075    
184 | #> ---
185 | #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
186 | #> 
187 | #> (Dispersion parameter for gaussian family taken to be 53.62833)
188 | #> 
189 | #>     Null deviance: 97176  on 1565  degrees of freedom
190 | #> Residual deviance: 82695  on 1542  degrees of freedom
191 | #>   (3132 observations deleted due to missingness)
192 | #> AIC: 10706
193 | #> 
194 | #> Number of Fisher Scoring iterations: 2
195 | #> 
196 | #> Average treatment effect of qsmk:
197 | #>                            Estimate
198 | #> Observed effect            2.638300
199 | #> Counterfactual (treated)   5.243703
200 | #> Counterfactual (untreated) 1.751003
201 | #> Risk difference            3.492700
202 | #> Risk ratio                 2.994686
203 | #> 
204 | ```
205 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # CausalModels
  3 | 
  4 | <!-- badges: start -->
  5 | <!-- badges: end -->
  6 | 
  7 | The goal of CausalModels is to provide a survey of fundamental causal
  8 | inference models in one single location. While there are many packages
  9 | for these types of models, CausalModels brings them all to one place
 10 | with a simple user experience. The package uses a format that is
 11 | familiar to users using well known statistical and machine learning
 12 | models. While causal inference models require careful consideration of
 13 | variables to correctly infer a causal effect, CausalModels uses simple
 14 | code while requiring the user to make these considerations. This enables
 15 | efficient and thoughtful research using causal inference. As of May 30,
 16 | 2022, the package has been [published on
 17 | CRAN](https://cran.r-project.org/package=CausalModels).
 18 | 
 19 | ## Change Log
 20 | 
 21 | ### Version 0.2.1 - 2025/04/27
 22 | 
 23 | #### Fixed
 24 | 
 25 | - generic treatment names bug (#7)
 26 | 
 27 | ### Version 0.2.0 - 2022/10/27
 28 | 
 29 | #### Added
 30 | 
 31 | - one parameter g-estimation
 32 | 
 33 | #### Fixed
 34 | 
 35 | - Typo in ipweighting documentation
 36 | 
 37 | ## Installation
 38 | 
 39 | You can install the development version of CausalModels from
 40 | [GitHub](https://github.com/) with:
 41 | 
 42 | ``` r
 43 | # install.packages("devtools")
 44 | devtools::install_github("ander428/CausalModels")
 45 | ```
 46 | 
 47 | Since the package has been published on CRAN, the production version can
 48 | be installed with:
 49 | 
 50 | ``` r
 51 | install.packages("CausalModels")
 52 | ```
 53 | 
 54 | ## Example
 55 | 
 56 | This is a basic example which shows you how to solve a common problem:
 57 | 
 58 | ``` r
 59 | library(CausalModels)
 60 | library(causaldata)
 61 | 
 62 | data(nhefs)
 63 | 
 64 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)),]
 65 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
 66 | 
 67 | confounders <- c("sex", "race", "age", "education", "smokeintensity",
 68 |                  "smokeyrs", "exercise", "active", "wt71")
 69 | 
 70 | # initialize package
 71 | ?init_params
 72 | init_params(wt82_71, qsmk,
 73 |             covariates = confounders,
 74 |             data = nhefs.nmv, simple = F)
 75 | #> Successfully initialized!
 76 | #> 
 77 | #> Summary:
 78 | #> 
 79 | #> Outcome - wt82_71 
 80 | #> Treatment - qsmk 
 81 | #> Covariates - [ sex, race, age, education, smokeintensity, smokeyrs, exercise, active, wt71 ] 
 82 | #> 
 83 | #> Size - 1566 x 67 
 84 | #> 
 85 | #> Default formula for outcome models: 
 86 | #> wt82_71 ~ qsmk + sex + race + education + exercise + active + age + (qsmk * age) + I(age * age) + smokeintensity + (qsmk * smokeintensity) + I(smokeintensity * smokeintensity) + smokeyrs + (qsmk * smokeyrs) + I(smokeyrs * smokeyrs) + wt71 + (qsmk * wt71) + I(wt71 * wt71) 
 87 | #> 
 88 | #> Default formula for propensity models: 
 89 | #> qsmk ~ sex + race + education + exercise + active + age + I(age * age) + smokeintensity + I(smokeintensity * smokeintensity) + smokeyrs + I(smokeyrs * smokeyrs) + wt71 + I(wt71 * wt71) 
 90 | 
 91 | # mode the causal effect of qsmk on wt82_71
 92 | model <- standardization(nhefs.nmv)
 93 | print(model)
 94 | #> 
 95 | #> Call:  glm(formula = wt82_71 ~ qsmk + sex + race + education + exercise + 
 96 | #>     active + age + (qsmk * age) + I(age * age) + smokeintensity + 
 97 | #>     (qsmk * smokeintensity) + I(smokeintensity * smokeintensity) + 
 98 | #>     smokeyrs + (qsmk * smokeyrs) + I(smokeyrs * smokeyrs) + wt71 + 
 99 | #>     (qsmk * wt71) + I(wt71 * wt71), family = family, data = combined_data)
100 | #> 
101 | #> Coefficients:
102 | #>                        (Intercept)                               qsmk1  
103 | #>                         -0.9699812                           0.5509460  
104 | #>                               sex1                               race1  
105 | #>                         -1.4371844                           0.5868376  
106 | #>                         education2                          education3  
107 | #>                          0.8174769                           0.5824119  
108 | #>                         education4                          education5  
109 | #>                          1.5240890                          -0.1792422  
110 | #>                          exercise1                           exercise2  
111 | #>                          0.3063727                           0.3550789  
112 | #>                            active1                             active2  
113 | #>                         -0.9460683                          -0.2707615  
114 | #>                                age                        I(age * age)  
115 | #>                          0.3495673                          -0.0060652  
116 | #>                     smokeintensity  I(smokeintensity * smokeintensity)  
117 | #>                          0.0482197                          -0.0009597  
118 | #>                           smokeyrs              I(smokeyrs * smokeyrs)  
119 | #>                          0.1418662                          -0.0018076  
120 | #>                               wt71                      I(wt71 * wt71)  
121 | #>                          0.0393011                          -0.0009787  
122 | #>                          qsmk1:age                qsmk1:smokeintensity  
123 | #>                          0.0123138                           0.0448028  
124 | #>                     qsmk1:smokeyrs                          qsmk1:wt71  
125 | #>                         -0.0235529                           0.0291350  
126 | #> 
127 | #> Degrees of Freedom: 1565 Total (i.e. Null);  1542 Residual
128 | #>   (3132 observations deleted due to missingness)
129 | #> Null Deviance:       97180 
130 | #> Residual Deviance: 82690     AIC: 10710
131 | #> 
132 | #> Average treatment effect of qsmk:
133 | #> 3.4927 
134 | summary(model)
135 | #> 
136 | #> Call:
137 | #> glm(formula = wt82_71 ~ qsmk + sex + race + education + exercise + 
138 | #>     active + age + (qsmk * age) + I(age * age) + smokeintensity + 
139 | #>     (qsmk * smokeintensity) + I(smokeintensity * smokeintensity) + 
140 | #>     smokeyrs + (qsmk * smokeyrs) + I(smokeyrs * smokeyrs) + wt71 + 
141 | #>     (qsmk * wt71) + I(wt71 * wt71), family = family, data = combined_data)
142 | #> 
143 | #> Deviance Residuals: 
144 | #>     Min       1Q   Median       3Q      Max  
145 | #> -41.913   -4.168   -0.314    3.869   44.573  
146 | #> 
147 | #> Coefficients:
148 | #>                                      Estimate Std. Error t value Pr(>|t|)    
149 | #> (Intercept)                        -0.9699812  4.3673208  -0.222 0.824266    
150 | #> qsmk1                               0.5509460  2.8229123   0.195 0.845286    
151 | #> sex1                               -1.4371844  0.4693195  -3.062 0.002235 ** 
152 | #> race1                               0.5868376  0.5828368   1.007 0.314158    
153 | #> education2                          0.8174769  0.6085125   1.343 0.179339    
154 | #> education3                          0.5824119  0.5575569   1.045 0.296382    
155 | #> education4                          1.5240890  0.8351981   1.825 0.068221 .  
156 | #> education5                         -0.1792422  0.7462118  -0.240 0.810205    
157 | #> exercise1                           0.3063727  0.5360193   0.572 0.567697    
158 | #> exercise2                           0.3550789  0.5592886   0.635 0.525603    
159 | #> active1                            -0.9460683  0.4105673  -2.304 0.021338 *  
160 | #> active2                            -0.2707615  0.6851128  -0.395 0.692745    
161 | #> age                                 0.3495673  0.1648157   2.121 0.034084 *  
162 | #> I(age * age)                       -0.0060652  0.0017347  -3.496 0.000485 ***
163 | #> smokeintensity                      0.0482197  0.0518339   0.930 0.352375    
164 | #> I(smokeintensity * smokeintensity) -0.0009597  0.0009409  -1.020 0.307878    
165 | #> smokeyrs                            0.1418662  0.0943836   1.503 0.133023    
166 | #> I(smokeyrs * smokeyrs)             -0.0018076  0.0015458  -1.169 0.242437    
167 | #> wt71                                0.0393011  0.0836422   0.470 0.638514    
168 | #> I(wt71 * wt71)                     -0.0009787  0.0005255  -1.862 0.062751 .  
169 | #> qsmk1:age                           0.0123138  0.0670159   0.184 0.854238    
170 | #> qsmk1:smokeintensity                0.0448028  0.0360169   1.244 0.213712    
171 | #> qsmk1:smokeyrs                     -0.0235529  0.0654333  -0.360 0.718931    
172 | #> qsmk1:wt71                          0.0291350  0.0276439   1.054 0.292075    
173 | #> ---
174 | #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
175 | #> 
176 | #> (Dispersion parameter for gaussian family taken to be 53.62833)
177 | #> 
178 | #>     Null deviance: 97176  on 1565  degrees of freedom
179 | #> Residual deviance: 82695  on 1542  degrees of freedom
180 | #>   (3132 observations deleted due to missingness)
181 | #> AIC: 10706
182 | #> 
183 | #> Number of Fisher Scoring iterations: 2
184 | #> 
185 | #> Average treatment effect of qsmk:
186 | #>                            Estimate
187 | #> Observed effect            2.638300
188 | #> Counterfactual (treated)   5.243703
189 | #> Counterfactual (untreated) 1.751003
190 | #> Risk difference            3.492700
191 | #> Risk ratio                 2.994686
192 | #> 
193 | ```
194 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## R CMD check results
2 | 
3 | 0 errors | 0 warnings | 1 note
4 | 
5 | * This is a new release.
6 | 


--------------------------------------------------------------------------------
/man/doubly_robust.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/doubly_robust.R
  3 | \name{doubly_robust}
  4 | \alias{doubly_robust}
  5 | \title{Doubly Robust Model}
  6 | \usage{
  7 | doubly_robust(
  8 |   data,
  9 |   out.mod = NULL,
 10 |   p.mod = NULL,
 11 |   f = NA,
 12 |   family = gaussian(),
 13 |   simple = pkg.env$simple,
 14 |   scores = NA,
 15 |   p.f = NA,
 16 |   p.simple = pkg.env$simple,
 17 |   p.family = binomial(),
 18 |   p.scores = NA,
 19 |   n.boot = 50,
 20 |   ...
 21 | )
 22 | }
 23 | \arguments{
 24 | \item{data}{a data frame containing the variables in the model.
 25 | This should be the same data used in \code{\link[=init_params]{init_params}}.}
 26 | 
 27 | \item{out.mod}{(optional) a regression model that predicts the outcome. NOTE: the model given
 28 | must be compatible with the \code{\link[stats:predict]{predict}} generic function.}
 29 | 
 30 | \item{p.mod}{(optional) a propensity model that predicts the probability of treatment. NOTE: the model given
 31 | must be compatible with the \code{\link[stats:predict]{predict}} generic function.}
 32 | 
 33 | \item{f}{(optional) an object of class "formula" that overrides the default parameter}
 34 | 
 35 | \item{family}{the family to be used in the general linear model.
 36 | By default, this is set to \code{\link[stats:gaussian]{gaussian}}.}
 37 | 
 38 | \item{simple}{a boolean indicator to build default formula with interactions.
 39 | If true, interactions will be excluded. If false, interactions will be included. By
 40 | default, simple is set to false.}
 41 | 
 42 | \item{scores}{(optional) use calculated outcome estimates.}
 43 | 
 44 | \item{p.f}{(optional) an object of class "formula" that overrides the default formula for the denominator of the IP
 45 | weighting function.}
 46 | 
 47 | \item{p.simple}{a boolean indicator to build default formula with interactions for the propensity models.
 48 | If true, interactions will be excluded. If false, interactions will be included. By
 49 | default, simple is set to false.
 50 | NOTE: if this is changed, the coefficient for treatment may not accurately represent the average causal effect.}
 51 | 
 52 | \item{p.family}{the family to be used in the underlying propensity model.
 53 | By default, this is set to \code{\link[stats:gaussian]{binomial}}.}
 54 | 
 55 | \item{p.scores}{(optional) use calculated propensity scores.}
 56 | 
 57 | \item{n.boot}{an integer value that indicates number of bootstrap iterations to calculate standard error.}
 58 | 
 59 | \item{...}{additional arguments that may be passed to the underlying \code{\link[stats:glm]{glm}} model.}
 60 | }
 61 | \value{
 62 | \code{doubly_robust} returns an object of \code{\link[base:class]{class}} "doubly_robust".
 63 | 
 64 | The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with
 65 | the underlying \code{glm} model.
 66 | 
 67 | An object of class \code{"doubly_robust"} is a list containing the following:
 68 | 
 69 |  \item{out.call}{the matched call of the outcome model.}
 70 |  \item{p.call}{the matched call of the propensity model.}
 71 |  \item{out.model}{the underlying outcome model.}
 72 |  \item{p.model}{the underlying propensity model.}
 73 |  \item{y_hat}{the estimated outcome values.}
 74 |  \item{p.scores}{the estimated propensity scores.}
 75 |  \item{ATE}{the estimated average treatment effect (risk difference).}
 76 |  \item{ATE.summary}{a data frame containing the ATE, SE, and 95\% CI of the ATE. }
 77 |  \item{data}{the data frame used to train the model.}
 78 | }
 79 | \description{
 80 | \code{`doubly_robust`} trains both an outcome model and a propensity model to generate predictions
 81 | for the outcome and probability of treatment respectively. By default, the model uses
 82 | \code{\link[=standardization]{standardization}} and \code{\link[=propensity_scores]{propensity_scores}} to form a
 83 | doubly-robust model between standardization and IP weighting. Alternatively, any outcome and treatment
 84 | models can be provided instead, but must be compatible with the \code{\link[stats:predict]{predict}} generic function in R.
 85 | Since many propensity models may not predict probabilities without additional arguments into the
 86 | predict function, the predictions themselves can be given for both the outcome and propensity scores.
 87 | }
 88 | \examples{
 89 | library(causaldata)
 90 | data(nhefs)
 91 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ]
 92 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
 93 | 
 94 | confounders <- c(
 95 |   "sex", "race", "age", "education", "smokeintensity",
 96 |   "smokeyrs", "exercise", "active", "wt71"
 97 | )
 98 | 
 99 | init_params(wt82_71, qsmk,
100 |   covariates = confounders,
101 |   data = nhefs.nmv
102 | )
103 | 
104 | # model using all defaults
105 | model <- doubly_robust(data = nhefs.nmv)
106 | summary(model)
107 | 
108 | # use alternative outcome model
109 | out.mod <- propensity_matching(data = nhefs.nmv)
110 | db.model <- doubly_robust(
111 |   out.mod = out.mod,
112 |   data = nhefs.nmv
113 | )
114 | db.model
115 | 
116 | # give calculated outcome predictions and give formula for propensity scores
117 | db.model <- doubly_robust(
118 |   scores = predict(out.mod),
119 |   p.f = qsmk ~ sex + race + age,
120 |   data = nhefs.nmv
121 | )
122 | db.model
123 | 
124 | }
125 | 


--------------------------------------------------------------------------------
/man/figures/README-pressure-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ander428/CausalModels/4a81259939f9be47a294354f4b17f0652799a54a/man/figures/README-pressure-1.png


--------------------------------------------------------------------------------
/man/gestimation.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/gestimation.R
  3 | \name{gestimation}
  4 | \alias{gestimation}
  5 | \title{One Parameter G-Estimation of Structural Nested Mean Models}
  6 | \usage{
  7 | gestimation(
  8 |   data,
  9 |   grid,
 10 |   ids = list(),
 11 |   f = NA,
 12 |   family = binomial(),
 13 |   simple = pkg.env$simple,
 14 |   p.f = NA,
 15 |   p.simple = pkg.env$simple,
 16 |   p.family = binomial(),
 17 |   p.scores = NA,
 18 |   SW = TRUE,
 19 |   n.boot = 100,
 20 |   type = "one.grid",
 21 |   ...
 22 | )
 23 | }
 24 | \arguments{
 25 | \item{data}{a data frame containing the variables in the model.
 26 | This should be the same data used in \code{\link[=init_params]{init_params}}.}
 27 | 
 28 | \item{grid}{a list of possible \eqn{\beta} values that will be used in the grid search.}
 29 | 
 30 | \item{ids}{(optional) see documentation for \code{\link[geepack:geeglm]{geeglm}}. By default rownames of the data will be used.}
 31 | 
 32 | \item{f}{(optional) an object of class "formula" that overrides the default parameter. NOTE: for g-estimation this should be
 33 | a propensity formula.}
 34 | 
 35 | \item{family}{the family to be used in the general linear model.
 36 | By default, this is set to \code{\link[stats:gaussian]{gaussian}}.}
 37 | 
 38 | \item{simple}{(optional) a boolean indicator to build default formula with interactions for the g-estimation model.
 39 | If true, interactions will be excluded. If false, interactions will be included. By default, simple is set to false.
 40 | NOTE: \eqn{\beta}  will be appended to the end of the formula}
 41 | 
 42 | \item{p.f}{(optional) an object of class "formula" that overrides the default formula for the denominator of the IP
 43 | weighting function.}
 44 | 
 45 | \item{p.simple}{(optional) a boolean indicator to build default formula with interactions for the propensity models.
 46 | If true, interactions will be excluded. If false, interactions will be included. By
 47 | default, simple is set to false.
 48 | NOTE: if this is changed, the coefficient for treatment may not accurately represent the average causal effect.}
 49 | 
 50 | \item{p.family}{the family to be used in the underlying propensity model.
 51 | By default, this is set to \code{\link[stats:binomial]{binomial}}.}
 52 | 
 53 | \item{p.scores}{(optional) use calculated propensity scores for the weights. If using standardized weights,
 54 | the numerator will still be modeled.}
 55 | 
 56 | \item{SW}{a boolean indicator to indicate the use of standardized weights. By default, this is set to true.}
 57 | 
 58 | \item{n.boot}{(optional) an integer value that indicates number of bootstrap iterations to calculate standard error.
 59 | If no value is given, the standard error from the underlying linear model will be used. NOTE: when type is 'one.grid'
 60 | bootstrapping is not performed. By default, this is set to 100.}
 61 | 
 62 | \item{type}{the type of g-estimation to perform. It must be one of \code{"one.grid"} or \code{"one.linear"} for a
 63 | one parameter grid and linear mean model estimation respectively.}
 64 | 
 65 | \item{...}{additional arguments that may be passed to the underlying model.}
 66 | }
 67 | \value{
 68 | \code{gestimation} returns an object of \code{\link[base:class]{class} "gestimation"}.
 69 | 
 70 | The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with
 71 | the underlying \code{glm} or \code{geeglm} model.
 72 | 
 73 | An object of class \code{"gestimation"} is a list containing the following:
 74 | 
 75 |  \item{call}{the matched call.}
 76 |  \item{formula}{the formula used in the model.}
 77 |  \item{model}{the underlying glm model. If the model performed a grid search, this will be renamed 'best.model'}
 78 |  \item{weights}{the estimated IP weights.}
 79 |  \item{type}{returns the value used for the 'type' parameter.}
 80 |  \item{ATE}{the estimated average treatment effect (risk difference).}
 81 |  \item{ATE.summary}{a data frame containing the ATE, SE, and 95\% CI of the ATE. }
 82 | }
 83 | \description{
 84 | `gestimation` uses the \code{\link[=propensity_scores]{propensity_scores}} function to generate inverse probability
 85 | weights. The weights can either be standardized weights or non-standardized weights. A grid search is done on \eqn{\alpha}
 86 | to construct the best \eqn{\beta} coefficient in the structural nested mean model. Alternatively, a linear mean model can be used
 87 | for a closed form estimator.
 88 | }
 89 | \examples{
 90 | library(causaldata)
 91 | data(nhefs)
 92 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ]
 93 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
 94 | 
 95 | confounders <- c(
 96 |   "sex", "race", "age", "education", "smokeintensity",
 97 |   "smokeyrs", "exercise", "active", "wt71"
 98 | )
 99 | 
100 | init_params(wt82_71, qsmk,
101 |   covariates = confounders,
102 |   data = nhefs.nmv
103 | )
104 | 
105 | gest.model <- gestimation(nhefs.nmv, type = "one.linear", n.boot = 150)
106 | gest.model$ATE.summary
107 | }
108 | 


--------------------------------------------------------------------------------
/man/init_params.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/initialize.R
 3 | \name{init_params}
 4 | \alias{init_params}
 5 | \title{Initialize CausalModels Package}
 6 | \usage{
 7 | init_params(outcome, treatment, covariates, data, simple = FALSE)
 8 | }
 9 | \arguments{
10 | \item{outcome}{the outcome variable of interest (must be continuous).}
11 | 
12 | \item{treatment}{the treatment with the causal effect of interest on the outcome.}
13 | 
14 | \item{covariates}{a list/vector of covariate names to be use for confounding adjustment.}
15 | 
16 | \item{data}{a data frame containing the variables in the model.}
17 | 
18 | \item{simple}{a boolean indicator to build default formula with interactions.
19 | If true, interactions will be excluded. If false, interactions will be included. By
20 | default, simple is set to false.}
21 | }
22 | \value{
23 | No return value. Called for parameter initialization.
24 | }
25 | \description{
26 | This function is required to be run first before any other function can run.
27 | This will set within the package the global outcome, treatment, and covariate functions for each model to use.
28 | }
29 | 


--------------------------------------------------------------------------------
/man/ipweighting.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ipweighting.R
 3 | \name{ipweighting}
 4 | \alias{ipweighting}
 5 | \title{Parametric IP Weighting}
 6 | \usage{
 7 | ipweighting(
 8 |   data,
 9 |   f = NA,
10 |   family = gaussian(),
11 |   p.f = NA,
12 |   p.simple = pkg.env$simple,
13 |   p.family = binomial(),
14 |   p.scores = NA,
15 |   SW = TRUE,
16 |   n.boot = 0,
17 |   ...
18 | )
19 | }
20 | \arguments{
21 | \item{data}{a data frame containing the variables in the model.
22 | This should be the same data used in \code{\link[=init_params]{init_params}}.}
23 | 
24 | \item{f}{(optional) an object of class "formula" that overrides the default parameter}
25 | 
26 | \item{family}{the family to be used in the general linear model.
27 | By default, this is set to \code{\link[stats:gaussian]{gaussian}}.}
28 | 
29 | \item{p.f}{(optional) an object of class "formula" that overrides the default formula for the denominator of the IP
30 | weighting function.}
31 | 
32 | \item{p.simple}{a boolean indicator to build default formula with interactions for the propensity models.
33 | If true, interactions will be excluded. If false, interactions will be included. By
34 | default, simple is set to false.
35 | NOTE: if this is changed, the coefficient for treatment may not accurately represent the average causal effect.}
36 | 
37 | \item{p.family}{the family to be used in the underlying propensity model.
38 | By default, this is set to \code{\link[stats:binomial]{binomial}}.}
39 | 
40 | \item{p.scores}{(optional) use calculated propensity scores for the weights. If using standardized weights,
41 | the numerator will still be modeled.}
42 | 
43 | \item{SW}{a boolean indicator to indicate the use of standardized weights. By default, this is set to true.}
44 | 
45 | \item{n.boot}{(optional) an integer value that indicates number of bootstrap iterations to calculate standard error.
46 | If no value is given, the standard error from the underlying linear model will be used.}
47 | 
48 | \item{...}{additional arguments that may be passed to the underlying \code{\link[stats:glm]{glm}} model.}
49 | }
50 | \value{
51 | \code{ipweighting} returns an object of \code{\link[base:class]{class} "ipweighting"}.
52 | 
53 | The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with
54 | the underlying \code{glm} model.
55 | 
56 | An object of class \code{"ipweighting"} is a list containing the following:
57 | 
58 |  \item{call}{the matched call.}
59 |  \item{formula}{the formula used in the model.}
60 |  \item{model}{the underlying glm model.}
61 |  \item{weights}{the estimated IP weights.}
62 |  \item{ATE}{the estimated average treatment effect (risk difference).}
63 |  \item{ATE.summary}{a data frame containing the ATE, SE, and 95\% CI of the ATE. }
64 | }
65 | \description{
66 | `ipweighting` uses the \code{\link[=propensity_scores]{propensity_scores}} function to generate inverse probability
67 | weights. The weights can either be standardized weights or non-standardized weights. The weights are used to train a
68 | general linear model whose coefficient for treatment represents the average treatment effect on the additive scale.
69 | }
70 | \examples{
71 | library(causaldata)
72 | data(nhefs)
73 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ]
74 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
75 | 
76 | confounders <- c(
77 |   "sex", "race", "age", "education", "smokeintensity",
78 |   "smokeyrs", "exercise", "active", "wt71"
79 | )
80 | 
81 | init_params(wt82_71, qsmk,
82 |   covariates = confounders,
83 |   data = nhefs.nmv
84 | )
85 | 
86 | # model using all defaults
87 | model <- ipweighting(data = nhefs.nmv)
88 | summary(model)
89 | 
90 | # Model using calculated propensity scores and manual outcome formula
91 | p.scores <- propensity_scores(nhefs.nmv)$p.scores
92 | model <- ipweighting(wt82_71 ~ qsmk, p.scores = p.scores, data = nhefs.nmv)
93 | summary(model)
94 | 
95 | }
96 | 


--------------------------------------------------------------------------------
/man/iv_est.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/iv_est.R
 3 | \name{iv_est}
 4 | \alias{iv_est}
 5 | \title{Standard Instrumental Variable Estimator}
 6 | \usage{
 7 | iv_est(IV, data, n.boot = 50)
 8 | }
 9 | \arguments{
10 | \item{IV}{the instrumental variable to be used in the conditional means. Must be a factor with no more than 2 levels.
11 | It is assumed the second level is the positive level, i.e., the binary equivalent of the second factor level should be 1
12 | and the first should be 0.}
13 | 
14 | \item{data}{a data frame containing the variables in the model.
15 | This should be the same data used in \code{\link[=init_params]{init_params}}.}
16 | 
17 | \item{n.boot}{an integer value that indicates number of bootstrap iterations to calculate standard error.}
18 | }
19 | \value{
20 | \code{iv_est} returns a data frame containing the standard IV estimate, standard error, and Wald 95% CI.
21 | }
22 | \description{
23 | `iv_est` calculates the standard IV estimand using the conditional means on a given instrumental variable.
24 | }
25 | \examples{
26 | library(causaldata)
27 | data(nhefs)
28 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ]
29 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
30 | 
31 | confounders <- c(
32 |   "sex", "race", "age", "education", "smokeintensity",
33 |   "smokeyrs", "exercise", "active", "wt71"
34 | )
35 | nhefs.iv <- nhefs[which(!is.na(nhefs$wt82) & !is.na(nhefs$price82)), ]
36 | nhefs.iv$highprice <- as.factor(ifelse(nhefs.iv$price82 >= 1.5, 1, 0))
37 | nhefs.iv$qsmk <- as.factor(nhefs.iv$qsmk)
38 | init_params(wt82_71, qsmk,
39 |             covariates = confounders,
40 |             data = nhefs.iv)
41 | 
42 | iv_est("highprice", nhefs.iv)
43 | }
44 | 


--------------------------------------------------------------------------------
/man/outcome_regression.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/outcome_regression.R
 3 | \name{outcome_regression}
 4 | \alias{outcome_regression}
 5 | \title{Outcome Regression}
 6 | \usage{
 7 | outcome_regression(
 8 |   data,
 9 |   f = NA,
10 |   simple = pkg.env$simple,
11 |   family = gaussian(),
12 |   contrasts = list(),
13 |   ...
14 | )
15 | }
16 | \arguments{
17 | \item{data}{a data frame containing the variables in the model.
18 | This should be the same data used in \code{\link[=init_params]{init_params}}.}
19 | 
20 | \item{f}{(optional) an object of class "formula" that overrides the default parameter}
21 | 
22 | \item{simple}{a boolean indicator to build default formula with interactions.
23 | If true, interactions will be excluded. If false, interactions will be included. By
24 | default, simple is set to false.}
25 | 
26 | \item{family}{the family to be used in the general linear model.
27 | By default, this is set to \code{\link[stats:gaussian]{gaussian}}.
28 | NOTE: if this is changed, the assumptions about the model output may be incorrect and may not provide
29 | accurate treatment effects.}
30 | 
31 | \item{contrasts}{a list of continuous covariates and values in the model to be included in the contrast matrix
32 | (e.g. \code{list(age = c(18, 25, 40), weight = c(90, 159))}).}
33 | 
34 | \item{...}{additional arguments that may be passed to the underlying \code{\link[multcomp:glht]{glht}} model.}
35 | }
36 | \value{
37 | \code{outcome_regression} returns an object of \code{\link[base:class]{class} "outcome_regression"}
38 | 
39 | The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with
40 | the underlying \code{glht} model.
41 | 
42 | An object of class \code{"outcome_regression"} is a list containing the following:
43 | 
44 |  \item{call}{the matched call.}
45 |  \item{formula}{the formula used in the model.}
46 |  \item{model}{the underlying glht model.}
47 |  \item{ATE}{a data frame containing the ATE, SE, and 95\% CI of the ATE. }
48 |  \item{ATE.summary}{a more detailed summary of the ATE estimations from glht. }
49 | }
50 | \description{
51 | `outcome_regression` builds a linear model using all covariates. The treatment effects are stratified
52 | within the levels of the covariates. The model will automatically provide all discrete covariates in a contrast matrix.
53 | To view estimated change in treatment effect from continuous variables, a list called \code{contrasts}, needs to be given
54 | with specific values to estimate. A vector of values can be given for any particular continuous variable.
55 | }
56 | \examples{
57 | library(causaldata)
58 | library(multcomp)
59 | 
60 | data(nhefs)
61 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ]
62 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
63 | 
64 | confounders <- c(
65 |   "sex", "race", "age", "education", "smokeintensity",
66 |   "smokeyrs", "exercise", "active", "wt71"
67 | )
68 | 
69 | init_params(wt82_71, qsmk,
70 |   covariates = confounders,
71 |   data = nhefs.nmv
72 | )
73 | 
74 | out.mod <- outcome_regression(nhefs.nmv, contrasts = list(
75 |   age = c(21, 55),
76 |   smokeintensity = c(5, 20, 40)
77 | ))
78 | print(out.mod)
79 | summary(out.mod)
80 | head(data.frame(preds = predict(out.mod)))
81 | 
82 | }
83 | 


--------------------------------------------------------------------------------
/man/propensity_matching.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/propensity_matching.R
 3 | \name{propensity_matching}
 4 | \alias{propensity_matching}
 5 | \title{Propensity Matching}
 6 | \usage{
 7 | propensity_matching(
 8 |   data,
 9 |   f = NA,
10 |   simple = pkg.env$simple,
11 |   p.scores = NA,
12 |   p.simple = pkg.env$simple,
13 |   type = "strata",
14 |   grp.width = 0.1,
15 |   quant = TRUE,
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{data}{a data frame containing the variables in the model.
21 | This should be the same data used in \code{\link[=init_params]{init_params}}.}
22 | 
23 | \item{f}{(optional) an object of class "formula" that overrides the default parameter}
24 | 
25 | \item{simple}{a boolean indicator to build default formula with interactions.
26 | If true, interactions will be excluded. If false, interactions will be included. By
27 | default, simple is set to false.}
28 | 
29 | \item{p.scores}{(optional) use calculated propensity scores for matching. Otherwise, propensity scores
30 | will be automatically modeled.}
31 | 
32 | \item{p.simple}{a boolean indicator to build default formula with interactions for the propensity models.
33 | If true, interactions will be excluded. If false, interactions will be included. By
34 | default, simple is set to false.}
35 | 
36 | \item{type}{a string representing the type of propensity model to be used. By default, the function will stratify. Standardization with
37 | propensity scores may also be used. The value given for \code{type} must be in \code{c("strata", "stdm")}.}
38 | 
39 | \item{grp.width}{a decimal value to specify the range to stratify the propensity scores. If option \code{quant} is set to true,
40 | this will represent the spread of percentiles. If false, it will represent the spread of raw values of propensity
41 | scores. Must be a decimal between 0 and 1. By default, this is set to 0.1. This option is ignored for standardization.}
42 | 
43 | \item{quant}{a boolean indicator to specify the type of stratification. If true (default), the model will stratify by
44 | percentiles. If false, the scores will be grouped by a range of their raw values. This option is ignored for standardization.}
45 | 
46 | \item{...}{additional arguments that may be passed to the underlying \code{\link[=propensity_scores]{propensity_scores}} function.}
47 | }
48 | \value{
49 | \code{propensity_matching} returns an object of \code{\link[base:class]{class} "propensity_matching"}
50 | 
51 | The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with the underlying \code{glht} or
52 | \code{standardization} model.
53 | 
54 | An object of class \code{"propensity_matching"} is a list containing the following:
55 | 
56 |  \item{call}{the matched call.}
57 |  \item{formula}{the formula used in the model.}
58 |  \item{model}{either the underlying \code{glht} or \code{standardization} model.}
59 |  \item{p.scores}{the estimated propensity scores}
60 |  \item{ATE}{a data frame containing the ATE, SE, and 95\% CI of the ATE. }
61 |  \item{ATE.summary}{either a data frame containing the \code{glht} or \code{standardization} summary. }
62 | }
63 | \description{
64 | `propensity_matching` uses either stratification or standardization to model an outcome
65 | conditional on the propensity scores. In stratification, the model will break the propensity scores
66 | into groups and output a \code{\link[multcomp:glht]{glht}} model based off a contrast matrix which
67 | estimates the change in average causal effect within groups of propensity scores. In standardization,
68 | the model will output a \code{\link[=standardization]{standardization}} model that conditions on the
69 | propensity strata rather than the covariates. The model can also predict the expected outcome.
70 | }
71 | \examples{
72 | library(causaldata)
73 | library(multcomp)
74 | 
75 | data(nhefs)
76 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ]
77 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
78 | 
79 | confounders <- c(
80 |   "sex", "race", "age", "education", "smokeintensity",
81 |   "smokeyrs", "exercise", "active", "wt71"
82 | )
83 | 
84 | init_params(wt82_71, qsmk,
85 |   covariates = confounders,
86 |   data = nhefs.nmv
87 | )
88 | 
89 | pm.model <- propensity_matching(nhefs.nmv)
90 | pm.model$ATE.summary
91 | summary(pm.model)
92 | head(data.frame(preds = predict(pm.model)))
93 | 
94 | }
95 | 


--------------------------------------------------------------------------------
/man/propensity_scores.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/propensity_scores.R
 3 | \name{propensity_scores}
 4 | \alias{propensity_scores}
 5 | \title{Propensity Scores}
 6 | \usage{
 7 | propensity_scores(
 8 |   data,
 9 |   f = NA,
10 |   simple = pkg.env$simple,
11 |   family = binomial(),
12 |   ...
13 | )
14 | }
15 | \arguments{
16 | \item{data}{a data frame containing the variables in the model.
17 | This should be the same data used in \code{\link[=init_params]{init_params}}.}
18 | 
19 | \item{f}{(optional) an object of class "formula" that overrides the default parameter}
20 | 
21 | \item{simple}{a boolean indicator to build default formula with interactions.
22 | If true, interactions will be excluded. If false, interactions will be included. By
23 | default, simple is set to false.}
24 | 
25 | \item{family}{the family to be used in the general linear model.
26 | By default, this is set to \code{\link[stats:binomial]{binomial}}
27 | NOTE: if this is changed, the outcome of the model may not be the probabilities and the results will not be valid.}
28 | 
29 | \item{...}{additional arguments that may be passed to the underlying \code{\link[stats:glm]{glm}} model.}
30 | }
31 | \value{
32 | \code{propensity_scores} returns an object of \code{\link[base:class]{class} "propensity_scores"}
33 | 
34 | The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with
35 | the underlying \code{glm} model.
36 | 
37 | An object of class \code{"propensity_scores"} is a list containing the following:
38 | 
39 |  \item{call}{the matched call.}
40 |  \item{formula}{the formula used in the model.}
41 |  \item{model}{the underlying glm model.}
42 |  \item{p.scores}{the estimated propensity scores.}
43 | }
44 | \description{
45 | `propensity_scores` builds a logistic regression with the target as the treatment variable
46 | and the covariates as the independent variables.
47 | }
48 | \examples{
49 | library(causaldata)
50 | data(nhefs)
51 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ]
52 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
53 | 
54 | confounders <- c(
55 |   "sex", "race", "age", "education", "smokeintensity",
56 |   "smokeyrs", "exercise", "active", "wt71"
57 | )
58 | 
59 | init_params(wt82_71, qsmk,
60 |   covariates = confounders,
61 |   data = nhefs.nmv
62 | )
63 | 
64 | p.score <- propensity_scores(nhefs.nmv)
65 | p.score
66 | 
67 | }
68 | 


--------------------------------------------------------------------------------
/man/standardization.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/standardization.R
 3 | \name{standardization}
 4 | \alias{standardization}
 5 | \title{Parametric Standardization}
 6 | \usage{
 7 | standardization(
 8 |   data,
 9 |   f = NA,
10 |   family = gaussian(),
11 |   simple = pkg.env$simple,
12 |   n.boot = 50,
13 |   ...
14 | )
15 | }
16 | \arguments{
17 | \item{data}{a data frame containing the variables in the model.
18 | This should be the same data used in \code{\link[=init_params]{init_params}}.}
19 | 
20 | \item{f}{(optional) an object of class "formula" that overrides the default parameter}
21 | 
22 | \item{family}{the family to be used in the general linear model.
23 | By default, this is set to \code{\link[stats:gaussian]{gaussian}}.}
24 | 
25 | \item{simple}{a boolean indicator to build default formula with interactions.
26 | If true, interactions will be excluded. If false, interactions will be included. By
27 | default, simple is set to false.
28 | NOTE: if this is changed, the coefficient for treatment may not accurately represent the average causal effect.}
29 | 
30 | \item{n.boot}{an integer value that indicates number of bootstrap iterations to calculate standard error.}
31 | 
32 | \item{...}{additional arguments that may be passed to the underlying \code{\link[stats:glm]{glm}} model.}
33 | }
34 | \value{
35 | \code{standardization} returns an object of \code{\link[base:class]{class} "standardization"}.
36 | 
37 | The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with
38 | the underlying \code{glm} model.
39 | 
40 | An object of class \code{"standardization"} is a list containing the following:
41 | 
42 | \item{call}{the matched call.}
43 | \item{formula}{the formula used in the model.}
44 | \item{model}{the underlying glm model.}
45 | \item{ATE}{a data frame containing estimates of the treatment effect
46 |  of the observed, counterfactuals, and risk metrics.}
47 | \item{ATE.summary}{a data frame containing the ATE, SE, and 95\% CI of the ATE. }
48 | }
49 | \description{
50 | `standardization` uses a standard \code{\link[stats:glm]{glm}} linear model to perform parametric standardization
51 | by adjusting bias through including all confounders as covariates. The model will calculate during training both the risk difference
52 | and the risk ratio. Both can be accessed from the model as well as estimates of the counterfactuals of treatment.
53 | }
54 | \examples{
55 | library(causaldata)
56 | 
57 | data(nhefs)
58 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ]
59 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk)
60 | 
61 | confounders <- c(
62 |   "sex", "race", "age", "education", "smokeintensity",
63 |   "smokeyrs", "exercise", "active", "wt71"
64 | )
65 | 
66 | init_params(wt82_71, qsmk,
67 |   covariates = confounders,
68 |   data = nhefs.nmv
69 | )
70 | 
71 | # model using all defaults
72 | model <- standardization(data = nhefs.nmv)
73 | print(model)
74 | summary(model)
75 | print(model$ATE.summary)
76 | print(model$ATE.summary$Estimate[[2]] -
77 |   model$ATE.summary$Estimate[[3]]) # manually calculate risk difference
78 | 
79 | }
80 | 


--------------------------------------------------------------------------------
/renv.lock:
--------------------------------------------------------------------------------
 1 | {
 2 |   "R": {
 3 |     "Version": "4.1.1",
 4 |     "Repositories": [
 5 |       {
 6 |         "Name": "CRAN",
 7 |         "URL": "https://cran.rstudio.com"
 8 |       }
 9 |     ]
10 |   },
11 |   "Packages": {
12 |     "renv": {
13 |       "Package": "renv",
14 |       "Version": "0.15.4",
15 |       "Source": "Repository",
16 |       "Repository": "CRAN",
17 |       "Hash": "c1078316e1d4f70275fc1ea60c0bc431",
18 |       "Requirements": []
19 |     }
20 |   }
21 | }
22 | 


--------------------------------------------------------------------------------
/renv/.gitignore:
--------------------------------------------------------------------------------
1 | library/
2 | local/
3 | cellar/
4 | lock/
5 | python/
6 | staging/
7 | 


--------------------------------------------------------------------------------
/renv/activate.R:
--------------------------------------------------------------------------------
  1 | 
  2 | local({
  3 | 
  4 |   # the requested version of renv
  5 |   version <- "0.15.4"
  6 | 
  7 |   # the project directory
  8 |   project <- getwd()
  9 | 
 10 |   # figure out whether the autoloader is enabled
 11 |   enabled <- local({
 12 | 
 13 |     # first, check config option
 14 |     override <- getOption("renv.config.autoloader.enabled")
 15 |     if (!is.null(override))
 16 |       return(override)
 17 | 
 18 |     # next, check environment variables
 19 |     # TODO: prefer using the configuration one in the future
 20 |     envvars <- c(
 21 |       "RENV_CONFIG_AUTOLOADER_ENABLED",
 22 |       "RENV_AUTOLOADER_ENABLED",
 23 |       "RENV_ACTIVATE_PROJECT"
 24 |     )
 25 | 
 26 |     for (envvar in envvars) {
 27 |       envval <- Sys.getenv(envvar, unset = NA)
 28 |       if (!is.na(envval))
 29 |         return(tolower(envval) %in% c("true", "t", "1"))
 30 |     }
 31 | 
 32 |     # enable by default
 33 |     TRUE
 34 | 
 35 |   })
 36 | 
 37 |   if (!enabled)
 38 |     return(FALSE)
 39 | 
 40 |   # avoid recursion
 41 |   if (identical(getOption("renv.autoloader.running"), TRUE)) {
 42 |     warning("ignoring recursive attempt to run renv autoloader")
 43 |     return(invisible(TRUE))
 44 |   }
 45 | 
 46 |   # signal that we're loading renv during R startup
 47 |   options(renv.autoloader.running = TRUE)
 48 |   on.exit(options(renv.autoloader.running = NULL), add = TRUE)
 49 | 
 50 |   # signal that we've consented to use renv
 51 |   options(renv.consent = TRUE)
 52 | 
 53 |   # load the 'utils' package eagerly -- this ensures that renv shims, which
 54 |   # mask 'utils' packages, will come first on the search path
 55 |   library(utils, lib.loc = .Library)
 56 | 
 57 |   # unload renv if it's already been laoded
 58 |   if ("renv" %in% loadedNamespaces())
 59 |     unloadNamespace("renv")
 60 | 
 61 |   # load bootstrap tools   
 62 |   `%||%` <- function(x, y) {
 63 |     if (is.environment(x) || length(x)) x else y
 64 |   }
 65 |   
 66 |   bootstrap <- function(version, library) {
 67 |   
 68 |     # attempt to download renv
 69 |     tarball <- tryCatch(renv_bootstrap_download(version), error = identity)
 70 |     if (inherits(tarball, "error"))
 71 |       stop("failed to download renv ", version)
 72 |   
 73 |     # now attempt to install
 74 |     status <- tryCatch(renv_bootstrap_install(version, tarball, library), error = identity)
 75 |     if (inherits(status, "error"))
 76 |       stop("failed to install renv ", version)
 77 |   
 78 |   }
 79 |   
 80 |   renv_bootstrap_tests_running <- function() {
 81 |     getOption("renv.tests.running", default = FALSE)
 82 |   }
 83 |   
 84 |   renv_bootstrap_repos <- function() {
 85 |   
 86 |     # check for repos override
 87 |     repos <- Sys.getenv("RENV_CONFIG_REPOS_OVERRIDE", unset = NA)
 88 |     if (!is.na(repos))
 89 |       return(repos)
 90 |   
 91 |     # check for lockfile repositories
 92 |     repos <- tryCatch(renv_bootstrap_repos_lockfile(), error = identity)
 93 |     if (!inherits(repos, "error") && length(repos))
 94 |       return(repos)
 95 |   
 96 |     # if we're testing, re-use the test repositories
 97 |     if (renv_bootstrap_tests_running())
 98 |       return(getOption("renv.tests.repos"))
 99 |   
100 |     # retrieve current repos
101 |     repos <- getOption("repos")
102 |   
103 |     # ensure @CRAN@ entries are resolved
104 |     repos[repos == "@CRAN@"] <- getOption(
105 |       "renv.repos.cran",
106 |       "https://cloud.r-project.org"
107 |     )
108 |   
109 |     # add in renv.bootstrap.repos if set
110 |     default <- c(FALLBACK = "https://cloud.r-project.org")
111 |     extra <- getOption("renv.bootstrap.repos", default = default)
112 |     repos <- c(repos, extra)
113 |   
114 |     # remove duplicates that might've snuck in
115 |     dupes <- duplicated(repos) | duplicated(names(repos))
116 |     repos[!dupes]
117 |   
118 |   }
119 |   
120 |   renv_bootstrap_repos_lockfile <- function() {
121 |   
122 |     lockpath <- Sys.getenv("RENV_PATHS_LOCKFILE", unset = "renv.lock")
123 |     if (!file.exists(lockpath))
124 |       return(NULL)
125 |   
126 |     lockfile <- tryCatch(renv_json_read(lockpath), error = identity)
127 |     if (inherits(lockfile, "error")) {
128 |       warning(lockfile)
129 |       return(NULL)
130 |     }
131 |   
132 |     repos <- lockfile$R$Repositories
133 |     if (length(repos) == 0)
134 |       return(NULL)
135 |   
136 |     keys <- vapply(repos, `[[`, "Name", FUN.VALUE = character(1))
137 |     vals <- vapply(repos, `[[`, "URL", FUN.VALUE = character(1))
138 |     names(vals) <- keys
139 |   
140 |     return(vals)
141 |   
142 |   }
143 |   
144 |   renv_bootstrap_download <- function(version) {
145 |   
146 |     # if the renv version number has 4 components, assume it must
147 |     # be retrieved via github
148 |     nv <- numeric_version(version)
149 |     components <- unclass(nv)[[1]]
150 |   
151 |     # if this appears to be a development version of 'renv', we'll
152 |     # try to restore from github
153 |     dev <- length(components) == 4L
154 |   
155 |     # begin collecting different methods for finding renv
156 |     methods <- c(
157 |       renv_bootstrap_download_tarball,
158 |       if (dev)
159 |         renv_bootstrap_download_github
160 |       else c(
161 |         renv_bootstrap_download_cran_latest,
162 |         renv_bootstrap_download_cran_archive
163 |       )
164 |     )
165 |   
166 |     for (method in methods) {
167 |       path <- tryCatch(method(version), error = identity)
168 |       if (is.character(path) && file.exists(path))
169 |         return(path)
170 |     }
171 |   
172 |     stop("failed to download renv ", version)
173 |   
174 |   }
175 |   
176 |   renv_bootstrap_download_impl <- function(url, destfile) {
177 |   
178 |     mode <- "wb"
179 |   
180 |     # https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17715
181 |     fixup <-
182 |       Sys.info()[["sysname"]] == "Windows" &&
183 |       substring(url, 1L, 5L) == "file:"
184 |   
185 |     if (fixup)
186 |       mode <- "w+b"
187 |   
188 |     utils::download.file(
189 |       url      = url,
190 |       destfile = destfile,
191 |       mode     = mode,
192 |       quiet    = TRUE
193 |     )
194 |   
195 |   }
196 |   
197 |   renv_bootstrap_download_cran_latest <- function(version) {
198 |   
199 |     spec <- renv_bootstrap_download_cran_latest_find(version)
200 |   
201 |     message("* Downloading renv ", version, " ... ", appendLF = FALSE)
202 |   
203 |     type  <- spec$type
204 |     repos <- spec$repos
205 |   
206 |     info <- tryCatch(
207 |       utils::download.packages(
208 |         pkgs    = "renv",
209 |         destdir = tempdir(),
210 |         repos   = repos,
211 |         type    = type,
212 |         quiet   = TRUE
213 |       ),
214 |       condition = identity
215 |     )
216 |   
217 |     if (inherits(info, "condition")) {
218 |       message("FAILED")
219 |       return(FALSE)
220 |     }
221 |   
222 |     # report success and return
223 |     message("OK (downloaded ", type, ")")
224 |     info[1, 2]
225 |   
226 |   }
227 |   
228 |   renv_bootstrap_download_cran_latest_find <- function(version) {
229 |   
230 |     # check whether binaries are supported on this system
231 |     binary <-
232 |       getOption("renv.bootstrap.binary", default = TRUE) &&
233 |       !identical(.Platform$pkgType, "source") &&
234 |       !identical(getOption("pkgType"), "source") &&
235 |       Sys.info()[["sysname"]] %in% c("Darwin", "Windows")
236 |   
237 |     types <- c(if (binary) "binary", "source")
238 |   
239 |     # iterate over types + repositories
240 |     for (type in types) {
241 |       for (repos in renv_bootstrap_repos()) {
242 |   
243 |         # retrieve package database
244 |         db <- tryCatch(
245 |           as.data.frame(
246 |             utils::available.packages(type = type, repos = repos),
247 |             stringsAsFactors = FALSE
248 |           ),
249 |           error = identity
250 |         )
251 |   
252 |         if (inherits(db, "error"))
253 |           next
254 |   
255 |         # check for compatible entry
256 |         entry <- db[db$Package %in% "renv" & db$Version %in% version, ]
257 |         if (nrow(entry) == 0)
258 |           next
259 |   
260 |         # found it; return spec to caller
261 |         spec <- list(entry = entry, type = type, repos = repos)
262 |         return(spec)
263 |   
264 |       }
265 |     }
266 |   
267 |     # if we got here, we failed to find renv
268 |     fmt <- "renv %s is not available from your declared package repositories"
269 |     stop(sprintf(fmt, version))
270 |   
271 |   }
272 |   
273 |   renv_bootstrap_download_cran_archive <- function(version) {
274 |   
275 |     name <- sprintf("renv_%s.tar.gz", version)
276 |     repos <- renv_bootstrap_repos()
277 |     urls <- file.path(repos, "src/contrib/Archive/renv", name)
278 |     destfile <- file.path(tempdir(), name)
279 |   
280 |     message("* Downloading renv ", version, " ... ", appendLF = FALSE)
281 |   
282 |     for (url in urls) {
283 |   
284 |       status <- tryCatch(
285 |         renv_bootstrap_download_impl(url, destfile),
286 |         condition = identity
287 |       )
288 |   
289 |       if (identical(status, 0L)) {
290 |         message("OK")
291 |         return(destfile)
292 |       }
293 |   
294 |     }
295 |   
296 |     message("FAILED")
297 |     return(FALSE)
298 |   
299 |   }
300 |   
301 |   renv_bootstrap_download_tarball <- function(version) {
302 |   
303 |     # if the user has provided the path to a tarball via
304 |     # an environment variable, then use it
305 |     tarball <- Sys.getenv("RENV_BOOTSTRAP_TARBALL", unset = NA)
306 |     if (is.na(tarball))
307 |       return()
308 |   
309 |     # allow directories
310 |     info <- file.info(tarball, extra_cols = FALSE)
311 |     if (identical(info$isdir, TRUE)) {
312 |       name <- sprintf("renv_%s.tar.gz", version)
313 |       tarball <- file.path(tarball, name)
314 |     }
315 |   
316 |     # bail if it doesn't exist
317 |     if (!file.exists(tarball))
318 |       return()
319 |   
320 |     fmt <- "* Bootstrapping with tarball at path '%s'."
321 |     msg <- sprintf(fmt, tarball)
322 |     message(msg)
323 |   
324 |     tarball
325 |   
326 |   }
327 |   
328 |   renv_bootstrap_download_github <- function(version) {
329 |   
330 |     enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE")
331 |     if (!identical(enabled, "TRUE"))
332 |       return(FALSE)
333 |   
334 |     # prepare download options
335 |     pat <- Sys.getenv("GITHUB_PAT")
336 |     if (nzchar(Sys.which("curl")) && nzchar(pat)) {
337 |       fmt <- "--location --fail --header \"Authorization: token %s\""
338 |       extra <- sprintf(fmt, pat)
339 |       saved <- options("download.file.method", "download.file.extra")
340 |       options(download.file.method = "curl", download.file.extra = extra)
341 |       on.exit(do.call(base::options, saved), add = TRUE)
342 |     } else if (nzchar(Sys.which("wget")) && nzchar(pat)) {
343 |       fmt <- "--header=\"Authorization: token %s\""
344 |       extra <- sprintf(fmt, pat)
345 |       saved <- options("download.file.method", "download.file.extra")
346 |       options(download.file.method = "wget", download.file.extra = extra)
347 |       on.exit(do.call(base::options, saved), add = TRUE)
348 |     }
349 |   
350 |     message("* Downloading renv ", version, " from GitHub ... ", appendLF = FALSE)
351 |   
352 |     url <- file.path("https://api.github.com/repos/rstudio/renv/tarball", version)
353 |     name <- sprintf("renv_%s.tar.gz", version)
354 |     destfile <- file.path(tempdir(), name)
355 |   
356 |     status <- tryCatch(
357 |       renv_bootstrap_download_impl(url, destfile),
358 |       condition = identity
359 |     )
360 |   
361 |     if (!identical(status, 0L)) {
362 |       message("FAILED")
363 |       return(FALSE)
364 |     }
365 |   
366 |     message("OK")
367 |     return(destfile)
368 |   
369 |   }
370 |   
371 |   renv_bootstrap_install <- function(version, tarball, library) {
372 |   
373 |     # attempt to install it into project library
374 |     message("* Installing renv ", version, " ... ", appendLF = FALSE)
375 |     dir.create(library, showWarnings = FALSE, recursive = TRUE)
376 |   
377 |     # invoke using system2 so we can capture and report output
378 |     bin <- R.home("bin")
379 |     exe <- if (Sys.info()[["sysname"]] == "Windows") "R.exe" else "R"
380 |     r <- file.path(bin, exe)
381 |   
382 |     args <- c(
383 |       "--vanilla", "CMD", "INSTALL", "--no-multiarch",
384 |       "-l", shQuote(path.expand(library)),
385 |       shQuote(path.expand(tarball))
386 |     )
387 |   
388 |     output <- system2(r, args, stdout = TRUE, stderr = TRUE)
389 |     message("Done!")
390 |   
391 |     # check for successful install
392 |     status <- attr(output, "status")
393 |     if (is.numeric(status) && !identical(status, 0L)) {
394 |       header <- "Error installing renv:"
395 |       lines <- paste(rep.int("=", nchar(header)), collapse = "")
396 |       text <- c(header, lines, output)
397 |       writeLines(text, con = stderr())
398 |     }
399 |   
400 |     status
401 |   
402 |   }
403 |   
404 |   renv_bootstrap_platform_prefix <- function() {
405 |   
406 |     # construct version prefix
407 |     version <- paste(R.version$major, R.version$minor, sep = ".")
408 |     prefix <- paste("R", numeric_version(version)[1, 1:2], sep = "-")
409 |   
410 |     # include SVN revision for development versions of R
411 |     # (to avoid sharing platform-specific artefacts with released versions of R)
412 |     devel <-
413 |       identical(R.version[["status"]],   "Under development (unstable)") ||
414 |       identical(R.version[["nickname"]], "Unsuffered Consequences")
415 |   
416 |     if (devel)
417 |       prefix <- paste(prefix, R.version[["svn rev"]], sep = "-r")
418 |   
419 |     # build list of path components
420 |     components <- c(prefix, R.version$platform)
421 |   
422 |     # include prefix if provided by user
423 |     prefix <- renv_bootstrap_platform_prefix_impl()
424 |     if (!is.na(prefix) && nzchar(prefix))
425 |       components <- c(prefix, components)
426 |   
427 |     # build prefix
428 |     paste(components, collapse = "/")
429 |   
430 |   }
431 |   
432 |   renv_bootstrap_platform_prefix_impl <- function() {
433 |   
434 |     # if an explicit prefix has been supplied, use it
435 |     prefix <- Sys.getenv("RENV_PATHS_PREFIX", unset = NA)
436 |     if (!is.na(prefix))
437 |       return(prefix)
438 |   
439 |     # if the user has requested an automatic prefix, generate it
440 |     auto <- Sys.getenv("RENV_PATHS_PREFIX_AUTO", unset = NA)
441 |     if (auto %in% c("TRUE", "True", "true", "1"))
442 |       return(renv_bootstrap_platform_prefix_auto())
443 |   
444 |     # empty string on failure
445 |     ""
446 |   
447 |   }
448 |   
449 |   renv_bootstrap_platform_prefix_auto <- function() {
450 |   
451 |     prefix <- tryCatch(renv_bootstrap_platform_os(), error = identity)
452 |     if (inherits(prefix, "error") || prefix %in% "unknown") {
453 |   
454 |       msg <- paste(
455 |         "failed to infer current operating system",
456 |         "please file a bug report at https://github.com/rstudio/renv/issues",
457 |         sep = "; "
458 |       )
459 |   
460 |       warning(msg)
461 |   
462 |     }
463 |   
464 |     prefix
465 |   
466 |   }
467 |   
468 |   renv_bootstrap_platform_os <- function() {
469 |   
470 |     sysinfo <- Sys.info()
471 |     sysname <- sysinfo[["sysname"]]
472 |   
473 |     # handle Windows + macOS up front
474 |     if (sysname == "Windows")
475 |       return("windows")
476 |     else if (sysname == "Darwin")
477 |       return("macos")
478 |   
479 |     # check for os-release files
480 |     for (file in c("/etc/os-release", "/usr/lib/os-release"))
481 |       if (file.exists(file))
482 |         return(renv_bootstrap_platform_os_via_os_release(file, sysinfo))
483 |   
484 |     # check for redhat-release files
485 |     if (file.exists("/etc/redhat-release"))
486 |       return(renv_bootstrap_platform_os_via_redhat_release())
487 |   
488 |     "unknown"
489 |   
490 |   }
491 |   
492 |   renv_bootstrap_platform_os_via_os_release <- function(file, sysinfo) {
493 |   
494 |     # read /etc/os-release
495 |     release <- utils::read.table(
496 |       file             = file,
497 |       sep              = "=",
498 |       quote            = c("\"", "'"),
499 |       col.names        = c("Key", "Value"),
500 |       comment.char     = "#",
501 |       stringsAsFactors = FALSE
502 |     )
503 |   
504 |     vars <- as.list(release$Value)
505 |     names(vars) <- release$Key
506 |   
507 |     # get os name
508 |     os <- tolower(sysinfo[["sysname"]])
509 |   
510 |     # read id
511 |     id <- "unknown"
512 |     for (field in c("ID", "ID_LIKE")) {
513 |       if (field %in% names(vars) && nzchar(vars[[field]])) {
514 |         id <- vars[[field]]
515 |         break
516 |       }
517 |     }
518 |   
519 |     # read version
520 |     version <- "unknown"
521 |     for (field in c("UBUNTU_CODENAME", "VERSION_CODENAME", "VERSION_ID", "BUILD_ID")) {
522 |       if (field %in% names(vars) && nzchar(vars[[field]])) {
523 |         version <- vars[[field]]
524 |         break
525 |       }
526 |     }
527 |   
528 |     # join together
529 |     paste(c(os, id, version), collapse = "-")
530 |   
531 |   }
532 |   
533 |   renv_bootstrap_platform_os_via_redhat_release <- function() {
534 |   
535 |     # read /etc/redhat-release
536 |     contents <- readLines("/etc/redhat-release", warn = FALSE)
537 |   
538 |     # infer id
539 |     id <- if (grepl("centos", contents, ignore.case = TRUE))
540 |       "centos"
541 |     else if (grepl("redhat", contents, ignore.case = TRUE))
542 |       "redhat"
543 |     else
544 |       "unknown"
545 |   
546 |     # try to find a version component (very hacky)
547 |     version <- "unknown"
548 |   
549 |     parts <- strsplit(contents, "[[:space:]]")[[1L]]
550 |     for (part in parts) {
551 |   
552 |       nv <- tryCatch(numeric_version(part), error = identity)
553 |       if (inherits(nv, "error"))
554 |         next
555 |   
556 |       version <- nv[1, 1]
557 |       break
558 |   
559 |     }
560 |   
561 |     paste(c("linux", id, version), collapse = "-")
562 |   
563 |   }
564 |   
565 |   renv_bootstrap_library_root_name <- function(project) {
566 |   
567 |     # use project name as-is if requested
568 |     asis <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT_ASIS", unset = "FALSE")
569 |     if (asis)
570 |       return(basename(project))
571 |   
572 |     # otherwise, disambiguate based on project's path
573 |     id <- substring(renv_bootstrap_hash_text(project), 1L, 8L)
574 |     paste(basename(project), id, sep = "-")
575 |   
576 |   }
577 |   
578 |   renv_bootstrap_library_root <- function(project) {
579 |   
580 |     prefix <- renv_bootstrap_profile_prefix()
581 |   
582 |     path <- Sys.getenv("RENV_PATHS_LIBRARY", unset = NA)
583 |     if (!is.na(path))
584 |       return(paste(c(path, prefix), collapse = "/"))
585 |   
586 |     path <- renv_bootstrap_library_root_impl(project)
587 |     if (!is.null(path)) {
588 |       name <- renv_bootstrap_library_root_name(project)
589 |       return(paste(c(path, prefix, name), collapse = "/"))
590 |     }
591 |   
592 |     renv_bootstrap_paths_renv("library", project = project)
593 |   
594 |   }
595 |   
596 |   renv_bootstrap_library_root_impl <- function(project) {
597 |   
598 |     root <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT", unset = NA)
599 |     if (!is.na(root))
600 |       return(root)
601 |   
602 |     type <- renv_bootstrap_project_type(project)
603 |     if (identical(type, "package")) {
604 |       userdir <- renv_bootstrap_user_dir()
605 |       return(file.path(userdir, "library"))
606 |     }
607 |   
608 |   }
609 |   
610 |   renv_bootstrap_validate_version <- function(version) {
611 |   
612 |     loadedversion <- utils::packageDescription("renv", fields = "Version")
613 |     if (version == loadedversion)
614 |       return(TRUE)
615 |   
616 |     # assume four-component versions are from GitHub; three-component
617 |     # versions are from CRAN
618 |     components <- strsplit(loadedversion, "[.-]")[[1]]
619 |     remote <- if (length(components) == 4L)
620 |       paste("rstudio/renv", loadedversion, sep = "@")
621 |     else
622 |       paste("renv", loadedversion, sep = "@")
623 |   
624 |     fmt <- paste(
625 |       "renv %1$s was loaded from project library, but this project is configured to use renv %2$s.",
626 |       "Use `renv::record(\"%3$s\")` to record renv %1$s in the lockfile.",
627 |       "Use `renv::restore(packages = \"renv\")` to install renv %2$s into the project library.",
628 |       sep = "\n"
629 |     )
630 |   
631 |     msg <- sprintf(fmt, loadedversion, version, remote)
632 |     warning(msg, call. = FALSE)
633 |   
634 |     FALSE
635 |   
636 |   }
637 |   
638 |   renv_bootstrap_hash_text <- function(text) {
639 |   
640 |     hashfile <- tempfile("renv-hash-")
641 |     on.exit(unlink(hashfile), add = TRUE)
642 |   
643 |     writeLines(text, con = hashfile)
644 |     tools::md5sum(hashfile)
645 |   
646 |   }
647 |   
648 |   renv_bootstrap_load <- function(project, libpath, version) {
649 |   
650 |     # try to load renv from the project library
651 |     if (!requireNamespace("renv", lib.loc = libpath, quietly = TRUE))
652 |       return(FALSE)
653 |   
654 |     # warn if the version of renv loaded does not match
655 |     renv_bootstrap_validate_version(version)
656 |   
657 |     # load the project
658 |     renv::load(project)
659 |   
660 |     TRUE
661 |   
662 |   }
663 |   
664 |   renv_bootstrap_profile_load <- function(project) {
665 |   
666 |     # if RENV_PROFILE is already set, just use that
667 |     profile <- Sys.getenv("RENV_PROFILE", unset = NA)
668 |     if (!is.na(profile) && nzchar(profile))
669 |       return(profile)
670 |   
671 |     # check for a profile file (nothing to do if it doesn't exist)
672 |     path <- renv_bootstrap_paths_renv("profile", profile = FALSE)
673 |     if (!file.exists(path))
674 |       return(NULL)
675 |   
676 |     # read the profile, and set it if it exists
677 |     contents <- readLines(path, warn = FALSE)
678 |     if (length(contents) == 0L)
679 |       return(NULL)
680 |   
681 |     # set RENV_PROFILE
682 |     profile <- contents[[1L]]
683 |     if (!profile %in% c("", "default"))
684 |       Sys.setenv(RENV_PROFILE = profile)
685 |   
686 |     profile
687 |   
688 |   }
689 |   
690 |   renv_bootstrap_profile_prefix <- function() {
691 |     profile <- renv_bootstrap_profile_get()
692 |     if (!is.null(profile))
693 |       return(file.path("profiles", profile, "renv"))
694 |   }
695 |   
696 |   renv_bootstrap_profile_get <- function() {
697 |     profile <- Sys.getenv("RENV_PROFILE", unset = "")
698 |     renv_bootstrap_profile_normalize(profile)
699 |   }
700 |   
701 |   renv_bootstrap_profile_set <- function(profile) {
702 |     profile <- renv_bootstrap_profile_normalize(profile)
703 |     if (is.null(profile))
704 |       Sys.unsetenv("RENV_PROFILE")
705 |     else
706 |       Sys.setenv(RENV_PROFILE = profile)
707 |   }
708 |   
709 |   renv_bootstrap_profile_normalize <- function(profile) {
710 |   
711 |     if (is.null(profile) || profile %in% c("", "default"))
712 |       return(NULL)
713 |   
714 |     profile
715 |   
716 |   }
717 |   
718 |   renv_bootstrap_path_absolute <- function(path) {
719 |   
720 |     substr(path, 1L, 1L) %in% c("~", "/", "\\") || (
721 |       substr(path, 1L, 1L) %in% c(letters, LETTERS) &&
722 |       substr(path, 2L, 3L) %in% c(":/", ":\\")
723 |     )
724 |   
725 |   }
726 |   
727 |   renv_bootstrap_paths_renv <- function(..., profile = TRUE, project = NULL) {
728 |     renv <- Sys.getenv("RENV_PATHS_RENV", unset = "renv")
729 |     root <- if (renv_bootstrap_path_absolute(renv)) NULL else project
730 |     prefix <- if (profile) renv_bootstrap_profile_prefix()
731 |     components <- c(root, renv, prefix, ...)
732 |     paste(components, collapse = "/")
733 |   }
734 |   
735 |   renv_bootstrap_project_type <- function(path) {
736 |   
737 |     descpath <- file.path(path, "DESCRIPTION")
738 |     if (!file.exists(descpath))
739 |       return("unknown")
740 |   
741 |     desc <- tryCatch(
742 |       read.dcf(descpath, all = TRUE),
743 |       error = identity
744 |     )
745 |   
746 |     if (inherits(desc, "error"))
747 |       return("unknown")
748 |   
749 |     type <- desc$Type
750 |     if (!is.null(type))
751 |       return(tolower(type))
752 |   
753 |     package <- desc$Package
754 |     if (!is.null(package))
755 |       return("package")
756 |   
757 |     "unknown"
758 |   
759 |   }
760 |   
761 |   renv_bootstrap_user_dir <- function() {
762 |     dir <- renv_bootstrap_user_dir_impl()
763 |     path.expand(chartr("\\", "/", dir))
764 |   }
765 |   
766 |   renv_bootstrap_user_dir_impl <- function() {
767 |   
768 |     # use local override if set
769 |     override <- getOption("renv.userdir.override")
770 |     if (!is.null(override))
771 |       return(override)
772 |   
773 |     # use R_user_dir if available
774 |     tools <- asNamespace("tools")
775 |     if (is.function(tools$R_user_dir))
776 |       return(tools$R_user_dir("renv", "cache"))
777 |   
778 |     # try using our own backfill for older versions of R
779 |     envvars <- c("R_USER_CACHE_DIR", "XDG_CACHE_HOME")
780 |     for (envvar in envvars) {
781 |       root <- Sys.getenv(envvar, unset = NA)
782 |       if (!is.na(root))
783 |         return(file.path(root, "R/renv"))
784 |     }
785 |   
786 |     # use platform-specific default fallbacks
787 |     if (Sys.info()[["sysname"]] == "Windows")
788 |       file.path(Sys.getenv("LOCALAPPDATA"), "R/cache/R/renv")
789 |     else if (Sys.info()[["sysname"]] == "Darwin")
790 |       "~/Library/Caches/org.R-project.R/R/renv"
791 |     else
792 |       "~/.cache/R/renv"
793 |   
794 |   }
795 |   
796 |   
797 |   renv_json_read <- function(file = NULL, text = NULL) {
798 |   
799 |     text <- paste(text %||% read(file), collapse = "\n")
800 |   
801 |     # find strings in the JSON
802 |     pattern <- '["](?:(?:\\\\.)|(?:[^"\\\\]))*?["]'
803 |     locs <- gregexpr(pattern, text)[[1]]
804 |   
805 |     # if any are found, replace them with placeholders
806 |     replaced <- text
807 |     strings <- character()
808 |     replacements <- character()
809 |   
810 |     if (!identical(c(locs), -1L)) {
811 |   
812 |       # get the string values
813 |       starts <- locs
814 |       ends <- locs + attr(locs, "match.length") - 1L
815 |       strings <- substring(text, starts, ends)
816 |   
817 |       # only keep those requiring escaping
818 |       strings <- grep("[[\\]{}:]", strings, perl = TRUE, value = TRUE)
819 |   
820 |       # compute replacements
821 |       replacements <- sprintf('"\032%i\032"', seq_along(strings))
822 |   
823 |       # replace the strings
824 |       mapply(function(string, replacement) {
825 |         replaced <<- sub(string, replacement, replaced, fixed = TRUE)
826 |       }, strings, replacements)
827 |   
828 |     }
829 |   
830 |     # transform the JSON into something the R parser understands
831 |     transformed <- replaced
832 |     transformed <- gsub("[[{]", "list(", transformed)
833 |     transformed <- gsub("[]}]", ")", transformed)
834 |     transformed <- gsub(":", "=", transformed, fixed = TRUE)
835 |     text <- paste(transformed, collapse = "\n")
836 |   
837 |     # parse it
838 |     json <- parse(text = text, keep.source = FALSE, srcfile = NULL)[[1L]]
839 |   
840 |     # construct map between source strings, replaced strings
841 |     map <- as.character(parse(text = strings))
842 |     names(map) <- as.character(parse(text = replacements))
843 |   
844 |     # convert to list
845 |     map <- as.list(map)
846 |   
847 |     # remap strings in object
848 |     remapped <- renv_json_remap(json, map)
849 |   
850 |     # evaluate
851 |     eval(remapped, envir = baseenv())
852 |   
853 |   }
854 |   
855 |   renv_json_remap <- function(json, map) {
856 |   
857 |     # fix names
858 |     if (!is.null(names(json))) {
859 |       lhs <- match(names(json), names(map), nomatch = 0L)
860 |       rhs <- match(names(map), names(json), nomatch = 0L)
861 |       names(json)[rhs] <- map[lhs]
862 |     }
863 |   
864 |     # fix values
865 |     if (is.character(json))
866 |       return(map[[json]] %||% json)
867 |   
868 |     # handle true, false, null
869 |     if (is.name(json)) {
870 |       text <- as.character(json)
871 |       if (text == "true")
872 |         return(TRUE)
873 |       else if (text == "false")
874 |         return(FALSE)
875 |       else if (text == "null")
876 |         return(NULL)
877 |     }
878 |   
879 |     # recurse
880 |     if (is.recursive(json)) {
881 |       for (i in seq_along(json)) {
882 |         json[i] <- list(renv_json_remap(json[[i]], map))
883 |       }
884 |     }
885 |   
886 |     json
887 |   
888 |   }
889 | 
890 |   # load the renv profile, if any
891 |   renv_bootstrap_profile_load(project)
892 | 
893 |   # construct path to library root
894 |   root <- renv_bootstrap_library_root(project)
895 | 
896 |   # construct library prefix for platform
897 |   prefix <- renv_bootstrap_platform_prefix()
898 | 
899 |   # construct full libpath
900 |   libpath <- file.path(root, prefix)
901 | 
902 |   # attempt to load
903 |   if (renv_bootstrap_load(project, libpath, version))
904 |     return(TRUE)
905 | 
906 |   # load failed; inform user we're about to bootstrap
907 |   prefix <- paste("# Bootstrapping renv", version)
908 |   postfix <- paste(rep.int("-", 77L - nchar(prefix)), collapse = "")
909 |   header <- paste(prefix, postfix)
910 |   message(header)
911 | 
912 |   # perform bootstrap
913 |   bootstrap(version, libpath)
914 | 
915 |   # exit early if we're just testing bootstrap
916 |   if (!is.na(Sys.getenv("RENV_BOOTSTRAP_INSTALL_ONLY", unset = NA)))
917 |     return(TRUE)
918 | 
919 |   # try again to load
920 |   if (requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) {
921 |     message("* Successfully installed and loaded renv ", version, ".")
922 |     return(renv::load())
923 |   }
924 | 
925 |   # failed to download or load renv; warn the user
926 |   msg <- c(
927 |     "Failed to find an renv installation: the project will not be loaded.",
928 |     "Use `renv::activate()` to re-initialize the project."
929 |   )
930 | 
931 |   warning(paste(msg, collapse = "\n"), call. = FALSE)
932 | 
933 | })
934 | 


--------------------------------------------------------------------------------
/renv/settings.dcf:
--------------------------------------------------------------------------------
 1 | bioconductor.version:
 2 | external.libraries:
 3 | ignored.packages:
 4 | package.dependency.fields: Imports, Depends, LinkingTo
 5 | r.version:
 6 | snapshot.type: implicit
 7 | use.cache: TRUE
 8 | vcs.ignore.cellar: TRUE
 9 | vcs.ignore.library: TRUE
10 | vcs.ignore.local: TRUE
11 | 


--------------------------------------------------------------------------------