├── .Rbuildignore ├── .Rprofile ├── .gitignore ├── .travis.yml ├── CRAN-SUBMISSION ├── CausalModels.Rproj ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── doubly_robust.R ├── gestimation.R ├── helper_funcs.R ├── imports.R ├── initialize.R ├── ipweighting.R ├── iv_est.R ├── outcome_regression.R ├── propensity_matching.R ├── propensity_scores.R ├── standardization.R └── zzz.R ├── README.Rmd ├── README.md ├── cran-comments.md ├── man ├── doubly_robust.Rd ├── figures │ └── README-pressure-1.png ├── gestimation.Rd ├── init_params.Rd ├── ipweighting.Rd ├── iv_est.Rd ├── outcome_regression.Rd ├── propensity_matching.Rd ├── propensity_scores.Rd └── standardization.Rd ├── renv.lock └── renv ├── .gitignore ├── activate.R └── settings.dcf /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^renv$ 2 | ^renv\.lock$ 3 | ^.*\.Rproj$ 4 | ^\.Rproj\.user$ 5 | ^cran-comments\.md$ 6 | ^\.travis\.yml$ 7 | ^README\.Rmd$ 8 | ^CRAN-SUBMISSION$ 9 | -------------------------------------------------------------------------------- /.Rprofile: -------------------------------------------------------------------------------- 1 | source("renv/activate.R") 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 2 | 3 | language: R 4 | cache: packages 5 | -------------------------------------------------------------------------------- /CRAN-SUBMISSION: -------------------------------------------------------------------------------- 1 | Version: 0.2.1 2 | Date: 2025-04-24 23:48:43 UTC 3 | SHA: 9c9ee22b6640a3a40ddb7e24353333fe7fd8be39 4 | -------------------------------------------------------------------------------- /CausalModels.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: CausalModels 2 | Type: Package 3 | Title: Causal Inference Modeling for Estimation of Causal Effects 4 | Version: 0.2.1 5 | Authors@R: 6 | c(person(given = "Joshua", 7 | family = "Anderson", 8 | role = c("aut", "cre", "cph"), 9 | email = "jwanderson198@gmail.com"), 10 | person(given = "Cyril", 11 | family = "Rakovski", 12 | role = "rev", 13 | email = "rakovski@chapman.edu"), 14 | person(given = "Yesha", 15 | family = "Patel", 16 | role = "rev"), 17 | person(given = "Erin", 18 | family = "Lee", 19 | role = "rev")) 20 | Maintainer: Joshua Anderson 21 | Description: 22 | Provides an array of statistical models common in causal inference such as 23 | standardization, IP weighting, propensity matching, outcome regression, and doubly-robust 24 | estimators. Estimates of the average treatment effects from each model are given with the 25 | standard error and a 95% Wald confidence interval (Hernan, Robins (2020) ). 26 | License: GPL-3 27 | URL: https://github.com/ander428/CausalModels 28 | BugReports: https://github.com/ander428/CausalModels/issues 29 | Encoding: UTF-8 30 | LazyData: true 31 | RoxygenNote: 7.1.2 32 | Language: en-US 33 | Imports: 34 | stats, 35 | causaldata, 36 | boot, 37 | multcomp, 38 | geepack 39 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(predict,doubly_robust) 4 | S3method(predict,gestimation) 5 | S3method(predict,ipweighting) 6 | S3method(predict,outcome_regression) 7 | S3method(predict,propensity_matching) 8 | S3method(predict,propensity_scores) 9 | S3method(predict,standardization) 10 | S3method(print,doubly_robust) 11 | S3method(print,gestimation) 12 | S3method(print,ipweighting) 13 | S3method(print,outcome_regression) 14 | S3method(print,propensity_matching) 15 | S3method(print,propensity_scores) 16 | S3method(print,standardization) 17 | S3method(print,summary.doubly_robust) 18 | S3method(print,summary.gestimation) 19 | S3method(print,summary.ipweighting) 20 | S3method(print,summary.standardization) 21 | S3method(summary,doubly_robust) 22 | S3method(summary,gestimation) 23 | S3method(summary,ipweighting) 24 | S3method(summary,outcome_regression) 25 | S3method(summary,propensity_matching) 26 | S3method(summary,propensity_scores) 27 | S3method(summary,standardization) 28 | export(doubly_robust) 29 | export(gestimation) 30 | export(init_params) 31 | export(ipweighting) 32 | export(iv_est) 33 | export(outcome_regression) 34 | export(propensity_matching) 35 | export(propensity_scores) 36 | export(standardization) 37 | import(boot) 38 | import(causaldata) 39 | import(geepack) 40 | import(multcomp) 41 | import(stats) 42 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # CausalModels 0.1.0 2 | 3 | - Added a `NEWS.md` file to track changes to the package. 4 | 5 | ### Version 0.2.0 - 2022/10/27 6 | 7 | #### Added 8 | 9 | - one parameter g-estimation 10 | 11 | #### Fixed 12 | 13 | - Typo in ipweighting documentation 14 | 15 | ### Version 0.2.1 - 2025/04/27 16 | 17 | #### Fixed 18 | 19 | - generic treatment names bug 20 | -------------------------------------------------------------------------------- /R/doubly_robust.R: -------------------------------------------------------------------------------- 1 | #' @title Doubly Robust Model 2 | #' @description \code{`doubly_robust`} trains both an outcome model and a propensity model to generate predictions 3 | #' for the outcome and probability of treatment respectively. By default, the model uses 4 | #' \code{\link[=standardization]{standardization}} and \code{\link[=propensity_scores]{propensity_scores}} to form a 5 | #' doubly-robust model between standardization and IP weighting. Alternatively, any outcome and treatment 6 | #' models can be provided instead, but must be compatible with the \code{\link[stats:predict]{predict}} generic function in R. 7 | #' Since many propensity models may not predict probabilities without additional arguments into the 8 | #' predict function, the predictions themselves can be given for both the outcome and propensity scores. 9 | #' 10 | #' @param data a data frame containing the variables in the model. 11 | #' This should be the same data used in \code{\link[=init_params]{init_params}}. 12 | #' @param out.mod (optional) a regression model that predicts the outcome. NOTE: the model given 13 | #' must be compatible with the \code{\link[stats:predict]{predict}} generic function. 14 | #' @param p.mod (optional) a propensity model that predicts the probability of treatment. NOTE: the model given 15 | #' must be compatible with the \code{\link[stats:predict]{predict}} generic function. 16 | #' @param f (optional) an object of class "formula" that overrides the default parameter 17 | #' @param family the family to be used in the general linear model. 18 | #' By default, this is set to \code{\link[stats:gaussian]{gaussian}}. 19 | #' @param simple a boolean indicator to build default formula with interactions. 20 | #' If true, interactions will be excluded. If false, interactions will be included. By 21 | #' default, simple is set to false. 22 | #' @param scores (optional) use calculated outcome estimates. 23 | #' @param p.f (optional) an object of class "formula" that overrides the default formula for the denominator of the IP 24 | #' weighting function. 25 | #' @param p.simple a boolean indicator to build default formula with interactions for the propensity models. 26 | #' If true, interactions will be excluded. If false, interactions will be included. By 27 | #' default, simple is set to false. 28 | #' NOTE: if this is changed, the coefficient for treatment may not accurately represent the average causal effect. 29 | #' @param p.family the family to be used in the underlying propensity model. 30 | #' By default, this is set to \code{\link[stats:gaussian]{binomial}}. 31 | #' @param p.scores (optional) use calculated propensity scores. 32 | #' @param n.boot an integer value that indicates number of bootstrap iterations to calculate standard error. 33 | #' @param ... additional arguments that may be passed to the underlying \code{\link[stats:glm]{glm}} model. 34 | #' 35 | #' @returns \code{doubly_robust} returns an object of \code{\link[base:class]{class}} "doubly_robust". 36 | #' 37 | #' The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with 38 | #' the underlying \code{glm} model. 39 | #' 40 | #' An object of class \code{"doubly_robust"} is a list containing the following: 41 | #' 42 | #' \item{out.call}{the matched call of the outcome model.} 43 | #' \item{p.call}{the matched call of the propensity model.} 44 | #' \item{out.model}{the underlying outcome model.} 45 | #' \item{p.model}{the underlying propensity model.} 46 | #' \item{y_hat}{the estimated outcome values.} 47 | #' \item{p.scores}{the estimated propensity scores.} 48 | #' \item{ATE}{the estimated average treatment effect (risk difference).} 49 | #' \item{ATE.summary}{a data frame containing the ATE, SE, and 95\% CI of the ATE. } 50 | #' \item{data}{the data frame used to train the model.} 51 | #' 52 | #' @export 53 | #' 54 | #' @examples 55 | #' library(causaldata) 56 | #' data(nhefs) 57 | #' nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ] 58 | #' nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 59 | #' 60 | #' confounders <- c( 61 | #' "sex", "race", "age", "education", "smokeintensity", 62 | #' "smokeyrs", "exercise", "active", "wt71" 63 | #' ) 64 | #' 65 | #' init_params(wt82_71, qsmk, 66 | #' covariates = confounders, 67 | #' data = nhefs.nmv 68 | #' ) 69 | #' 70 | #' # model using all defaults 71 | #' model <- doubly_robust(data = nhefs.nmv) 72 | #' summary(model) 73 | #' 74 | #' # use alternative outcome model 75 | #' out.mod <- propensity_matching(data = nhefs.nmv) 76 | #' db.model <- doubly_robust( 77 | #' out.mod = out.mod, 78 | #' data = nhefs.nmv 79 | #' ) 80 | #' db.model 81 | #' 82 | #' # give calculated outcome predictions and give formula for propensity scores 83 | #' db.model <- doubly_robust( 84 | #' scores = predict(out.mod), 85 | #' p.f = qsmk ~ sex + race + age, 86 | #' data = nhefs.nmv 87 | #' ) 88 | #' db.model 89 | #' 90 | doubly_robust <- function(data, out.mod = NULL, p.mod = NULL, f = NA, 91 | family = gaussian(), simple = pkg.env$simple, scores = NA, 92 | p.f = NA, p.simple = pkg.env$simple, p.family = binomial(), 93 | p.scores = NA, n.boot = 50, ...) { 94 | check_init() 95 | 96 | # grab function parameters 97 | params <- as.list(match.call()[-1]) 98 | 99 | if (anyNA(scores)) { 100 | # generate standardization if outcome model not specified 101 | if (is.null(out.mod)) { 102 | out.mod <- standardization(f = f, data = data, simple = simple, family = family) 103 | } 104 | 105 | scores <- predict(out.mod) 106 | } 107 | 108 | 109 | if (anyNA(p.scores)) { 110 | # generate propensity model if treatment model not specified 111 | if (is.null(p.mod)) { 112 | p.mod <- propensity_scores(f = p.f, data = data, family = p.family, simple = p.simple) 113 | } 114 | 115 | p.scores <- predict(p.mod) 116 | } 117 | 118 | boot_result <- boot(data = data, statistic = function(data, indices) { 119 | data <- data[indices, ] 120 | scores <- scores[indices] 121 | p.scores <- p.scores[indices] 122 | return(doubly_robust_est(scores, p.scores, data)) 123 | }, R = n.boot) 124 | 125 | # calculate 95% CI 126 | beta <- boot_result$t0 127 | SE <- sd(boot_result$t) 128 | ATE <- data.frame( 129 | "ATE" = beta, 130 | "SE" = SE, 131 | conf_int(beta, SE), 132 | check.names = FALSE 133 | ) 134 | 135 | output <- list( 136 | "out.call" = out.mod$call, "p.call" = p.mod$call, "out.model" = out.mod, 137 | "p.model" = p.mod, "y_hat" = scores, "p.scores" = p.scores, 138 | "ATE" = beta, "ATE.summary" = ATE, "data" = data 139 | ) 140 | 141 | class(output) <- "doubly_robust" 142 | return(output) 143 | } 144 | 145 | #' @export 146 | print.doubly_robust <- function(x, ...) { 147 | cat("Outcome Model\r\n\r\n") 148 | cat("Call:\r\n") 149 | print(x$out.call, ...) 150 | cat("\r\nPredictions:\r\n") 151 | print(summary(x$y_hat)) 152 | cat("\r\nPropensity Model\r\n\r\n") 153 | cat("Call:\r\n") 154 | print(x$p.call, ...) 155 | cat("\r\nPredictions:\r\n") 156 | print(summary(x$p.scores)) 157 | cat("\r\n") 158 | cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "") 159 | cat("Estimate - ", x$ATE, "\r\n") 160 | cat("SE - ", x$ATE.summary$SE, "\r\n") 161 | cat("95% CI - (", x$ATE.summary$`2.5 %`, ", ", x$ATE.summary$`97.5 %`, ")", "\r\n") 162 | } 163 | 164 | #' @export 165 | summary.doubly_robust <- function(object, ...) { 166 | s1 <- summary(object$out.model, ...) 167 | s1$ATE <- object$ATE 168 | s2 <- summary(object$p.model, ...) 169 | s2$ATE <- object$ATE 170 | s <- list("out.summary" = s1, "p.summary" = s2) 171 | class(s) <- "summary.doubly_robust" 172 | return(s) 173 | } 174 | 175 | #' @export 176 | print.summary.doubly_robust <- function(x, ...) { 177 | s1 <- x$out.summary 178 | s2 <- x$p.summary 179 | cat("Outcome Model Summary\r\n") 180 | print(s1, ...) 181 | cat("Propensity Model Summary\r\n") 182 | print(s2, ...) 183 | } 184 | 185 | #' @export 186 | predict.doubly_robust <- function(object, ...) { 187 | return(doubly_robust_est(object$y_hat, object$p.scores, object$data)) 188 | } 189 | 190 | doubly_robust_est <- function(S, W, data) { 191 | n <- length(S) 192 | tr <- as.numeric(levels(data[[pkg.env$treatment]]))[data[[pkg.env$treatment]]] 193 | 194 | return(sum(S - ((tr * (data[[pkg.env$outcome]] - S)) / W)) / n) 195 | } 196 | -------------------------------------------------------------------------------- /R/gestimation.R: -------------------------------------------------------------------------------- 1 | 2 | #' @title One Parameter G-Estimation of Structural Nested Mean Models 3 | #' @description `gestimation` uses the \code{\link[=propensity_scores]{propensity_scores}} function to generate inverse probability 4 | #' weights. The weights can either be standardized weights or non-standardized weights. A grid search is done on \eqn{\alpha} 5 | #' to construct the best \eqn{\beta} coefficient in the structural nested mean model. Alternatively, a linear mean model can be used 6 | #' for a closed form estimator. 7 | #' 8 | #' @param data a data frame containing the variables in the model. 9 | #' This should be the same data used in \code{\link[=init_params]{init_params}}. 10 | #' @param grid a list of possible \eqn{\beta} values that will be used in the grid search. 11 | #' @param ids (optional) see documentation for \code{\link[geepack:geeglm]{geeglm}}. By default rownames of the data will be used. 12 | #' @param f (optional) an object of class "formula" that overrides the default parameter. NOTE: for g-estimation this should be 13 | #' a propensity formula. 14 | #' @param family the family to be used in the general linear model. 15 | #' By default, this is set to \code{\link[stats:gaussian]{gaussian}}. 16 | #' @param simple (optional) a boolean indicator to build default formula with interactions for the g-estimation model. 17 | #' If true, interactions will be excluded. If false, interactions will be included. By default, simple is set to false. 18 | #' NOTE: \eqn{\beta} will be appended to the end of the formula 19 | #' @param p.f (optional) an object of class "formula" that overrides the default formula for the denominator of the IP 20 | #' weighting function. 21 | #' @param p.simple (optional) a boolean indicator to build default formula with interactions for the propensity models. 22 | #' If true, interactions will be excluded. If false, interactions will be included. By 23 | #' default, simple is set to false. 24 | #' NOTE: if this is changed, the coefficient for treatment may not accurately represent the average causal effect. 25 | #' @param p.family the family to be used in the underlying propensity model. 26 | #' By default, this is set to \code{\link[stats:binomial]{binomial}}. 27 | #' @param p.scores (optional) use calculated propensity scores for the weights. If using standardized weights, 28 | #' the numerator will still be modeled. 29 | #' @param SW a boolean indicator to indicate the use of standardized weights. By default, this is set to true. 30 | #' @param n.boot (optional) an integer value that indicates number of bootstrap iterations to calculate standard error. 31 | #' If no value is given, the standard error from the underlying linear model will be used. NOTE: when type is 'one.grid' 32 | #' bootstrapping is not performed. By default, this is set to 100. 33 | #' @param type the type of g-estimation to perform. It must be one of \code{"one.grid"} or \code{"one.linear"} for a 34 | #' one parameter grid and linear mean model estimation respectively. 35 | #' @param ... additional arguments that may be passed to the underlying model. 36 | #' 37 | #' @returns \code{gestimation} returns an object of \code{\link[base:class]{class} "gestimation"}. 38 | #' 39 | #' The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with 40 | #' the underlying \code{glm} or \code{geeglm} model. 41 | #' 42 | #' An object of class \code{"gestimation"} is a list containing the following: 43 | #' 44 | #' \item{call}{the matched call.} 45 | #' \item{formula}{the formula used in the model.} 46 | #' \item{model}{the underlying glm model. If the model performed a grid search, this will be renamed 'best.model'} 47 | #' \item{weights}{the estimated IP weights.} 48 | #' \item{type}{returns the value used for the 'type' parameter.} 49 | #' \item{ATE}{the estimated average treatment effect (risk difference).} 50 | #' \item{ATE.summary}{a data frame containing the ATE, SE, and 95\% CI of the ATE. } 51 | #' 52 | #' 53 | #' @examples 54 | #' library(causaldata) 55 | #' data(nhefs) 56 | #' nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ] 57 | #' nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 58 | #' 59 | #' confounders <- c( 60 | #' "sex", "race", "age", "education", "smokeintensity", 61 | #' "smokeyrs", "exercise", "active", "wt71" 62 | #' ) 63 | #' 64 | #' init_params(wt82_71, qsmk, 65 | #' covariates = confounders, 66 | #' data = nhefs.nmv 67 | #' ) 68 | #' 69 | #' gest.model <- gestimation(nhefs.nmv, type = "one.linear", n.boot = 150) 70 | #' gest.model$ATE.summary 71 | 72 | #' @export 73 | gestimation <- function(data, grid, ids = list(), f = NA, family = binomial(), simple = pkg.env$simple, 74 | p.f = NA, p.simple = pkg.env$simple, p.family = binomial(), p.scores = NA, 75 | SW = TRUE, n.boot = 100, type = "one.grid", ...) { 76 | check_init() 77 | 78 | # grab function parameters 79 | params <- as.list(match.call()[-1]) 80 | 81 | # if no formula provided 82 | if (is.na(as.character(f))[1]) { 83 | # override simple 84 | if (simple != pkg.env$simple) { 85 | f <- build_formula( 86 | out = pkg.env$treatment, cov = pkg.env$covariates, 87 | data = data, simple = p.simple 88 | ) 89 | } 90 | # use default 91 | else { 92 | f <- formula(pkg.env$f_tr) 93 | } 94 | } 95 | 96 | # if user does not give propensity scores 97 | if (anyNA(p.scores)) { 98 | if (!is.na(as.character(p.f))[1]) { 99 | p.scores <- propensity_scores(p.f, data = data, family = p.family)$p.scores 100 | } 101 | # if no given propensity formula 102 | else { 103 | if (p.simple != pkg.env$simple) { 104 | p.f <- build_formula( 105 | out = pkg.env$treatment, cov = pkg.env$covariates, 106 | data = data, simple = p.simple 107 | ) 108 | } 109 | # use default 110 | else { 111 | p.f <- formula(pkg.env$f_tr) 112 | } 113 | 114 | p.scores <- propensity_scores(p.f, data = data, family = p.family)$p.scores 115 | } 116 | } 117 | 118 | # if user does give propensity scores 119 | else { 120 | if (!is.na(as.character(p.f))[1]) { 121 | message("Ignoring given propensity formula since propensity scores have been given.") 122 | } 123 | message("Using given propensity scores.") 124 | } 125 | 126 | # use standardized weights 127 | if (SW) { 128 | numer_scores <- propensity_scores(as.formula(paste(pkg.env$treatment, "~1")), family = binomial(), data = data)$p.scores 129 | 130 | data$weights <- numer_scores / p.scores 131 | } else { 132 | data$weights <- 1 / p.scores 133 | } 134 | 135 | # perform grid search 136 | if (type == "one.grid") { 137 | if (length(grid) == 0) { 138 | errorCondition("'grid' parameter must have be a list with at least one value.") 139 | } 140 | 141 | coefs <- cbind(rep(NA, length(grid)), rep(NA, length(grid)), rep(NA, length(grid)), rep(NA, length(grid))) 142 | colnames(coefs) <- list("Effect", "Estimate", "Std. Error", "Pr(>|W|)") 143 | data$weights <- rep(1, nrow(data)) 144 | 145 | # validate rownames 146 | if (length(ids) != nrow(data)) { 147 | message("IDs list does not match number of rows in data. Using rownames by default.") 148 | data$ids <- rownames(data) 149 | } else { 150 | data$ids <- ids 151 | } 152 | 153 | f_terms <- as.character(f) 154 | f <- as.formula(paste(f_terms[[2]], f_terms[[1]], f_terms[[3]], "+", "beta")) 155 | 156 | i <- 0 157 | model <- NA 158 | models <- list() 159 | 160 | for (val in grid) { 161 | i <- i + 1 162 | data[[pkg.env$treatment]] <- as.numeric(as.character(data[[pkg.env$treatment]])) # geeglm requires numeric response 163 | data$beta <- data[[pkg.env$outcome]] - (val * data[[pkg.env$treatment]] - 1) 164 | 165 | model <- geepack::geeglm(f, family = family, data = data, weights = weights, id = ids, corstr = "independence") 166 | models[[i]] <- model 167 | estimate <- summary(model)$coefficients["beta", "Estimate"] 168 | 169 | coefs[i, 1] <- val 170 | coefs[i, 2] <- estimate 171 | coefs[i, 3] <- summary(model)$coefficients["beta", "Std.err"] 172 | coefs[i, 4] <- summary(model)$coefficients["beta", "Pr(>|W|)"] 173 | } 174 | 175 | result <- as.data.frame(coefs) 176 | rownames(result) <- paste("Effect =", result$Effect) 177 | 178 | conf_int_idx <- which(result$`Pr(>|W|)` > 0.05) 179 | 180 | ATE.summary <- data.frame( 181 | Beta = result$Effect[[which.min(abs(result$Estimate))]], 182 | SE = NA, 183 | `2.5 %` = result$Effect[[min(conf_int_idx)]], 184 | `97.5 %` = result$Effect[[max(conf_int_idx)]], 185 | check.names = FALSE 186 | ) 187 | # colnames(ATE.summary) <- c("Beta", "SE", "2.5 %", "97.5 %") 188 | 189 | output <- list( 190 | "call" = model$call, "formula" = f, "best.model" = models[[which.min(abs(result$Estimate))]], 191 | "weights" = data$weights, "result" = result[-1], type = "one.grid", 192 | "ATE" = ATE.summary$Beta, "ATE.summary" = ATE.summary 193 | ) 194 | 195 | class(output) <- "gestimation" 196 | return(output) 197 | } 198 | 199 | # one parameter linear mean model 200 | else if (type == "one.linear") { 201 | data[[pkg.env$treatment]] <- as.numeric(as.character(data[[pkg.env$treatment]])) 202 | 203 | model_func <- function(data, indices, f, family, weights, ...) { 204 | if (!anyNA(indices)) { 205 | data <- data[indices, ] 206 | } 207 | 208 | model <- glm(f, data = data, weights = weights, family = family) 209 | preds <- predict(model, data, type = "response") 210 | estimate <- sum(data$weights * data[[pkg.env$outcome]] * (data[[pkg.env$treatment]] - preds)) / 211 | sum(data$weights * data[[pkg.env$treatment]] * (data[[pkg.env$treatment]] - preds)) 212 | return(list("model" = model, "ATE" = estimate)) 213 | } 214 | 215 | # build model 216 | result <- model_func(data = data, indices = NA, f = f, family = family, weights = data$weights, ...) 217 | model <- result$model 218 | beta <- 0 219 | SE <- 0 220 | ATE <- result$ATE 221 | 222 | if (n.boot > 1) { 223 | # build bootstrapped estimates 224 | boot_result <- boot( 225 | data = data, R = n.boot, f = f, family = family, weights = data$weights, 226 | statistic = function(data, indices, f, family, ...) { 227 | model_func(data, indices, f, family, weights, ...)$ATE 228 | }, ... 229 | ) 230 | 231 | # calculate 95% CI 232 | beta <- boot_result$t0 233 | SE <- sd(boot_result$t) 234 | } 235 | 236 | # calculate causal stats 237 | ATE.summary <- data.frame( 238 | "Beta" = beta, 239 | "SE" = SE, 240 | conf_int(beta, SE), 241 | check.names = FALSE 242 | ) 243 | 244 | output <- list( 245 | "call" = model$call, "formula" = f, "model" = model, type = "one.linear", 246 | "weights" = data$weights, "ATE" = ATE.summary$Beta, "ATE.summary" = ATE.summary 247 | ) 248 | 249 | class(output) <- "gestimation" 250 | return(output) 251 | } else { 252 | errorCondition("Invalid model type. Must be one of ('one.grid', 'one.linear')") 253 | } 254 | } 255 | 256 | #' @export 257 | print.gestimation <- function(x, ...) { 258 | if (x$type == "one.grid") { 259 | cat("Best Model:") 260 | cat("\r\n") 261 | print(x$best.model, ...) 262 | } else { 263 | print(x$model, ...) 264 | } 265 | 266 | cat("\r\n") 267 | cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "") 268 | cat("Estimate - ", x$ATE, "\r\n") 269 | cat("SE - ", x$ATE.summary$SE, "\r\n") 270 | cat("95% CI - (", x$ATE.summary$`2.5 %`, ", ", x$ATE.summary$`97.5 %`, ")", "\r\n") 271 | } 272 | 273 | #' @export 274 | summary.gestimation <- function(object, ...) { 275 | model <- ifelse(object$type == "one.grid", object$best.model, object$model) 276 | s <- summary(model, ...) 277 | s$ATE <- object$ATE.summary 278 | class(s) <- "summary.gestimation" 279 | return(s) 280 | } 281 | 282 | #' @export 283 | print.summary.gestimation <- function(x, ...) { 284 | class(x) <- "summary.glm" 285 | print(x, ...) 286 | cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "") 287 | print(x$ATE, row.names = FALSE) 288 | cat("\r\n") 289 | } 290 | 291 | #' @export 292 | predict.gestimation <- function(object, ...) { 293 | model <- ifelse(object$type == "one.grid", object$best.model, object$model) 294 | return(predict(model, ...)) 295 | } 296 | -------------------------------------------------------------------------------- /R/helper_funcs.R: -------------------------------------------------------------------------------- 1 | 2 | build_formula <- function(out, cov = list(), tr = NA, data, simple = FALSE) { 3 | # set treatment variable value 4 | tr <- ifelse(!is.na(tr), as.character(tr), "") 5 | 6 | # separate continuous and distrete covariates 7 | num_vars <- unlist(lapply(data[cov], is.numeric)) 8 | cont <- cov[num_vars] 9 | disc <- cov[!num_vars] 10 | glm 11 | # collapse discrete covariates with + 12 | disc_val <- paste(unlist(disc), collapse = " + ") 13 | disc_val <- ifelse(tr != "" && disc_val != "", paste("+", disc_val), disc_val) # add + before if prev term exists 14 | 15 | # return basic formula with no interactions 16 | if (simple) { 17 | cont_val <- paste(unlist(cont), collapse = " + ") # collapse all terms with + 18 | cont_val <- ifelse(disc_val != "" || tr != "", # add + before if prev term exists 19 | paste("+", cont_val), cont_val 20 | ) 21 | 22 | # return formula that doesn't include a treatment 23 | if (tr == "") { 24 | return(as.formula(paste(out, "~", disc_val, cont_val, collapse = " "))) 25 | } 26 | # return a formula with a treatment 27 | else { 28 | return(as.formula(paste(out, "~", tr, disc_val, cont_val, collapse = " "))) 29 | } 30 | } 31 | # if treatment exists, add interations between treatment and continuous 32 | # add cubic terms for each continuous variable 33 | else { 34 | # helper function to write out interactions between two vars 35 | print_interact <- function(x, y, use.I = TRUE) { 36 | if (!use.I) { 37 | return(paste("(", x, "*", y, ")", collapse = "")) 38 | } 39 | 40 | return(paste("I(", x, "*", y, ")", collapse = "")) 41 | } 42 | 43 | cont_val <- "" 44 | # only execute if there are continuous covariates 45 | if (!anyNA(cont) && !identical(cont, character(0))) { 46 | # logic for a single continuous covariate 47 | if (length(cont) == 1) { 48 | tr_interact <- ifelse(tr != "", paste("+", print_interact(tr, cont, FALSE)), "") 49 | cont_val <- paste(cont, tr_interact, "+", print_interact(cont, cont)) 50 | } 51 | # logic for a set of continuous covariates 52 | else { 53 | i <- 1 54 | for (var in cont) { 55 | tr_interact <- ifelse(tr != "", paste("+", print_interact(tr, var, FALSE)), "") 56 | plus_op <- ifelse(i == 1, "", "+") # if first value, don't add plus 57 | cont_val <- paste(cont_val, plus_op, var, tr_interact, "+", print_interact(var, var)) 58 | i <- i + 1 59 | } 60 | } 61 | cont_val <- ifelse(tr != "" || disc_val != "", paste("+", cont_val), cont_val) # add + to front if prev terms exist 62 | } 63 | } 64 | if (tr == "") { 65 | return(as.formula(paste(out, "~", disc_val, cont_val, collapse = " "))) 66 | } else { 67 | return(as.formula(paste(out, "~", tr, disc_val, cont_val, collapse = " "))) 68 | } 69 | } 70 | 71 | check_init <- function() { 72 | if (!pkg.env$init) { 73 | stop("Parameters not initialized. Please run init_params(...) before using any other functions") 74 | } 75 | } 76 | 77 | 78 | conf_int <- function(x, se) { 79 | lb <- x - qnorm(0.975) * se 80 | ub <- x + qnorm(0.975) * se 81 | return(list("2.5 %" = lb, "97.5 %" = ub)) 82 | } 83 | 84 | 85 | contrast_matrix <- function(model, rows, names) { 86 | mat <- matrix(0, nrow = rows, ncol = length(coef(model))) 87 | colnames(mat) <- names(coef(model)) 88 | rownames(mat) <- names 89 | return(mat) 90 | } 91 | -------------------------------------------------------------------------------- /R/imports.R: -------------------------------------------------------------------------------- 1 | #' @import stats 2 | #' @import causaldata 3 | #' @import boot 4 | #' @import multcomp 5 | #' @import geepack 6 | NULL 7 | -------------------------------------------------------------------------------- /R/initialize.R: -------------------------------------------------------------------------------- 1 | #' @title Initialize CausalModels Package 2 | #' @description This function is required to be run first before any other function can run. 3 | #' This will set within the package the global outcome, treatment, and covariate functions for each model to use. 4 | #' 5 | #' @param outcome the outcome variable of interest (must be continuous). 6 | #' @param treatment the treatment with the causal effect of interest on the outcome. 7 | #' @param covariates a list/vector of covariate names to be use for confounding adjustment. 8 | #' @param data a data frame containing the variables in the model. 9 | #' @param simple a boolean indicator to build default formula with interactions. 10 | #' If true, interactions will be excluded. If false, interactions will be included. By 11 | #' default, simple is set to false. 12 | #' 13 | #' @returns No return value. Called for parameter initialization. 14 | #' 15 | #' @export 16 | #' 17 | init_params <- function(outcome, treatment, covariates, data, simple = FALSE) { 18 | params <- as.list(match.call()[-1]) 19 | cov <- params$cov 20 | 21 | if (!is.numeric(data[[as.character(params$outcome)]])) { 22 | assign("init", FALSE, pkg.env) 23 | stop("Outcome variable must be numeric!") 24 | } 25 | 26 | if (!is.factor(data[[as.character(params$treatment)]])) { 27 | assign("init", FALSE, pkg.env) 28 | stop("Treatment must be of type factor!") 29 | } else if (length(levels(data[[as.character(params$treatment)]])) > 2) { 30 | assign("init", FALSE, pkg.env) 31 | stop(paste("Treatment must be binary!", as.character(params$treatment), "has more than two levels!")) 32 | } 33 | 34 | tryCatch(data[as.character(params$covariates)[-1]], error = function(e) { 35 | assign("init", TRUE, pkg.env) 36 | stop(e) 37 | }) 38 | 39 | assign("outcome", as.character(params$outcome), pkg.env) 40 | assign("treatment", as.character(params$treatment), pkg.env) 41 | 42 | # if covariates are given as strings 43 | if (length(as.character(params$covariates)[-1]) == 0) { 44 | cov <- covariates 45 | assign("covariates", cov, pkg.env) 46 | } 47 | # if covariates are not given as strings 48 | else { 49 | assign("covariates", as.character(params$covariates)[-1], pkg.env) 50 | } 51 | 52 | assign("simple", simple, pkg.env) 53 | assign("p_simple", simple, pkg.env) 54 | 55 | f_out <- build_formula( 56 | out = as.character(params$outcome), 57 | tr = as.character(params$treatment), 58 | cov = pkg.env$covariates, 59 | data = data, simple = simple 60 | ) 61 | f_tr <- build_formula( 62 | out = as.character(params$treatment), 63 | cov = pkg.env$covariates, 64 | data = data, simple = simple 65 | ) 66 | assign("f_out", f_out, pkg.env) 67 | assign("f_tr", f_tr, pkg.env) 68 | 69 | assign("init", TRUE, pkg.env) 70 | 71 | cat("Successfully initialized!\r\n\r\n") 72 | 73 | cat("Summary:\r\n\r\n") 74 | cat(paste("Outcome -", pkg.env$outcome, "\r\n")) 75 | cat(paste("Treatment -", pkg.env$treatment, "\r\n")) 76 | cat(paste("Covariates -", "[", paste(pkg.env$covariates, collapse = ", "), "]", "\r\n\r\n")) 77 | cat(paste("Size -", nrow(data), "x", ncol(data), "\r\n\r\n")) 78 | cat(paste("Default formula for outcome models:", "\r\n")) 79 | cat(paste(deparse(f_out, width.cutoff = 500), "\r\n\r\n", collapse = "")) 80 | cat(paste("Default formula for propensity models:", "\r\n")) 81 | cat(paste(deparse(f_tr, width.cutoff = 500), "\r\n", collapse = "")) 82 | } 83 | -------------------------------------------------------------------------------- /R/ipweighting.R: -------------------------------------------------------------------------------- 1 | #' @title Parametric IP Weighting 2 | #' @description `ipweighting` uses the \code{\link[=propensity_scores]{propensity_scores}} function to generate inverse probability 3 | #' weights. The weights can either be standardized weights or non-standardized weights. The weights are used to train a 4 | #' general linear model whose coefficient for treatment represents the average treatment effect on the additive scale. 5 | #' 6 | #' @param data a data frame containing the variables in the model. 7 | #' This should be the same data used in \code{\link[=init_params]{init_params}}. 8 | #' @param f (optional) an object of class "formula" that overrides the default parameter 9 | #' @param family the family to be used in the general linear model. 10 | #' By default, this is set to \code{\link[stats:gaussian]{gaussian}}. 11 | #' @param p.f (optional) an object of class "formula" that overrides the default formula for the denominator of the IP 12 | #' weighting function. 13 | #' @param p.simple a boolean indicator to build default formula with interactions for the propensity models. 14 | #' If true, interactions will be excluded. If false, interactions will be included. By 15 | #' default, simple is set to false. 16 | #' NOTE: if this is changed, the coefficient for treatment may not accurately represent the average causal effect. 17 | #' @param p.family the family to be used in the underlying propensity model. 18 | #' By default, this is set to \code{\link[stats:binomial]{binomial}}. 19 | #' @param p.scores (optional) use calculated propensity scores for the weights. If using standardized weights, 20 | #' the numerator will still be modeled. 21 | #' @param SW a boolean indicator to indicate the use of standardized weights. By default, this is set to true. 22 | #' @param n.boot (optional) an integer value that indicates number of bootstrap iterations to calculate standard error. 23 | #' If no value is given, the standard error from the underlying linear model will be used. 24 | #' @param ... additional arguments that may be passed to the underlying \code{\link[stats:glm]{glm}} model. 25 | #' 26 | #' @returns \code{ipweighting} returns an object of \code{\link[base:class]{class} "ipweighting"}. 27 | #' 28 | #' The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with 29 | #' the underlying \code{glm} model. 30 | #' 31 | #' An object of class \code{"ipweighting"} is a list containing the following: 32 | #' 33 | #' \item{call}{the matched call.} 34 | #' \item{formula}{the formula used in the model.} 35 | #' \item{model}{the underlying glm model.} 36 | #' \item{weights}{the estimated IP weights.} 37 | #' \item{ATE}{the estimated average treatment effect (risk difference).} 38 | #' \item{ATE.summary}{a data frame containing the ATE, SE, and 95\% CI of the ATE. } 39 | #' 40 | #' @export 41 | #' 42 | #' @examples 43 | #' library(causaldata) 44 | #' data(nhefs) 45 | #' nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ] 46 | #' nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 47 | #' 48 | #' confounders <- c( 49 | #' "sex", "race", "age", "education", "smokeintensity", 50 | #' "smokeyrs", "exercise", "active", "wt71" 51 | #' ) 52 | #' 53 | #' init_params(wt82_71, qsmk, 54 | #' covariates = confounders, 55 | #' data = nhefs.nmv 56 | #' ) 57 | #' 58 | #' # model using all defaults 59 | #' model <- ipweighting(data = nhefs.nmv) 60 | #' summary(model) 61 | #' 62 | #' # Model using calculated propensity scores and manual outcome formula 63 | #' p.scores <- propensity_scores(nhefs.nmv)$p.scores 64 | #' model <- ipweighting(wt82_71 ~ qsmk, p.scores = p.scores, data = nhefs.nmv) 65 | #' summary(model) 66 | #' 67 | ipweighting <- function(data, f = NA, family = gaussian(), p.f = NA, p.simple = pkg.env$simple, 68 | p.family = binomial(), p.scores = NA, SW = TRUE, n.boot = 0, ...) { 69 | check_init() 70 | data$weights <- rep(1, nrow(data)) 71 | 72 | # grab function parameters 73 | params <- as.list(match.call()[-1]) 74 | 75 | # if user gives an outcome formula 76 | if (is.na(as.character(f))[1]) { 77 | f <- as.formula(paste(pkg.env$outcome, "~", pkg.env$treatment)) 78 | } 79 | 80 | # if user does not give propensity scores 81 | if (anyNA(p.scores)) { 82 | if (!is.na(as.character(p.f))[1]) { 83 | p.scores <- propensity_scores(p.f, data = data, family = p.family)$p.scores 84 | } 85 | # if no given propensity formula 86 | else { 87 | if (p.simple != pkg.env$simple) { 88 | p.f <- build_formula( 89 | out = pkg.env$treatment, cov = pkg.env$covariates, 90 | data = data, simple = p.simple 91 | ) 92 | } 93 | # use default 94 | else { 95 | p.f <- formula(pkg.env$f_tr) 96 | } 97 | 98 | p.scores <- propensity_scores(p.f, data = data, family = p.family)$p.scores 99 | } 100 | } 101 | # if user does give propensity scores 102 | else { 103 | if (!is.na(as.character(p.f))[1]) { 104 | message("Ignoring given propensity formula since propensity scores have been given.") 105 | } 106 | message("Using given propensity scores.") 107 | } 108 | 109 | if (SW) { 110 | numer_scores <- propensity_scores(as.formula(paste(pkg.env$treatment, "~1")), family = binomial(), data = data)$p.scores 111 | 112 | data$weights <- numer_scores / p.scores 113 | } else { 114 | data$weights <- 1 / p.scores 115 | } 116 | 117 | model_func <- function(data, indices, f, family, weights, ...) { 118 | if (!anyNA(indices)) { 119 | data <- data[indices, ] 120 | } 121 | 122 | model <- glm(f, weights = weights, data = data, family = family, ...) 123 | model$call$formula <- formula(f) # manually set model formula to prevent "formula = formula" 124 | 125 | return(list("model" = model, "ATE" = coef(model)[[2]])) 126 | } 127 | 128 | # build model 129 | result <- model_func(data = data, indices = NA, f = f, family = family, weights = data$weights, ...) 130 | model <- result$model 131 | beta <- 0 132 | SE <- 0 133 | ATE <- list() 134 | if (n.boot > 1) { 135 | # build bootstrapped estimates 136 | boot_result <- boot( 137 | data = data, R = n.boot, f = f, family = family, weights = data$weights, 138 | statistic = function(data, indices, f, family, weights, ...) { 139 | model_func(data, indices, f, family, weights, ...)$ATE 140 | }, ... 141 | ) 142 | 143 | # calculate 95% CI 144 | beta <- boot_result$t0 145 | SE <- sd(boot_result$t) 146 | ATE <- data.frame( 147 | "Beta" = beta, 148 | "SE" = SE, 149 | conf_int(beta, SE), 150 | check.names = FALSE 151 | ) 152 | } else { 153 | # calculate causal stats 154 | beta <- coef(model)[[2]] 155 | SE <- coef(summary(model))[2, 2] 156 | ATE <- data.frame( 157 | "Beta" = beta, 158 | "SE" = SE, 159 | conf_int(beta, SE), 160 | check.names = FALSE 161 | ) 162 | } 163 | 164 | 165 | output <- list( 166 | "call" = model$call, "formula" = model$call$formula, "model" = model, 167 | "weights" = data$weights, "ATE" = beta, "ATE.summary" = ATE 168 | ) 169 | 170 | class(output) <- "ipweighting" 171 | return(output) 172 | } 173 | 174 | #' @export 175 | print.ipweighting <- function(x, ...) { 176 | print(x$model, ...) 177 | cat("\r\n") 178 | cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "") 179 | cat("Estimate - ", x$ATE, "\r\n") 180 | cat("SE - ", x$ATE.summary$SE, "\r\n") 181 | cat("95% CI - (", x$ATE.summary$`2.5 %`, ", ", x$ATE.summary$`97.5 %`, ")", "\r\n") 182 | } 183 | 184 | #' @export 185 | summary.ipweighting <- function(object, ...) { 186 | s <- summary(object$model, ...) 187 | s$ATE <- object$ATE.summary 188 | class(s) <- "summary.ipweighting" 189 | return(s) 190 | } 191 | 192 | #' @export 193 | print.summary.ipweighting <- function(x, ...) { 194 | class(x) <- "summary.glm" 195 | print(x, ...) 196 | cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "") 197 | print(x$ATE, row.names = FALSE) 198 | cat("\r\n") 199 | } 200 | 201 | #' @export 202 | predict.ipweighting <- function(object, ...) { 203 | return(predict(object$model, ...)) 204 | } 205 | -------------------------------------------------------------------------------- /R/iv_est.R: -------------------------------------------------------------------------------- 1 | #' @title Standard Instrumental Variable Estimator 2 | #' @description `iv_est` calculates the standard IV estimand using the conditional means on a given instrumental variable. 3 | #' 4 | #' @param IV the instrumental variable to be used in the conditional means. Must be a factor with no more than 2 levels. 5 | #' It is assumed the second level is the positive level, i.e., the binary equivalent of the second factor level should be 1 6 | #' and the first should be 0. 7 | #' @param data a data frame containing the variables in the model. 8 | #' This should be the same data used in \code{\link[=init_params]{init_params}}. 9 | #' @param n.boot an integer value that indicates number of bootstrap iterations to calculate standard error. 10 | #' 11 | #' @returns \code{iv_est} returns a data frame containing the standard IV estimate, standard error, and Wald 95% CI. 12 | #' 13 | #' @export 14 | #' 15 | #' @examples 16 | #' library(causaldata) 17 | #' data(nhefs) 18 | #' nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ] 19 | #' nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 20 | #' 21 | #' confounders <- c( 22 | #' "sex", "race", "age", "education", "smokeintensity", 23 | #' "smokeyrs", "exercise", "active", "wt71" 24 | #' ) 25 | #' nhefs.iv <- nhefs[which(!is.na(nhefs$wt82) & !is.na(nhefs$price82)), ] 26 | #' nhefs.iv$highprice <- as.factor(ifelse(nhefs.iv$price82 >= 1.5, 1, 0)) 27 | #' nhefs.iv$qsmk <- as.factor(nhefs.iv$qsmk) 28 | 29 | #' init_params(wt82_71, qsmk, 30 | #' covariates = confounders, 31 | #' data = nhefs.iv) 32 | #' 33 | #' iv_est("highprice", nhefs.iv) 34 | 35 | iv_est <- function(IV, data, n.boot = 50) { 36 | check_init() 37 | 38 | # grab function parameters 39 | params <- as.list(match.call()[-1]) 40 | IV_levels <- levels(data[[IV]]) 41 | 42 | IV <- as.character(params$IV) 43 | if (!is.factor(data[[IV]])) { 44 | stop("Instrumental variable must be a factor") 45 | } else if (length(IV_levels) > 2) { 46 | stop("Instrumental variable must be binary") 47 | } 48 | 49 | est_func <- function(data, indices, ...) { 50 | data <- data[indices, ] 51 | # manual calculation of standard IV Estimator 52 | numer_1 <- mean(data[data[[IV]] == IV_levels[[2]], ][[pkg.env$outcome]], na.rm = TRUE) 53 | numer_0 <- mean(data[data[[IV]] == IV_levels[[1]], ][[pkg.env$outcome]], na.rm = TRUE) 54 | denom_1 <- mean(as.numeric(data[data[[IV]] == IV_levels[[2]], ][[pkg.env$treatment]]), na.rm = TRUE) 55 | denom_0 <- mean(as.numeric(data[data[[IV]] == IV_levels[[1]], ][[pkg.env$treatment]]), na.rm = TRUE) 56 | 57 | return((numer_1 - numer_0) / (denom_1 - denom_0)) 58 | } 59 | 60 | boot_result <- boot(data = data, statistic = est_func, R = n.boot) 61 | 62 | # calculate 95% CI 63 | beta <- boot_result$t0 64 | SE <- sd(boot_result$t) 65 | ATE <- data.frame( 66 | "ATE" = beta, 67 | "SE" = SE, 68 | conf_int(beta, SE), 69 | check.names = FALSE 70 | ) 71 | 72 | return(ATE) 73 | } 74 | -------------------------------------------------------------------------------- /R/outcome_regression.R: -------------------------------------------------------------------------------- 1 | #' @title Outcome Regression 2 | #' @description `outcome_regression` builds a linear model using all covariates. The treatment effects are stratified 3 | #' within the levels of the covariates. The model will automatically provide all discrete covariates in a contrast matrix. 4 | #' To view estimated change in treatment effect from continuous variables, a list called \code{contrasts}, needs to be given 5 | #' with specific values to estimate. A vector of values can be given for any particular continuous variable. 6 | #' 7 | #' @param data a data frame containing the variables in the model. 8 | #' This should be the same data used in \code{\link[=init_params]{init_params}}. 9 | #' @param f (optional) an object of class "formula" that overrides the default parameter 10 | #' @param simple a boolean indicator to build default formula with interactions. 11 | #' If true, interactions will be excluded. If false, interactions will be included. By 12 | #' default, simple is set to false. 13 | #' @param family the family to be used in the general linear model. 14 | #' By default, this is set to \code{\link[stats:gaussian]{gaussian}}. 15 | #' NOTE: if this is changed, the assumptions about the model output may be incorrect and may not provide 16 | #' accurate treatment effects. 17 | #' @param contrasts a list of continuous covariates and values in the model to be included in the contrast matrix 18 | #' (e.g. \code{list(age = c(18, 25, 40), weight = c(90, 159))}). 19 | #' @param ... additional arguments that may be passed to the underlying \code{\link[multcomp:glht]{glht}} model. 20 | #' 21 | #' @returns \code{outcome_regression} returns an object of \code{\link[base:class]{class} "outcome_regression"} 22 | #' 23 | #' The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with 24 | #' the underlying \code{glht} model. 25 | #' 26 | #' An object of class \code{"outcome_regression"} is a list containing the following: 27 | #' 28 | #' \item{call}{the matched call.} 29 | #' \item{formula}{the formula used in the model.} 30 | #' \item{model}{the underlying glht model.} 31 | #' \item{ATE}{a data frame containing the ATE, SE, and 95\% CI of the ATE. } 32 | #' \item{ATE.summary}{a more detailed summary of the ATE estimations from glht. } 33 | #' 34 | #' @export 35 | #' 36 | #' @examples 37 | #' library(causaldata) 38 | #' library(multcomp) 39 | #' 40 | #' data(nhefs) 41 | #' nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ] 42 | #' nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 43 | #' 44 | #' confounders <- c( 45 | #' "sex", "race", "age", "education", "smokeintensity", 46 | #' "smokeyrs", "exercise", "active", "wt71" 47 | #' ) 48 | #' 49 | #' init_params(wt82_71, qsmk, 50 | #' covariates = confounders, 51 | #' data = nhefs.nmv 52 | #' ) 53 | #' 54 | #' out.mod <- outcome_regression(nhefs.nmv, contrasts = list( 55 | #' age = c(21, 55), 56 | #' smokeintensity = c(5, 20, 40) 57 | #' )) 58 | #' print(out.mod) 59 | #' summary(out.mod) 60 | #' head(data.frame(preds = predict(out.mod))) 61 | #' 62 | outcome_regression <- function(data, f = NA, simple = pkg.env$simple, 63 | family = gaussian(), contrasts = list(), ...) { 64 | check_init() 65 | 66 | # grab function parameters 67 | params <- as.list(match.call()[-1]) 68 | 69 | # if no formula provided 70 | if (is.na(as.character(f))[1]) { 71 | # override simple 72 | if (simple != pkg.env$simple) { 73 | f <- build_formula( 74 | out = pkg.env$outcome, tr = pkg.env$treatment, 75 | cov = pkg.env$covariates, data = data, simple = simple 76 | ) 77 | } 78 | # use default 79 | else { 80 | f <- formula(pkg.env$f_out) 81 | } 82 | } 83 | 84 | model <- NA 85 | call <- NA 86 | ATE <- NA 87 | ATE.summary <- NA 88 | 89 | model <- glm(f, data = data, family = family, ...) 90 | model$call$formula <- f 91 | 92 | # grab and divide model matrix 93 | temp_map <- data.frame(model.matrix(model)) 94 | num_vars <- sapply(temp_map, function(x) { 95 | length(unique(x)) > 2 96 | }) 97 | disc <- temp_map[!num_vars] 98 | disc <- disc[, -c(1, ncol(disc))] # exclude intercept and treatment 99 | has_interaction <- unlist(lapply(names(num_vars), function(x) { 100 | grepl(pkg.env$treatment, 101 | x, 102 | fixed = TRUE 103 | ) 104 | })) 105 | continuous <- temp_map[has_interaction][-1] 106 | 107 | 108 | 109 | # if there are no interactions, use original variables 110 | if (ncol(continuous) == 0) { 111 | continuous <- temp_map[num_vars] 112 | } 113 | # if only sum interactions, use original for those without one 114 | else { 115 | no_interaction <- list() 116 | for (i in 1:length(num_vars)) { 117 | no_interaction[i] <- (!has_interaction[[i]] && num_vars[[i]] && 118 | !any(grepl(names(num_vars)[[i]], names(continuous)))) 119 | } 120 | continuous <- cbind(continuous, temp_map[unlist(no_interaction)]) 121 | } 122 | 123 | continuous_names <- names(continuous) 124 | 125 | cont_input <- list() 126 | if(length(names(disc)) > 0) { 127 | for (i in 1:length(names(disc))) { 128 | cont_input[[i]] <- paste("Effect of", pkg.env$treatment, "at", names(disc[i])) 129 | } 130 | } 131 | 132 | 133 | if (length(contrasts) > 0) { 134 | j <- length(cont_input) + 1 135 | for (i in 1:length(contrasts)) { 136 | cont_input[[j]] <- paste("Effect of", pkg.env$treatment, "at", names(contrasts[i]), "of", contrasts[[i]]) 137 | j <- j + 1 138 | } 139 | } 140 | else if (length(cont_input) > 0) { 141 | warning("No contrasts included in analysis. This will exclude results from any continuous covariates.") 142 | } 143 | 144 | cont_input <- unlist(cont_input) 145 | 146 | cont_mat <- contrast_matrix(model, length(cont_input), cont_input) 147 | 148 | sum_cont <- 0 149 | for (cont in contrasts) { 150 | sum_cont <- sum_cont + length(cont) 151 | } 152 | 153 | j <- 1 154 | cont_mat[1:length(cont_input), paste0(pkg.env$treatment, levels(data[[pkg.env$treatment]])[2])] <- 1 155 | 156 | for (i in 1:length(names(disc))) { 157 | cont_mat[j, names(disc[i])] <- 1 158 | j <- j + 1 159 | } 160 | 161 | if (length(contrasts) > 0) { 162 | for (i in 1:length(contrasts)) { 163 | name <- names(contrasts[i]) 164 | for (val in contrasts[[i]]) { 165 | if (paste(pkg.env$treatment, levels(data[[pkg.env$treatment]])[2], 166 | ":", name, 167 | sep = "" 168 | ) %in% colnames(cont_mat)) { 169 | cont_mat[j, paste(pkg.env$treatment, levels(data[[pkg.env$treatment]])[2], ":", name, sep = "")] <- val 170 | } else if (paste(name, ":", pkg.env$treatment, 171 | levels(data[[pkg.env$treatment]])[2], 172 | sep = "" 173 | ) %in% colnames(cont_mat)) { 174 | cont_mat[j, paste(name, ":", pkg.env$treatment, levels(data[[pkg.env$treatment]])[2], sep = "")] <- val 175 | } else { 176 | cont_mat[j, name] <- val 177 | } 178 | j <- j + 1 179 | } 180 | } 181 | } 182 | 183 | 184 | 185 | model <- glht(model, linfct = cont_mat) 186 | call <- model$model$call 187 | 188 | # summarize model 189 | sum_model <- summary(model)$test 190 | 191 | results <- data.frame( 192 | "Estimate" = sum_model$coefficients, 193 | "Std. Error" = sum_model$sigma, 194 | conf_int(sum_model$coefficients, sum_model$sigma), 195 | check.names = FALSE 196 | ) 197 | 198 | ATE.summary <- summary(model) 199 | ATE <- results 200 | 201 | output <- list( 202 | "call" = call, "formula" = call$formula, "model" = model, 203 | "ATE" = ATE, "ATE.summary" = ATE.summary 204 | ) 205 | 206 | class(output) <- "outcome_regression" 207 | return(output) 208 | } 209 | 210 | #' @export 211 | print.outcome_regression <- function(x, ...) { 212 | cat("\r\n") 213 | cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "") 214 | cat("\r\n") 215 | print(x$ATE) 216 | } 217 | 218 | #' @export 219 | summary.outcome_regression <- function(object, ...) { 220 | summary(object$model, ...) 221 | } 222 | 223 | #' @export 224 | predict.outcome_regression <- function(object, ...) { 225 | return(predict(object$model$model, ...)) 226 | } 227 | -------------------------------------------------------------------------------- /R/propensity_matching.R: -------------------------------------------------------------------------------- 1 | #' @title Propensity Matching 2 | #' @description `propensity_matching` uses either stratification or standardization to model an outcome 3 | #' conditional on the propensity scores. In stratification, the model will break the propensity scores 4 | #' into groups and output a \code{\link[multcomp:glht]{glht}} model based off a contrast matrix which 5 | #' estimates the change in average causal effect within groups of propensity scores. In standardization, 6 | #' the model will output a \code{\link[=standardization]{standardization}} model that conditions on the 7 | #' propensity strata rather than the covariates. The model can also predict the expected outcome. 8 | #' 9 | #' @param data a data frame containing the variables in the model. 10 | #' This should be the same data used in \code{\link[=init_params]{init_params}}. 11 | #' @param f (optional) an object of class "formula" that overrides the default parameter 12 | #' @param simple a boolean indicator to build default formula with interactions. 13 | #' If true, interactions will be excluded. If false, interactions will be included. By 14 | #' default, simple is set to false. 15 | #' @param p.scores (optional) use calculated propensity scores for matching. Otherwise, propensity scores 16 | #' will be automatically modeled. 17 | #' @param p.simple a boolean indicator to build default formula with interactions for the propensity models. 18 | #' If true, interactions will be excluded. If false, interactions will be included. By 19 | #' default, simple is set to false. 20 | #' @param type a string representing the type of propensity model to be used. By default, the function will stratify. Standardization with 21 | #' propensity scores may also be used. The value given for \code{type} must be in \code{c("strata", "stdm")}. 22 | #' @param grp.width a decimal value to specify the range to stratify the propensity scores. If option \code{quant} is set to true, 23 | #' this will represent the spread of percentiles. If false, it will represent the spread of raw values of propensity 24 | #' scores. Must be a decimal between 0 and 1. By default, this is set to 0.1. This option is ignored for standardization. 25 | #' @param quant a boolean indicator to specify the type of stratification. If true (default), the model will stratify by 26 | #' percentiles. If false, the scores will be grouped by a range of their raw values. This option is ignored for standardization. 27 | #' @param ... additional arguments that may be passed to the underlying \code{\link[=propensity_scores]{propensity_scores}} function. 28 | #' 29 | #' @returns \code{propensity_matching} returns an object of \code{\link[base:class]{class} "propensity_matching"} 30 | #' 31 | #' The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with the underlying \code{glht} or 32 | #' \code{standardization} model. 33 | #' 34 | #' An object of class \code{"propensity_matching"} is a list containing the following: 35 | #' 36 | #' \item{call}{the matched call.} 37 | #' \item{formula}{the formula used in the model.} 38 | #' \item{model}{either the underlying \code{glht} or \code{standardization} model.} 39 | #' \item{p.scores}{the estimated propensity scores} 40 | #' \item{ATE}{a data frame containing the ATE, SE, and 95\% CI of the ATE. } 41 | #' \item{ATE.summary}{either a data frame containing the \code{glht} or \code{standardization} summary. } 42 | #' 43 | #' @export 44 | #' 45 | #' @examples 46 | #' library(causaldata) 47 | #' library(multcomp) 48 | #' 49 | #' data(nhefs) 50 | #' nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ] 51 | #' nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 52 | #' 53 | #' confounders <- c( 54 | #' "sex", "race", "age", "education", "smokeintensity", 55 | #' "smokeyrs", "exercise", "active", "wt71" 56 | #' ) 57 | #' 58 | #' init_params(wt82_71, qsmk, 59 | #' covariates = confounders, 60 | #' data = nhefs.nmv 61 | #' ) 62 | #' 63 | #' pm.model <- propensity_matching(nhefs.nmv) 64 | #' pm.model$ATE.summary 65 | #' summary(pm.model) 66 | #' head(data.frame(preds = predict(pm.model))) 67 | #' 68 | propensity_matching <- function(data, f = NA, simple = pkg.env$simple, p.scores = NA, p.simple = pkg.env$simple, 69 | type = "strata", grp.width = 0.1, quant = TRUE, ...) { 70 | check_init() 71 | 72 | # grab function parameters 73 | params <- as.list(match.call()[-1]) 74 | 75 | # check valid model type 76 | if (!type %in% c("strata", "stdm")) { 77 | stop("Invalid model type! Must be on of the following values: 'strata', 'std', or 'std.boot'") 78 | } 79 | 80 | if (grp.width <= 0 || grp.width >= 1) { 81 | stop("Invalid parameter! You must set 0 < grp.width < 1") 82 | } 83 | 84 | # if no given propensity scores 85 | if (anyNA(p.scores)) { 86 | # if no formula provided 87 | if (is.na(as.character(f))[1]) { 88 | # override simple 89 | if (p.simple != pkg.env$simple) { 90 | f <- build_formula( 91 | out = pkg.env$treatment, cov = pkg.env$covariates, 92 | data = data, simple = p.simple 93 | ) 94 | } 95 | # use default 96 | else { 97 | f <- formula(pkg.env$f_tr) 98 | } 99 | } 100 | 101 | p.scores <- propensity_scores(f = f, data = data, ...)$p.scores 102 | } 103 | 104 | # initialize output objects 105 | model <- NA 106 | call <- NA 107 | ATE <- NA 108 | ATE.summary <- NA 109 | 110 | # using stratification 111 | if (type == "strata") { 112 | p.grp <- list() 113 | lookup <- NA 114 | if (quant) { # use percentiles 115 | # group by propensity percentiles w/ width = grp.width 116 | quants <- c(quantile(p.scores, probs = seq(0, 1, grp.width))) 117 | p.grp <- cut(p.scores, breaks = quants, include.lowest = TRUE) 118 | 119 | # create lookup table for groups 120 | lookup <- data.frame("n" = table(p.grp), names(quants)[-1]) 121 | colnames(lookup) <- c("breaks", "n", "percentile") 122 | lookup$grp.name <- paste("p.grp", seq(1:(length(quants) - 1)), sep = "") 123 | 124 | levels(p.grp) <- 1:nrow(lookup) # rename levels 125 | lookup <- lookup[c("grp.name", "breaks", "n", "percentile")] 126 | } else { # use raw groupings 127 | # group by propensity breaks w/ width = grp.width 128 | quants <- seq(0, 1, grp.width) 129 | p.grp <- cut(p.scores, breaks = quants, include.lowest = T) 130 | 131 | # create lookup table for groups 132 | lookup <- data.frame("n" = table(p.grp)) 133 | colnames(lookup) <- c("breaks", "n") 134 | lookup$grp.name <- paste("p.grp", seq(1:(length(quants) - 1)), sep = "") 135 | 136 | levels(p.grp) <- 1:nrow(lookup) # rename levels 137 | lookup <- lookup[c("grp.name", "breaks", "n")] 138 | } 139 | 140 | # build linear model to make estimates in the contrast matrix 141 | model.f <- as.formula(paste(pkg.env$outcome, "~", pkg.env$treatment, "* p.grp")) 142 | model <- glm(model.f, data = data) 143 | model$call$formula <- model.f 144 | 145 | # build contrast matrix of all propensity groups 146 | cont_mat <- contrast_matrix( 147 | model, nrow(lookup), 148 | c(paste("Effect of", pkg.env$treatment, "for p.score in", lookup$breaks)) 149 | ) 150 | 151 | 152 | # set all treatment values to 1 153 | cont_mat[1:nrow(lookup), names(model$coefficients)[2]] <- 1 154 | 155 | # try to fill the diag of the matrix with 1s 156 | tryCatch( 157 | for (i in 2:nrow(lookup)) { 158 | cont_mat[i, paste(names(model$coefficients)[2], ":", lookup$grp.name[[i]], sep = "")] <- 1 159 | }, 160 | # if this fails, there are likely one or more groups with <= 1 samples 161 | error = function(e) { 162 | stop("Unable to stratify propensity scores. This is likely due to a lack of positivity in the groups. 163 | Try setting 'grp.width' to a larger value.") 164 | } 165 | ) 166 | 167 | # build model for contrast matrix 168 | model <- glht(model, cont_mat) 169 | call <- model$model$call 170 | 171 | # summarize model 172 | sum_model <- summary(model)$test 173 | 174 | results <- data.frame( 175 | "Estimate" = sum_model$coefficients, 176 | "Std. Error" = sum_model$sigma, 177 | conf_int(sum_model$coefficients, sum_model$sigma), 178 | check.names = FALSE 179 | ) 180 | 181 | ATE.summary <- cbind(lookup, results) 182 | ATE <- results 183 | } 184 | 185 | # using standardization 186 | else if (type == "stdm") { 187 | model.f <- build_formula( 188 | out = pkg.env$outcome, tr = pkg.env$treatment, 189 | cov = c("p.scores"), simple = simple, data = cbind(data, p.scores) 190 | ) 191 | model <- standardization(f = model.f, data = cbind(data, p.scores)) 192 | 193 | call <- model$call 194 | call$formula <- model.f 195 | ATE <- model$ATE 196 | ATE.summary <- model$ATE.summary 197 | } 198 | 199 | output <- list( 200 | "call" = call, "formula" = call$formula, "model" = model, "p.scores" = p.scores, 201 | "ATE" = ATE, "ATE.summary" = ATE.summary, "type" = type 202 | ) 203 | 204 | class(output) <- "propensity_matching" 205 | return(output) 206 | } 207 | 208 | #' @export 209 | print.propensity_matching <- function(x, ...) { 210 | if (x$type == "strata") { 211 | cat("\r\n") 212 | cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "") 213 | cat("\r\n") 214 | print(x$ATE) 215 | } else if (x$type == "stdm") { 216 | print(x$model, ...) 217 | } 218 | } 219 | 220 | #' @export 221 | summary.propensity_matching <- function(object, ...) { 222 | if (object$type == "strata" || object$type == "stdm") { 223 | summary(object$model, ...) 224 | } 225 | } 226 | 227 | #' @export 228 | predict.propensity_matching <- function(object, ...) { 229 | if (object$type == "strata") { 230 | return(predict(object$model$model, ...)) 231 | } else if (object$type == "stdm") { 232 | return(predict(object$model, ...)) 233 | } 234 | } 235 | -------------------------------------------------------------------------------- /R/propensity_scores.R: -------------------------------------------------------------------------------- 1 | #' @title Propensity Scores 2 | #' @description `propensity_scores` builds a logistic regression with the target as the treatment variable 3 | #' and the covariates as the independent variables. 4 | #' 5 | #' @param data a data frame containing the variables in the model. 6 | #' This should be the same data used in \code{\link[=init_params]{init_params}}. 7 | #' @param f (optional) an object of class "formula" that overrides the default parameter 8 | #' @param simple a boolean indicator to build default formula with interactions. 9 | #' If true, interactions will be excluded. If false, interactions will be included. By 10 | #' default, simple is set to false. 11 | #' @param family the family to be used in the general linear model. 12 | #' By default, this is set to \code{\link[stats:binomial]{binomial}} 13 | #' NOTE: if this is changed, the outcome of the model may not be the probabilities and the results will not be valid. 14 | #' @param ... additional arguments that may be passed to the underlying \code{\link[stats:glm]{glm}} model. 15 | #' 16 | #' @returns \code{propensity_scores} returns an object of \code{\link[base:class]{class} "propensity_scores"} 17 | #' 18 | #' The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with 19 | #' the underlying \code{glm} model. 20 | #' 21 | #' An object of class \code{"propensity_scores"} is a list containing the following: 22 | #' 23 | #' \item{call}{the matched call.} 24 | #' \item{formula}{the formula used in the model.} 25 | #' \item{model}{the underlying glm model.} 26 | #' \item{p.scores}{the estimated propensity scores.} 27 | #' 28 | #' @export 29 | #' 30 | #' @examples 31 | #' library(causaldata) 32 | #' data(nhefs) 33 | #' nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ] 34 | #' nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 35 | #' 36 | #' confounders <- c( 37 | #' "sex", "race", "age", "education", "smokeintensity", 38 | #' "smokeyrs", "exercise", "active", "wt71" 39 | #' ) 40 | #' 41 | #' init_params(wt82_71, qsmk, 42 | #' covariates = confounders, 43 | #' data = nhefs.nmv 44 | #' ) 45 | #' 46 | #' p.score <- propensity_scores(nhefs.nmv) 47 | #' p.score 48 | #' 49 | propensity_scores <- function(data, f = NA, simple = pkg.env$simple, family = binomial(), ...) { 50 | check_init() 51 | 52 | # grab function parameters 53 | params <- as.list(match.call()[-1]) 54 | 55 | # if no formula provided 56 | if (is.na(as.character(f))[1]) { 57 | # override simple 58 | if (simple != pkg.env$simple) { 59 | f <- build_formula( 60 | out = pkg.env$treatment, cov = pkg.env$covariates, 61 | data = data, simple = simple 62 | ) 63 | } 64 | # use default 65 | else { 66 | f <- formula(pkg.env$f_tr) 67 | } 68 | } 69 | 70 | # build model and generate scores 71 | model <- glm(f, data = data, family = family, ...) 72 | 73 | scores <- predict(model, type = "response") 74 | 75 | # probability of treatment for the untreated 76 | # scores[which(data[[pkg.env$treatment]] == 0)] = 1 - scores[which(data[[pkg.env$treatment]] == 0)] 77 | 78 | model$call$formula <- formula(f) # manually set model formula to prevent "formula = formula" 79 | output <- list("call" = model$call, "formula" = model$call$formula, "model" = model, "p.scores" = scores) 80 | 81 | class(output) <- "propensity_scores" 82 | return(output) 83 | } 84 | 85 | #' @export 86 | print.propensity_scores <- function(x, ...) { 87 | print(x$model, ...) 88 | } 89 | 90 | #' @export 91 | summary.propensity_scores <- function(object, ...) { 92 | summary(object$model, ...) 93 | } 94 | 95 | #' @export 96 | predict.propensity_scores <- function(object, ...) { 97 | return(predict(object$model, type = "response", ...)) 98 | } 99 | -------------------------------------------------------------------------------- /R/standardization.R: -------------------------------------------------------------------------------- 1 | #' @title Parametric Standardization 2 | #' @description `standardization` uses a standard \code{\link[stats:glm]{glm}} linear model to perform parametric standardization 3 | #' by adjusting bias through including all confounders as covariates. The model will calculate during training both the risk difference 4 | #' and the risk ratio. Both can be accessed from the model as well as estimates of the counterfactuals of treatment. 5 | #' 6 | #' @param data a data frame containing the variables in the model. 7 | #' This should be the same data used in \code{\link[=init_params]{init_params}}. 8 | #' @param f (optional) an object of class "formula" that overrides the default parameter 9 | #' @param family the family to be used in the general linear model. 10 | #' By default, this is set to \code{\link[stats:gaussian]{gaussian}}. 11 | #' @param simple a boolean indicator to build default formula with interactions. 12 | #' If true, interactions will be excluded. If false, interactions will be included. By 13 | #' default, simple is set to false. 14 | #' NOTE: if this is changed, the coefficient for treatment may not accurately represent the average causal effect. 15 | #' @param n.boot an integer value that indicates number of bootstrap iterations to calculate standard error. 16 | #' @param ... additional arguments that may be passed to the underlying \code{\link[stats:glm]{glm}} model. 17 | #' 18 | #' @returns \code{standardization} returns an object of \code{\link[base:class]{class} "standardization"}. 19 | #' 20 | #' The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with 21 | #' the underlying \code{glm} model. 22 | #' 23 | #' An object of class \code{"standardization"} is a list containing the following: 24 | #' 25 | #' \item{call}{the matched call.} 26 | #' \item{formula}{the formula used in the model.} 27 | #' \item{model}{the underlying glm model.} 28 | #' \item{ATE}{a data frame containing estimates of the treatment effect 29 | #' of the observed, counterfactuals, and risk metrics.} 30 | #' \item{ATE.summary}{a data frame containing the ATE, SE, and 95\% CI of the ATE. } 31 | #' 32 | #' @export 33 | #' 34 | #' @examples 35 | #' library(causaldata) 36 | #' 37 | #' data(nhefs) 38 | #' nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ] 39 | #' nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 40 | #' 41 | #' confounders <- c( 42 | #' "sex", "race", "age", "education", "smokeintensity", 43 | #' "smokeyrs", "exercise", "active", "wt71" 44 | #' ) 45 | #' 46 | #' init_params(wt82_71, qsmk, 47 | #' covariates = confounders, 48 | #' data = nhefs.nmv 49 | #' ) 50 | #' 51 | #' # model using all defaults 52 | #' model <- standardization(data = nhefs.nmv) 53 | #' print(model) 54 | #' summary(model) 55 | #' print(model$ATE.summary) 56 | #' print(model$ATE.summary$Estimate[[2]] - 57 | #' model$ATE.summary$Estimate[[3]]) # manually calculate risk difference 58 | #' 59 | standardization <- function(data, f = NA, family = gaussian(), simple = pkg.env$simple, n.boot = 50, ...) { 60 | check_init() 61 | 62 | # grab function parameters 63 | params <- as.list(match.call()[-1]) 64 | 65 | # if no formula provided 66 | if (is.na(as.character(f))[1]) { 67 | # override simple 68 | if (simple != pkg.env$simple) { 69 | f <- build_formula( 70 | out = pkg.env$outcome, tr = pkg.env$treatment, 71 | cov = pkg.env$covariates, 72 | data = data, simple = simple 73 | ) 74 | } 75 | # use default 76 | else { 77 | f <- formula(pkg.env$f_out) 78 | } 79 | } 80 | 81 | # make three copies of the dataset 82 | cp <- data 83 | cp$label <- "observed" 84 | tr0 <- cp 85 | tr0$label <- "cf_untreated" 86 | tr0[pkg.env$treatment] <- 0 87 | tr0[pkg.env$outcome] <- NA 88 | tr1 <- cp 89 | tr1$label <- "cf_treated" 90 | tr1[pkg.env$treatment] <- 1 91 | tr1[pkg.env$outcome] <- NA 92 | 93 | combined_data <- rbind(cp, tr0, tr1) # combine copies 94 | 95 | model_func <- function(data, indices, f, family, ...) { 96 | if (!anyNA(indices)) { 97 | data <- data[indices, ] 98 | } 99 | 100 | # build model using all three copies 101 | model <- glm(formula = f, data = data, family = family, ...) 102 | data$Y_hat <- predict(model, data) 103 | 104 | # calculate means in each group 105 | means <- c( 106 | mean(data$Y_hat[data$label == "observed"]), # estimated outcome of the observed 107 | mean(data$Y_hat[data$label == "cf_treated"]), # estimated counterfactual of the treated 108 | mean(data$Y_hat[data$label == "cf_untreated"]), # estimated counterfactual of the untreated 109 | mean(data$Y_hat[data$label == "cf_treated"]) - # estimated risk differnece 110 | mean(data$Y_hat[data$label == "cf_untreated"]), 111 | mean(data$Y_hat[data$label == "cf_treated"]) / # estimated risk ratio 112 | mean(data$Y_hat[data$label == "cf_untreated"]) 113 | ) 114 | return(list("model" = model, "means" = means)) 115 | } 116 | 117 | # build model and bootstrapped estimates 118 | result <- model_func(data = combined_data, indices = NA, f = f, family = family, ...) 119 | boot_result <- boot( 120 | data = combined_data, R = n.boot, f = f, family = family, 121 | statistic = function(data, indices, f, family, ...) { 122 | model_func(data, indices, f, family, ...)$means[[4]] 123 | }, ... 124 | ) 125 | 126 | # calculate 95% CI 127 | beta <- boot_result$t0 128 | SE <- sd(boot_result$t) 129 | ATE.summary <- data.frame( 130 | "Beta" = beta, 131 | "SE" = SE, 132 | conf_int(beta, SE), 133 | check.names = FALSE 134 | ) 135 | 136 | model <- result$model 137 | ATE <- data.frame(Estimate = result$means) 138 | rownames(ATE) <- c( 139 | "Observed effect", "Counterfactual (treated)", 140 | "Counterfactual (untreated)", "Risk difference", 141 | "Risk ratio" 142 | ) 143 | 144 | model$call$formula <- formula(f) # manually set model formula to prevent "formula = formula" 145 | output <- list( 146 | "call" = model$call, "formula" = model$call$formula, 147 | "model" = model, "ATE" = ATE, "ATE.summary" = ATE.summary 148 | ) 149 | class(output) <- "standardization" 150 | return(output) 151 | } 152 | 153 | #' @export 154 | print.standardization <- function(x, ...) { 155 | print(x$model, ...) 156 | cat("\r\n") 157 | cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "") 158 | cat("Estimate - ", x$ATE$Estimate[[4]], "\r\n") 159 | cat("SE - ", x$ATE.summary$SE, "\r\n") 160 | cat("95% CI - (", x$ATE.summary$`2.5 %`, ", ", x$ATE.summary$`97.5 %`, ")", "\r\n") 161 | } 162 | 163 | #' @export 164 | summary.standardization <- function(object, ...) { 165 | s <- summary(object$model, ...) 166 | s$ATE <- object$ATE.summary 167 | class(s) <- "summary.standardization" 168 | return(s) 169 | } 170 | 171 | #' @export 172 | print.summary.standardization <- function(x, ...) { 173 | class(x) <- "summary.glm" 174 | print(x, ...) 175 | cat("Average treatment effect of ", pkg.env$treatment, ":", "\r\n", sep = "") 176 | print(x$ATE, row.names = FALSE) 177 | cat("\r\n") 178 | } 179 | 180 | #' @export 181 | predict.standardization <- function(object, ...) { 182 | return(predict(object$model, ...)) 183 | } 184 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | # function to be executed on package load 2 | .onLoad <- function(libname, pkgname) { 3 | assign("pkg.env", new.env(), topenv()) 4 | assign("init", FALSE, pkg.env) 5 | } 6 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | ```{r, include = FALSE} 6 | knitr::opts_chunk$set( 7 | collapse = TRUE, 8 | comment = "#>", 9 | fig.path = "man/figures/README-", 10 | out.width = "100%" 11 | ) 12 | ``` 13 | 14 | # CausalModels 15 | 16 | 17 | 18 | 19 | The goal of CausalModels is to provide a survey of fundamental causal 20 | inference models in one single location. While there are many packages 21 | for these types of models, CausalModels brings them all to one place 22 | with a simple user experience. The package uses a format that is familiar 23 | to users using well known statistical and machine learning models. While causal inference 24 | models require careful consideration of variables to correctly infer a causal effect, 25 | CausalModels uses simple code while requiring the user to make these considerations. This 26 | enables efficient and thoughtful research using causal inference. 27 | As of May 30, 2022, the package has been 28 | [published on CRAN](https://cran.r-project.org/package=CausalModels). 29 | 30 | ## Change Log 31 | 32 | ### Version 0.2.1 - 2025/04/27 33 | 34 | #### Fixed 35 | 36 | - generic treatment names bug (#7) 37 | 38 | ### Version 0.2.0 - 2022/10/27 39 | 40 | #### Added 41 | 42 | - one parameter g-estimation 43 | 44 | #### Fixed 45 | 46 | - Typo in ipweighting documentation 47 | 48 | 49 | ## Installation 50 | 51 | You can install the development version of CausalModels from 52 | [GitHub](https://github.com/) with: 53 | 54 | ``` r 55 | # install.packages("devtools") 56 | devtools::install_github("ander428/CausalModels") 57 | ``` 58 | 59 | Since the package has been published on CRAN, the production version can be installed with: 60 | 61 | ``` r 62 | install.packages("CausalModels") 63 | ``` 64 | 65 | ## Example 66 | 67 | This is a basic example which shows you how to solve a common problem: 68 | 69 | ``` r 70 | library(CausalModels) 71 | library(causaldata) 72 | 73 | data(nhefs) 74 | 75 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)),] 76 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 77 | 78 | confounders <- c("sex", "race", "age", "education", "smokeintensity", 79 | "smokeyrs", "exercise", "active", "wt71") 80 | 81 | # initialize package 82 | ?init_params 83 | init_params(wt82_71, qsmk, 84 | covariates = confounders, 85 | data = nhefs.nmv, simple = F) 86 | #> Successfully initialized! 87 | #> 88 | #> Summary: 89 | #> 90 | #> Outcome - wt82_71 91 | #> Treatment - qsmk 92 | #> Covariates - [ sex, race, age, education, smokeintensity, smokeyrs, exercise, active, wt71 ] 93 | #> 94 | #> Size - 1566 x 67 95 | #> 96 | #> Default formula for outcome models: 97 | #> wt82_71 ~ qsmk + sex + race + education + exercise + active + age + (qsmk * age) + I(age * age) + smokeintensity + (qsmk * smokeintensity) + I(smokeintensity * smokeintensity) + smokeyrs + (qsmk * smokeyrs) + I(smokeyrs * smokeyrs) + wt71 + (qsmk * wt71) + I(wt71 * wt71) 98 | #> 99 | #> Default formula for propensity models: 100 | #> qsmk ~ sex + race + education + exercise + active + age + I(age * age) + smokeintensity + I(smokeintensity * smokeintensity) + smokeyrs + I(smokeyrs * smokeyrs) + wt71 + I(wt71 * wt71) 101 | 102 | # mode the causal effect of qsmk on wt82_71 103 | model <- standardization(nhefs.nmv) 104 | print(model) 105 | #> 106 | #> Call: glm(formula = wt82_71 ~ qsmk + sex + race + education + exercise + 107 | #> active + age + (qsmk * age) + I(age * age) + smokeintensity + 108 | #> (qsmk * smokeintensity) + I(smokeintensity * smokeintensity) + 109 | #> smokeyrs + (qsmk * smokeyrs) + I(smokeyrs * smokeyrs) + wt71 + 110 | #> (qsmk * wt71) + I(wt71 * wt71), family = family, data = combined_data) 111 | #> 112 | #> Coefficients: 113 | #> (Intercept) qsmk1 114 | #> -0.9699812 0.5509460 115 | #> sex1 race1 116 | #> -1.4371844 0.5868376 117 | #> education2 education3 118 | #> 0.8174769 0.5824119 119 | #> education4 education5 120 | #> 1.5240890 -0.1792422 121 | #> exercise1 exercise2 122 | #> 0.3063727 0.3550789 123 | #> active1 active2 124 | #> -0.9460683 -0.2707615 125 | #> age I(age * age) 126 | #> 0.3495673 -0.0060652 127 | #> smokeintensity I(smokeintensity * smokeintensity) 128 | #> 0.0482197 -0.0009597 129 | #> smokeyrs I(smokeyrs * smokeyrs) 130 | #> 0.1418662 -0.0018076 131 | #> wt71 I(wt71 * wt71) 132 | #> 0.0393011 -0.0009787 133 | #> qsmk1:age qsmk1:smokeintensity 134 | #> 0.0123138 0.0448028 135 | #> qsmk1:smokeyrs qsmk1:wt71 136 | #> -0.0235529 0.0291350 137 | #> 138 | #> Degrees of Freedom: 1565 Total (i.e. Null); 1542 Residual 139 | #> (3132 observations deleted due to missingness) 140 | #> Null Deviance: 97180 141 | #> Residual Deviance: 82690 AIC: 10710 142 | #> 143 | #> Average treatment effect of qsmk: 144 | #> 3.4927 145 | summary(model) 146 | #> 147 | #> Call: 148 | #> glm(formula = wt82_71 ~ qsmk + sex + race + education + exercise + 149 | #> active + age + (qsmk * age) + I(age * age) + smokeintensity + 150 | #> (qsmk * smokeintensity) + I(smokeintensity * smokeintensity) + 151 | #> smokeyrs + (qsmk * smokeyrs) + I(smokeyrs * smokeyrs) + wt71 + 152 | #> (qsmk * wt71) + I(wt71 * wt71), family = family, data = combined_data) 153 | #> 154 | #> Deviance Residuals: 155 | #> Min 1Q Median 3Q Max 156 | #> -41.913 -4.168 -0.314 3.869 44.573 157 | #> 158 | #> Coefficients: 159 | #> Estimate Std. Error t value Pr(>|t|) 160 | #> (Intercept) -0.9699812 4.3673208 -0.222 0.824266 161 | #> qsmk1 0.5509460 2.8229123 0.195 0.845286 162 | #> sex1 -1.4371844 0.4693195 -3.062 0.002235 ** 163 | #> race1 0.5868376 0.5828368 1.007 0.314158 164 | #> education2 0.8174769 0.6085125 1.343 0.179339 165 | #> education3 0.5824119 0.5575569 1.045 0.296382 166 | #> education4 1.5240890 0.8351981 1.825 0.068221 . 167 | #> education5 -0.1792422 0.7462118 -0.240 0.810205 168 | #> exercise1 0.3063727 0.5360193 0.572 0.567697 169 | #> exercise2 0.3550789 0.5592886 0.635 0.525603 170 | #> active1 -0.9460683 0.4105673 -2.304 0.021338 * 171 | #> active2 -0.2707615 0.6851128 -0.395 0.692745 172 | #> age 0.3495673 0.1648157 2.121 0.034084 * 173 | #> I(age * age) -0.0060652 0.0017347 -3.496 0.000485 *** 174 | #> smokeintensity 0.0482197 0.0518339 0.930 0.352375 175 | #> I(smokeintensity * smokeintensity) -0.0009597 0.0009409 -1.020 0.307878 176 | #> smokeyrs 0.1418662 0.0943836 1.503 0.133023 177 | #> I(smokeyrs * smokeyrs) -0.0018076 0.0015458 -1.169 0.242437 178 | #> wt71 0.0393011 0.0836422 0.470 0.638514 179 | #> I(wt71 * wt71) -0.0009787 0.0005255 -1.862 0.062751 . 180 | #> qsmk1:age 0.0123138 0.0670159 0.184 0.854238 181 | #> qsmk1:smokeintensity 0.0448028 0.0360169 1.244 0.213712 182 | #> qsmk1:smokeyrs -0.0235529 0.0654333 -0.360 0.718931 183 | #> qsmk1:wt71 0.0291350 0.0276439 1.054 0.292075 184 | #> --- 185 | #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 186 | #> 187 | #> (Dispersion parameter for gaussian family taken to be 53.62833) 188 | #> 189 | #> Null deviance: 97176 on 1565 degrees of freedom 190 | #> Residual deviance: 82695 on 1542 degrees of freedom 191 | #> (3132 observations deleted due to missingness) 192 | #> AIC: 10706 193 | #> 194 | #> Number of Fisher Scoring iterations: 2 195 | #> 196 | #> Average treatment effect of qsmk: 197 | #> Estimate 198 | #> Observed effect 2.638300 199 | #> Counterfactual (treated) 5.243703 200 | #> Counterfactual (untreated) 1.751003 201 | #> Risk difference 3.492700 202 | #> Risk ratio 2.994686 203 | #> 204 | ``` 205 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # CausalModels 3 | 4 | 5 | 6 | 7 | The goal of CausalModels is to provide a survey of fundamental causal 8 | inference models in one single location. While there are many packages 9 | for these types of models, CausalModels brings them all to one place 10 | with a simple user experience. The package uses a format that is 11 | familiar to users using well known statistical and machine learning 12 | models. While causal inference models require careful consideration of 13 | variables to correctly infer a causal effect, CausalModels uses simple 14 | code while requiring the user to make these considerations. This enables 15 | efficient and thoughtful research using causal inference. As of May 30, 16 | 2022, the package has been [published on 17 | CRAN](https://cran.r-project.org/package=CausalModels). 18 | 19 | ## Change Log 20 | 21 | ### Version 0.2.1 - 2025/04/27 22 | 23 | #### Fixed 24 | 25 | - generic treatment names bug (#7) 26 | 27 | ### Version 0.2.0 - 2022/10/27 28 | 29 | #### Added 30 | 31 | - one parameter g-estimation 32 | 33 | #### Fixed 34 | 35 | - Typo in ipweighting documentation 36 | 37 | ## Installation 38 | 39 | You can install the development version of CausalModels from 40 | [GitHub](https://github.com/) with: 41 | 42 | ``` r 43 | # install.packages("devtools") 44 | devtools::install_github("ander428/CausalModels") 45 | ``` 46 | 47 | Since the package has been published on CRAN, the production version can 48 | be installed with: 49 | 50 | ``` r 51 | install.packages("CausalModels") 52 | ``` 53 | 54 | ## Example 55 | 56 | This is a basic example which shows you how to solve a common problem: 57 | 58 | ``` r 59 | library(CausalModels) 60 | library(causaldata) 61 | 62 | data(nhefs) 63 | 64 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)),] 65 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 66 | 67 | confounders <- c("sex", "race", "age", "education", "smokeintensity", 68 | "smokeyrs", "exercise", "active", "wt71") 69 | 70 | # initialize package 71 | ?init_params 72 | init_params(wt82_71, qsmk, 73 | covariates = confounders, 74 | data = nhefs.nmv, simple = F) 75 | #> Successfully initialized! 76 | #> 77 | #> Summary: 78 | #> 79 | #> Outcome - wt82_71 80 | #> Treatment - qsmk 81 | #> Covariates - [ sex, race, age, education, smokeintensity, smokeyrs, exercise, active, wt71 ] 82 | #> 83 | #> Size - 1566 x 67 84 | #> 85 | #> Default formula for outcome models: 86 | #> wt82_71 ~ qsmk + sex + race + education + exercise + active + age + (qsmk * age) + I(age * age) + smokeintensity + (qsmk * smokeintensity) + I(smokeintensity * smokeintensity) + smokeyrs + (qsmk * smokeyrs) + I(smokeyrs * smokeyrs) + wt71 + (qsmk * wt71) + I(wt71 * wt71) 87 | #> 88 | #> Default formula for propensity models: 89 | #> qsmk ~ sex + race + education + exercise + active + age + I(age * age) + smokeintensity + I(smokeintensity * smokeintensity) + smokeyrs + I(smokeyrs * smokeyrs) + wt71 + I(wt71 * wt71) 90 | 91 | # mode the causal effect of qsmk on wt82_71 92 | model <- standardization(nhefs.nmv) 93 | print(model) 94 | #> 95 | #> Call: glm(formula = wt82_71 ~ qsmk + sex + race + education + exercise + 96 | #> active + age + (qsmk * age) + I(age * age) + smokeintensity + 97 | #> (qsmk * smokeintensity) + I(smokeintensity * smokeintensity) + 98 | #> smokeyrs + (qsmk * smokeyrs) + I(smokeyrs * smokeyrs) + wt71 + 99 | #> (qsmk * wt71) + I(wt71 * wt71), family = family, data = combined_data) 100 | #> 101 | #> Coefficients: 102 | #> (Intercept) qsmk1 103 | #> -0.9699812 0.5509460 104 | #> sex1 race1 105 | #> -1.4371844 0.5868376 106 | #> education2 education3 107 | #> 0.8174769 0.5824119 108 | #> education4 education5 109 | #> 1.5240890 -0.1792422 110 | #> exercise1 exercise2 111 | #> 0.3063727 0.3550789 112 | #> active1 active2 113 | #> -0.9460683 -0.2707615 114 | #> age I(age * age) 115 | #> 0.3495673 -0.0060652 116 | #> smokeintensity I(smokeintensity * smokeintensity) 117 | #> 0.0482197 -0.0009597 118 | #> smokeyrs I(smokeyrs * smokeyrs) 119 | #> 0.1418662 -0.0018076 120 | #> wt71 I(wt71 * wt71) 121 | #> 0.0393011 -0.0009787 122 | #> qsmk1:age qsmk1:smokeintensity 123 | #> 0.0123138 0.0448028 124 | #> qsmk1:smokeyrs qsmk1:wt71 125 | #> -0.0235529 0.0291350 126 | #> 127 | #> Degrees of Freedom: 1565 Total (i.e. Null); 1542 Residual 128 | #> (3132 observations deleted due to missingness) 129 | #> Null Deviance: 97180 130 | #> Residual Deviance: 82690 AIC: 10710 131 | #> 132 | #> Average treatment effect of qsmk: 133 | #> 3.4927 134 | summary(model) 135 | #> 136 | #> Call: 137 | #> glm(formula = wt82_71 ~ qsmk + sex + race + education + exercise + 138 | #> active + age + (qsmk * age) + I(age * age) + smokeintensity + 139 | #> (qsmk * smokeintensity) + I(smokeintensity * smokeintensity) + 140 | #> smokeyrs + (qsmk * smokeyrs) + I(smokeyrs * smokeyrs) + wt71 + 141 | #> (qsmk * wt71) + I(wt71 * wt71), family = family, data = combined_data) 142 | #> 143 | #> Deviance Residuals: 144 | #> Min 1Q Median 3Q Max 145 | #> -41.913 -4.168 -0.314 3.869 44.573 146 | #> 147 | #> Coefficients: 148 | #> Estimate Std. Error t value Pr(>|t|) 149 | #> (Intercept) -0.9699812 4.3673208 -0.222 0.824266 150 | #> qsmk1 0.5509460 2.8229123 0.195 0.845286 151 | #> sex1 -1.4371844 0.4693195 -3.062 0.002235 ** 152 | #> race1 0.5868376 0.5828368 1.007 0.314158 153 | #> education2 0.8174769 0.6085125 1.343 0.179339 154 | #> education3 0.5824119 0.5575569 1.045 0.296382 155 | #> education4 1.5240890 0.8351981 1.825 0.068221 . 156 | #> education5 -0.1792422 0.7462118 -0.240 0.810205 157 | #> exercise1 0.3063727 0.5360193 0.572 0.567697 158 | #> exercise2 0.3550789 0.5592886 0.635 0.525603 159 | #> active1 -0.9460683 0.4105673 -2.304 0.021338 * 160 | #> active2 -0.2707615 0.6851128 -0.395 0.692745 161 | #> age 0.3495673 0.1648157 2.121 0.034084 * 162 | #> I(age * age) -0.0060652 0.0017347 -3.496 0.000485 *** 163 | #> smokeintensity 0.0482197 0.0518339 0.930 0.352375 164 | #> I(smokeintensity * smokeintensity) -0.0009597 0.0009409 -1.020 0.307878 165 | #> smokeyrs 0.1418662 0.0943836 1.503 0.133023 166 | #> I(smokeyrs * smokeyrs) -0.0018076 0.0015458 -1.169 0.242437 167 | #> wt71 0.0393011 0.0836422 0.470 0.638514 168 | #> I(wt71 * wt71) -0.0009787 0.0005255 -1.862 0.062751 . 169 | #> qsmk1:age 0.0123138 0.0670159 0.184 0.854238 170 | #> qsmk1:smokeintensity 0.0448028 0.0360169 1.244 0.213712 171 | #> qsmk1:smokeyrs -0.0235529 0.0654333 -0.360 0.718931 172 | #> qsmk1:wt71 0.0291350 0.0276439 1.054 0.292075 173 | #> --- 174 | #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 175 | #> 176 | #> (Dispersion parameter for gaussian family taken to be 53.62833) 177 | #> 178 | #> Null deviance: 97176 on 1565 degrees of freedom 179 | #> Residual deviance: 82695 on 1542 degrees of freedom 180 | #> (3132 observations deleted due to missingness) 181 | #> AIC: 10706 182 | #> 183 | #> Number of Fisher Scoring iterations: 2 184 | #> 185 | #> Average treatment effect of qsmk: 186 | #> Estimate 187 | #> Observed effect 2.638300 188 | #> Counterfactual (treated) 5.243703 189 | #> Counterfactual (untreated) 1.751003 190 | #> Risk difference 3.492700 191 | #> Risk ratio 2.994686 192 | #> 193 | ``` 194 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## R CMD check results 2 | 3 | 0 errors | 0 warnings | 1 note 4 | 5 | * This is a new release. 6 | -------------------------------------------------------------------------------- /man/doubly_robust.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/doubly_robust.R 3 | \name{doubly_robust} 4 | \alias{doubly_robust} 5 | \title{Doubly Robust Model} 6 | \usage{ 7 | doubly_robust( 8 | data, 9 | out.mod = NULL, 10 | p.mod = NULL, 11 | f = NA, 12 | family = gaussian(), 13 | simple = pkg.env$simple, 14 | scores = NA, 15 | p.f = NA, 16 | p.simple = pkg.env$simple, 17 | p.family = binomial(), 18 | p.scores = NA, 19 | n.boot = 50, 20 | ... 21 | ) 22 | } 23 | \arguments{ 24 | \item{data}{a data frame containing the variables in the model. 25 | This should be the same data used in \code{\link[=init_params]{init_params}}.} 26 | 27 | \item{out.mod}{(optional) a regression model that predicts the outcome. NOTE: the model given 28 | must be compatible with the \code{\link[stats:predict]{predict}} generic function.} 29 | 30 | \item{p.mod}{(optional) a propensity model that predicts the probability of treatment. NOTE: the model given 31 | must be compatible with the \code{\link[stats:predict]{predict}} generic function.} 32 | 33 | \item{f}{(optional) an object of class "formula" that overrides the default parameter} 34 | 35 | \item{family}{the family to be used in the general linear model. 36 | By default, this is set to \code{\link[stats:gaussian]{gaussian}}.} 37 | 38 | \item{simple}{a boolean indicator to build default formula with interactions. 39 | If true, interactions will be excluded. If false, interactions will be included. By 40 | default, simple is set to false.} 41 | 42 | \item{scores}{(optional) use calculated outcome estimates.} 43 | 44 | \item{p.f}{(optional) an object of class "formula" that overrides the default formula for the denominator of the IP 45 | weighting function.} 46 | 47 | \item{p.simple}{a boolean indicator to build default formula with interactions for the propensity models. 48 | If true, interactions will be excluded. If false, interactions will be included. By 49 | default, simple is set to false. 50 | NOTE: if this is changed, the coefficient for treatment may not accurately represent the average causal effect.} 51 | 52 | \item{p.family}{the family to be used in the underlying propensity model. 53 | By default, this is set to \code{\link[stats:gaussian]{binomial}}.} 54 | 55 | \item{p.scores}{(optional) use calculated propensity scores.} 56 | 57 | \item{n.boot}{an integer value that indicates number of bootstrap iterations to calculate standard error.} 58 | 59 | \item{...}{additional arguments that may be passed to the underlying \code{\link[stats:glm]{glm}} model.} 60 | } 61 | \value{ 62 | \code{doubly_robust} returns an object of \code{\link[base:class]{class}} "doubly_robust". 63 | 64 | The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with 65 | the underlying \code{glm} model. 66 | 67 | An object of class \code{"doubly_robust"} is a list containing the following: 68 | 69 | \item{out.call}{the matched call of the outcome model.} 70 | \item{p.call}{the matched call of the propensity model.} 71 | \item{out.model}{the underlying outcome model.} 72 | \item{p.model}{the underlying propensity model.} 73 | \item{y_hat}{the estimated outcome values.} 74 | \item{p.scores}{the estimated propensity scores.} 75 | \item{ATE}{the estimated average treatment effect (risk difference).} 76 | \item{ATE.summary}{a data frame containing the ATE, SE, and 95\% CI of the ATE. } 77 | \item{data}{the data frame used to train the model.} 78 | } 79 | \description{ 80 | \code{`doubly_robust`} trains both an outcome model and a propensity model to generate predictions 81 | for the outcome and probability of treatment respectively. By default, the model uses 82 | \code{\link[=standardization]{standardization}} and \code{\link[=propensity_scores]{propensity_scores}} to form a 83 | doubly-robust model between standardization and IP weighting. Alternatively, any outcome and treatment 84 | models can be provided instead, but must be compatible with the \code{\link[stats:predict]{predict}} generic function in R. 85 | Since many propensity models may not predict probabilities without additional arguments into the 86 | predict function, the predictions themselves can be given for both the outcome and propensity scores. 87 | } 88 | \examples{ 89 | library(causaldata) 90 | data(nhefs) 91 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ] 92 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 93 | 94 | confounders <- c( 95 | "sex", "race", "age", "education", "smokeintensity", 96 | "smokeyrs", "exercise", "active", "wt71" 97 | ) 98 | 99 | init_params(wt82_71, qsmk, 100 | covariates = confounders, 101 | data = nhefs.nmv 102 | ) 103 | 104 | # model using all defaults 105 | model <- doubly_robust(data = nhefs.nmv) 106 | summary(model) 107 | 108 | # use alternative outcome model 109 | out.mod <- propensity_matching(data = nhefs.nmv) 110 | db.model <- doubly_robust( 111 | out.mod = out.mod, 112 | data = nhefs.nmv 113 | ) 114 | db.model 115 | 116 | # give calculated outcome predictions and give formula for propensity scores 117 | db.model <- doubly_robust( 118 | scores = predict(out.mod), 119 | p.f = qsmk ~ sex + race + age, 120 | data = nhefs.nmv 121 | ) 122 | db.model 123 | 124 | } 125 | -------------------------------------------------------------------------------- /man/figures/README-pressure-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ander428/CausalModels/4a81259939f9be47a294354f4b17f0652799a54a/man/figures/README-pressure-1.png -------------------------------------------------------------------------------- /man/gestimation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/gestimation.R 3 | \name{gestimation} 4 | \alias{gestimation} 5 | \title{One Parameter G-Estimation of Structural Nested Mean Models} 6 | \usage{ 7 | gestimation( 8 | data, 9 | grid, 10 | ids = list(), 11 | f = NA, 12 | family = binomial(), 13 | simple = pkg.env$simple, 14 | p.f = NA, 15 | p.simple = pkg.env$simple, 16 | p.family = binomial(), 17 | p.scores = NA, 18 | SW = TRUE, 19 | n.boot = 100, 20 | type = "one.grid", 21 | ... 22 | ) 23 | } 24 | \arguments{ 25 | \item{data}{a data frame containing the variables in the model. 26 | This should be the same data used in \code{\link[=init_params]{init_params}}.} 27 | 28 | \item{grid}{a list of possible \eqn{\beta} values that will be used in the grid search.} 29 | 30 | \item{ids}{(optional) see documentation for \code{\link[geepack:geeglm]{geeglm}}. By default rownames of the data will be used.} 31 | 32 | \item{f}{(optional) an object of class "formula" that overrides the default parameter. NOTE: for g-estimation this should be 33 | a propensity formula.} 34 | 35 | \item{family}{the family to be used in the general linear model. 36 | By default, this is set to \code{\link[stats:gaussian]{gaussian}}.} 37 | 38 | \item{simple}{(optional) a boolean indicator to build default formula with interactions for the g-estimation model. 39 | If true, interactions will be excluded. If false, interactions will be included. By default, simple is set to false. 40 | NOTE: \eqn{\beta} will be appended to the end of the formula} 41 | 42 | \item{p.f}{(optional) an object of class "formula" that overrides the default formula for the denominator of the IP 43 | weighting function.} 44 | 45 | \item{p.simple}{(optional) a boolean indicator to build default formula with interactions for the propensity models. 46 | If true, interactions will be excluded. If false, interactions will be included. By 47 | default, simple is set to false. 48 | NOTE: if this is changed, the coefficient for treatment may not accurately represent the average causal effect.} 49 | 50 | \item{p.family}{the family to be used in the underlying propensity model. 51 | By default, this is set to \code{\link[stats:binomial]{binomial}}.} 52 | 53 | \item{p.scores}{(optional) use calculated propensity scores for the weights. If using standardized weights, 54 | the numerator will still be modeled.} 55 | 56 | \item{SW}{a boolean indicator to indicate the use of standardized weights. By default, this is set to true.} 57 | 58 | \item{n.boot}{(optional) an integer value that indicates number of bootstrap iterations to calculate standard error. 59 | If no value is given, the standard error from the underlying linear model will be used. NOTE: when type is 'one.grid' 60 | bootstrapping is not performed. By default, this is set to 100.} 61 | 62 | \item{type}{the type of g-estimation to perform. It must be one of \code{"one.grid"} or \code{"one.linear"} for a 63 | one parameter grid and linear mean model estimation respectively.} 64 | 65 | \item{...}{additional arguments that may be passed to the underlying model.} 66 | } 67 | \value{ 68 | \code{gestimation} returns an object of \code{\link[base:class]{class} "gestimation"}. 69 | 70 | The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with 71 | the underlying \code{glm} or \code{geeglm} model. 72 | 73 | An object of class \code{"gestimation"} is a list containing the following: 74 | 75 | \item{call}{the matched call.} 76 | \item{formula}{the formula used in the model.} 77 | \item{model}{the underlying glm model. If the model performed a grid search, this will be renamed 'best.model'} 78 | \item{weights}{the estimated IP weights.} 79 | \item{type}{returns the value used for the 'type' parameter.} 80 | \item{ATE}{the estimated average treatment effect (risk difference).} 81 | \item{ATE.summary}{a data frame containing the ATE, SE, and 95\% CI of the ATE. } 82 | } 83 | \description{ 84 | `gestimation` uses the \code{\link[=propensity_scores]{propensity_scores}} function to generate inverse probability 85 | weights. The weights can either be standardized weights or non-standardized weights. A grid search is done on \eqn{\alpha} 86 | to construct the best \eqn{\beta} coefficient in the structural nested mean model. Alternatively, a linear mean model can be used 87 | for a closed form estimator. 88 | } 89 | \examples{ 90 | library(causaldata) 91 | data(nhefs) 92 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ] 93 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 94 | 95 | confounders <- c( 96 | "sex", "race", "age", "education", "smokeintensity", 97 | "smokeyrs", "exercise", "active", "wt71" 98 | ) 99 | 100 | init_params(wt82_71, qsmk, 101 | covariates = confounders, 102 | data = nhefs.nmv 103 | ) 104 | 105 | gest.model <- gestimation(nhefs.nmv, type = "one.linear", n.boot = 150) 106 | gest.model$ATE.summary 107 | } 108 | -------------------------------------------------------------------------------- /man/init_params.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/initialize.R 3 | \name{init_params} 4 | \alias{init_params} 5 | \title{Initialize CausalModels Package} 6 | \usage{ 7 | init_params(outcome, treatment, covariates, data, simple = FALSE) 8 | } 9 | \arguments{ 10 | \item{outcome}{the outcome variable of interest (must be continuous).} 11 | 12 | \item{treatment}{the treatment with the causal effect of interest on the outcome.} 13 | 14 | \item{covariates}{a list/vector of covariate names to be use for confounding adjustment.} 15 | 16 | \item{data}{a data frame containing the variables in the model.} 17 | 18 | \item{simple}{a boolean indicator to build default formula with interactions. 19 | If true, interactions will be excluded. If false, interactions will be included. By 20 | default, simple is set to false.} 21 | } 22 | \value{ 23 | No return value. Called for parameter initialization. 24 | } 25 | \description{ 26 | This function is required to be run first before any other function can run. 27 | This will set within the package the global outcome, treatment, and covariate functions for each model to use. 28 | } 29 | -------------------------------------------------------------------------------- /man/ipweighting.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ipweighting.R 3 | \name{ipweighting} 4 | \alias{ipweighting} 5 | \title{Parametric IP Weighting} 6 | \usage{ 7 | ipweighting( 8 | data, 9 | f = NA, 10 | family = gaussian(), 11 | p.f = NA, 12 | p.simple = pkg.env$simple, 13 | p.family = binomial(), 14 | p.scores = NA, 15 | SW = TRUE, 16 | n.boot = 0, 17 | ... 18 | ) 19 | } 20 | \arguments{ 21 | \item{data}{a data frame containing the variables in the model. 22 | This should be the same data used in \code{\link[=init_params]{init_params}}.} 23 | 24 | \item{f}{(optional) an object of class "formula" that overrides the default parameter} 25 | 26 | \item{family}{the family to be used in the general linear model. 27 | By default, this is set to \code{\link[stats:gaussian]{gaussian}}.} 28 | 29 | \item{p.f}{(optional) an object of class "formula" that overrides the default formula for the denominator of the IP 30 | weighting function.} 31 | 32 | \item{p.simple}{a boolean indicator to build default formula with interactions for the propensity models. 33 | If true, interactions will be excluded. If false, interactions will be included. By 34 | default, simple is set to false. 35 | NOTE: if this is changed, the coefficient for treatment may not accurately represent the average causal effect.} 36 | 37 | \item{p.family}{the family to be used in the underlying propensity model. 38 | By default, this is set to \code{\link[stats:binomial]{binomial}}.} 39 | 40 | \item{p.scores}{(optional) use calculated propensity scores for the weights. If using standardized weights, 41 | the numerator will still be modeled.} 42 | 43 | \item{SW}{a boolean indicator to indicate the use of standardized weights. By default, this is set to true.} 44 | 45 | \item{n.boot}{(optional) an integer value that indicates number of bootstrap iterations to calculate standard error. 46 | If no value is given, the standard error from the underlying linear model will be used.} 47 | 48 | \item{...}{additional arguments that may be passed to the underlying \code{\link[stats:glm]{glm}} model.} 49 | } 50 | \value{ 51 | \code{ipweighting} returns an object of \code{\link[base:class]{class} "ipweighting"}. 52 | 53 | The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with 54 | the underlying \code{glm} model. 55 | 56 | An object of class \code{"ipweighting"} is a list containing the following: 57 | 58 | \item{call}{the matched call.} 59 | \item{formula}{the formula used in the model.} 60 | \item{model}{the underlying glm model.} 61 | \item{weights}{the estimated IP weights.} 62 | \item{ATE}{the estimated average treatment effect (risk difference).} 63 | \item{ATE.summary}{a data frame containing the ATE, SE, and 95\% CI of the ATE. } 64 | } 65 | \description{ 66 | `ipweighting` uses the \code{\link[=propensity_scores]{propensity_scores}} function to generate inverse probability 67 | weights. The weights can either be standardized weights or non-standardized weights. The weights are used to train a 68 | general linear model whose coefficient for treatment represents the average treatment effect on the additive scale. 69 | } 70 | \examples{ 71 | library(causaldata) 72 | data(nhefs) 73 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ] 74 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 75 | 76 | confounders <- c( 77 | "sex", "race", "age", "education", "smokeintensity", 78 | "smokeyrs", "exercise", "active", "wt71" 79 | ) 80 | 81 | init_params(wt82_71, qsmk, 82 | covariates = confounders, 83 | data = nhefs.nmv 84 | ) 85 | 86 | # model using all defaults 87 | model <- ipweighting(data = nhefs.nmv) 88 | summary(model) 89 | 90 | # Model using calculated propensity scores and manual outcome formula 91 | p.scores <- propensity_scores(nhefs.nmv)$p.scores 92 | model <- ipweighting(wt82_71 ~ qsmk, p.scores = p.scores, data = nhefs.nmv) 93 | summary(model) 94 | 95 | } 96 | -------------------------------------------------------------------------------- /man/iv_est.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/iv_est.R 3 | \name{iv_est} 4 | \alias{iv_est} 5 | \title{Standard Instrumental Variable Estimator} 6 | \usage{ 7 | iv_est(IV, data, n.boot = 50) 8 | } 9 | \arguments{ 10 | \item{IV}{the instrumental variable to be used in the conditional means. Must be a factor with no more than 2 levels. 11 | It is assumed the second level is the positive level, i.e., the binary equivalent of the second factor level should be 1 12 | and the first should be 0.} 13 | 14 | \item{data}{a data frame containing the variables in the model. 15 | This should be the same data used in \code{\link[=init_params]{init_params}}.} 16 | 17 | \item{n.boot}{an integer value that indicates number of bootstrap iterations to calculate standard error.} 18 | } 19 | \value{ 20 | \code{iv_est} returns a data frame containing the standard IV estimate, standard error, and Wald 95% CI. 21 | } 22 | \description{ 23 | `iv_est` calculates the standard IV estimand using the conditional means on a given instrumental variable. 24 | } 25 | \examples{ 26 | library(causaldata) 27 | data(nhefs) 28 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ] 29 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 30 | 31 | confounders <- c( 32 | "sex", "race", "age", "education", "smokeintensity", 33 | "smokeyrs", "exercise", "active", "wt71" 34 | ) 35 | nhefs.iv <- nhefs[which(!is.na(nhefs$wt82) & !is.na(nhefs$price82)), ] 36 | nhefs.iv$highprice <- as.factor(ifelse(nhefs.iv$price82 >= 1.5, 1, 0)) 37 | nhefs.iv$qsmk <- as.factor(nhefs.iv$qsmk) 38 | init_params(wt82_71, qsmk, 39 | covariates = confounders, 40 | data = nhefs.iv) 41 | 42 | iv_est("highprice", nhefs.iv) 43 | } 44 | -------------------------------------------------------------------------------- /man/outcome_regression.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/outcome_regression.R 3 | \name{outcome_regression} 4 | \alias{outcome_regression} 5 | \title{Outcome Regression} 6 | \usage{ 7 | outcome_regression( 8 | data, 9 | f = NA, 10 | simple = pkg.env$simple, 11 | family = gaussian(), 12 | contrasts = list(), 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{data}{a data frame containing the variables in the model. 18 | This should be the same data used in \code{\link[=init_params]{init_params}}.} 19 | 20 | \item{f}{(optional) an object of class "formula" that overrides the default parameter} 21 | 22 | \item{simple}{a boolean indicator to build default formula with interactions. 23 | If true, interactions will be excluded. If false, interactions will be included. By 24 | default, simple is set to false.} 25 | 26 | \item{family}{the family to be used in the general linear model. 27 | By default, this is set to \code{\link[stats:gaussian]{gaussian}}. 28 | NOTE: if this is changed, the assumptions about the model output may be incorrect and may not provide 29 | accurate treatment effects.} 30 | 31 | \item{contrasts}{a list of continuous covariates and values in the model to be included in the contrast matrix 32 | (e.g. \code{list(age = c(18, 25, 40), weight = c(90, 159))}).} 33 | 34 | \item{...}{additional arguments that may be passed to the underlying \code{\link[multcomp:glht]{glht}} model.} 35 | } 36 | \value{ 37 | \code{outcome_regression} returns an object of \code{\link[base:class]{class} "outcome_regression"} 38 | 39 | The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with 40 | the underlying \code{glht} model. 41 | 42 | An object of class \code{"outcome_regression"} is a list containing the following: 43 | 44 | \item{call}{the matched call.} 45 | \item{formula}{the formula used in the model.} 46 | \item{model}{the underlying glht model.} 47 | \item{ATE}{a data frame containing the ATE, SE, and 95\% CI of the ATE. } 48 | \item{ATE.summary}{a more detailed summary of the ATE estimations from glht. } 49 | } 50 | \description{ 51 | `outcome_regression` builds a linear model using all covariates. The treatment effects are stratified 52 | within the levels of the covariates. The model will automatically provide all discrete covariates in a contrast matrix. 53 | To view estimated change in treatment effect from continuous variables, a list called \code{contrasts}, needs to be given 54 | with specific values to estimate. A vector of values can be given for any particular continuous variable. 55 | } 56 | \examples{ 57 | library(causaldata) 58 | library(multcomp) 59 | 60 | data(nhefs) 61 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ] 62 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 63 | 64 | confounders <- c( 65 | "sex", "race", "age", "education", "smokeintensity", 66 | "smokeyrs", "exercise", "active", "wt71" 67 | ) 68 | 69 | init_params(wt82_71, qsmk, 70 | covariates = confounders, 71 | data = nhefs.nmv 72 | ) 73 | 74 | out.mod <- outcome_regression(nhefs.nmv, contrasts = list( 75 | age = c(21, 55), 76 | smokeintensity = c(5, 20, 40) 77 | )) 78 | print(out.mod) 79 | summary(out.mod) 80 | head(data.frame(preds = predict(out.mod))) 81 | 82 | } 83 | -------------------------------------------------------------------------------- /man/propensity_matching.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/propensity_matching.R 3 | \name{propensity_matching} 4 | \alias{propensity_matching} 5 | \title{Propensity Matching} 6 | \usage{ 7 | propensity_matching( 8 | data, 9 | f = NA, 10 | simple = pkg.env$simple, 11 | p.scores = NA, 12 | p.simple = pkg.env$simple, 13 | type = "strata", 14 | grp.width = 0.1, 15 | quant = TRUE, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{data}{a data frame containing the variables in the model. 21 | This should be the same data used in \code{\link[=init_params]{init_params}}.} 22 | 23 | \item{f}{(optional) an object of class "formula" that overrides the default parameter} 24 | 25 | \item{simple}{a boolean indicator to build default formula with interactions. 26 | If true, interactions will be excluded. If false, interactions will be included. By 27 | default, simple is set to false.} 28 | 29 | \item{p.scores}{(optional) use calculated propensity scores for matching. Otherwise, propensity scores 30 | will be automatically modeled.} 31 | 32 | \item{p.simple}{a boolean indicator to build default formula with interactions for the propensity models. 33 | If true, interactions will be excluded. If false, interactions will be included. By 34 | default, simple is set to false.} 35 | 36 | \item{type}{a string representing the type of propensity model to be used. By default, the function will stratify. Standardization with 37 | propensity scores may also be used. The value given for \code{type} must be in \code{c("strata", "stdm")}.} 38 | 39 | \item{grp.width}{a decimal value to specify the range to stratify the propensity scores. If option \code{quant} is set to true, 40 | this will represent the spread of percentiles. If false, it will represent the spread of raw values of propensity 41 | scores. Must be a decimal between 0 and 1. By default, this is set to 0.1. This option is ignored for standardization.} 42 | 43 | \item{quant}{a boolean indicator to specify the type of stratification. If true (default), the model will stratify by 44 | percentiles. If false, the scores will be grouped by a range of their raw values. This option is ignored for standardization.} 45 | 46 | \item{...}{additional arguments that may be passed to the underlying \code{\link[=propensity_scores]{propensity_scores}} function.} 47 | } 48 | \value{ 49 | \code{propensity_matching} returns an object of \code{\link[base:class]{class} "propensity_matching"} 50 | 51 | The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with the underlying \code{glht} or 52 | \code{standardization} model. 53 | 54 | An object of class \code{"propensity_matching"} is a list containing the following: 55 | 56 | \item{call}{the matched call.} 57 | \item{formula}{the formula used in the model.} 58 | \item{model}{either the underlying \code{glht} or \code{standardization} model.} 59 | \item{p.scores}{the estimated propensity scores} 60 | \item{ATE}{a data frame containing the ATE, SE, and 95\% CI of the ATE. } 61 | \item{ATE.summary}{either a data frame containing the \code{glht} or \code{standardization} summary. } 62 | } 63 | \description{ 64 | `propensity_matching` uses either stratification or standardization to model an outcome 65 | conditional on the propensity scores. In stratification, the model will break the propensity scores 66 | into groups and output a \code{\link[multcomp:glht]{glht}} model based off a contrast matrix which 67 | estimates the change in average causal effect within groups of propensity scores. In standardization, 68 | the model will output a \code{\link[=standardization]{standardization}} model that conditions on the 69 | propensity strata rather than the covariates. The model can also predict the expected outcome. 70 | } 71 | \examples{ 72 | library(causaldata) 73 | library(multcomp) 74 | 75 | data(nhefs) 76 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ] 77 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 78 | 79 | confounders <- c( 80 | "sex", "race", "age", "education", "smokeintensity", 81 | "smokeyrs", "exercise", "active", "wt71" 82 | ) 83 | 84 | init_params(wt82_71, qsmk, 85 | covariates = confounders, 86 | data = nhefs.nmv 87 | ) 88 | 89 | pm.model <- propensity_matching(nhefs.nmv) 90 | pm.model$ATE.summary 91 | summary(pm.model) 92 | head(data.frame(preds = predict(pm.model))) 93 | 94 | } 95 | -------------------------------------------------------------------------------- /man/propensity_scores.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/propensity_scores.R 3 | \name{propensity_scores} 4 | \alias{propensity_scores} 5 | \title{Propensity Scores} 6 | \usage{ 7 | propensity_scores( 8 | data, 9 | f = NA, 10 | simple = pkg.env$simple, 11 | family = binomial(), 12 | ... 13 | ) 14 | } 15 | \arguments{ 16 | \item{data}{a data frame containing the variables in the model. 17 | This should be the same data used in \code{\link[=init_params]{init_params}}.} 18 | 19 | \item{f}{(optional) an object of class "formula" that overrides the default parameter} 20 | 21 | \item{simple}{a boolean indicator to build default formula with interactions. 22 | If true, interactions will be excluded. If false, interactions will be included. By 23 | default, simple is set to false.} 24 | 25 | \item{family}{the family to be used in the general linear model. 26 | By default, this is set to \code{\link[stats:binomial]{binomial}} 27 | NOTE: if this is changed, the outcome of the model may not be the probabilities and the results will not be valid.} 28 | 29 | \item{...}{additional arguments that may be passed to the underlying \code{\link[stats:glm]{glm}} model.} 30 | } 31 | \value{ 32 | \code{propensity_scores} returns an object of \code{\link[base:class]{class} "propensity_scores"} 33 | 34 | The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with 35 | the underlying \code{glm} model. 36 | 37 | An object of class \code{"propensity_scores"} is a list containing the following: 38 | 39 | \item{call}{the matched call.} 40 | \item{formula}{the formula used in the model.} 41 | \item{model}{the underlying glm model.} 42 | \item{p.scores}{the estimated propensity scores.} 43 | } 44 | \description{ 45 | `propensity_scores` builds a logistic regression with the target as the treatment variable 46 | and the covariates as the independent variables. 47 | } 48 | \examples{ 49 | library(causaldata) 50 | data(nhefs) 51 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ] 52 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 53 | 54 | confounders <- c( 55 | "sex", "race", "age", "education", "smokeintensity", 56 | "smokeyrs", "exercise", "active", "wt71" 57 | ) 58 | 59 | init_params(wt82_71, qsmk, 60 | covariates = confounders, 61 | data = nhefs.nmv 62 | ) 63 | 64 | p.score <- propensity_scores(nhefs.nmv) 65 | p.score 66 | 67 | } 68 | -------------------------------------------------------------------------------- /man/standardization.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/standardization.R 3 | \name{standardization} 4 | \alias{standardization} 5 | \title{Parametric Standardization} 6 | \usage{ 7 | standardization( 8 | data, 9 | f = NA, 10 | family = gaussian(), 11 | simple = pkg.env$simple, 12 | n.boot = 50, 13 | ... 14 | ) 15 | } 16 | \arguments{ 17 | \item{data}{a data frame containing the variables in the model. 18 | This should be the same data used in \code{\link[=init_params]{init_params}}.} 19 | 20 | \item{f}{(optional) an object of class "formula" that overrides the default parameter} 21 | 22 | \item{family}{the family to be used in the general linear model. 23 | By default, this is set to \code{\link[stats:gaussian]{gaussian}}.} 24 | 25 | \item{simple}{a boolean indicator to build default formula with interactions. 26 | If true, interactions will be excluded. If false, interactions will be included. By 27 | default, simple is set to false. 28 | NOTE: if this is changed, the coefficient for treatment may not accurately represent the average causal effect.} 29 | 30 | \item{n.boot}{an integer value that indicates number of bootstrap iterations to calculate standard error.} 31 | 32 | \item{...}{additional arguments that may be passed to the underlying \code{\link[stats:glm]{glm}} model.} 33 | } 34 | \value{ 35 | \code{standardization} returns an object of \code{\link[base:class]{class} "standardization"}. 36 | 37 | The functions \code{print}, \code{summary}, and \code{predict} can be used to interact with 38 | the underlying \code{glm} model. 39 | 40 | An object of class \code{"standardization"} is a list containing the following: 41 | 42 | \item{call}{the matched call.} 43 | \item{formula}{the formula used in the model.} 44 | \item{model}{the underlying glm model.} 45 | \item{ATE}{a data frame containing estimates of the treatment effect 46 | of the observed, counterfactuals, and risk metrics.} 47 | \item{ATE.summary}{a data frame containing the ATE, SE, and 95\% CI of the ATE. } 48 | } 49 | \description{ 50 | `standardization` uses a standard \code{\link[stats:glm]{glm}} linear model to perform parametric standardization 51 | by adjusting bias through including all confounders as covariates. The model will calculate during training both the risk difference 52 | and the risk ratio. Both can be accessed from the model as well as estimates of the counterfactuals of treatment. 53 | } 54 | \examples{ 55 | library(causaldata) 56 | 57 | data(nhefs) 58 | nhefs.nmv <- nhefs[which(!is.na(nhefs$wt82)), ] 59 | nhefs.nmv$qsmk <- as.factor(nhefs.nmv$qsmk) 60 | 61 | confounders <- c( 62 | "sex", "race", "age", "education", "smokeintensity", 63 | "smokeyrs", "exercise", "active", "wt71" 64 | ) 65 | 66 | init_params(wt82_71, qsmk, 67 | covariates = confounders, 68 | data = nhefs.nmv 69 | ) 70 | 71 | # model using all defaults 72 | model <- standardization(data = nhefs.nmv) 73 | print(model) 74 | summary(model) 75 | print(model$ATE.summary) 76 | print(model$ATE.summary$Estimate[[2]] - 77 | model$ATE.summary$Estimate[[3]]) # manually calculate risk difference 78 | 79 | } 80 | -------------------------------------------------------------------------------- /renv.lock: -------------------------------------------------------------------------------- 1 | { 2 | "R": { 3 | "Version": "4.1.1", 4 | "Repositories": [ 5 | { 6 | "Name": "CRAN", 7 | "URL": "https://cran.rstudio.com" 8 | } 9 | ] 10 | }, 11 | "Packages": { 12 | "renv": { 13 | "Package": "renv", 14 | "Version": "0.15.4", 15 | "Source": "Repository", 16 | "Repository": "CRAN", 17 | "Hash": "c1078316e1d4f70275fc1ea60c0bc431", 18 | "Requirements": [] 19 | } 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /renv/.gitignore: -------------------------------------------------------------------------------- 1 | library/ 2 | local/ 3 | cellar/ 4 | lock/ 5 | python/ 6 | staging/ 7 | -------------------------------------------------------------------------------- /renv/activate.R: -------------------------------------------------------------------------------- 1 | 2 | local({ 3 | 4 | # the requested version of renv 5 | version <- "0.15.4" 6 | 7 | # the project directory 8 | project <- getwd() 9 | 10 | # figure out whether the autoloader is enabled 11 | enabled <- local({ 12 | 13 | # first, check config option 14 | override <- getOption("renv.config.autoloader.enabled") 15 | if (!is.null(override)) 16 | return(override) 17 | 18 | # next, check environment variables 19 | # TODO: prefer using the configuration one in the future 20 | envvars <- c( 21 | "RENV_CONFIG_AUTOLOADER_ENABLED", 22 | "RENV_AUTOLOADER_ENABLED", 23 | "RENV_ACTIVATE_PROJECT" 24 | ) 25 | 26 | for (envvar in envvars) { 27 | envval <- Sys.getenv(envvar, unset = NA) 28 | if (!is.na(envval)) 29 | return(tolower(envval) %in% c("true", "t", "1")) 30 | } 31 | 32 | # enable by default 33 | TRUE 34 | 35 | }) 36 | 37 | if (!enabled) 38 | return(FALSE) 39 | 40 | # avoid recursion 41 | if (identical(getOption("renv.autoloader.running"), TRUE)) { 42 | warning("ignoring recursive attempt to run renv autoloader") 43 | return(invisible(TRUE)) 44 | } 45 | 46 | # signal that we're loading renv during R startup 47 | options(renv.autoloader.running = TRUE) 48 | on.exit(options(renv.autoloader.running = NULL), add = TRUE) 49 | 50 | # signal that we've consented to use renv 51 | options(renv.consent = TRUE) 52 | 53 | # load the 'utils' package eagerly -- this ensures that renv shims, which 54 | # mask 'utils' packages, will come first on the search path 55 | library(utils, lib.loc = .Library) 56 | 57 | # unload renv if it's already been laoded 58 | if ("renv" %in% loadedNamespaces()) 59 | unloadNamespace("renv") 60 | 61 | # load bootstrap tools 62 | `%||%` <- function(x, y) { 63 | if (is.environment(x) || length(x)) x else y 64 | } 65 | 66 | bootstrap <- function(version, library) { 67 | 68 | # attempt to download renv 69 | tarball <- tryCatch(renv_bootstrap_download(version), error = identity) 70 | if (inherits(tarball, "error")) 71 | stop("failed to download renv ", version) 72 | 73 | # now attempt to install 74 | status <- tryCatch(renv_bootstrap_install(version, tarball, library), error = identity) 75 | if (inherits(status, "error")) 76 | stop("failed to install renv ", version) 77 | 78 | } 79 | 80 | renv_bootstrap_tests_running <- function() { 81 | getOption("renv.tests.running", default = FALSE) 82 | } 83 | 84 | renv_bootstrap_repos <- function() { 85 | 86 | # check for repos override 87 | repos <- Sys.getenv("RENV_CONFIG_REPOS_OVERRIDE", unset = NA) 88 | if (!is.na(repos)) 89 | return(repos) 90 | 91 | # check for lockfile repositories 92 | repos <- tryCatch(renv_bootstrap_repos_lockfile(), error = identity) 93 | if (!inherits(repos, "error") && length(repos)) 94 | return(repos) 95 | 96 | # if we're testing, re-use the test repositories 97 | if (renv_bootstrap_tests_running()) 98 | return(getOption("renv.tests.repos")) 99 | 100 | # retrieve current repos 101 | repos <- getOption("repos") 102 | 103 | # ensure @CRAN@ entries are resolved 104 | repos[repos == "@CRAN@"] <- getOption( 105 | "renv.repos.cran", 106 | "https://cloud.r-project.org" 107 | ) 108 | 109 | # add in renv.bootstrap.repos if set 110 | default <- c(FALLBACK = "https://cloud.r-project.org") 111 | extra <- getOption("renv.bootstrap.repos", default = default) 112 | repos <- c(repos, extra) 113 | 114 | # remove duplicates that might've snuck in 115 | dupes <- duplicated(repos) | duplicated(names(repos)) 116 | repos[!dupes] 117 | 118 | } 119 | 120 | renv_bootstrap_repos_lockfile <- function() { 121 | 122 | lockpath <- Sys.getenv("RENV_PATHS_LOCKFILE", unset = "renv.lock") 123 | if (!file.exists(lockpath)) 124 | return(NULL) 125 | 126 | lockfile <- tryCatch(renv_json_read(lockpath), error = identity) 127 | if (inherits(lockfile, "error")) { 128 | warning(lockfile) 129 | return(NULL) 130 | } 131 | 132 | repos <- lockfile$R$Repositories 133 | if (length(repos) == 0) 134 | return(NULL) 135 | 136 | keys <- vapply(repos, `[[`, "Name", FUN.VALUE = character(1)) 137 | vals <- vapply(repos, `[[`, "URL", FUN.VALUE = character(1)) 138 | names(vals) <- keys 139 | 140 | return(vals) 141 | 142 | } 143 | 144 | renv_bootstrap_download <- function(version) { 145 | 146 | # if the renv version number has 4 components, assume it must 147 | # be retrieved via github 148 | nv <- numeric_version(version) 149 | components <- unclass(nv)[[1]] 150 | 151 | # if this appears to be a development version of 'renv', we'll 152 | # try to restore from github 153 | dev <- length(components) == 4L 154 | 155 | # begin collecting different methods for finding renv 156 | methods <- c( 157 | renv_bootstrap_download_tarball, 158 | if (dev) 159 | renv_bootstrap_download_github 160 | else c( 161 | renv_bootstrap_download_cran_latest, 162 | renv_bootstrap_download_cran_archive 163 | ) 164 | ) 165 | 166 | for (method in methods) { 167 | path <- tryCatch(method(version), error = identity) 168 | if (is.character(path) && file.exists(path)) 169 | return(path) 170 | } 171 | 172 | stop("failed to download renv ", version) 173 | 174 | } 175 | 176 | renv_bootstrap_download_impl <- function(url, destfile) { 177 | 178 | mode <- "wb" 179 | 180 | # https://bugs.r-project.org/bugzilla/show_bug.cgi?id=17715 181 | fixup <- 182 | Sys.info()[["sysname"]] == "Windows" && 183 | substring(url, 1L, 5L) == "file:" 184 | 185 | if (fixup) 186 | mode <- "w+b" 187 | 188 | utils::download.file( 189 | url = url, 190 | destfile = destfile, 191 | mode = mode, 192 | quiet = TRUE 193 | ) 194 | 195 | } 196 | 197 | renv_bootstrap_download_cran_latest <- function(version) { 198 | 199 | spec <- renv_bootstrap_download_cran_latest_find(version) 200 | 201 | message("* Downloading renv ", version, " ... ", appendLF = FALSE) 202 | 203 | type <- spec$type 204 | repos <- spec$repos 205 | 206 | info <- tryCatch( 207 | utils::download.packages( 208 | pkgs = "renv", 209 | destdir = tempdir(), 210 | repos = repos, 211 | type = type, 212 | quiet = TRUE 213 | ), 214 | condition = identity 215 | ) 216 | 217 | if (inherits(info, "condition")) { 218 | message("FAILED") 219 | return(FALSE) 220 | } 221 | 222 | # report success and return 223 | message("OK (downloaded ", type, ")") 224 | info[1, 2] 225 | 226 | } 227 | 228 | renv_bootstrap_download_cran_latest_find <- function(version) { 229 | 230 | # check whether binaries are supported on this system 231 | binary <- 232 | getOption("renv.bootstrap.binary", default = TRUE) && 233 | !identical(.Platform$pkgType, "source") && 234 | !identical(getOption("pkgType"), "source") && 235 | Sys.info()[["sysname"]] %in% c("Darwin", "Windows") 236 | 237 | types <- c(if (binary) "binary", "source") 238 | 239 | # iterate over types + repositories 240 | for (type in types) { 241 | for (repos in renv_bootstrap_repos()) { 242 | 243 | # retrieve package database 244 | db <- tryCatch( 245 | as.data.frame( 246 | utils::available.packages(type = type, repos = repos), 247 | stringsAsFactors = FALSE 248 | ), 249 | error = identity 250 | ) 251 | 252 | if (inherits(db, "error")) 253 | next 254 | 255 | # check for compatible entry 256 | entry <- db[db$Package %in% "renv" & db$Version %in% version, ] 257 | if (nrow(entry) == 0) 258 | next 259 | 260 | # found it; return spec to caller 261 | spec <- list(entry = entry, type = type, repos = repos) 262 | return(spec) 263 | 264 | } 265 | } 266 | 267 | # if we got here, we failed to find renv 268 | fmt <- "renv %s is not available from your declared package repositories" 269 | stop(sprintf(fmt, version)) 270 | 271 | } 272 | 273 | renv_bootstrap_download_cran_archive <- function(version) { 274 | 275 | name <- sprintf("renv_%s.tar.gz", version) 276 | repos <- renv_bootstrap_repos() 277 | urls <- file.path(repos, "src/contrib/Archive/renv", name) 278 | destfile <- file.path(tempdir(), name) 279 | 280 | message("* Downloading renv ", version, " ... ", appendLF = FALSE) 281 | 282 | for (url in urls) { 283 | 284 | status <- tryCatch( 285 | renv_bootstrap_download_impl(url, destfile), 286 | condition = identity 287 | ) 288 | 289 | if (identical(status, 0L)) { 290 | message("OK") 291 | return(destfile) 292 | } 293 | 294 | } 295 | 296 | message("FAILED") 297 | return(FALSE) 298 | 299 | } 300 | 301 | renv_bootstrap_download_tarball <- function(version) { 302 | 303 | # if the user has provided the path to a tarball via 304 | # an environment variable, then use it 305 | tarball <- Sys.getenv("RENV_BOOTSTRAP_TARBALL", unset = NA) 306 | if (is.na(tarball)) 307 | return() 308 | 309 | # allow directories 310 | info <- file.info(tarball, extra_cols = FALSE) 311 | if (identical(info$isdir, TRUE)) { 312 | name <- sprintf("renv_%s.tar.gz", version) 313 | tarball <- file.path(tarball, name) 314 | } 315 | 316 | # bail if it doesn't exist 317 | if (!file.exists(tarball)) 318 | return() 319 | 320 | fmt <- "* Bootstrapping with tarball at path '%s'." 321 | msg <- sprintf(fmt, tarball) 322 | message(msg) 323 | 324 | tarball 325 | 326 | } 327 | 328 | renv_bootstrap_download_github <- function(version) { 329 | 330 | enabled <- Sys.getenv("RENV_BOOTSTRAP_FROM_GITHUB", unset = "TRUE") 331 | if (!identical(enabled, "TRUE")) 332 | return(FALSE) 333 | 334 | # prepare download options 335 | pat <- Sys.getenv("GITHUB_PAT") 336 | if (nzchar(Sys.which("curl")) && nzchar(pat)) { 337 | fmt <- "--location --fail --header \"Authorization: token %s\"" 338 | extra <- sprintf(fmt, pat) 339 | saved <- options("download.file.method", "download.file.extra") 340 | options(download.file.method = "curl", download.file.extra = extra) 341 | on.exit(do.call(base::options, saved), add = TRUE) 342 | } else if (nzchar(Sys.which("wget")) && nzchar(pat)) { 343 | fmt <- "--header=\"Authorization: token %s\"" 344 | extra <- sprintf(fmt, pat) 345 | saved <- options("download.file.method", "download.file.extra") 346 | options(download.file.method = "wget", download.file.extra = extra) 347 | on.exit(do.call(base::options, saved), add = TRUE) 348 | } 349 | 350 | message("* Downloading renv ", version, " from GitHub ... ", appendLF = FALSE) 351 | 352 | url <- file.path("https://api.github.com/repos/rstudio/renv/tarball", version) 353 | name <- sprintf("renv_%s.tar.gz", version) 354 | destfile <- file.path(tempdir(), name) 355 | 356 | status <- tryCatch( 357 | renv_bootstrap_download_impl(url, destfile), 358 | condition = identity 359 | ) 360 | 361 | if (!identical(status, 0L)) { 362 | message("FAILED") 363 | return(FALSE) 364 | } 365 | 366 | message("OK") 367 | return(destfile) 368 | 369 | } 370 | 371 | renv_bootstrap_install <- function(version, tarball, library) { 372 | 373 | # attempt to install it into project library 374 | message("* Installing renv ", version, " ... ", appendLF = FALSE) 375 | dir.create(library, showWarnings = FALSE, recursive = TRUE) 376 | 377 | # invoke using system2 so we can capture and report output 378 | bin <- R.home("bin") 379 | exe <- if (Sys.info()[["sysname"]] == "Windows") "R.exe" else "R" 380 | r <- file.path(bin, exe) 381 | 382 | args <- c( 383 | "--vanilla", "CMD", "INSTALL", "--no-multiarch", 384 | "-l", shQuote(path.expand(library)), 385 | shQuote(path.expand(tarball)) 386 | ) 387 | 388 | output <- system2(r, args, stdout = TRUE, stderr = TRUE) 389 | message("Done!") 390 | 391 | # check for successful install 392 | status <- attr(output, "status") 393 | if (is.numeric(status) && !identical(status, 0L)) { 394 | header <- "Error installing renv:" 395 | lines <- paste(rep.int("=", nchar(header)), collapse = "") 396 | text <- c(header, lines, output) 397 | writeLines(text, con = stderr()) 398 | } 399 | 400 | status 401 | 402 | } 403 | 404 | renv_bootstrap_platform_prefix <- function() { 405 | 406 | # construct version prefix 407 | version <- paste(R.version$major, R.version$minor, sep = ".") 408 | prefix <- paste("R", numeric_version(version)[1, 1:2], sep = "-") 409 | 410 | # include SVN revision for development versions of R 411 | # (to avoid sharing platform-specific artefacts with released versions of R) 412 | devel <- 413 | identical(R.version[["status"]], "Under development (unstable)") || 414 | identical(R.version[["nickname"]], "Unsuffered Consequences") 415 | 416 | if (devel) 417 | prefix <- paste(prefix, R.version[["svn rev"]], sep = "-r") 418 | 419 | # build list of path components 420 | components <- c(prefix, R.version$platform) 421 | 422 | # include prefix if provided by user 423 | prefix <- renv_bootstrap_platform_prefix_impl() 424 | if (!is.na(prefix) && nzchar(prefix)) 425 | components <- c(prefix, components) 426 | 427 | # build prefix 428 | paste(components, collapse = "/") 429 | 430 | } 431 | 432 | renv_bootstrap_platform_prefix_impl <- function() { 433 | 434 | # if an explicit prefix has been supplied, use it 435 | prefix <- Sys.getenv("RENV_PATHS_PREFIX", unset = NA) 436 | if (!is.na(prefix)) 437 | return(prefix) 438 | 439 | # if the user has requested an automatic prefix, generate it 440 | auto <- Sys.getenv("RENV_PATHS_PREFIX_AUTO", unset = NA) 441 | if (auto %in% c("TRUE", "True", "true", "1")) 442 | return(renv_bootstrap_platform_prefix_auto()) 443 | 444 | # empty string on failure 445 | "" 446 | 447 | } 448 | 449 | renv_bootstrap_platform_prefix_auto <- function() { 450 | 451 | prefix <- tryCatch(renv_bootstrap_platform_os(), error = identity) 452 | if (inherits(prefix, "error") || prefix %in% "unknown") { 453 | 454 | msg <- paste( 455 | "failed to infer current operating system", 456 | "please file a bug report at https://github.com/rstudio/renv/issues", 457 | sep = "; " 458 | ) 459 | 460 | warning(msg) 461 | 462 | } 463 | 464 | prefix 465 | 466 | } 467 | 468 | renv_bootstrap_platform_os <- function() { 469 | 470 | sysinfo <- Sys.info() 471 | sysname <- sysinfo[["sysname"]] 472 | 473 | # handle Windows + macOS up front 474 | if (sysname == "Windows") 475 | return("windows") 476 | else if (sysname == "Darwin") 477 | return("macos") 478 | 479 | # check for os-release files 480 | for (file in c("/etc/os-release", "/usr/lib/os-release")) 481 | if (file.exists(file)) 482 | return(renv_bootstrap_platform_os_via_os_release(file, sysinfo)) 483 | 484 | # check for redhat-release files 485 | if (file.exists("/etc/redhat-release")) 486 | return(renv_bootstrap_platform_os_via_redhat_release()) 487 | 488 | "unknown" 489 | 490 | } 491 | 492 | renv_bootstrap_platform_os_via_os_release <- function(file, sysinfo) { 493 | 494 | # read /etc/os-release 495 | release <- utils::read.table( 496 | file = file, 497 | sep = "=", 498 | quote = c("\"", "'"), 499 | col.names = c("Key", "Value"), 500 | comment.char = "#", 501 | stringsAsFactors = FALSE 502 | ) 503 | 504 | vars <- as.list(release$Value) 505 | names(vars) <- release$Key 506 | 507 | # get os name 508 | os <- tolower(sysinfo[["sysname"]]) 509 | 510 | # read id 511 | id <- "unknown" 512 | for (field in c("ID", "ID_LIKE")) { 513 | if (field %in% names(vars) && nzchar(vars[[field]])) { 514 | id <- vars[[field]] 515 | break 516 | } 517 | } 518 | 519 | # read version 520 | version <- "unknown" 521 | for (field in c("UBUNTU_CODENAME", "VERSION_CODENAME", "VERSION_ID", "BUILD_ID")) { 522 | if (field %in% names(vars) && nzchar(vars[[field]])) { 523 | version <- vars[[field]] 524 | break 525 | } 526 | } 527 | 528 | # join together 529 | paste(c(os, id, version), collapse = "-") 530 | 531 | } 532 | 533 | renv_bootstrap_platform_os_via_redhat_release <- function() { 534 | 535 | # read /etc/redhat-release 536 | contents <- readLines("/etc/redhat-release", warn = FALSE) 537 | 538 | # infer id 539 | id <- if (grepl("centos", contents, ignore.case = TRUE)) 540 | "centos" 541 | else if (grepl("redhat", contents, ignore.case = TRUE)) 542 | "redhat" 543 | else 544 | "unknown" 545 | 546 | # try to find a version component (very hacky) 547 | version <- "unknown" 548 | 549 | parts <- strsplit(contents, "[[:space:]]")[[1L]] 550 | for (part in parts) { 551 | 552 | nv <- tryCatch(numeric_version(part), error = identity) 553 | if (inherits(nv, "error")) 554 | next 555 | 556 | version <- nv[1, 1] 557 | break 558 | 559 | } 560 | 561 | paste(c("linux", id, version), collapse = "-") 562 | 563 | } 564 | 565 | renv_bootstrap_library_root_name <- function(project) { 566 | 567 | # use project name as-is if requested 568 | asis <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT_ASIS", unset = "FALSE") 569 | if (asis) 570 | return(basename(project)) 571 | 572 | # otherwise, disambiguate based on project's path 573 | id <- substring(renv_bootstrap_hash_text(project), 1L, 8L) 574 | paste(basename(project), id, sep = "-") 575 | 576 | } 577 | 578 | renv_bootstrap_library_root <- function(project) { 579 | 580 | prefix <- renv_bootstrap_profile_prefix() 581 | 582 | path <- Sys.getenv("RENV_PATHS_LIBRARY", unset = NA) 583 | if (!is.na(path)) 584 | return(paste(c(path, prefix), collapse = "/")) 585 | 586 | path <- renv_bootstrap_library_root_impl(project) 587 | if (!is.null(path)) { 588 | name <- renv_bootstrap_library_root_name(project) 589 | return(paste(c(path, prefix, name), collapse = "/")) 590 | } 591 | 592 | renv_bootstrap_paths_renv("library", project = project) 593 | 594 | } 595 | 596 | renv_bootstrap_library_root_impl <- function(project) { 597 | 598 | root <- Sys.getenv("RENV_PATHS_LIBRARY_ROOT", unset = NA) 599 | if (!is.na(root)) 600 | return(root) 601 | 602 | type <- renv_bootstrap_project_type(project) 603 | if (identical(type, "package")) { 604 | userdir <- renv_bootstrap_user_dir() 605 | return(file.path(userdir, "library")) 606 | } 607 | 608 | } 609 | 610 | renv_bootstrap_validate_version <- function(version) { 611 | 612 | loadedversion <- utils::packageDescription("renv", fields = "Version") 613 | if (version == loadedversion) 614 | return(TRUE) 615 | 616 | # assume four-component versions are from GitHub; three-component 617 | # versions are from CRAN 618 | components <- strsplit(loadedversion, "[.-]")[[1]] 619 | remote <- if (length(components) == 4L) 620 | paste("rstudio/renv", loadedversion, sep = "@") 621 | else 622 | paste("renv", loadedversion, sep = "@") 623 | 624 | fmt <- paste( 625 | "renv %1$s was loaded from project library, but this project is configured to use renv %2$s.", 626 | "Use `renv::record(\"%3$s\")` to record renv %1$s in the lockfile.", 627 | "Use `renv::restore(packages = \"renv\")` to install renv %2$s into the project library.", 628 | sep = "\n" 629 | ) 630 | 631 | msg <- sprintf(fmt, loadedversion, version, remote) 632 | warning(msg, call. = FALSE) 633 | 634 | FALSE 635 | 636 | } 637 | 638 | renv_bootstrap_hash_text <- function(text) { 639 | 640 | hashfile <- tempfile("renv-hash-") 641 | on.exit(unlink(hashfile), add = TRUE) 642 | 643 | writeLines(text, con = hashfile) 644 | tools::md5sum(hashfile) 645 | 646 | } 647 | 648 | renv_bootstrap_load <- function(project, libpath, version) { 649 | 650 | # try to load renv from the project library 651 | if (!requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) 652 | return(FALSE) 653 | 654 | # warn if the version of renv loaded does not match 655 | renv_bootstrap_validate_version(version) 656 | 657 | # load the project 658 | renv::load(project) 659 | 660 | TRUE 661 | 662 | } 663 | 664 | renv_bootstrap_profile_load <- function(project) { 665 | 666 | # if RENV_PROFILE is already set, just use that 667 | profile <- Sys.getenv("RENV_PROFILE", unset = NA) 668 | if (!is.na(profile) && nzchar(profile)) 669 | return(profile) 670 | 671 | # check for a profile file (nothing to do if it doesn't exist) 672 | path <- renv_bootstrap_paths_renv("profile", profile = FALSE) 673 | if (!file.exists(path)) 674 | return(NULL) 675 | 676 | # read the profile, and set it if it exists 677 | contents <- readLines(path, warn = FALSE) 678 | if (length(contents) == 0L) 679 | return(NULL) 680 | 681 | # set RENV_PROFILE 682 | profile <- contents[[1L]] 683 | if (!profile %in% c("", "default")) 684 | Sys.setenv(RENV_PROFILE = profile) 685 | 686 | profile 687 | 688 | } 689 | 690 | renv_bootstrap_profile_prefix <- function() { 691 | profile <- renv_bootstrap_profile_get() 692 | if (!is.null(profile)) 693 | return(file.path("profiles", profile, "renv")) 694 | } 695 | 696 | renv_bootstrap_profile_get <- function() { 697 | profile <- Sys.getenv("RENV_PROFILE", unset = "") 698 | renv_bootstrap_profile_normalize(profile) 699 | } 700 | 701 | renv_bootstrap_profile_set <- function(profile) { 702 | profile <- renv_bootstrap_profile_normalize(profile) 703 | if (is.null(profile)) 704 | Sys.unsetenv("RENV_PROFILE") 705 | else 706 | Sys.setenv(RENV_PROFILE = profile) 707 | } 708 | 709 | renv_bootstrap_profile_normalize <- function(profile) { 710 | 711 | if (is.null(profile) || profile %in% c("", "default")) 712 | return(NULL) 713 | 714 | profile 715 | 716 | } 717 | 718 | renv_bootstrap_path_absolute <- function(path) { 719 | 720 | substr(path, 1L, 1L) %in% c("~", "/", "\\") || ( 721 | substr(path, 1L, 1L) %in% c(letters, LETTERS) && 722 | substr(path, 2L, 3L) %in% c(":/", ":\\") 723 | ) 724 | 725 | } 726 | 727 | renv_bootstrap_paths_renv <- function(..., profile = TRUE, project = NULL) { 728 | renv <- Sys.getenv("RENV_PATHS_RENV", unset = "renv") 729 | root <- if (renv_bootstrap_path_absolute(renv)) NULL else project 730 | prefix <- if (profile) renv_bootstrap_profile_prefix() 731 | components <- c(root, renv, prefix, ...) 732 | paste(components, collapse = "/") 733 | } 734 | 735 | renv_bootstrap_project_type <- function(path) { 736 | 737 | descpath <- file.path(path, "DESCRIPTION") 738 | if (!file.exists(descpath)) 739 | return("unknown") 740 | 741 | desc <- tryCatch( 742 | read.dcf(descpath, all = TRUE), 743 | error = identity 744 | ) 745 | 746 | if (inherits(desc, "error")) 747 | return("unknown") 748 | 749 | type <- desc$Type 750 | if (!is.null(type)) 751 | return(tolower(type)) 752 | 753 | package <- desc$Package 754 | if (!is.null(package)) 755 | return("package") 756 | 757 | "unknown" 758 | 759 | } 760 | 761 | renv_bootstrap_user_dir <- function() { 762 | dir <- renv_bootstrap_user_dir_impl() 763 | path.expand(chartr("\\", "/", dir)) 764 | } 765 | 766 | renv_bootstrap_user_dir_impl <- function() { 767 | 768 | # use local override if set 769 | override <- getOption("renv.userdir.override") 770 | if (!is.null(override)) 771 | return(override) 772 | 773 | # use R_user_dir if available 774 | tools <- asNamespace("tools") 775 | if (is.function(tools$R_user_dir)) 776 | return(tools$R_user_dir("renv", "cache")) 777 | 778 | # try using our own backfill for older versions of R 779 | envvars <- c("R_USER_CACHE_DIR", "XDG_CACHE_HOME") 780 | for (envvar in envvars) { 781 | root <- Sys.getenv(envvar, unset = NA) 782 | if (!is.na(root)) 783 | return(file.path(root, "R/renv")) 784 | } 785 | 786 | # use platform-specific default fallbacks 787 | if (Sys.info()[["sysname"]] == "Windows") 788 | file.path(Sys.getenv("LOCALAPPDATA"), "R/cache/R/renv") 789 | else if (Sys.info()[["sysname"]] == "Darwin") 790 | "~/Library/Caches/org.R-project.R/R/renv" 791 | else 792 | "~/.cache/R/renv" 793 | 794 | } 795 | 796 | 797 | renv_json_read <- function(file = NULL, text = NULL) { 798 | 799 | text <- paste(text %||% read(file), collapse = "\n") 800 | 801 | # find strings in the JSON 802 | pattern <- '["](?:(?:\\\\.)|(?:[^"\\\\]))*?["]' 803 | locs <- gregexpr(pattern, text)[[1]] 804 | 805 | # if any are found, replace them with placeholders 806 | replaced <- text 807 | strings <- character() 808 | replacements <- character() 809 | 810 | if (!identical(c(locs), -1L)) { 811 | 812 | # get the string values 813 | starts <- locs 814 | ends <- locs + attr(locs, "match.length") - 1L 815 | strings <- substring(text, starts, ends) 816 | 817 | # only keep those requiring escaping 818 | strings <- grep("[[\\]{}:]", strings, perl = TRUE, value = TRUE) 819 | 820 | # compute replacements 821 | replacements <- sprintf('"\032%i\032"', seq_along(strings)) 822 | 823 | # replace the strings 824 | mapply(function(string, replacement) { 825 | replaced <<- sub(string, replacement, replaced, fixed = TRUE) 826 | }, strings, replacements) 827 | 828 | } 829 | 830 | # transform the JSON into something the R parser understands 831 | transformed <- replaced 832 | transformed <- gsub("[[{]", "list(", transformed) 833 | transformed <- gsub("[]}]", ")", transformed) 834 | transformed <- gsub(":", "=", transformed, fixed = TRUE) 835 | text <- paste(transformed, collapse = "\n") 836 | 837 | # parse it 838 | json <- parse(text = text, keep.source = FALSE, srcfile = NULL)[[1L]] 839 | 840 | # construct map between source strings, replaced strings 841 | map <- as.character(parse(text = strings)) 842 | names(map) <- as.character(parse(text = replacements)) 843 | 844 | # convert to list 845 | map <- as.list(map) 846 | 847 | # remap strings in object 848 | remapped <- renv_json_remap(json, map) 849 | 850 | # evaluate 851 | eval(remapped, envir = baseenv()) 852 | 853 | } 854 | 855 | renv_json_remap <- function(json, map) { 856 | 857 | # fix names 858 | if (!is.null(names(json))) { 859 | lhs <- match(names(json), names(map), nomatch = 0L) 860 | rhs <- match(names(map), names(json), nomatch = 0L) 861 | names(json)[rhs] <- map[lhs] 862 | } 863 | 864 | # fix values 865 | if (is.character(json)) 866 | return(map[[json]] %||% json) 867 | 868 | # handle true, false, null 869 | if (is.name(json)) { 870 | text <- as.character(json) 871 | if (text == "true") 872 | return(TRUE) 873 | else if (text == "false") 874 | return(FALSE) 875 | else if (text == "null") 876 | return(NULL) 877 | } 878 | 879 | # recurse 880 | if (is.recursive(json)) { 881 | for (i in seq_along(json)) { 882 | json[i] <- list(renv_json_remap(json[[i]], map)) 883 | } 884 | } 885 | 886 | json 887 | 888 | } 889 | 890 | # load the renv profile, if any 891 | renv_bootstrap_profile_load(project) 892 | 893 | # construct path to library root 894 | root <- renv_bootstrap_library_root(project) 895 | 896 | # construct library prefix for platform 897 | prefix <- renv_bootstrap_platform_prefix() 898 | 899 | # construct full libpath 900 | libpath <- file.path(root, prefix) 901 | 902 | # attempt to load 903 | if (renv_bootstrap_load(project, libpath, version)) 904 | return(TRUE) 905 | 906 | # load failed; inform user we're about to bootstrap 907 | prefix <- paste("# Bootstrapping renv", version) 908 | postfix <- paste(rep.int("-", 77L - nchar(prefix)), collapse = "") 909 | header <- paste(prefix, postfix) 910 | message(header) 911 | 912 | # perform bootstrap 913 | bootstrap(version, libpath) 914 | 915 | # exit early if we're just testing bootstrap 916 | if (!is.na(Sys.getenv("RENV_BOOTSTRAP_INSTALL_ONLY", unset = NA))) 917 | return(TRUE) 918 | 919 | # try again to load 920 | if (requireNamespace("renv", lib.loc = libpath, quietly = TRUE)) { 921 | message("* Successfully installed and loaded renv ", version, ".") 922 | return(renv::load()) 923 | } 924 | 925 | # failed to download or load renv; warn the user 926 | msg <- c( 927 | "Failed to find an renv installation: the project will not be loaded.", 928 | "Use `renv::activate()` to re-initialize the project." 929 | ) 930 | 931 | warning(paste(msg, collapse = "\n"), call. = FALSE) 932 | 933 | }) 934 | -------------------------------------------------------------------------------- /renv/settings.dcf: -------------------------------------------------------------------------------- 1 | bioconductor.version: 2 | external.libraries: 3 | ignored.packages: 4 | package.dependency.fields: Imports, Depends, LinkingTo 5 | r.version: 6 | snapshot.type: implicit 7 | use.cache: TRUE 8 | vcs.ignore.cellar: TRUE 9 | vcs.ignore.library: TRUE 10 | vcs.ignore.local: TRUE 11 | --------------------------------------------------------------------------------