├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ └── pkgdown.yaml ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── R ├── conformal.R ├── conformalCf.R ├── conformalCf_CV.R ├── conformalCf_split.R ├── conformalInt.R ├── conformalInt_CV.R ├── conformalInt_split.R ├── conformalIte.R ├── conformalIte_cf.R ├── conformalIte_naive.R ├── conformalIte_nest.R ├── conformal_CV.R ├── conformal_learners.R ├── conformal_split.R ├── conformal_utils.R ├── import-packages.R └── utils.R ├── README.md ├── _pkgdown.yml ├── cfcausal.Rproj ├── docs ├── 404.html ├── LICENSE-text.html ├── LICENSE.html ├── articles │ ├── cfcausal_demo.html │ ├── cfcausal_demo_files │ │ └── header-attrs-2.2 │ │ │ └── header-attrs.js │ └── index.html ├── authors.html ├── docsearch.css ├── docsearch.js ├── index.html ├── link.svg ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml └── reference │ ├── conformal.html │ ├── conformalCf.html │ ├── conformalInt.html │ ├── conformalIte.html │ ├── index.html │ ├── predict.conformalCV.html │ ├── predict.conformalIntCV.html │ ├── predict.conformalIntSplit.html │ └── predict.conformalSplit.html ├── inst └── CITATION ├── man ├── conformal.Rd ├── conformalCf.Rd ├── conformalInt.Rd ├── conformalIte.Rd ├── predict.conformalCV.Rd ├── predict.conformalIntCV.Rd ├── predict.conformalIntSplit.Rd └── predict.conformalSplit.Rd └── vignettes ├── auto └── cfcausal.el ├── cfcausal.bib └── cfcausal_demo.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^LICENSE\.md$ 2 | ^.*\.Rproj$ 3 | ^\.Rproj\.user$ 4 | ^R_backup/* 5 | ^doc$ 6 | ^Meta$ 7 | ^_pkgdown\.yml$ 8 | ^docs$ 9 | ^pkgdown$ 10 | ^\.github$ 11 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: master 4 | 5 | name: pkgdown 6 | 7 | jobs: 8 | pkgdown: 9 | runs-on: macOS-latest 10 | env: 11 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 12 | steps: 13 | - uses: actions/checkout@v2 14 | 15 | - uses: r-lib/actions/setup-r@master 16 | 17 | - uses: r-lib/actions/setup-pandoc@master 18 | 19 | - name: Query dependencies 20 | run: | 21 | install.packages('remotes') 22 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 23 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 24 | shell: Rscript {0} 25 | 26 | - name: Cache R packages 27 | uses: actions/cache@v1 28 | with: 29 | path: ${{ env.R_LIBS_USER }} 30 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} 31 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- 32 | 33 | - name: Install dependencies 34 | run: | 35 | install.packages("remotes") 36 | remotes::install_deps(dependencies = TRUE) 37 | remotes::install_dev("pkgdown") 38 | shell: Rscript {0} 39 | 40 | - name: Install package 41 | run: R CMD INSTALL . 42 | 43 | - name: Deploy package 44 | run: pkgdown::deploy_to_branch(new_process = FALSE) 45 | shell: Rscript {0} 46 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | R_backup/* 6 | doc 7 | Meta 8 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: cfcausal 2 | Type: Package 3 | Title: Conformal inference of counterfactuals and individual treatment effects 4 | Version: 0.2.1 5 | Author: Lihua Lei [aut, cre], Emmanuel Candes [ctb] 6 | Maintainer: Lihua Lei 7 | Description: Implementation of weighted conformal inference for counterfactuals and individual treatment effects, including both split conformal inference and cross-valiation+ (CV+). 8 | License: MIT + file LICENSE 9 | Encoding: UTF-8 10 | LazyData: true 11 | Suggests: 12 | bartMachine, gbm, grf, knitr, randomForest, rmarkdown 13 | RoxygenNote: 7.2.1 14 | Imports: 15 | methods 16 | VignetteBuilder: knitr 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2019 2 | COPYRIGHT HOLDER: Lihua Lei 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2019 Lihua Lei 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(predict,conformalCV) 4 | S3method(predict,conformalIntCV) 5 | S3method(predict,conformalIntSplit) 6 | S3method(predict,conformalSplit) 7 | export(conformal) 8 | export(conformalCf) 9 | export(conformalInt) 10 | export(conformalIte) 11 | importFrom(methods,formalArgs) 12 | importFrom(stats,predict) 13 | importFrom(stats,quantile) 14 | -------------------------------------------------------------------------------- /R/conformalCf.R: -------------------------------------------------------------------------------- 1 | #' Conformal inference for counterfactuals 2 | #' 3 | #' \code{conformalCf} computes intervals for counterfactuals or outcomes with ignorable missing values in general. 4 | #' It supports both split conformal inference and CV+, 5 | #' including weighted Jackknife+ as a special case. For each type, it supports both conformalized 6 | #' quantile regression (CQR) and standard conformal inference based on conditional mean regression. 7 | #' 8 | #' @details The outcome \code{Y} must comprise both observed values and missing values encoded as NA. 9 | #' The missing values are used to estimate the propensity score \eqn{P(missing | X)}. 10 | #' 11 | #' \code{estimand} controls the type of coverage to be guaranteed: 12 | #' \itemize{ 13 | #' \item (Default) when \code{estimand = "unconditional"}, the interval has 14 | #' \eqn{P(Y \in \hat{C}(X))\ge 1 - \alpha}. 15 | #' \item When \code{estimand = "nonmissing"}, the interval has 16 | #' \eqn{P(Y \in \hat{C}(X) | nonmissing) \ge 1 - \alpha}. 17 | #' \item When \code{estimand = "missing"}, the interval has 18 | #' \eqn{P(Y \in \hat{C}(X) | missing) \ge 1 - \alpha}. 19 | #' } 20 | #' 21 | #' When \code{side = "above"}, 22 | #' intervals are of form [-Inf, a(x)] and when \code{side = "below"} the intervals are of form [a(x), Inf]. 23 | #' 24 | #' \code{outfun} can be a valid string, including 25 | #' \itemize{ 26 | #' \item "RF" for random forest that predicts the conditional mean, a wrapper built on \code{randomForest} package. 27 | #' Used when \code{type = "mean"}. 28 | #' \item "quantRF" for quantile random forest that predicts the conditional quantiles, a wrapper built on 29 | #' \code{grf} package. Used when \code{type = "CQR"}. 30 | #' \item "Boosting" for gradient boosting that predicts the conditional mean, a wrapper built on \code{gbm} 31 | #' package. Used when \code{type = "mean"}. 32 | #' \item "quantBoosting" for quantile gradient boosting that predicts the conditional quantiles, a wrapper built on 33 | #' \code{gbm} package. Used when \code{type = "CQR"}. 34 | #' \item "BART" for gradient boosting that predicts the conditional mean, a wrapper built on \code{bartMachine} 35 | #' package. Used when \code{type = "mean"}. 36 | #' \item "quantBART" for quantile gradient boosting that predicts the conditional quantiles, a wrapper built on 37 | #' \code{bartMachine} package. Used when \code{type = "CQR"}. 38 | #' } 39 | #' or a function object whose input must include, but not limited to 40 | #' \itemize{ 41 | #' \item \code{Y} for outcome in the training data. 42 | #' \item \code{X} for covariates in the training data. 43 | #' \item \code{Xtest} for covariates in the testing data. 44 | #' } 45 | #' When \code{type = "CQR"}, \code{outfun} should also include an argument \code{quantiles} that is either 46 | #' a vector of length 2 or a scalar, depending on the argument \code{side}. The output of \code{outfun} 47 | #' must be a matrix with two columns giving the conditional quantile estimates when \code{quantiles} is 48 | #' a vector of length 2; otherwise, it must be a vector giving the conditional quantile estimate or 49 | #' conditional mean estimate. Other optional arguments can be passed into \code{outfun} through \code{outparams}. 50 | #' 51 | #' \code{psfun} can be a valid string, including 52 | #' \itemize{ 53 | #' \item "RF" for random forest that predicts the propensity score, a wrapper built on \code{randomForest} package. 54 | #' Used when \code{type = "mean"}. 55 | #' \item "Boosting" for gradient boosting that predicts the propensity score, a wrapper built on \code{gbm} 56 | #' package. Used when \code{type = "mean"}. 57 | #' } 58 | #' or a function object whose input must include, but not limited to 59 | #' \itemize{ 60 | #' \item \code{Y} for treatment assignment, a binary vector, in the training data. 61 | #' \item \code{X} for covariates in the training data. 62 | #' \item \code{Xtest} for covariates in the testing data. 63 | #' } 64 | #' The output of \code{psfun} must be a vector of predicted probabilities. Other optional arguments 65 | #' can be passed into \code{psfun} through \code{psparams}. 66 | #' 67 | #' @param X covariates. 68 | #' @param Y outcome vector with missing values encoded as NA. See Details. 69 | #' @param estimand a string that takes values in \{"unconditional", "nonmissing", "missing"\}. See Details. 70 | #' @param type a string that takes values in \{"CQR", "mean"\}. 71 | #' @param side a string that takes values in \{"two", "above", "below"\}. See Details. 72 | #' @param quantiles a scalar or a vector of length 2 depending on \code{side}. Used only when \code{type = "CQR"}. See Details. 73 | #' @param outfun a function that models the conditional mean or quantiles, or a valid string. 74 | #' The default is random forest when \code{type = "mean"} and quantile random forest when 75 | #' \code{type = "CQR"}. See Details. 76 | #' @param outparams a list of other parameters to be passed into \code{outfun}. 77 | #' @param psfun a function that models the missing mechanism (probability of missing given X), or a valid string. 78 | #' The default is "Boosting". See Details. 79 | #' @param psparams a list of other parameters to be passed into \code{psfun}. 80 | #' @param useCV FALSE for split conformal inference and TRUE for CV+. 81 | #' @param trainprop proportion of units for training \code{outfun}. The default if 75\%. Used only when \code{useCV = FALSE}. 82 | #' @param nfolds number of folds. The default is 10. Used only when \code{useCV = TRUE}. 83 | #' 84 | #' @return a \code{conformalSplit} object when \code{useCV = FALSE} or a \code{conformalCV} object 85 | #' 86 | #' @seealso 87 | #' \code{\link{conformal}}, \code{\link{conformalIte}} 88 | #' 89 | #' @examples 90 | #' \donttest{# Generate data from a linear model 91 | #' set.seed(1) 92 | #' n <- 1000 93 | #' d <- 5 94 | #' X <- matrix(rnorm(n * d), nrow = n) 95 | #' beta <- rep(1, 5) 96 | #' Y <- X %*% beta + rnorm(n) 97 | #' 98 | #' # Generate missing indicators 99 | #' missing_prob <- pnorm(X[, 1]) 100 | #' if_missing <- missing_prob < runif(n) 101 | #' Y[if_missing] <- NA 102 | #' 103 | #' # Generate testing data 104 | #' ntest <- 5 105 | #' Xtest <- matrix(rnorm(ntest * d), nrow = ntest) 106 | #' 107 | #' # Run weighted split CQR 108 | #' obj <- conformalCf(X, Y, type = "CQR", quantiles = c(0.05, 0.95), 109 | #' outfun = "quantRF", useCV = FALSE) 110 | #' predict(obj, Xtest, alpha = 0.1) 111 | #' 112 | #' # Run weighted standard conformal inference 113 | #' obj <- conformalCf(X, Y, type = "mean", 114 | #' outfun = "RF", useCV = FALSE) 115 | #' predict(obj, Xtest, alpha = 0.1) 116 | #' 117 | #' # Run one-sided weighted split CQR 118 | #' obj1 <- conformalCf(X, Y, type = "CQR", side = "above", 119 | #' quantiles = 0.95, outfun = "quantRF", useCV = FALSE) 120 | #' predict(obj1, Xtest, alpha = 0.1) 121 | #' obj2 <- conformalCf(X, Y, type = "CQR", side = "below", 122 | #' quantiles = 0.05, outfun = "quantRF", useCV = FALSE) 123 | #' predict(obj2, Xtest, alpha = 0.1) 124 | #' 125 | #' # Run split CQR with a self-defined quantile random forest 126 | #' # Y, X, Xtest, quantiles should be included in the inputs 127 | #' quantRF <- function(Y, X, Xtest, quantiles, ...){ 128 | #' fit <- grf::quantile_forest(X, Y, quantiles = quantiles, ...) 129 | #' res <- predict(fit, Xtest, quantiles = quantiles) 130 | #' if (is.list(res) && !is.data.frame(res)){ 131 | #' # for the recent update of \code{grf} package that 132 | #' # changes the output format 133 | #' res <- res$predictions 134 | #' } 135 | #' if (length(quantiles) == 1){ 136 | #' res <- as.numeric(res) 137 | #' } else { 138 | #' res <- as.matrix(res) 139 | #' } 140 | #' return(res) 141 | #' } 142 | #' obj <- conformalCf(X, Y, type = "CQR", quantiles = c(0.05, 0.95), 143 | #' outfun = quantRF, useCV = FALSE) 144 | #' predict(obj, Xtest, alpha = 0.1) 145 | #' 146 | #' # Run standard split conformal inference with a self-defined linear regression 147 | #' # Y, X, Xtest should be included in the inputs 148 | #' linearReg <- function(Y, X, Xtest){ 149 | #' X <- as.data.frame(X) 150 | #' Xtest <- as.data.frame(Xtest) 151 | #' data <- data.frame(Y = Y, X) 152 | #' fit <- lm(Y ~ ., data = data) 153 | #' as.numeric(predict(fit, Xtest)) 154 | #' } 155 | #' obj <- conformalCf(X, Y, type = "mean", 156 | #' outfun = linearReg, useCV = FALSE) 157 | #' predict(obj, Xtest, alpha = 0.1) 158 | #' 159 | #' # Run split CQR with a built-in psfun 160 | #' # Y, X, Xtest, should be included in the inputs 161 | #' obj <- conformalCf(X, Y, type = "CQR", quantiles = c(0.05, 0.95), 162 | #' outfun = "quantRF", psfun = "RF", useCV = FALSE) 163 | #' predict(obj, Xtest, alpha = 0.1) 164 | #' 165 | #' # Run split CQR with a self-defined function to estimate propensity scores 166 | #' # Y, X, Xtest, should be included in the inputs 167 | #' logitReg <- function(Y, X, Xtest, ...){ 168 | #' X <- as.data.frame(X) 169 | #' Xtest <- as.data.frame(Xtest) 170 | #' data <- data.frame(Y = Y, X) 171 | #' fit <- glm(Y ~ ., data = data, family = "binomial", ...) 172 | #' as.numeric(predict(fit, Xtest, type = "response")) 173 | #' } 174 | #' obj <- conformalCf(X, Y, type = "CQR", quantiles = c(0.05, 0.95), 175 | #' outfun = "quantRF", psfun = logitReg, useCV = FALSE) 176 | #' predict(obj, Xtest, alpha = 0.1) 177 | #' 178 | #' } 179 | #' @export 180 | conformalCf <- function(X, Y, 181 | estimand = c("unconditional", 182 | "nonmissing", 183 | "missing"), 184 | type = c("CQR", "mean"), 185 | side = c("two", "above", "below"), 186 | quantiles = NULL, 187 | outfun = NULL, 188 | outparams = list(), 189 | psfun = NULL, 190 | psparams = list(), 191 | useCV = FALSE, 192 | trainprop = 0.75, 193 | nfolds = 10){ 194 | type <- type[1] 195 | stopifnot(type %in% c("CQR", "mean")) 196 | side <- side[1] 197 | stopifnot(side %in% c("two", "above", "below")) 198 | estimand <- estimand[1] 199 | stopifnot(estimand %in% c("unconditional", 200 | "nonmissing", 201 | "missing")) 202 | 203 | if (is.null(outfun)){ 204 | outfun <- switch(type, 205 | CQR = quantRF, 206 | mean = RF) 207 | } else if (is.character(outfun)){ 208 | outfun <- str_outfun(outfun[1]) 209 | } else if (is.function(outfun)){ 210 | check_outfun(outfun, type) 211 | } else { 212 | stop("outfun must be NULL or a string or a function") 213 | } 214 | 215 | if (is.null(psfun)){ 216 | psfun <- Boosting 217 | } else if (is.character(psfun)){ 218 | psfun <- str_psfun(psfun[1]) 219 | } else if (is.function(psfun)){ 220 | check_psfun(psfun) 221 | } else { 222 | stop("psfun must be NULL or a string or a function") 223 | } 224 | 225 | if (!useCV){ 226 | conformalCf_split(X, Y, 227 | estimand, 228 | type, side, 229 | quantiles, 230 | outfun, outparams, 231 | psfun, psparams, 232 | trainprop) 233 | } else { 234 | conformalCf_CV(X, Y, 235 | estimand, 236 | type, side, 237 | quantiles, 238 | outfun, outparams, 239 | psfun, psparams, 240 | nfolds) 241 | } 242 | } 243 | -------------------------------------------------------------------------------- /R/conformalCf_CV.R: -------------------------------------------------------------------------------- 1 | ## CV+ for counterfactuals. See ?conformalCf 2 | conformalCf_CV <- function(X, Y, 3 | estimand, 4 | type, side, 5 | quantiles, 6 | outfun, outparams, 7 | psfun, psparams, 8 | nfolds){ 9 | T <- as.numeric(!is.na(Y)) 10 | inds1 <- which(T == 1) 11 | inds0 <- which(T == 0) 12 | n1 <- length(inds1) 13 | n0 <- length(inds0) 14 | if (n1 < nfolds){ 15 | stop("Insufficient non-missing data") 16 | } 17 | idlist1 <- gen_cv_ids(n1, nfolds, offset = 0) 18 | idlist0 <- gen_cv_ids(n0, nfolds, offset = 0) 19 | idlist <- lapply(1:nfolds, function(k){ 20 | c(inds1[idlist1[[k]]], inds0[idlist0[[k]]]) 21 | }) 22 | 23 | psparams0 <- psparams 24 | if (estimand == "unconditional"){ 25 | wtfun <- lapply(1:nfolds, function(k){ 26 | testid <- idlist[[k]] 27 | Xtrain <- X[-testid, ,drop=FALSE] 28 | Ttrain <- T[-testid] 29 | psparams <- c(list(Y = Ttrain, X = Xtrain), psparams0) 30 | function(X){ 31 | ps <- do.call(psfun, c(psparams, list(Xtest = X))) 32 | 1 / ps 33 | } 34 | }) 35 | psparams <- c(list(Y = T, X = X), psparams0) 36 | wtfun_test <- function(X){ 37 | ps <- do.call(psfun, c(psparams, list(Xtest = X))) 38 | 1 / ps 39 | } 40 | } else if (estimand == "nonmissing"){ 41 | wtfun_test <- function(X){ 42 | rep(1, nrow(X)) 43 | } 44 | wtfun <- lapply(1:nfolds, function(k){ 45 | wtfun_test 46 | }) 47 | } else if (estimand == "missing"){ 48 | wtfun <- lapply(1:nfolds, function(k){ 49 | testid <- idlist[[k]] 50 | Xtrain <- X[-testid, ,drop=FALSE] 51 | Ttrain <- T[-testid] 52 | psparams <- c(list(Y = Ttrain, X = Xtrain), psparams0) 53 | function(X){ 54 | ps <- do.call(psfun, c(psparams, list(Xtest = X))) 55 | (1 - ps) / ps 56 | } 57 | }) 58 | psparams <- c(list(Y = T, X = X), psparams0) 59 | wtfun_test <- function(X){ 60 | ps <- do.call(psfun, c(psparams, list(Xtest = X))) 61 | (1 - ps) / ps 62 | } 63 | } 64 | 65 | X <- X[inds1, ,drop=FALSE] 66 | Y <- Y[inds1] 67 | res <- conformalCV(X, Y, 68 | type, side, 69 | quantiles, 70 | outfun, outparams, 71 | wtfun, 72 | nfolds, idlist1) 73 | res$wtfun <- wtfun_test 74 | return(res) 75 | } 76 | -------------------------------------------------------------------------------- /R/conformalCf_split.R: -------------------------------------------------------------------------------- 1 | ## Split conformal inference for counterfactuals. See ?conformalCf 2 | conformalCf_split <- function(X, Y, 3 | estimand, 4 | type, side, 5 | quantiles, 6 | outfun, outparams, 7 | psfun, psparams, 8 | trainprop){ 9 | T <- as.numeric(!is.na(Y)) 10 | inds1 <- which(T == 1) 11 | inds0 <- which(T == 0) 12 | n1 <- length(inds1) 13 | n0 <- length(inds0) 14 | trainid1 <- sample(n1, floor(n1 * trainprop)) 15 | trainid0 <- sample(n0, floor(n0 * trainprop)) 16 | trainid <- c(inds1[trainid1], inds0[trainid0]) 17 | Xtrain <- X[trainid, ,drop=FALSE] 18 | Ttrain <- T[trainid] 19 | 20 | psparams0 <- psparams 21 | if (estimand == "unconditional"){ 22 | psparams <- c(list(Y = Ttrain, X = Xtrain), psparams0) 23 | wtfun <- function(X){ 24 | ps <- do.call(psfun, c(list(Xtest = X), psparams)) 25 | 1 / ps 26 | } 27 | } else if (estimand == "nonmissing"){ 28 | wtfun <- function(X){ 29 | rep(1, nrow(X)) 30 | } 31 | } else if (estimand == "missing"){ 32 | psparams <- c(list(Y = Ttrain, X = Xtrain), psparams0) 33 | wtfun <- function(X){ 34 | ps <- do.call(psfun, c(list(Xtest = X), psparams)) 35 | (1 - ps) / ps 36 | } 37 | } 38 | 39 | X <- X[inds1, ,drop=FALSE] 40 | Y <- Y[inds1] 41 | res <- conformalSplit(X, Y, 42 | type, side, 43 | quantiles, 44 | outfun, outparams, 45 | wtfun, 46 | trainprop, trainid1) 47 | return(res) 48 | } 49 | -------------------------------------------------------------------------------- /R/conformalInt.R: -------------------------------------------------------------------------------- 1 | #' Conformal inference for interval outcomes 2 | #' 3 | #' \code{conformalInt} is a framework for weighted and unweighted conformal inference for interval 4 | #' outcomes. It supports both weighted split conformal inference and weighted CV+, 5 | #' including weighted Jackknife+ as a special case. For each type, it supports both conformalized 6 | #' quantile regression (CQR) and standard conformal inference based on conditional mean regression. 7 | #' 8 | #' @details The conformal interval for a testing point x is in the form of 9 | #' \eqn{[\hat{m}^{L}(x) - \eta, \hat{m}^{R}(x) + \eta]} where \eqn{\hat{m}^{L}(x)} is fit by \code{lofun} 10 | #' and \eqn{\hat{m}^{R}(x)} is fit by \code{upfun}. 11 | #' 12 | #' \code{lofun}/\code{upfun} can be a valid string, including 13 | #' \itemize{ 14 | #' \item "RF" for random forest that predicts the conditional mean, a wrapper built on \code{randomForest} package. 15 | #' Used when \code{type = "mean"}; 16 | #' \item "quantRF" for quantile random forest that predicts the conditional quantiles, a wrapper built on 17 | #' \code{grf} package. Used when \code{type = "CQR"}; 18 | #' \item "Boosting" for gradient boosting that predicts the conditional mean, a wrapper built on \code{gbm} 19 | #' package. Used when \code{type = "mean"}; 20 | #' \item "quantBoosting" for quantile gradient boosting that predicts the conditional quantiles, a wrapper built on 21 | #' \code{gbm} package. Used when \code{type = "CQR"}; 22 | #' \item "BART" for gradient boosting that predicts the conditional mean, a wrapper built on \code{bartMachine} 23 | #' package. Used when \code{type = "mean"}; 24 | #' \item "quantBART" for quantile gradient boosting that predicts the conditional quantiles, a wrapper built on 25 | #' \code{bartMachine} package. Used when \code{type = "CQR"}; 26 | #' } 27 | #' 28 | #' or a function object whose input must include, but not limited to 29 | #' \itemize{ 30 | #' \item \code{Y} for outcome in the training data; 31 | #' \item \code{X} for covariates in the training data; 32 | #' \item \code{Xtest} for covariates in the testing data. 33 | #' } 34 | #' When \code{type = "CQR"}, \code{lofun} and \code{upfun} should also include an argument \code{quantiles} that is a scalar. The output of \code{lofun} and \code{upfun} must be a vector giving the conditional quantile estimate or conditional mean estimate. Other optional arguments can be 35 | #' passed into \code{lofun} and \code{upfun} through \code{loparams} and \code{upparams}. 36 | #' 37 | #' @param X covariates. 38 | #' @param Y interval outcomes. A matrix with two columns. 39 | #' @param type a string that takes values in \{"CQR", "mean"\}. 40 | #' @param lofun a function to fit the lower bound, or a valid string. See Details. 41 | #' @param loquantile the quantile to be fit by \code{lofun}. Used only when \code{type = "CQR"}. 42 | #' @param loparams a list of other parameters to be passed into \code{lofun}. 43 | #' @param upfun a function to fit the upper bound, or a valid string; see Details. 44 | #' @param upquantile the quantile to be fit by \code{upfun}. Used only when \code{type = "CQR"}. 45 | #' @param upparams a list of other parameters to be passed into \code{upfun}. 46 | #' @param wtfun NULL for unweighted conformal inference, or a function for weighted conformal inference 47 | #' when \code{useCV = FALSE}, or a list of functions for weighted conformal inference when \code{useCV = TRUE}. 48 | #' See Details. 49 | #' @param useCV FALSE for split conformal inference and TRUE for CV+. 50 | #' @param trainprop proportion of units for training \code{outfun}. The default it 75\%. Used only when \code{useCV = FALSE}. 51 | #' @param trainid indices of training units. The default is NULL, generating random indices. Used only when \code{useCV = FALSE}. 52 | #' @param nfolds number of folds. The default is 10. Used only when \code{useCV = TRUE}. 53 | #' @param idlist a list of indices of length \code{nfolds}. The default is NULL, generating random indices. Used only when \code{useCV = TRUE}. 54 | #' 55 | #' @return a \code{conformalIntSplit} object when \code{useCV = FALSE} with the following attributes: 56 | #' \itemize{ 57 | #' \item{Yscore:}{ a vector of non-conformity score on the calibration fold} 58 | #' \item{wt:}{ a vector of weights on the calibration fold} 59 | #' \item{Ymodel:}{ a function with required argument \code{X} that produces the estimates the conditional 60 | #' mean or quantiles of \code{X}} 61 | #' \item{wtfun, type, loquantile, upquantile, trainprop, trainid:}{ the same as inputs} 62 | #' } 63 | #' 64 | #' or a \code{conformalIntCV} object when \code{useCV = TRUE} with the following attributes: 65 | #' \itemize{ 66 | #' \item{info: }{ a list of length \code{nfolds} with each element being a list with attributes 67 | #' \code{Yscore}, \code{wt} and \code{Ymodel} described above for each fold} 68 | #' \item{wtfun, type, loquantile, upquantile, nfolds, idlist:}{ the same as inputs} 69 | #' } 70 | #' 71 | #' @seealso 72 | #' \code{\link{predict.conformalIntSplit}}, \code{\link{predict.conformalIntCV}}. 73 | #' 74 | #' @examples 75 | #' \donttest{# Generate data from a linear model 76 | #' set.seed(1) 77 | #' n <- 1000 78 | #' d <- 5 79 | #' X <- matrix(rnorm(n * d), nrow = n) 80 | #' beta <- rep(1, 5) 81 | #' Ylo <- X %*% beta + rnorm(n) 82 | #' Yup <- Ylo + pmax(1, 2 * rnorm(n)) 83 | #' Y <- cbind(Ylo, Yup) 84 | #' 85 | #' # Generate testing data 86 | #' ntest <- 5 87 | #' Xtest <- matrix(rnorm(ntest * d), nrow = ntest) 88 | #' 89 | #' # Run unweighted split CQR with the built-in quantile random forest learner 90 | #' # grf package needs to be installed 91 | #' obj <- conformalInt(X, Y, type = "CQR", 92 | #' lofun = "quantRF", upfun = "quantRF", 93 | #' wtfun = NULL, useCV = FALSE) 94 | #' predict(obj, Xtest, alpha = 0.1) 95 | #' 96 | #' # Run unweighted standard split conformal inference with the built-in random forest learner 97 | #' # randomForest package needs to be installed 98 | #' obj <- conformalInt(X, Y, type = "mean", 99 | #' lofun = "RF", upfun = "RF", 100 | #' wtfun = NULL, useCV = FALSE) 101 | #' predict(obj, Xtest, alpha = 0.1) 102 | #' 103 | #' # Run unweighted CQR-CV+ with the built-in quantile random forest learner 104 | #' # grf package needs to be installed 105 | #' obj <- conformalInt(X, Y, type = "CQR", 106 | #' lofun = "quantRF", upfun = "quantRF", 107 | #' wtfun = NULL, useCV = TRUE) 108 | #' predict(obj, Xtest, alpha = 0.1) 109 | #' 110 | #' # Run unweighted standard CV+ with the built-in random forest learner 111 | #' # randomForest package needs to be installed 112 | #' obj <- conformalInt(X, Y, type = "mean", 113 | #' lofun = "RF", upfun = "RF", 114 | #' wtfun = NULL, useCV = TRUE) 115 | #' predict(obj, Xtest, alpha = 0.1) 116 | #' 117 | #' # Run weighted split CQR with w(x) = pnorm(x1) 118 | #' wtfun <- function(X){pnorm(X[, 1])} 119 | #' obj <- conformalInt(X, Y, type = "CQR", 120 | #' lofun = "quantRF", upfun = "quantRF", 121 | #' wtfun = wtfun, useCV = FALSE) 122 | #' predict(obj, Xtest, alpha = 0.1) 123 | #' 124 | #' # Run unweighted split CQR with a self-defined quantile random forest 125 | #' # Y, X, Xtest, quantiles should be included in the inputs 126 | #' quantRF <- function(Y, X, Xtest, quantiles, ...){ 127 | #' fit <- grf::quantile_forest(X, Y, quantiles = quantiles, ...) 128 | #' res <- predict(fit, Xtest, quantiles = quantiles) 129 | #' if (is.list(res) && !is.data.frame(res)){ 130 | #' # for the recent update of \code{grf} package that 131 | #' # changes the output format 132 | #' res <- res$predictions 133 | #' } 134 | #' if (length(quantiles) == 1){ 135 | #' res <- as.numeric(res) 136 | #' } else { 137 | #' res <- as.matrix(res) 138 | #' } 139 | #' return(res) 140 | #' } 141 | #' obj <- conformalInt(X, Y, type = "CQR", 142 | #' lofun = quantRF, upfun = quantRF, 143 | #' wtfun = NULL, useCV = FALSE) 144 | #' predict(obj, Xtest, alpha = 0.1) 145 | #' 146 | #' # Run unweighted standard split conformal inference with a self-defined linear regression 147 | #' # Y, X, Xtest should be included in the inputs 148 | #' linearReg <- function(Y, X, Xtest){ 149 | #' X <- as.data.frame(X) 150 | #' Xtest <- as.data.frame(Xtest) 151 | #' data <- data.frame(Y = Y, X) 152 | #' fit <- lm(Y ~ ., data = data) 153 | #' as.numeric(predict(fit, Xtest)) 154 | #' } 155 | #' obj <- conformalInt(X, Y, type = "mean", 156 | #' lofun = linearReg, upfun = linearReg, 157 | #' wtfun = NULL, useCV = FALSE) 158 | #' predict(obj, Xtest, alpha = 0.1) 159 | #' 160 | #' # Run weighted split-CQR with user-defined weights 161 | #' wtfun <- function(X){ 162 | #' pnorm(X[, 1]) 163 | #' } 164 | #' obj <- conformalInt(X, Y, type = "CQR", 165 | #' lofun = "quantRF", upfun = "quantRF", 166 | #' wtfun = wtfun, useCV = FALSE) 167 | #' predict(obj, Xtest, alpha = 0.1) 168 | #' 169 | #' # Run weighted CQR-CV+ with user-defined weights 170 | #' # Use a list of identical functions 171 | #' set.seed(1) 172 | #' wtfun_list <- lapply(1:10, function(i){wtfun}) 173 | #' obj1 <- conformalInt(X, Y, type = "CQR", 174 | #' lofun = "quantRF", upfun = "quantRF", 175 | #' wtfun = wtfun_list, useCV = TRUE) 176 | #' predict(obj1, Xtest, alpha = 0.1) 177 | #' 178 | #' # Use a single function. Equivalent to the above approach 179 | #' set.seed(1) 180 | #' obj2 <- conformalInt(X, Y, type = "CQR", 181 | #' lofun = "quantRF", upfun = "quantRF", 182 | #' wtfun = wtfun, useCV = TRUE) 183 | #' predict(obj2, Xtest, alpha = 0.1) 184 | #' } 185 | #' @export 186 | conformalInt <- function(X, Y, 187 | type = c("CQR", "mean"), 188 | lofun = NULL, 189 | loquantile = 0.5, 190 | loparams = list(), 191 | upfun = NULL, 192 | upquantile = 0.5, 193 | upparams = list(), 194 | wtfun = NULL, 195 | useCV = FALSE, 196 | trainprop = 0.75, trainid = NULL, 197 | nfolds = 10, idlist = NULL){ 198 | if (!is.matrix(Y) || ncol(Y) != 2){ 199 | stop("Y must a matrix with 2 columns") 200 | } 201 | type <- type[1] 202 | stopifnot(type %in% c("CQR", "mean")) 203 | 204 | if (is.null(lofun)){ 205 | lofun <- switch(type, 206 | CQR = quantRF, 207 | mean = RF) 208 | } else if (is.character(lofun)){ 209 | lofun <- str_outfun(lofun[1]) 210 | } else if (is.function(lofun)){ 211 | check_outfun(lofun, type) 212 | } else { 213 | stop("lofun must be NULL or a string or a function") 214 | } 215 | 216 | if (is.null(upfun)){ 217 | upfun <- switch(type, 218 | CQR = quantRF, 219 | mean = RF) 220 | } else if (is.character(upfun)){ 221 | upfun <- str_outfun(upfun[1]) 222 | } else if (is.function(upfun)){ 223 | check_outfun(upfun, type) 224 | } else { 225 | stop("upfun must be NULL or a string or a function") 226 | } 227 | 228 | if (!useCV){ 229 | conformalIntSplit(X, Y, 230 | type, 231 | lofun, loquantile, loparams, 232 | upfun, upquantile, upparams, 233 | wtfun, 234 | trainprop, trainid) 235 | } else { 236 | conformalIntCV(X, Y, 237 | type, 238 | lofun, loquantile, loparams, 239 | upfun, upquantile, upparams, 240 | wtfun, 241 | nfolds, idlist) 242 | } 243 | } 244 | -------------------------------------------------------------------------------- /R/conformalInt_CV.R: -------------------------------------------------------------------------------- 1 | ## Generic weighted interval CV+. See ?conformalInt 2 | conformalIntCV <- function(X, Y, 3 | type = c("CQR", "mean"), 4 | lofun = NULL, 5 | loquantile = 0.5, 6 | loparams = list(), 7 | upfun = NULL, 8 | upquantile = 0.5, 9 | upparams = list(), 10 | wtfun = NULL, 11 | nfolds = 10, 12 | idlist = NULL){ 13 | n <- nrow(Y) 14 | 15 | if (is.null(idlist)){ 16 | idlist <- gen_cv_ids(n, nfolds) 17 | } 18 | if (!is.list(idlist) || length(idlist) != nfolds){ 19 | stop("idlist needs to a list of length 'nfolds'") 20 | } 21 | 22 | wtfun0 <- NULL 23 | if (is.null(wtfun)){ 24 | wtfun <- lapply(1:nfolds, function(k){ 25 | function(X){ 26 | rep(1, nrow(X)) 27 | } 28 | }) 29 | } else if (is.function(wtfun)){ 30 | wtfun0 <- wtfun 31 | wtfun <- rep(list(wtfun), nfolds) 32 | } else if (!is.list(wtfun) || length(wtfun) != nfolds){ 33 | stop("wtfun must be a function or a list (of functions) of length nfolds") 34 | } 35 | if (is.null(wtfun0)){ 36 | wtfun0 <- wtfun 37 | } 38 | 39 | if (type == "CQR"){ 40 | if (is.null(loquantile) || is.null(upquantile)){ 41 | stop("loquantile and upquantile should be provided if CQR is used.") 42 | } 43 | loparams <- c(list(quantiles = loquantile), loparams) 44 | upparams <- c(list(quantiles = upquantile), upparams) 45 | } 46 | 47 | loparams0 <- loparams 48 | upparams0 <- upparams 49 | info <- list() 50 | for (k in 1:nfolds){ 51 | testid <- idlist[[k]] 52 | Xtrain <- X[-testid, ,drop=FALSE] 53 | Ytrain <- Y[-testid, ] 54 | Xval <- X[testid, ,drop=FALSE] 55 | Yval <- Y[testid, ] 56 | 57 | loparams <- c(list(Y = Ytrain[, 1], X = Xtrain), loparams0) 58 | upparams <- c(list(Y = Ytrain[, 2], X = Xtrain), upparams0) 59 | Ylo_model <- function(X){ 60 | do.call(lofun, c(loparams, list(Xtest = X))) 61 | } 62 | Yup_model <- function(X){ 63 | do.call(upfun, c(upparams, list(Xtest = X))) 64 | } 65 | Ymodel <- list(Ylo_model, Yup_model) 66 | Yhat <- cbind(Ylo_model(Xval), Yup_model(Xval)) 67 | Yscore <- conformalScore(Yval, Yhat, type, "two") 68 | wt <- wtfun[[k]](Xval) 69 | 70 | obj <- list(Yscore = Yscore, 71 | wt = wt, 72 | Ymodel = Ymodel) 73 | info[[k]] <- obj 74 | } 75 | 76 | res <- list(info = info, 77 | wtfun = wtfun0, 78 | type = type, 79 | loquantile = loquantile, 80 | upquantile = upquantile, 81 | nfolds = nfolds, 82 | idlist = idlist) 83 | class(res) <- "conformalIntCV" 84 | return(res) 85 | } 86 | 87 | #' Predict Method for conformalIntCV objects 88 | #' 89 | #' Obtains predictive intervals on a testing dataset based on a \code{conformalIntCV} object 90 | #' from \code{\link{conformalInt}} with \code{useCV = TRUE}. 91 | #' 92 | #' Given a testing set \eqn{X_1, X_2, \ldots, X_n} and a weight function \eqn{w(x)}, the 93 | #' weight of the weighted distribution \eqn{p_j = w(X_j) / (w(X_1) + \cdots + w(X_n))}. 94 | #' In cases where some of \eqn{p_j} are extreme, we truncate \eqn{p_j} at level \code{wthigh} 95 | #' and \code{wtlow} to ensure stability. If \code{wthigh = Inf, wtlow = 0}, no truncation 96 | #' is being used. 97 | #' 98 | #' @param object an object of class \code{conformalIntCV}; see \code{\link{conformalInt}}. 99 | #' @param Xtest testing covariates. 100 | #' @param alpha confidence level. 101 | #' @param wthigh upper truncation level of weights; see Details. 102 | #' @param wtlow lower truncation level of weights; see Details. 103 | #' @param useInf if FALSE then replace infinity by the maximum conformity score. 104 | #' @param ... other arguments 105 | #' 106 | #' @return predictive intervals. A data.frame with \code{nrow(Xtest)} rows and two columns: 107 | #' "lower" for the lower bound and "upper" for the upper bound. 108 | #' 109 | #' @seealso 110 | #' \code{\link{predict.conformalIntSplit}}, \code{\link{conformalInt}}. 111 | #' 112 | #' @export 113 | predict.conformalIntCV <- function(object, Xtest, 114 | alpha = 0.1, 115 | wthigh = 20, wtlow = 0.05, 116 | useInf = FALSE, 117 | ...){ 118 | type <- object$type 119 | nfolds <- object$nfolds 120 | info <- object$info 121 | 122 | 123 | for (k in 1:nfolds){ 124 | info[[k]]$Yhat_test <- cbind( 125 | info[[k]]$Ymodel[[1]](Xtest), 126 | info[[k]]$Ymodel[[2]](Xtest)) 127 | } 128 | 129 | wt <- do.call(c, lapply(info, function(x){x$wt})) 130 | if (is.function(object$wtfun)){ 131 | wt_test <- object$wtfun(Xtest) 132 | } else { 133 | wt_test <- sapply(object$wtfun, function(wtfun){ 134 | wtfun(Xtest) 135 | }) 136 | wt_test <- rowMeans(wt_test) 137 | } 138 | avg_wt <- mean(c(wt, wt_test)) 139 | wt <- censoring(wt / avg_wt, wthigh, wtlow) 140 | wt_test <- censoring(wt_test / avg_wt, wthigh, wtlow) 141 | 142 | totw <- sum(wt) 143 | wt <- wt / totw 144 | qt <- (1 + wt_test / totw) * (1 - alpha) 145 | ## qt <- pmin(qt, 1) 146 | 147 | CI <- sapply(1:length(qt), function(i){ 148 | Ylo <- lapply(info, function(x){ 149 | x$Yhat_test[i, 1] - x$Yscore 150 | }) 151 | Ylo <- do.call(c, Ylo) 152 | Ylo <- -weightedConformalCutoff(-Ylo, wt, qt[i], useInf) 153 | Yup <- lapply(info, function(x){ 154 | x$Yhat_test[i, 2] + x$Yscore 155 | }) 156 | Yup <- do.call(c, Yup) 157 | Yup <- weightedConformalCutoff(Yup, wt, qt[i], useInf) 158 | c(Ylo, Yup) 159 | }) 160 | 161 | res <- data.frame(lower = as.numeric(CI[1, ]), 162 | upper = as.numeric(CI[2, ])) 163 | return(res) 164 | } 165 | -------------------------------------------------------------------------------- /R/conformalInt_split.R: -------------------------------------------------------------------------------- 1 | ## Generic weighted interval conformal inference. See ?conformalInt 2 | conformalIntSplit <- function(X, Y, 3 | type = c("CQR", "mean"), 4 | lofun = NULL, 5 | loquantile = 0.5, 6 | loparams = list(), 7 | upfun = NULL, 8 | upquantile = 0.5, 9 | upparams = list(), 10 | wtfun = NULL, 11 | trainprop = 0.75, 12 | trainid = NULL){ 13 | n <- nrow(Y) 14 | 15 | if (is.null(wtfun)){ 16 | wtfun <- function(Xtest){ 17 | rep(1, nrow(Xtest)) 18 | } 19 | } 20 | 21 | if (type == "CQR"){ 22 | if (is.null(loquantile) || is.null(upquantile)){ 23 | stop("loquantile and upquantile should be provided if CQR is used.") 24 | } 25 | } 26 | 27 | if (is.null(trainid)){ 28 | trainid <- sample(n, floor(n * trainprop)) 29 | } 30 | Xtrain <- X[trainid, ,drop=FALSE] 31 | Ytrain <- Y[trainid, ] 32 | Xval <- X[-trainid, ,drop=FALSE] 33 | Yval <- Y[-trainid, ] 34 | 35 | if (type == "CQR"){ 36 | loparams <- c(list(Y = Ytrain[, 1], X = Xtrain, quantiles = loquantile), loparams) 37 | upparams <- c(list(Y = Ytrain[, 2], X = Xtrain, quantiles = upquantile), upparams) 38 | } else if (type == "mean"){ 39 | loparams <- c(list(Y = Ytrain[, 1], X = Xtrain), loparams) 40 | upparams <- c(list(Y = Ytrain[, 2], X = Xtrain), upparams) 41 | } 42 | 43 | Ylo_model <- function(X){ 44 | do.call(lofun, c(loparams, list(Xtest = X))) 45 | } 46 | Yup_model <- function(X){ 47 | do.call(upfun, c(upparams, list(Xtest = X))) 48 | } 49 | Ymodel <- list(Ylo_model, Yup_model) 50 | Yhat <- cbind(Ylo_model(Xval), Yup_model(Xval)) 51 | Yscore <- conformalScore(Yval, Yhat, type, "two") 52 | wt <- wtfun(Xval) 53 | 54 | obj <- list(Yscore = Yscore, wt = wt, 55 | Ymodel = Ymodel, wtfun = wtfun, 56 | type = type, 57 | loquantile = loquantile, 58 | upquantile = upquantile, 59 | trainprop = trainprop, 60 | trainid = trainid) 61 | class(obj) <- "conformalIntSplit" 62 | return(obj) 63 | } 64 | 65 | #' Predict Method for conformalIntSplit objects 66 | #' 67 | #' Obtains predictive intervals on a testing dataset based on a \code{conformalIntSplit} object 68 | #' from \code{\link{conformalInt}} with \code{useCV = FALSE}. 69 | #' 70 | #' Given a testing set \eqn{X_1, X_2, \ldots, X_n} and a weight function \eqn{w(x)}, the 71 | #' weight of the weighted distribution \eqn{p_j = w(X_j) / (w(X_1) + \cdots + w(X_n))}. 72 | #' In cases where some of \eqn{p_j} are extreme, we truncate \eqn{p_j} at level \code{wthigh} 73 | #' and \code{wtlow} to ensure stability. If \code{wthigh = Inf, wtlow = 0}, no truncation 74 | #' is being used. 75 | #' 76 | #' @param object an object of class \code{conformalIntSplit}; see \code{\link{conformalInt}}. 77 | #' @param Xtest testing covariates. 78 | #' @param alpha confidence level. 79 | #' @param wthigh upper truncation level of weights; see Details. 80 | #' @param wtlow lower truncation level of weights; see Details. 81 | #' @param useInf if FALSE then replace infinity by the maximum conformity score. 82 | #' @param ... other arguments 83 | #' 84 | #' @return predictive intervals. A data.frame with \code{nrow(Xtest)} rows and two columns: 85 | #' "lower" for the lower bound and "upper" for the upper bound. 86 | #' 87 | #' @seealso 88 | #' \code{\link{predict.conformalIntCV}}, \code{\link{conformalInt}}. 89 | #' 90 | #' @export 91 | predict.conformalIntSplit <- function(object, Xtest, 92 | alpha = 0.1, 93 | wthigh = 20, wtlow = 0.05, 94 | useInf = FALSE, 95 | ...){ 96 | type <- object$type 97 | Yhat_test <- cbind(object$Ymodel[[1]](Xtest), 98 | object$Ymodel[[2]](Xtest)) 99 | wt_test <- object$wtfun(Xtest) 100 | 101 | avg_wt <- mean(c(object$wt, wt_test)) 102 | wt <- censoring(object$wt / avg_wt, wthigh, wtlow) 103 | wt_test <- censoring(wt_test / avg_wt, wthigh, wtlow) 104 | 105 | totw <- sum(wt) 106 | wt <- wt / totw 107 | qt <- (1 + wt_test / totw) * (1 - alpha) 108 | Yslack <- weightedConformalCutoff(object$Yscore, wt, qt, useInf) 109 | 110 | Ylo <- Yhat_test[, 1] - Yslack 111 | Yup <- Yhat_test[, 2] + Yslack 112 | 113 | res <- data.frame(lower = Ylo, upper = Yup) 114 | return(res) 115 | } 116 | -------------------------------------------------------------------------------- /R/conformalIte_cf.R: -------------------------------------------------------------------------------- 1 | ## Conformal inference for individual treatment effects for subjects with only one 2 | ## missing potential outcome. See ?conformalIte 3 | conformalIteCf <- function(X, Y, T, 4 | type, side, 5 | quantiles, 6 | outfun, outparams, 7 | psfun, psparams, 8 | useCV, 9 | trainprop, 10 | nfolds){ 11 | obj <- conformalIteNaive(X, Y, T, 12 | type, side, 13 | quantiles, 14 | outfun, outparams, 15 | psfun, psparams, 16 | useCV, 17 | trainprop, 18 | nfolds) 19 | 20 | CIfun <- function(X, Y, T, 21 | alpha, wthigh, wtlow, useInf){ 22 | res <- predict(obj, X, 2 * alpha, wthigh, wtlow, useInf) 23 | CI <- matrix(NA, nrow(X), 2) 24 | CI[T == 1, 1] <- Y[T == 1] - res$Y0[T == 1, 2] 25 | CI[T == 1, 2] <- Y[T == 1] - res$Y0[T == 1, 1] 26 | CI[T == 0, 1] <- res$Y1[T == 0, 1] - Y[T == 0] 27 | CI[T == 0, 2] <- res$Y1[T == 0, 2] - Y[T == 0] 28 | CI <- as.data.frame(CI) 29 | names(CI) <- c("lower", "upper") 30 | return(CI) 31 | } 32 | 33 | res <- list(CIfun = CIfun) 34 | class(res) <- "conformalIteCf" 35 | return(res) 36 | } 37 | 38 | predict.conformalIteCf <- function(object, 39 | Xtest, Ytest, Ttest, 40 | alpha = 0.1, 41 | wthigh = 20, wtlow = 0.05, 42 | useInf = FALSE){ 43 | object$CIfun(Xtest, Ytest, Ttest, alpha, wthigh, wtlow, useInf) 44 | } 45 | -------------------------------------------------------------------------------- /R/conformalIte_naive.R: -------------------------------------------------------------------------------- 1 | ## Naive methods of Conformal inference for individual treatment effects for subjects with both 2 | ## missing potential outcome. See ?conformalIte 3 | conformalIteNaive <- function(X, Y, T, 4 | type, side, 5 | quantiles, 6 | outfun, outparams, 7 | psfun, psparams, 8 | useCV, 9 | trainprop, 10 | nfolds){ 11 | n <- length(Y) 12 | 13 | Y1 <- Y0 <- Y 14 | Y1[T == 0] <- NA 15 | Y0[T == 1] <- NA 16 | inds <- which(T == 1) 17 | Xtrain <- X 18 | 19 | estimand1 <- "missing" 20 | side1 <- switch(side, 21 | two = "two", 22 | above = "above", 23 | below = "below") 24 | if (side == "two"){ 25 | quantiles1 <- quantiles 26 | } else { 27 | quantiles1 <- quantiles[2] 28 | } 29 | obj1 <- conformalCf(Xtrain, Y1, 30 | estimand1, 31 | type, side1, 32 | quantiles1, 33 | outfun, outparams, 34 | psfun, psparams, 35 | useCV, 36 | trainprop, 37 | nfolds) 38 | Y1_CIfun <- function(X, alpha, wthigh, wtlow, useInf){ 39 | predict(obj1, X, alpha = alpha / 2, 40 | wthigh = wthigh, wtlow = wtlow, 41 | useInf = useInf) 42 | } 43 | 44 | estimand0 <- "missing" 45 | side0 <- switch(side, 46 | two = "two", 47 | above = "below", 48 | below = "above") 49 | if (side == "two"){ 50 | quantiles0 <- quantiles 51 | } else { 52 | quantiles0 <- quantiles[1] 53 | } 54 | obj0 <- conformalCf(Xtrain, Y0, 55 | estimand0, 56 | type, side0, 57 | quantiles0, 58 | outfun, outparams, 59 | psfun, psparams, 60 | useCV, 61 | trainprop, 62 | nfolds) 63 | Y0_CIfun <- function(X, alpha, wthigh, wtlow, useInf){ 64 | predict(obj0, X, alpha = alpha / 2, 65 | wthigh = wthigh, wtlow = wtlow, 66 | useInf = useInf) 67 | } 68 | 69 | Ite_CIfun <- function(X, alpha, wthigh, wtlow, useInf){ 70 | Y1_CI <- Y1_CIfun(X, alpha, wthigh, wtlow, useInf) 71 | Y0_CI <- Y0_CIfun(X, alpha, wthigh, wtlow, useInf) 72 | CI <- data.frame(lower = Y1_CI[, 1] - Y0_CI[, 2], 73 | upper = Y1_CI[, 2] - Y0_CI[, 1]) 74 | } 75 | 76 | res <- list(Ite_CIfun = Ite_CIfun, 77 | Y1_CIfun = Y1_CIfun, 78 | Y0_CIfun = Y0_CIfun) 79 | class(res) <- "conformalIteNaive" 80 | return(res) 81 | } 82 | 83 | predict.conformalIteNaive <- function(object, Xtest, 84 | alpha = 0.1, 85 | wthigh = 20, wtlow = 0.05, 86 | useInf = FALSE){ 87 | Ite_CI <- object$Ite_CIfun(Xtest, alpha, wthigh, wtlow, useInf) 88 | Y1_CI <- object$Y1_CIfun(Xtest, alpha, wthigh, wtlow, useInf) 89 | Y0_CI <- object$Y0_CIfun(Xtest, alpha, wthigh, wtlow, useInf) 90 | list(Ite = Ite_CI, Y1 = Y1_CI, Y0 = Y0_CI) 91 | } 92 | -------------------------------------------------------------------------------- /R/conformalIte_nest.R: -------------------------------------------------------------------------------- 1 | ## Nested methods of Conformal inference for individual treatment effects for subjects with both 2 | ## missing potential outcome. See ?conformalIte 3 | conformalIteNest <- function(X, Y, T, 4 | alpha, 5 | type, side, 6 | quantiles, 7 | outfun, outparams, 8 | psfun, psparams, 9 | exact, 10 | cfprop, 11 | citype, 12 | lofun, loquantile, loparams, 13 | upfun, upquantile, upparams, 14 | useCV, 15 | trainprop, 16 | nfolds, 17 | wthigh, wtlow, 18 | useInf){ 19 | ## Reset effective alpha based on exact 20 | if (exact){ 21 | alpha <- alpha / 2 22 | } 23 | 24 | ## Data splitting 25 | n <- length(Y) 26 | tr_id <- which(T == 1) 27 | ct_id <- which(T == 0) 28 | ntr <- length(tr_id) 29 | nct <- length(ct_id) 30 | ntr_cf <- ceiling(ntr * cfprop) 31 | nct_cf <- ceiling(nct * cfprop) 32 | tr_cfid <- sample(ntr, ntr_cf) 33 | ct_cfid <- sample(nct, nct_cf) 34 | cfid <- c(tr_id[tr_cfid], ct_id[ct_cfid]) 35 | 36 | Xtrain <- X[-cfid, ,drop=FALSE] 37 | Ytrain <- Y[-cfid] 38 | Ttrain <- T[-cfid] 39 | Xcf <- X[cfid, ,drop=FALSE] 40 | Ycf <- Y[cfid] 41 | Tcf <- T[cfid] 42 | 43 | ## Get the counterfactual intervals and transform 44 | ## them into ITE intervals on the second fold 45 | obj <- conformalIteCf(Xtrain, Ytrain, Ttrain, 46 | type, side, 47 | quantiles, 48 | outfun, outparams, 49 | psfun, psparams, 50 | useCV, 51 | trainprop, 52 | nfolds) 53 | CI_cf <- predict(obj, Xcf, Ycf, Tcf, 54 | alpha, wthigh, wtlow, useInf) 55 | 56 | ## Get ITE intervals 57 | if (exact && (side == "two")){ 58 | ## Exact two-sided intervals 59 | CIfun <- function(X){ 60 | res <- conformalInt(Xcf, as.matrix(CI_cf), 61 | citype, 62 | lofun, loquantile, loparams, 63 | upfun, upquantile, upparams, 64 | NULL, 65 | useCV, 66 | trainprop, NULL, 67 | nfolds, NULL) 68 | predict(res, X, alpha, wthigh, wtlow, useInf) 69 | } 70 | } else if (exact && (side == "above")){ 71 | ## Exact right-sided intervals 72 | CIfun <- function(X){ 73 | res <- conformal(Xcf, as.numeric(CI_cf[, 2]), 74 | citype, "above", upquantile, 75 | upfun, upparams, 76 | NULL, 77 | useCV, 78 | trainprop, NULL, 79 | nfolds, NULL) 80 | upper <- predict(res, X, alpha, wthigh, wtlow, useInf)[, 2] 81 | data.frame(lower = -Inf, upper = upper) 82 | } 83 | } else if (exact && (side == "below")){ 84 | ## Exact left-sided intervals 85 | CIfun <- function(X){ 86 | res <- conformal(Xcf, as.numeric(CI_cf[, 1]), 87 | citype, "below", loquantile, 88 | lofun, loparams, 89 | NULL, 90 | useCV, 91 | trainprop, NULL, 92 | nfolds, NULL) 93 | lower <- predict(res, X, alpha, wthigh, wtlow, useInf)[, 1] 94 | data.frame(lower = lower, upper = Inf) 95 | } 96 | } else if (!exact && (side == "two")){ 97 | ## Inexact two-sided intervals 98 | if (citype == "CQR"){ 99 | loparams <- c(list(Y = CI_cf[, 1], X = Xcf, quantiles = loquantile), loparams) 100 | upparams <- c(list(Y = CI_cf[, 2], X = Xcf, quantiles = upquantile), upparams) 101 | } else if (citype == "mean"){ 102 | loparams <- c(list(Y = CI_cf[, 1], X = Xcf), loparams) 103 | upparams <- c(list(Y = CI_cf[, 2], X = Xcf), upparams) 104 | } 105 | CIfun <- function(X){ 106 | CI_lo <- do.call(lofun, c(loparams, list(Xtest = X))) 107 | CI_up <- do.call(upfun, c(upparams, list(Xtest = X))) 108 | CI <- data.frame(lower = CI_lo, 109 | upper = CI_up) 110 | return(CI) 111 | } 112 | } else if (!exact && (side == "above")){ 113 | ## Inexact right-sided intervals 114 | if (citype == "CQR"){ 115 | upparams <- c(list(Y = CI_cf[, 2], X = Xcf, quantiles = upquantile), upparams) 116 | } else if (citype == "mean"){ 117 | upparams <- c(list(Y = CI_cf[, 2], X = Xcf), upparams) 118 | } 119 | CIfun <- function(X){ 120 | CI_up <- do.call(upfun, c(upparams, list(Xtest = X))) 121 | CI <- data.frame(lower = -Inf, 122 | upper = CI_up) 123 | return(CI) 124 | } 125 | } else if (!exact && (side == "below")){ 126 | ## Inexact left-sided intervals 127 | if (citype == "CQR"){ 128 | loparams <- c(list(Y = CI_cf[, 1], X = Xcf, quantiles = loquantile), loparams) 129 | } else if (citype == "mean"){ 130 | loparams <- c(list(Y = CI_cf[, 1], X = Xcf), loparams) 131 | } 132 | CIfun <- function(X){ 133 | CI_lo <- do.call(lofun, c(loparams, list(Xtest = X))) 134 | CI <- data.frame(lower = CI_lo, 135 | upper = Inf) 136 | return(CI) 137 | } 138 | } 139 | 140 | res <- list(CIfun = CIfun, 141 | CI_cf = CI_cf, cfid = cfid) 142 | class(res) <- "conformalIteNest" 143 | return(res) 144 | } 145 | 146 | predict.conformalIteNest <- function(object, X){ 147 | object$CIfun(X) 148 | } 149 | -------------------------------------------------------------------------------- /R/conformal_CV.R: -------------------------------------------------------------------------------- 1 | ## Generic weighted CV+. See ?conformal 2 | conformalCV <- function(X, Y, 3 | type, side, 4 | quantiles, 5 | outfun, outparams, 6 | wtfun, 7 | nfolds, idlist){ 8 | wtfun0 <- NULL 9 | if (is.null(wtfun)){ 10 | wtfun <- lapply(1:nfolds, function(k){ 11 | function(X){ 12 | rep(1, nrow(X)) 13 | } 14 | }) 15 | } else if (is.function(wtfun)){ 16 | wtfun0 <- wtfun 17 | wtfun <- rep(list(wtfun), nfolds) 18 | } else if (!is.list(wtfun) || length(wtfun) != nfolds){ 19 | stop("wtfun must be a function or a list (of functions) of length nfolds") 20 | } 21 | sapply(wtfun, check_wtfun) 22 | if (is.null(wtfun0)){ 23 | wtfun0 <- wtfun 24 | } 25 | 26 | if (type == "CQR"){ 27 | if (is.null(quantiles)){ 28 | stop("Quantiles should be provided if CQR is used.") 29 | } else if (length(quantiles) > 2){ 30 | warning("At most two quantiles can be provided. Use the first two by default") 31 | quantiles <- quantiles[1:2] 32 | } 33 | if (side %in% c("above", "below") && length(quantiles) > 1){ 34 | warning("At most one quantile can be provided when side = \"above\" or \"below\". Use the first one by default") 35 | quantiles <- quantiles[1] 36 | } 37 | outparams <- c(outparams, list(quantiles = quantiles)) 38 | } 39 | 40 | n <- length(Y) 41 | if (is.null(idlist)){ 42 | idlist <- gen_cv_ids(n, nfolds) 43 | } 44 | if (!is.list(idlist) || length(idlist) != nfolds){ 45 | stop("idlist needs to a list of length 'nfolds'") 46 | } 47 | 48 | outparams0 <- outparams 49 | info <- list() 50 | for (k in 1:nfolds){ 51 | testid <- idlist[[k]] 52 | Xtrain <- X[-testid, ,drop=FALSE] 53 | Ytrain <- Y[-testid] 54 | Xval <- X[testid, ,drop=FALSE] 55 | Yval <- Y[testid] 56 | 57 | outparams <- c(list(Y = Ytrain, X = Xtrain), outparams0) 58 | Ymodel <- function(X){ 59 | do.call(outfun, c(outparams, list(Xtest = X))) 60 | } 61 | Yhat <- Ymodel(Xval) 62 | Yscore <- conformalScore(Yval, Yhat, type, side) 63 | wt <- wtfun[[k]](Xval) 64 | 65 | obj <- list(Yscore = Yscore, 66 | wt = wt, 67 | Ymodel = Ymodel) 68 | info[[k]] <- obj 69 | } 70 | 71 | res <- list(info = info, 72 | wtfun = wtfun0, 73 | type = type, 74 | side = side, 75 | quantiles = quantiles, 76 | nfolds = nfolds, 77 | idlist = idlist) 78 | class(res) <- "conformalCV" 79 | return(res) 80 | } 81 | 82 | #' Predict Method for conformalCV objects 83 | #' 84 | #' Obtains predictive intervals on a testing dataset based on a \code{conformalCV} object 85 | #' from \code{\link{conformal}} with \code{useCV = TRUE}. 86 | #' 87 | #' Given a testing set \eqn{X_1, X_2, \ldots, X_n} and a weight function \eqn{w(x)}, the 88 | #' weight of the weighted distribution \eqn{p_j = w(X_j) / (w(X_1) + \cdots + w(X_n))}. 89 | #' In cases where some of \eqn{p_j} are extreme, we truncate \eqn{p_j} at level \code{wthigh} 90 | #' and \code{wtlow} to ensure stability. If \code{wthigh = Inf, wtlow = 0}, no truncation 91 | #' is being used. 92 | #' 93 | #' @param object an object of class \code{conformalCV}; see \code{\link{conformal}}. 94 | #' @param Xtest testing covariates. 95 | #' @param alpha confidence level. 96 | #' @param wthigh upper truncation level of weights; see Details. 97 | #' @param wtlow lower truncation level of weights; see Details. 98 | #' @param useInf if FALSE then replace infinity by the maximum conformity score. 99 | #' @param ... other arguments 100 | #' 101 | #' @return predictive intervals. A data.frame with \code{nrow(Xtest)} rows and two columns: 102 | #' "lower" for the lower bound and "upper" for the upper bound. 103 | #' 104 | #' @seealso 105 | #' \code{\link{predict.conformalSplit}}, \code{\link{conformal}}. 106 | #' 107 | #' @export 108 | predict.conformalCV <- function(object, Xtest, 109 | alpha = 0.1, 110 | wthigh = 20, wtlow = 0.05, 111 | useInf = FALSE, 112 | ...){ 113 | type <- object$type 114 | side <- object$side 115 | nfolds <- object$nfolds 116 | info <- object$info 117 | 118 | for (k in 1:nfolds){ 119 | info[[k]]$Yhat_test <- info[[k]]$Ymodel(Xtest) 120 | } 121 | 122 | wt <- do.call(c, lapply(info, function(x){x$wt})) 123 | if (is.function(object$wtfun)){ 124 | wt_test <- object$wtfun(Xtest) 125 | } else { 126 | wt_test <- sapply(object$wtfun, function(wtfun){ 127 | wtfun(Xtest) 128 | }) 129 | wt_test <- rowMeans(wt_test) 130 | } 131 | avg_wt <- mean(c(wt, wt_test)) 132 | wt <- censoring(wt / avg_wt, wthigh, wtlow) 133 | wt_test <- censoring(wt_test / avg_wt, wthigh, wtlow) 134 | 135 | totw <- sum(wt) 136 | wt <- wt / totw 137 | qt <- (1 + wt_test / totw) * (1 - alpha) 138 | ## qt <- pmin(qt, 1) 139 | 140 | if (type == "CQR" && side == "two"){ 141 | CI <- sapply(1:length(qt), function(i){ 142 | Ylo <- lapply(info, function(x){ 143 | x$Yhat_test[i, 1] - x$Yscore 144 | }) 145 | Ylo <- do.call(c, Ylo) 146 | Ylo <- -weightedConformalCutoff(-Ylo, wt, qt[i], useInf) 147 | Yup <- lapply(info, function(x){ 148 | x$Yhat_test[i, 2] + x$Yscore 149 | }) 150 | Yup <- do.call(c, Yup) 151 | Yup <- weightedConformalCutoff(Yup, wt, qt[i], useInf) 152 | c(Ylo, Yup) 153 | }) 154 | } else if (type == "mean" && side == "two"){ 155 | CI <- sapply(1:length(qt), function(i){ 156 | Ylo <- lapply(info, function(x){ 157 | x$Yhat_test[i] - x$Yscore 158 | }) 159 | Ylo <- do.call(c, Ylo) 160 | Ylo <- -weightedConformalCutoff(-Ylo, wt, qt[i], useInf) 161 | Yup <- lapply(info, function(x){ 162 | x$Yhat_test[i] + x$Yscore 163 | }) 164 | Yup <- do.call(c, Yup) 165 | Yup <- weightedConformalCutoff(Yup, wt, qt[i], useInf) 166 | c(Ylo, Yup) 167 | }) 168 | } else if (side == "above"){ 169 | CI <- sapply(1:length(qt), function(i){ 170 | Ylo <- -Inf 171 | Yup <- lapply(info, function(x){ 172 | x$Yhat_test[i] + x$Yscore 173 | }) 174 | Yup <- do.call(c, Yup) 175 | Yup <- weightedConformalCutoff(Yup, wt, qt[i], useInf) 176 | c(Ylo, Yup) 177 | }) 178 | } else if (side == "below"){ 179 | CI <- sapply(1:length(qt), function(i){ 180 | Ylo <- lapply(info, function(x){ 181 | x$Yhat_test[i] - x$Yscore 182 | }) 183 | Ylo <- do.call(c, Ylo) 184 | Ylo <- -weightedConformalCutoff(-Ylo, wt, qt[i], useInf) 185 | Yup <- Inf 186 | c(Ylo, Yup) 187 | }) 188 | } 189 | 190 | res <- data.frame(lower = as.numeric(CI[1, ]), 191 | upper = as.numeric(CI[2, ])) 192 | return(res) 193 | } 194 | -------------------------------------------------------------------------------- /R/conformal_learners.R: -------------------------------------------------------------------------------- 1 | ## quantile random forest. grf package needed 2 | quantRF <- function(Y, X, Xtest, quantiles, ...){ 3 | fit <- grf::quantile_forest(X, Y, quantiles = quantiles, ...) 4 | res <- predict(fit, Xtest, quantiles = quantiles) 5 | if (is.list(res) && !is.data.frame(res)){ 6 | res <- res$predictions 7 | } 8 | if (length(quantiles) == 1){ 9 | res <- as.numeric(res) 10 | } else { 11 | res <- as.matrix(res) 12 | } 13 | return(res) 14 | } 15 | 16 | ## random forest. randomForest package needed 17 | RF <- function(Y, X, Xtest, ...){ 18 | dist <- guessClass(Y) 19 | if (dist == "gaussian"){ 20 | fit <- randomForest::randomForest(x = X, y = Y, ...) 21 | res <- predict(fit, newdata = Xtest) 22 | res <- as.numeric(res) 23 | } else if (dist == "bernoulli"){ 24 | if (!is.factor(Y)){ 25 | Y <- as.factor(Y) 26 | } 27 | fit <- randomForest::randomForest(x = X, y = Y, ...) 28 | res <- predict(fit, newdata = Xtest, type = "prob") 29 | res <- as.numeric(res[, 2]) 30 | } else if (dist == "multinomial"){ 31 | if (!is.factor(Y)){ 32 | Y <- as.factor(Y) 33 | } 34 | fit <- randomForest::randomForest(x = X, y = Y, ...) 35 | res <- predict(fit, newdata = Xtest, type = "prob") 36 | res <- as.matrix(res) 37 | } 38 | return(res) 39 | } 40 | 41 | ## quantile gradient boosting. gbm package needed 42 | quantBoosting <- function(Y, X, Xtest, quantiles, n.trees = 100, ...){ 43 | if (class(X)[1] != "data.frame"){ 44 | X <- as.data.frame(X) 45 | Xtest <- as.data.frame(Xtest) 46 | names(Xtest) <- names(X) 47 | } 48 | data <- data.frame(Y = Y, X) 49 | fit <- gbm::gbm(Y ~ ., distribution = list(name = "quantile", alpha = quantiles[1]), data = data, n.trees = n.trees, ...) 50 | res <- predict(fit, Xtest, type = "response", n.trees = n.trees) 51 | if (length(quantiles) == 2){ 52 | fit2 <- gbm::gbm(Y ~ ., distribution = list(name = "quantile", alpha = quantiles[2]), data = data, n.trees = n.trees, ...) 53 | res2 <- predict(fit2, Xtest, type = "response", n.trees = n.trees) 54 | res <- cbind(res, res2) 55 | } 56 | return(res) 57 | } 58 | 59 | ## gradient boosting. gbm package needed 60 | Boosting <- function(Y, X, Xtest, n.trees = 100, ...){ 61 | if (class(X)[1] != "data.frame"){ 62 | X <- as.data.frame(X) 63 | Xtest <- as.data.frame(Xtest) 64 | names(Xtest) <- names(X) 65 | } 66 | distribution <- guessClass(Y) 67 | if (distribution == "bernoulli" && is.factor(Y)){ 68 | Y <- as.numeric(Y) - 1 69 | } 70 | data <- data.frame(Y = Y, X) 71 | fit <- gbm::gbm(Y ~ ., distribution = distribution, data = data, n.trees = n.trees, ...) 72 | res <- predict(fit, Xtest, type = "response", n.trees = n.trees) 73 | if (distribution == "multinomial"){ 74 | res <- matrix(res, nrow = nrow(Xtest)) 75 | } 76 | return(res) 77 | } 78 | 79 | ## posterior quantiles of BART. bartMachine package needed 80 | quantBART <- function(Y, X, Xtest, quantiles, 81 | ndpost = 100, ...){ 82 | if (class(X)[1] != "data.frame"){ 83 | X <- as.data.frame(X) 84 | Xtest <- as.data.frame(Xtest) 85 | names(Xtest) <- names(X) 86 | } 87 | y <- Y 88 | fit <- bartMachine::bartMachine(X, y, verbose = FALSE) 89 | if (length(quantiles) == 2){ 90 | if (sum(quantiles) != 1){ 91 | warning("Two quantiles should sum up to 1.") 92 | } 93 | ci_conf <- quantiles[2] - quantiles[1] 94 | res <- bartMachine::calc_prediction_intervals( 95 | fit, new_data = Xtest, 96 | pi_conf = 0.95)$interval 97 | res <- as.matrix(res) 98 | } else if (length(quantiles) == 1){ 99 | if (quantiles[1] > 0.5){ 100 | ci_conf <- 2 * quantiles[1] 101 | res <- bartMachine::calc_prediction_intervals( 102 | fit, new_data = Xtest, 103 | pi_conf = 0.95)$interval[, 2] 104 | res <- as.numeric(res) 105 | } else{ 106 | ci_conf <- 2 * (1 - quantiles[1]) 107 | res <- bartMachine::calc_prediction_intervals( 108 | fit, new_data = Xtest, 109 | pi_conf = 0.95)$interval[, 1] 110 | res <- as.numeric(res) 111 | } 112 | } 113 | return(res) 114 | } 115 | 116 | ## BART. bartMachine package needed 117 | BART <- function(Y, X, Xtest, 118 | ndpost = 100, ...){ 119 | if (class(X)[1] != "data.frame"){ 120 | X <- as.data.frame(X) 121 | Xtest <- as.data.frame(Xtest) 122 | names(Xtest) <- names(X) 123 | } 124 | y <- Y 125 | fit <- bartMachine::bartMachine(X, y, verbose = FALSE) 126 | res <- predict(fit, Xtest) 127 | return(res) 128 | } 129 | -------------------------------------------------------------------------------- /R/conformal_split.R: -------------------------------------------------------------------------------- 1 | ## Generic weighted split conformal inference. See ?conformal 2 | conformalSplit <- function(X, Y, 3 | type, side, 4 | quantiles, 5 | outfun, outparams, 6 | wtfun, 7 | trainprop, trainid){ 8 | if (is.null(wtfun)){ 9 | wtfun <- function(X){ 10 | rep(1, nrow(X)) 11 | } 12 | } else if (is.function(wtfun)){ 13 | check_wtfun(wtfun) 14 | } 15 | 16 | if (type == "CQR"){ 17 | if (is.null(quantiles)){ 18 | stop("Quantiles should be provided if CQR is used.") 19 | } else if (length(quantiles) > 2){ 20 | warning("At most two quantiles can be provided. Use the first two by default") 21 | quantiles <- quantiles[1:2] 22 | } 23 | if (side %in% c("above", "below") && length(quantiles) > 1){ 24 | warning("At most one quantile can be provided when side = \"above\" or \"below\". Use the first one by default") 25 | quantiles <- quantiles[1] 26 | } 27 | outparams <- c(outparams, list(quantiles = quantiles)) 28 | } 29 | 30 | n <- length(Y) 31 | if (is.null(trainid)){ 32 | trainid <- sample(n, floor(n * trainprop)) 33 | } 34 | Xtrain <- X[trainid, ,drop=FALSE] 35 | Ytrain <- Y[trainid] 36 | Xval <- X[-trainid, ,drop=FALSE] 37 | Yval <- Y[-trainid] 38 | 39 | outparams <- c(list(Y = Ytrain, X = Xtrain), outparams) 40 | 41 | Ymodel <- function(X){ 42 | do.call(outfun, c(outparams, list(Xtest = X))) 43 | } 44 | Yhat <- Ymodel(Xval) 45 | Yscore <- conformalScore(Yval, Yhat, type, side) 46 | wt <- wtfun(Xval) 47 | 48 | obj <- list(Yscore = Yscore, wt = wt, 49 | Ymodel = Ymodel, wtfun = wtfun, 50 | type = type, 51 | side = side, 52 | quantiles = quantiles, 53 | trainprop = trainprop, 54 | trainid = trainid) 55 | class(obj) <- "conformalSplit" 56 | return(obj) 57 | } 58 | 59 | #' Predict Method for conformalSplit objects 60 | #' 61 | #' Obtains predictive intervals on a testing dataset based on a \code{conformalSplit} object 62 | #' from \code{\link{conformal}} with \code{useCV = FALSE}. 63 | #' 64 | #' Given a testing set \eqn{X_1, X_2, \ldots, X_n} and a weight function \eqn{w(x)}, the 65 | #' weight of the weighted distribution \eqn{p_j = w(X_j) / (w(X_1) + \cdots + w(X_n))}. 66 | #' In cases where some of \eqn{p_j} are extreme, we truncate \eqn{p_j} at level \code{wthigh} 67 | #' and \code{wtlow} to ensure stability. If \code{wthigh = Inf, wtlow = 0}, no truncation 68 | #' is being used. 69 | #' 70 | #' @param object an object of class \code{conformalSplit}; see \code{\link{conformal}}. 71 | #' @param Xtest testing covariates. 72 | #' @param alpha confidence level. 73 | #' @param wthigh upper truncation level of weights; see Details. 74 | #' @param wtlow lower truncation level of weights; see Details. 75 | #' @param useInf if FALSE then replace infinity by the maximum conformity score. 76 | #' @param ... other arguments 77 | #' 78 | #' @return predictive intervals. A data.frame with \code{nrow(Xtest)} rows and two columns: 79 | #' "lower" for the lower bound and "upper" for the upper bound. 80 | #' 81 | #' @seealso 82 | #' \code{\link{predict.conformalCV}}, \code{\link{conformal}}. 83 | #' 84 | #' @export 85 | predict.conformalSplit <- function(object, Xtest, 86 | alpha = 0.1, 87 | wthigh = 20, wtlow = 0.05, 88 | useInf = FALSE, 89 | ...){ 90 | type <- object$type 91 | side <- object$side 92 | Yhat_test <- object$Ymodel(Xtest) 93 | wt_test <- object$wtfun(Xtest) 94 | 95 | avg_wt <- mean(c(object$wt, wt_test)) 96 | wt <- censoring(object$wt / avg_wt, wthigh, wtlow) 97 | wt_test <- censoring(wt_test / avg_wt, wthigh, wtlow) 98 | 99 | totw <- sum(wt) 100 | wt <- wt / totw 101 | qt <- (1 + wt_test / totw) * (1 - alpha) 102 | Yslack <- weightedConformalCutoff(object$Yscore, wt, qt, useInf) 103 | 104 | if (type == "CQR" && side == "two"){ 105 | Ylo <- Yhat_test[, 1] - Yslack 106 | Yup <- Yhat_test[, 2] + Yslack 107 | } else if (type == "mean" && side == "two"){ 108 | Ylo <- Yhat_test - Yslack 109 | Yup <- Yhat_test + Yslack 110 | } else if (side == "above"){ 111 | Ylo <- -Inf 112 | Yup <- Yhat_test + Yslack 113 | } else if (side == "below"){ 114 | Ylo <- Yhat_test - Yslack 115 | Yup <- Inf 116 | } 117 | 118 | res <- data.frame(lower = Ylo, upper = Yup) 119 | return(res) 120 | } 121 | -------------------------------------------------------------------------------- /R/conformal_utils.R: -------------------------------------------------------------------------------- 1 | ## weightedConformalCutoff <- function(score, weight, qt){ 2 | ## quantile(score, qt) 3 | ## } 4 | 5 | find_inds <- function(a, b){ 6 | n <- length(a) 7 | b <- b - 1e-12 8 | ## n + 1 - rank(-c(a, b), ties.method = "first")[-(1:n)] + rank(-b, ties.method = "first") 9 | rank(c(a, b), ties.method = "first")[-(1:n)] - rank(b, ties.method = "first") + 1 10 | } 11 | 12 | ## weightedConformalCutoff0 <- function(score, weight, qt){ 13 | ## ord <- order(score) 14 | ## weight <- weight[ord] 15 | ## score <- score[ord] 16 | ## cw <- cumsum(weight) 17 | ## cutoff <- sapply(qt, function(q){ 18 | ## ind <- min(which(cw >= q)) 19 | ## score[ind] 20 | ## }) 21 | ## return(cutoff) 22 | ## } 23 | 24 | weightedConformalCutoff <- function(score, weight, qt, useInf){ 25 | ord <- order(score) 26 | weight <- weight[ord] 27 | score <- score[ord] 28 | cw <- cumsum(weight) 29 | inds <- find_inds(cw, pmin(qt, 1)) 30 | cutoff <- score[inds] 31 | if (useInf){ 32 | cutoff[qt >= 1] <- Inf 33 | } 34 | return(cutoff) 35 | } 36 | 37 | conformalScore <- function(Y, Yhat, type, side){ 38 | if (is.vector(Y) || (is.matrix(Y) && ncol(Y) == 1)){ 39 | if (type == "CQR" && side == "two"){ 40 | score <- pmax(Yhat[, 1] - Y, Y - Yhat[, 2]) 41 | } else if (type == "mean" && side == "two"){ 42 | score <- abs(Yhat - Y) 43 | } else if (side == "above"){ 44 | score <- Y - Yhat 45 | } else if (side == "below"){ 46 | score <- Yhat - Y 47 | } 48 | } else if (is.matrix(Y) && ncol(Y) == 2){ 49 | score <- pmax(Yhat[, 1] - Y[, 1], Y[, 2] - Yhat[, 2]) 50 | } 51 | return(score) 52 | } 53 | 54 | conformalScoreClass <- function(Y, phat, type, wt){ 55 | ncl <- nlevels(Y) 56 | if (type == "weighted"){ 57 | phat <- row_quo(phat, wt) 58 | } 59 | Yid <- as.numeric(Y) 60 | score <- phat[cbind(1:length(Y), Yid)] 61 | return(score) 62 | } 63 | 64 | -------------------------------------------------------------------------------- /R/import-packages.R: -------------------------------------------------------------------------------- 1 | #' @importFrom stats predict quantile 2 | #' @importFrom methods formalArgs 3 | NULL 4 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | ## Truncate a sequence from both sides 2 | censoring <- function(x, high = 20, low = 0.05){ 3 | pmin(pmax(x, low), high) 4 | } 5 | 6 | ## Guess the type of a variable 7 | guessClass <- function(x){ 8 | if (length(unique(x)) == 2 || is.logical(x) || (is.factor(x) && nlevels(x) == 2)){ 9 | dist <- "bernoulli" 10 | } else if (is.factor(x) && nlevels(x) > 2){ 11 | dist <- "multinomial" 12 | } else if (is.numeric(x)){ 13 | dist <- "gaussian" 14 | } 15 | return(dist) 16 | } 17 | 18 | ## Frequency of each level in a factor 19 | freq <- function(x){ 20 | if (!is.factor(x)){ 21 | x <- as.factor(x) 22 | } 23 | as.numeric(table(x)) / length(x) 24 | } 25 | 26 | ## Divide each row of A by a vector b 27 | row_quo <- function(A, b){ 28 | t(t(A) / b) 29 | } 30 | 31 | ## Generate a list of indices for cross-validation 32 | gen_cv_ids <- function(n, nfolds, offset = 0){ 33 | ids <- sample(n, n) 34 | quo <- floor(n / nfolds) 35 | if (quo == 0){ 36 | idlist <- lapply(1:nfolds, function(i){ 37 | if (i <= n){ 38 | i 39 | } else { 40 | numeric(0) 41 | } 42 | }) 43 | } else { 44 | resid <- n - quo * nfolds 45 | idlist <- lapply(1:nfolds, function(i){ 46 | tmp <- (i - 1) * quo + 1:quo 47 | if (i <= resid){ 48 | tmp <- c(tmp, quo * nfolds + i) 49 | } 50 | return(ids[tmp] + offset) 51 | }) 52 | } 53 | return(idlist) 54 | } 55 | 56 | ## Convert a valid outfun string to the function 57 | str_outfun <- function(method){ 58 | if (method == "RF"){ 59 | if (!requireNamespace("randomForest")){ 60 | stop("randomForest package needs to be installed") 61 | } 62 | return(RF) 63 | } else if (method == "quantRF"){ 64 | if (!requireNamespace("grf")){ 65 | stop("grf package needs to be installed") 66 | } 67 | return(quantRF) 68 | } else if (method == "Boosting"){ 69 | if (!requireNamespace("gbm")){ 70 | stop("gbm package needs to be installed") 71 | } 72 | return(Boosting) 73 | } else if (method == "quantBoosting"){ 74 | if (!requireNamespace("gbm")){ 75 | stop("gbm package needs to be installed") 76 | } 77 | return(quantBoosting) 78 | } else if (method == "BART"){ 79 | if (!requireNamespace("bartMachine")){ 80 | stop("bartMachine package needs to be installed") 81 | } 82 | return(BART) 83 | } else if (method == "quantBART"){ 84 | if (!requireNamespace("bartMachine")){ 85 | stop("bartMachine package needs to be installed") 86 | } 87 | return(quantBART) 88 | } else { 89 | stop(paste0(method, " is not supported. Please input a valid string or a function that meets the minimal requirements described in the man page")) 90 | } 91 | } 92 | 93 | ## Convert a valid psfun string to the function 94 | str_psfun <- function(method){ 95 | if (method == "RF"){ 96 | if (!requireNamespace("randomForest")){ 97 | stop("randomForest package needs to be installed") 98 | } 99 | return(RF) 100 | } else if (method == "Boosting"){ 101 | if (!requireNamespace("gbm")){ 102 | stop("gbm package needs to be installed") 103 | } 104 | return(Boosting) 105 | } else { 106 | stop(paste0(method, " is not supported. Please input a valid string or a function that meets the minimal requirements described in the man page")) 107 | } 108 | } 109 | 110 | ## Check if the required inputs are included in outfun 111 | check_outfun <- function(fun, type){ 112 | args <- methods::formalArgs(fun) 113 | if (type == "CQR"){ 114 | res <- all(c("Y", "X", "Xtest", "quantiles") %in% args) 115 | if (!res){ 116 | stop("outfun should include 'Y', 'X', 'Xtest' and 'quantiles' as inputs when type = 'CQR'") 117 | } 118 | } else { 119 | res <- all(c("Y", "X", "Xtest") %in% args) 120 | if (!res){ 121 | stop("outfun should include 'Y', 'X' and 'Xtest' as inputs when type = 'mean'") 122 | } 123 | } 124 | } 125 | 126 | ## Check if the required inputs are included in wtfun 127 | check_wtfun <- function(fun){ 128 | args <- formalArgs(fun) 129 | if (!("X" %in% args)){ 130 | stop("wtfun should include 'X' as inputs") 131 | } 132 | } 133 | 134 | ## Check if the required inputs are included in psfun 135 | check_psfun <- function(fun){ 136 | args <- formalArgs(fun) 137 | if (!all(c("Y", "X", "Xtest") %in% args)){ 138 | stop("psfun should include 'Y', 'X' and 'Xtest' as inputs") 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cfcausal 2 | An R package for conformal inference of counterfactuals and individual treatment effects 3 | 4 | ## Overview 5 | This R package implements weighted conformal inference-based procedures for counterfactuals and individual treatment effects proposed in our paper: [Conformal Inference of Counterfactuals and Individual Treatment Effects](https://arxiv.org/abs/2006.06138). It includes both the split conformal inference and cross-validation+. For each type of conformal inference, both conformalized quantile regression (CQR) and standard conformal inference are supported. It provides a pool of convenient learners and allows flexible user-defined learners for conditional mean and quantiles. 6 | 7 | - `conformalCf()` produces intervals for counterfactuals or outcomes with missing values in general. 8 | - `conformalIte()` produces intervals for individual treatment effects with a binary treatment under the potential outcome framework. 9 | - `conformal()` provides a generic framework of weighted conformal inference for continuous outcomes. 10 | - `conformalInt()` provides a generic framework of weighted conformal inference for interval outcomes. 11 | 12 | ## Installation 13 | 14 | ``` 15 | if (!require("devtools")){ 16 | install.packages("devtools") 17 | } 18 | devtools::install_github("lihualei71/cfcausal") 19 | ``` 20 | To access the vignette, run the following code to build it. 21 | ``` 22 | devtools::install_github("lihualei71/cfcausal", build_vignettes = TRUE) 23 | ``` 24 | 25 | We suggest installing [grf](https://cran.r-project.org/web/packages/grf/grf.pdf), [randomForest](https://cran.r-project.org/web/packages/randomForest/randomForest.pdf), [gbm](https://cran.r-project.org/web/packages/gbm/gbm.pdf) and [bartMachine](https://cran.r-project.org/web/packages/bartMachine/bartMachine.pdf) to take advantage of the built-in learners. 26 | 27 | ## Usage Examples 28 | We illustrate the usage of cfcausal package using simple synthetic datasets. For details please read the vignette (`vignette("cfcausal_demo", package = "cfcausal")`) and the manual. 29 | 30 | ``` 31 | #### Conformal inference of counterfactuals 32 | library("cfcausal") 33 | 34 | # Generate data 35 | set.seed(1) 36 | n <- 1000 37 | d <- 5 38 | X <- matrix(rnorm(n * d), nrow = n) 39 | beta <- rep(1, 5) 40 | Y <- X %*% beta + rnorm(n) 41 | 42 | # Generate missing indicators 43 | missing_prob <- pnorm(X[, 1]) 44 | if_missing <- missing_prob < runif(n) 45 | Y[if_missing] <- NA 46 | 47 | # Generate testing data 48 | ntest <- 5 49 | Xtest <- matrix(rnorm(ntest * d), nrow = ntest) 50 | 51 | # Run weighted split CQR 52 | obj <- conformalCf(X, Y, type = "CQR", 53 | quantiles = c(0.05, 0.95), 54 | outfun = "quantRF", useCV = FALSE) 55 | predict(obj, Xtest, alpha = 0.1) 56 | 57 | # Run weighted CQR-CV+ 58 | obj <- conformalCf(X, Y, type = "CQR", 59 | quantiles = c(0.05, 0.95), 60 | outfun = "quantRF", useCV = TRUE) 61 | predict(obj, Xtest, alpha = 0.1) 62 | ``` 63 | 64 | ``` 65 | #### Conformal inference of individual treatment effects 66 | library("cfcausal") 67 | 68 | # Generate potential outcomes from two linear models 69 | set.seed(1) 70 | n <- 1000 71 | d <- 5 72 | X <- matrix(rnorm(n * d), nrow = n) 73 | beta <- rep(1, 5) 74 | Y1 <- X %*% beta + rnorm(n) 75 | Y0 <- rnorm(n) 76 | 77 | # Generate treatment indicators 78 | ps <- pnorm(X[, 1]) 79 | T <- as.numeric(ps < runif(n)) 80 | Y <- ifelse(T == 1, Y1, Y0) 81 | 82 | # Generate testing data 83 | ntest <- 5 84 | Xtest <- matrix(rnorm(ntest * d), nrow = ntest) 85 | 86 | # Inexact nested method 87 | CIfun <- conformalIte(X, Y, T, alpha = 0.1, 88 | algo = "nest", exact = FALSE, type = "CQR", 89 | quantiles = c(0.05, 0.95), 90 | outfun = "quantRF", useCV = FALSE) 91 | CIfun(Xtest) 92 | 93 | # Exact nested method 94 | CIfun <- conformalIte(X, Y, T, alpha = 0.1, 95 | algo = "nest", exact = TRUE, type = "CQR", 96 | quantiles = c(0.05, 0.95), 97 | outfun = "quantRF", useCV = FALSE) 98 | CIfun(Xtest) 99 | 100 | # Naive method 101 | CIfun <- conformalIte(X, Y, T, alpha = 0.1, 102 | algo = "naive", type = "CQR", 103 | quantiles = c(0.05, 0.95), 104 | outfun = "quantRF", useCV = FALSE) 105 | CIfun(Xtest) 106 | 107 | # Counterfactual method, Y and T needs to be observed 108 | pstest <- pnorm(Xtest[, 1]) 109 | Ttest <- as.numeric(pstest < runif(ntest)) 110 | Y1test <- Xtest %*% beta + rnorm(ntest) 111 | Y0test <- rnorm(ntest) 112 | Ytest <- ifelse(Ttest == 1, Y1test, Y0test) 113 | CIfun <- conformalIte(X, Y, T, alpha = 0.1, 114 | algo = "counterfactual", type = "CQR", 115 | quantiles = c(0.05, 0.95), 116 | outfun = "quantRF", useCV = FALSE) 117 | CIfun(Xtest, Ytest, Ttest) 118 | ``` 119 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | destination: docs 2 | -------------------------------------------------------------------------------- /cfcausal.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: XeLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | -------------------------------------------------------------------------------- /docs/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Page not found (404) • cfcausal 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 | 57 |
58 |
59 | 105 | 106 | 107 | 108 |
109 | 110 |
111 |
112 | 115 | 116 | Content not found. Please use links in the navbar. 117 | 118 |
119 | 120 |
121 | 122 | 123 | 124 |
125 | 128 | 129 |
130 |

Site built with pkgdown 1.4.1.

131 |
132 | 133 |
134 |
135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /docs/LICENSE-text.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | License • cfcausal 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 | 57 |
58 |
59 | 105 | 106 | 107 | 108 |
109 | 110 |
111 |
112 | 115 | 116 |
YEAR: 2019
117 | COPYRIGHT HOLDER: Lihua Lei
118 | 
119 | 120 |
121 | 122 |
123 | 124 | 125 | 126 |
127 | 130 | 131 |
132 |

Site built with pkgdown 1.4.1.

133 |
134 | 135 |
136 |
137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | -------------------------------------------------------------------------------- /docs/LICENSE.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | MIT License • cfcausal 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 | 57 |
58 |
59 | 105 | 106 | 107 | 108 |
109 | 110 |
111 |
112 | 115 | 116 |
117 | 118 |

Copyright (c) 2019 Lihua Lei

119 |

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

120 |

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

121 |

THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

122 |
123 | 124 |
125 | 126 |
127 | 128 | 129 | 130 |
131 | 134 | 135 |
136 |

Site built with pkgdown 1.4.1.

137 |
138 | 139 |
140 |
141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /docs/articles/cfcausal_demo_files/header-attrs-2.2/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Articles • cfcausal 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 | 57 |
58 |
59 | 105 | 106 | 107 | 108 |
109 | 110 |
111 |
112 | 115 | 116 |
117 |

All vignettes

118 |

119 | 120 | 123 |
124 |
125 |
126 | 127 | 128 |
129 | 132 | 133 |
134 |

Site built with pkgdown 1.4.1.

135 |
136 | 137 |
138 |
139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Citation and Authors • cfcausal 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 | 57 |
58 |
59 | 105 | 106 | 107 | 108 |
109 | 110 |
111 |
112 | 116 | 117 |

Lei L, Candès E (2020). 118 | “Conformal Inference of Counterfactuals and Individual Treatment Effects.” 119 | Arxiv. 120 | https://arxiv.org/abs/2006.06138. 121 |

122 |
@Article{,
123 |   author = {Lihua Lei and Emmanuel Cand\`es},
124 |   title = {Conformal Inference of Counterfactuals and Individual Treatment Effects},
125 |   journal = {Arxiv},
126 |   year = {2020},
127 |   url = {https://arxiv.org/abs/2006.06138},
128 | }
129 | 130 | 133 | 134 |
    135 |
  • 136 |

    Lihua Lei. Maintainer. 137 |

    138 |
  • 139 |
140 | 141 |
142 | 143 |
144 | 145 | 146 | 147 |
148 | 151 | 152 |
153 |

Site built with pkgdown 1.4.1.

154 |
155 | 156 |
157 |
158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/pkgdown.css: -------------------------------------------------------------------------------- 1 | /* Sticky footer */ 2 | 3 | /** 4 | * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ 5 | * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css 6 | * 7 | * .Site -> body > .container 8 | * .Site-content -> body > .container .row 9 | * .footer -> footer 10 | * 11 | * Key idea seems to be to ensure that .container and __all its parents__ 12 | * have height set to 100% 13 | * 14 | */ 15 | 16 | html, body { 17 | height: 100%; 18 | } 19 | 20 | body > .container { 21 | display: flex; 22 | height: 100%; 23 | flex-direction: column; 24 | } 25 | 26 | body > .container .row { 27 | flex: 1 0 auto; 28 | } 29 | 30 | footer { 31 | margin-top: 45px; 32 | padding: 35px 0 36px; 33 | border-top: 1px solid #e5e5e5; 34 | color: #666; 35 | display: flex; 36 | flex-shrink: 0; 37 | } 38 | footer p { 39 | margin-bottom: 0; 40 | } 41 | footer div { 42 | flex: 1; 43 | } 44 | footer .pkgdown { 45 | text-align: right; 46 | } 47 | footer p { 48 | margin-bottom: 0; 49 | } 50 | 51 | img.icon { 52 | float: right; 53 | } 54 | 55 | img { 56 | max-width: 100%; 57 | } 58 | 59 | /* Fix bug in bootstrap (only seen in firefox) */ 60 | summary { 61 | display: list-item; 62 | } 63 | 64 | /* Typographic tweaking ---------------------------------*/ 65 | 66 | .contents .page-header { 67 | margin-top: calc(-60px + 1em); 68 | } 69 | 70 | /* Section anchors ---------------------------------*/ 71 | 72 | a.anchor { 73 | margin-left: -30px; 74 | display:inline-block; 75 | width: 30px; 76 | height: 30px; 77 | visibility: hidden; 78 | 79 | background-image: url(./link.svg); 80 | background-repeat: no-repeat; 81 | background-size: 20px 20px; 82 | background-position: center center; 83 | } 84 | 85 | .hasAnchor:hover a.anchor { 86 | visibility: visible; 87 | } 88 | 89 | @media (max-width: 767px) { 90 | .hasAnchor:hover a.anchor { 91 | visibility: hidden; 92 | } 93 | } 94 | 95 | 96 | /* Fixes for fixed navbar --------------------------*/ 97 | 98 | .contents h1, .contents h2, .contents h3, .contents h4 { 99 | padding-top: 60px; 100 | margin-top: -40px; 101 | } 102 | 103 | /* Sidebar --------------------------*/ 104 | 105 | #sidebar { 106 | margin-top: 30px; 107 | position: -webkit-sticky; 108 | position: sticky; 109 | top: 70px; 110 | } 111 | #sidebar h2 { 112 | font-size: 1.5em; 113 | margin-top: 1em; 114 | } 115 | 116 | #sidebar h2:first-child { 117 | margin-top: 0; 118 | } 119 | 120 | #sidebar .list-unstyled li { 121 | margin-bottom: 0.5em; 122 | } 123 | 124 | .orcid { 125 | height: 16px; 126 | /* margins are required by official ORCID trademark and display guidelines */ 127 | margin-left:4px; 128 | margin-right:4px; 129 | vertical-align: middle; 130 | } 131 | 132 | /* Reference index & topics ----------------------------------------------- */ 133 | 134 | .ref-index th {font-weight: normal;} 135 | 136 | .ref-index td {vertical-align: top;} 137 | .ref-index .icon {width: 40px;} 138 | .ref-index .alias {width: 40%;} 139 | .ref-index-icons .alias {width: calc(40% - 40px);} 140 | .ref-index .title {width: 60%;} 141 | 142 | .ref-arguments th {text-align: right; padding-right: 10px;} 143 | .ref-arguments th, .ref-arguments td {vertical-align: top;} 144 | .ref-arguments .name {width: 20%;} 145 | .ref-arguments .desc {width: 80%;} 146 | 147 | /* Nice scrolling for wide elements --------------------------------------- */ 148 | 149 | table { 150 | display: block; 151 | overflow: auto; 152 | } 153 | 154 | /* Syntax highlighting ---------------------------------------------------- */ 155 | 156 | pre { 157 | word-wrap: normal; 158 | word-break: normal; 159 | border: 1px solid #eee; 160 | } 161 | 162 | pre, code { 163 | background-color: #f8f8f8; 164 | color: #333; 165 | } 166 | 167 | pre code { 168 | overflow: auto; 169 | word-wrap: normal; 170 | white-space: pre; 171 | } 172 | 173 | pre .img { 174 | margin: 5px 0; 175 | } 176 | 177 | pre .img img { 178 | background-color: #fff; 179 | display: block; 180 | height: auto; 181 | } 182 | 183 | code a, pre a { 184 | color: #375f84; 185 | } 186 | 187 | a.sourceLine:hover { 188 | text-decoration: none; 189 | } 190 | 191 | .fl {color: #1514b5;} 192 | .fu {color: #000000;} /* function */ 193 | .ch,.st {color: #036a07;} /* string */ 194 | .kw {color: #264D66;} /* keyword */ 195 | .co {color: #888888;} /* comment */ 196 | 197 | .message { color: black; font-weight: bolder;} 198 | .error { color: orange; font-weight: bolder;} 199 | .warning { color: #6A0366; font-weight: bolder;} 200 | 201 | /* Clipboard --------------------------*/ 202 | 203 | .hasCopyButton { 204 | position: relative; 205 | } 206 | 207 | .btn-copy-ex { 208 | position: absolute; 209 | right: 0; 210 | top: 0; 211 | visibility: hidden; 212 | } 213 | 214 | .hasCopyButton:hover button.btn-copy-ex { 215 | visibility: visible; 216 | } 217 | 218 | /* headroom.js ------------------------ */ 219 | 220 | .headroom { 221 | will-change: transform; 222 | transition: transform 200ms linear; 223 | } 224 | .headroom--pinned { 225 | transform: translateY(0%); 226 | } 227 | .headroom--unpinned { 228 | transform: translateY(-100%); 229 | } 230 | 231 | /* mark.js ----------------------------*/ 232 | 233 | mark { 234 | background-color: rgba(255, 255, 51, 0.5); 235 | border-bottom: 2px solid rgba(255, 153, 51, 0.3); 236 | padding: 1px; 237 | } 238 | 239 | /* vertical spacing after htmlwidgets */ 240 | .html-widget { 241 | margin-bottom: 10px; 242 | } 243 | 244 | /* fontawesome ------------------------ */ 245 | 246 | .fab { 247 | font-family: "Font Awesome 5 Brands" !important; 248 | } 249 | 250 | /* don't display links in code chunks when printing */ 251 | /* source: https://stackoverflow.com/a/10781533 */ 252 | @media print { 253 | code a:link:after, code a:visited:after { 254 | content: ""; 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('.navbar-fixed-top').headroom(); 6 | 7 | $('body').css('padding-top', $('.navbar').height() + 10); 8 | $(window).resize(function(){ 9 | $('body').css('padding-top', $('.navbar').height() + 10); 10 | }); 11 | 12 | $('body').scrollspy({ 13 | target: '#sidebar', 14 | offset: 60 15 | }); 16 | 17 | $('[data-toggle="tooltip"]').tooltip(); 18 | 19 | var cur_path = paths(location.pathname); 20 | var links = $("#navbar ul li a"); 21 | var max_length = -1; 22 | var pos = -1; 23 | for (var i = 0; i < links.length; i++) { 24 | if (links[i].getAttribute("href") === "#") 25 | continue; 26 | // Ignore external links 27 | if (links[i].host !== location.host) 28 | continue; 29 | 30 | var nav_path = paths(links[i].pathname); 31 | 32 | var length = prefix_length(nav_path, cur_path); 33 | if (length > max_length) { 34 | max_length = length; 35 | pos = i; 36 | } 37 | } 38 | 39 | // Add class to parent
  • , and enclosing
  • if in dropdown 40 | if (pos >= 0) { 41 | var menu_anchor = $(links[pos]); 42 | menu_anchor.parent().addClass("active"); 43 | menu_anchor.closest("li.dropdown").addClass("active"); 44 | } 45 | }); 46 | 47 | function paths(pathname) { 48 | var pieces = pathname.split("/"); 49 | pieces.shift(); // always starts with / 50 | 51 | var end = pieces[pieces.length - 1]; 52 | if (end === "index.html" || end === "") 53 | pieces.pop(); 54 | return(pieces); 55 | } 56 | 57 | // Returns -1 if not found 58 | function prefix_length(needle, haystack) { 59 | if (needle.length > haystack.length) 60 | return(-1); 61 | 62 | // Special case for length-0 haystack, since for loop won't run 63 | if (haystack.length === 0) { 64 | return(needle.length === 0 ? 0 : -1); 65 | } 66 | 67 | for (var i = 0; i < haystack.length; i++) { 68 | if (needle[i] != haystack[i]) 69 | return(i); 70 | } 71 | 72 | return(haystack.length); 73 | } 74 | 75 | /* Clipboard --------------------------*/ 76 | 77 | function changeTooltipMessage(element, msg) { 78 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 79 | element.setAttribute('data-original-title', msg); 80 | $(element).tooltip('show'); 81 | element.setAttribute('data-original-title', tooltipOriginalTitle); 82 | } 83 | 84 | if(ClipboardJS.isSupported()) { 85 | $(document).ready(function() { 86 | var copyButton = ""; 87 | 88 | $(".examples, div.sourceCode").addClass("hasCopyButton"); 89 | 90 | // Insert copy buttons: 91 | $(copyButton).prependTo(".hasCopyButton"); 92 | 93 | // Initialize tooltips: 94 | $('.btn-copy-ex').tooltip({container: 'body'}); 95 | 96 | // Initialize clipboard: 97 | var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { 98 | text: function(trigger) { 99 | return trigger.parentNode.textContent; 100 | } 101 | }); 102 | 103 | clipboardBtnCopies.on('success', function(e) { 104 | changeTooltipMessage(e.trigger, 'Copied!'); 105 | e.clearSelection(); 106 | }); 107 | 108 | clipboardBtnCopies.on('error', function() { 109 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 110 | }); 111 | }); 112 | } 113 | })(window.jQuery || window.$) 114 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 2.9.1.1 2 | pkgdown: 1.4.1 3 | pkgdown_sha: ~ 4 | articles: 5 | cfcausal_demo: cfcausal_demo.html 6 | 7 | -------------------------------------------------------------------------------- /docs/reference/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Function reference • cfcausal 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 51 | 52 | 53 | 54 | 55 | 56 | 57 |
    58 |
    59 | 105 | 106 | 107 | 108 |
    109 | 110 |
    111 |
    112 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 130 | 131 | 132 | 133 | 136 | 137 | 138 | 139 | 142 | 143 | 144 | 145 | 148 | 149 | 150 | 151 | 154 | 155 | 156 | 157 | 160 | 161 | 162 | 163 | 166 | 167 | 168 | 169 | 172 | 173 | 174 | 175 | 178 | 179 | 180 | 181 |
    127 |

    All functions

    128 |

    129 |
    134 |

    conformal()

    135 |

    Conformal inference for continuous outcomes

    140 |

    conformalCf()

    141 |

    Conformal inference for counterfactuals

    146 |

    conformalInt()

    147 |

    Conformal inference for interval outcomes

    152 |

    conformalIte()

    153 |

    Conformal inference for individual treatment effects

    158 |

    predict(<conformalCV>)

    159 |

    Predict Method for conformalCV objects

    164 |

    predict(<conformalIntCV>)

    165 |

    Predict Method for conformalIntCV objects

    170 |

    predict(<conformalIntSplit>)

    171 |

    Predict Method for conformalIntSplit objects

    176 |

    predict(<conformalSplit>)

    177 |

    Predict Method for conformalSplit objects

    182 |
    183 | 184 | 190 |
    191 | 192 | 193 |
    194 | 197 | 198 |
    199 |

    Site built with pkgdown 1.4.1.

    200 |
    201 | 202 |
    203 |
    204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | -------------------------------------------------------------------------------- /docs/reference/predict.conformalCV.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Predict Method for conformalCV objects — predict.conformalCV • cfcausal 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 54 | 55 | 56 | 57 | 58 | 59 | 60 |
    61 |
    62 | 108 | 109 | 110 | 111 |
    112 | 113 |
    114 |
    115 | 120 | 121 |
    122 |

    Obtains predictive intervals on a testing dataset based on a conformalCV object 123 | from conformal with useCV = TRUE.

    124 |
    125 | 126 |
    # S3 method for conformalCV
    127 | predict(object, Xtest, alpha = 0.1, wthigh = 20, wtlow = 0.05, ...)
    128 | 129 |

    Arguments

    130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 |
    object

    an object of class conformalCV; see conformal.

    Xtest

    testing covariates.

    alpha

    confidence level.

    wthigh

    upper truncation level of weights; see Details.

    wtlow

    lower truncation level of weights; see Details.

    ...

    other arguments

    157 | 158 |

    Value

    159 | 160 |

    predictive intervals. A data.frame with nrow(Xtest) rows and two columns: 161 | "lower" for the lower bound and "upper" for the upper bound.

    162 |

    Details

    163 | 164 |

    Given a testing set \(X_1, X_2, \ldots, X_n\) and a weight function \(w(x)\), the 165 | weight of the weighted distribution \(p_j = w(X_j) / (w(X_1) + \cdots + w(X_n))\). 166 | In cases where some of \(p_j\) are extreme, we truncate \(p_j\) at level wthigh 167 | and wtlow to ensure stability. If wthigh = Inf, wtlow = 0, no truncation 168 | is being used.

    169 |

    See also

    170 | 171 | 172 | 173 |
    174 | 184 |
    185 | 186 | 187 |
    188 | 191 | 192 |
    193 |

    Site built with pkgdown 1.4.1.

    194 |
    195 | 196 |
    197 |
    198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /docs/reference/predict.conformalIntCV.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Predict Method for conformalIntCV objects — predict.conformalIntCV • cfcausal 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 54 | 55 | 56 | 57 | 58 | 59 | 60 |
    61 |
    62 | 108 | 109 | 110 | 111 |
    112 | 113 |
    114 |
    115 | 120 | 121 |
    122 |

    Obtains predictive intervals on a testing dataset based on a conformalIntCV object 123 | from conformalInt with useCV = TRUE.

    124 |
    125 | 126 |
    # S3 method for conformalIntCV
    127 | predict(object, Xtest, alpha = 0.1, wthigh = 20, wtlow = 0.05, ...)
    128 | 129 |

    Arguments

    130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 |
    object

    an object of class conformalIntCV; see conformalInt.

    Xtest

    testing covariates.

    alpha

    confidence level.

    wthigh

    upper truncation level of weights; see Details.

    wtlow

    lower truncation level of weights; see Details.

    ...

    other arguments

    157 | 158 |

    Value

    159 | 160 |

    predictive intervals. A data.frame with nrow(Xtest) rows and two columns: 161 | "lower" for the lower bound and "upper" for the upper bound.

    162 |

    Details

    163 | 164 |

    Given a testing set \(X_1, X_2, \ldots, X_n\) and a weight function \(w(x)\), the 165 | weight of the weighted distribution \(p_j = w(X_j) / (w(X_1) + \cdots + w(X_n))\). 166 | In cases where some of \(p_j\) are extreme, we truncate \(p_j\) at level wthigh 167 | and wtlow to ensure stability. If wthigh = Inf, wtlow = 0, no truncation 168 | is being used.

    169 |

    See also

    170 | 171 | 172 | 173 |
    174 | 184 |
    185 | 186 | 187 |
    188 | 191 | 192 |
    193 |

    Site built with pkgdown 1.4.1.

    194 |
    195 | 196 |
    197 |
    198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /docs/reference/predict.conformalIntSplit.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Predict Method for conformalIntSplit objects — predict.conformalIntSplit • cfcausal 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 54 | 55 | 56 | 57 | 58 | 59 | 60 |
    61 |
    62 | 108 | 109 | 110 | 111 |
    112 | 113 |
    114 |
    115 | 120 | 121 |
    122 |

    Obtains predictive intervals on a testing dataset based on a conformalIntSplit object 123 | from conformalInt with useCV = FALSE.

    124 |
    125 | 126 |
    # S3 method for conformalIntSplit
    127 | predict(object, Xtest, alpha = 0.1, wthigh = 20, wtlow = 0.05, ...)
    128 | 129 |

    Arguments

    130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 |
    object

    an object of class conformalIntSplit; see conformalInt.

    Xtest

    testing covariates.

    alpha

    confidence level.

    wthigh

    upper truncation level of weights; see Details.

    wtlow

    lower truncation level of weights; see Details.

    ...

    other arguments

    157 | 158 |

    Value

    159 | 160 |

    predictive intervals. A data.frame with nrow(Xtest) rows and two columns: 161 | "lower" for the lower bound and "upper" for the upper bound.

    162 |

    Details

    163 | 164 |

    Given a testing set \(X_1, X_2, \ldots, X_n\) and a weight function \(w(x)\), the 165 | weight of the weighted distribution \(p_j = w(X_j) / (w(X_1) + \cdots + w(X_n))\). 166 | In cases where some of \(p_j\) are extreme, we truncate \(p_j\) at level wthigh 167 | and wtlow to ensure stability. If wthigh = Inf, wtlow = 0, no truncation 168 | is being used.

    169 |

    See also

    170 | 171 | 172 | 173 |
    174 | 184 |
    185 | 186 | 187 |
    188 | 191 | 192 |
    193 |

    Site built with pkgdown 1.4.1.

    194 |
    195 | 196 |
    197 |
    198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /docs/reference/predict.conformalSplit.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Predict Method for conformalSplit objects — predict.conformalSplit • cfcausal 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 54 | 55 | 56 | 57 | 58 | 59 | 60 |
    61 |
    62 | 108 | 109 | 110 | 111 |
    112 | 113 |
    114 |
    115 | 120 | 121 |
    122 |

    Obtains predictive intervals on a testing dataset based on a conformalSplit object 123 | from conformal with useCV = FALSE.

    124 |
    125 | 126 |
    # S3 method for conformalSplit
    127 | predict(object, Xtest, alpha = 0.1, wthigh = 20, wtlow = 0.05, ...)
    128 | 129 |

    Arguments

    130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 |
    object

    an object of class conformalSplit; see conformal.

    Xtest

    testing covariates.

    alpha

    confidence level.

    wthigh

    upper truncation level of weights; see Details.

    wtlow

    lower truncation level of weights; see Details.

    ...

    other arguments

    157 | 158 |

    Value

    159 | 160 |

    predictive intervals. A data.frame with nrow(Xtest) rows and two columns: 161 | "lower" for the lower bound and "upper" for the upper bound.

    162 |

    Details

    163 | 164 |

    Given a testing set \(X_1, X_2, \ldots, X_n\) and a weight function \(w(x)\), the 165 | weight of the weighted distribution \(p_j = w(X_j) / (w(X_1) + \cdots + w(X_n))\). 166 | In cases where some of \(p_j\) are extreme, we truncate \(p_j\) at level wthigh 167 | and wtlow to ensure stability. If wthigh = Inf, wtlow = 0, no truncation 168 | is being used.

    169 |

    See also

    170 | 171 | 172 | 173 |
    174 | 184 |
    185 | 186 | 187 | 197 |
    198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite cfcausal in publications, please use:") 2 | 3 | citEntry(entry = "article", 4 | author = "Lihua Lei and Emmanuel Cand\\`es", 5 | title = "Conformal Inference of Counterfactuals and Individual Treatment Effects", 6 | journal = "Arxiv", 7 | year = "2020", 8 | url = "https://arxiv.org/abs/2006.06138", 9 | textVersion = "Lei, L., & Cand\\`es, E. (2020). Conformal Inference of Counterfactuals and Individual Treatment Effects. arXiv preprint arXiv:." 10 | ) 11 | -------------------------------------------------------------------------------- /man/conformal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/conformal.R 3 | \name{conformal} 4 | \alias{conformal} 5 | \title{Conformal inference for continuous outcomes} 6 | \usage{ 7 | conformal( 8 | X, 9 | Y, 10 | type = c("CQR", "mean"), 11 | side = c("two", "above", "below"), 12 | quantiles = NULL, 13 | outfun = NULL, 14 | outparams = list(), 15 | wtfun = NULL, 16 | useCV = FALSE, 17 | trainprop = 0.75, 18 | trainid = NULL, 19 | nfolds = 10, 20 | idlist = NULL 21 | ) 22 | } 23 | \arguments{ 24 | \item{X}{covariates.} 25 | 26 | \item{Y}{outcome vector.} 27 | 28 | \item{type}{a string that takes values in \{"CQR", "mean"\}.} 29 | 30 | \item{side}{a string that takes values in \{"two", "above", "below"\}. See Details.} 31 | 32 | \item{quantiles}{a scalar or a vector of length 2 depending on \code{side}. Used only when \code{type = "CQR"}. See Details.} 33 | 34 | \item{outfun}{a function that models the conditional mean/quantiles, or a valid string. 35 | The default is random forest when \code{type = "mean"} and quantile random forest when 36 | \code{type = "CQR"}. See Details.} 37 | 38 | \item{outparams}{a list of other parameters to be passed into \code{outfun}.} 39 | 40 | \item{wtfun}{NULL for unweighted conformal inference, or a function for weighted conformal inference 41 | when \code{useCV = FALSE}, or a list of functions for weighted conformal inference when \code{useCV = TRUE}. 42 | See Details.} 43 | 44 | \item{useCV}{FALSE for split conformal inference and TRUE for CV+.} 45 | 46 | \item{trainprop}{proportion of units for training \code{outfun}. The default if 75\%. Used only when \code{useCV = FALSE}.} 47 | 48 | \item{trainid}{indices of training units. The default is NULL, generating random indices. Used only when \code{useCV = FALSE}.} 49 | 50 | \item{nfolds}{number of folds. The default is 10. Used only when \code{useCV = TRUE}.} 51 | 52 | \item{idlist}{a list of indices of length \code{nfolds}. The default is NULL, generating random indices. Used only when \code{useCV = TRUE}.} 53 | } 54 | \value{ 55 | a \code{conformalSplit} object when \code{useCV = FALSE} with the following attributes: 56 | \itemize{ 57 | \item{Yscore:}{ a vector of non-conformity score on the calibration fold} 58 | \item{wt:}{ a vector of weights on the calibration fold} 59 | \item{Ymodel:}{ a function with required argument \code{X} that produces the estimates the conditional 60 | mean or quantiles of \code{X}} 61 | \item{wtfun, type, side, quantiles, trainprop, trainid:}{ the same as inputs} 62 | } 63 | 64 | or a \code{conformalCV} object when \code{useCV = TRUE} with the following attributes: 65 | \itemize{ 66 | \item{info: }{ a list of length \code{nfolds} with each element being a list with attributes 67 | \code{Yscore}, \code{wt} and \code{Ymodel} described above for each fold} 68 | \item{wtfun, type, side, quantiles, nfolds, idlist:}{ the same as inputs} 69 | } 70 | } 71 | \description{ 72 | \code{conformal} is a framework for weighted and unweighted conformal inference for continuous 73 | outcomes. It supports both weighted split conformal inference and weighted CV+, 74 | including weighted Jackknife+ as a special case. For each type, it supports both conformalized 75 | quantile regression (CQR) and standard conformal inference based on conditional mean estimation. 76 | } 77 | \details{ 78 | When \code{side = "two"}, CQR (two-sided) produces intervals in the form of 79 | \deqn{[q_{\alpha_{lo}}(x) - \eta, q_{\alpha_{hi}}(x) + \eta]} 80 | where \eqn{q_{\alpha_{lo}}(x)} and \eqn{q_{\alpha_{hi}}(x)} are estimates of conditional 81 | quantiles of Y given X and the standard conformal inference produces (two-sided) intervals in the form of 82 | \deqn{[m(x) - \eta, m(x) + \eta]} 83 | where \eqn{m(x)} is an estimate of conditional mean/median of Y given X. When \code{side = "above"}, 84 | intervals are of form [-Inf, a(x)] and when \code{side = "below"} the intervals are of form [a(x), Inf]. 85 | 86 | \code{quantiles} should be given when \code{type = "CQR"}. When \code{side = "two"}, \code{quantiles} 87 | should be a vector of length 2, giving \eqn{\alpha_{lo}} and \eqn{\alpha_{hi}}. When \code{side = "above"} 88 | or \code{side = "below"}, only one quantile should be given. 89 | 90 | \code{outfun} can be a valid string, including 91 | \itemize{ 92 | \item "RF" for random forest that predicts the conditional mean, a wrapper built on \code{randomForest} package. 93 | Used when \code{type = "mean"}. 94 | \item "quantRF" for quantile random forest that predicts the conditional quantiles, a wrapper built on 95 | \code{grf} package. Used when \code{type = "CQR"}. 96 | \item "Boosting" for gradient boosting that predicts the conditional mean, a wrapper built on \code{gbm} 97 | package. Used when \code{type = "mean"}. 98 | \item "quantBoosting" for quantile gradient boosting that predicts the conditional quantiles, a wrapper built on 99 | \code{gbm} package. Used when \code{type = "CQR"}. 100 | \item "BART" for gradient boosting that predicts the conditional mean, a wrapper built on \code{bartMachine} 101 | package. Used when \code{type = "mean"}. 102 | \item "quantBART" for quantile gradient boosting that predicts the conditional quantiles, a wrapper built on 103 | \code{bartMachine} package. Used when \code{type = "CQR"}. 104 | } 105 | 106 | or a function object whose input must include, but not limited to 107 | \itemize{ 108 | \item \code{Y} for outcome in the training data. 109 | \item \code{X} for covariates in the training data. 110 | \item \code{Xtest} for covariates in the testing data. 111 | } 112 | When \code{type = "CQR"}, \code{outfun} should also include an argument \code{quantiles} that is either 113 | a vector of length 2 or a scalar, depending on the argument \code{side}. The output of \code{outfun} must be a matrix with two columns giving the conditional quantile estimates when \code{quantiles} is a vector of length 2; otherwise, it must be a vector giving the conditional quantile estimate or conditional mean estimate. Other optional arguments can be 114 | passed into \code{outfun} through \code{outparams}. 115 | 116 | \code{wtfun} is NULL for unweighted conformal inference. For weighted split conformal inference, it is a 117 | function with a required input \code{X} that produces a vector of non-negative reals of length \code{nrow(X)}. 118 | For weighted CV+, it can be a function as in the case \code{useCV = FALSE} so that the same function will 119 | apply to each fold, or a list of functions of length \code{nfolds} so that \code{wtfun[[k]]} is applied to fold \code{k}. 120 | } 121 | \examples{ 122 | \donttest{# Generate data from a linear model 123 | set.seed(1) 124 | n <- 1000 125 | d <- 5 126 | X <- matrix(rnorm(n * d), nrow = n) 127 | beta <- rep(1, 5) 128 | Y <- X \%*\% beta + rnorm(n) 129 | 130 | # Generate testing data 131 | ntest <- 5 132 | Xtest <- matrix(rnorm(ntest * d), nrow = ntest) 133 | 134 | # Run unweighted split CQR with the built-in quantile random forest learner 135 | # grf package needs to be installed 136 | obj <- conformal(X, Y, type = "CQR", quantiles = c(0.05, 0.95), 137 | outfun = "quantRF", wtfun = NULL, useCV = FALSE) 138 | predict(obj, Xtest, alpha = 0.1) 139 | 140 | # Run unweighted standard split conformal inference with the built-in random forest learner 141 | # randomForest package needs to be installed 142 | obj <- conformal(X, Y, type = "mean", 143 | outfun = "RF", wtfun = NULL, useCV = FALSE) 144 | predict(obj, Xtest, alpha = 0.1) 145 | 146 | # Run unweighted CQR-CV+ with the built-in quantile random forest learner 147 | # grf package needs to be installed 148 | obj <- conformal(X, Y, type = "CQR", quantiles = c(0.05, 0.95), 149 | outfun = "quantRF", wtfun = NULL, useCV = TRUE) 150 | predict(obj, Xtest, alpha = 0.1) 151 | 152 | # Run unweighted standard CV+ with the built-in random forest learner 153 | # randomForest package needs to be installed 154 | obj <- conformal(X, Y, type = "mean", 155 | outfun = "RF", wtfun = NULL, useCV = TRUE) 156 | predict(obj, Xtest, alpha = 0.1) 157 | 158 | # Run weighted split CQR with w(x) = pnorm(x1) 159 | wtfun <- function(X){pnorm(X[, 1])} 160 | obj <- conformal(X, Y, type = "CQR", quantiles = c(0.05, 0.95), 161 | outfun = "quantRF", wtfun = wtfun, useCV = FALSE) 162 | predict(obj, Xtest, alpha = 0.1) 163 | 164 | # Run unweighted split CQR with a self-defined quantile random forest 165 | # Y, X, Xtest, quantiles should be included in the inputs 166 | quantRF <- function(Y, X, Xtest, quantiles, ...){ 167 | fit <- grf::quantile_forest(X, Y, quantiles = quantiles, ...) 168 | res <- predict(fit, Xtest, quantiles = quantiles) 169 | if (is.list(res) && !is.data.frame(res)){ 170 | # for the recent update of \code{grf} package that 171 | # changes the output format 172 | res <- res$predictions 173 | } 174 | if (length(quantiles) == 1){ 175 | res <- as.numeric(res) 176 | } else { 177 | res <- as.matrix(res) 178 | } 179 | return(res) 180 | } 181 | obj <- conformal(X, Y, type = "CQR", quantiles = c(0.05, 0.95), 182 | outfun = quantRF, wtfun = NULL, useCV = FALSE) 183 | predict(obj, Xtest, alpha = 0.1) 184 | 185 | # Run unweighted standard split conformal inference with a self-defined linear regression 186 | # Y, X, Xtest should be included in the inputs 187 | linearReg <- function(Y, X, Xtest){ 188 | X <- as.data.frame(X) 189 | Xtest <- as.data.frame(Xtest) 190 | data <- data.frame(Y = Y, X) 191 | fit <- lm(Y ~ ., data = data) 192 | as.numeric(predict(fit, Xtest)) 193 | } 194 | obj <- conformal(X, Y, type = "mean", 195 | outfun = linearReg, wtfun = NULL, useCV = FALSE) 196 | predict(obj, Xtest, alpha = 0.1) 197 | 198 | # Run weighted split-CQR with user-defined weights 199 | wtfun <- function(X){ 200 | pnorm(X[, 1]) 201 | } 202 | obj <- conformal(X, Y, type = "CQR", quantiles = c(0.05, 0.95), 203 | outfun = "quantRF", wtfun = wtfun, useCV = FALSE) 204 | predict(obj, Xtest, alpha = 0.1) 205 | 206 | # Run weighted CQR-CV+ with user-defined weights 207 | # Use a list of identical functions 208 | set.seed(1) 209 | wtfun_list <- lapply(1:10, function(i){wtfun}) 210 | obj1 <- conformal(X, Y, type = "CQR", quantiles = c(0.05, 0.95), 211 | outfun = "quantRF", wtfun = wtfun_list, useCV = TRUE) 212 | predict(obj1, Xtest, alpha = 0.1) 213 | 214 | # Use a single function. Equivalent to the above approach 215 | set.seed(1) 216 | obj2 <- conformal(X, Y, type = "CQR", quantiles = c(0.05, 0.95), 217 | outfun = "quantRF", wtfun = wtfun, useCV = TRUE) 218 | predict(obj2, Xtest, alpha = 0.1) 219 | } 220 | 221 | } 222 | \seealso{ 223 | \code{\link{predict.conformalSplit}}, \code{\link{predict.conformalCV}}. 224 | } 225 | -------------------------------------------------------------------------------- /man/conformalCf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/conformalCf.R 3 | \name{conformalCf} 4 | \alias{conformalCf} 5 | \title{Conformal inference for counterfactuals} 6 | \usage{ 7 | conformalCf( 8 | X, 9 | Y, 10 | estimand = c("unconditional", "nonmissing", "missing"), 11 | type = c("CQR", "mean"), 12 | side = c("two", "above", "below"), 13 | quantiles = NULL, 14 | outfun = NULL, 15 | outparams = list(), 16 | psfun = NULL, 17 | psparams = list(), 18 | useCV = FALSE, 19 | trainprop = 0.75, 20 | nfolds = 10 21 | ) 22 | } 23 | \arguments{ 24 | \item{X}{covariates.} 25 | 26 | \item{Y}{outcome vector with missing values encoded as NA. See Details.} 27 | 28 | \item{estimand}{a string that takes values in \{"unconditional", "nonmissing", "missing"\}. See Details.} 29 | 30 | \item{type}{a string that takes values in \{"CQR", "mean"\}.} 31 | 32 | \item{side}{a string that takes values in \{"two", "above", "below"\}. See Details.} 33 | 34 | \item{quantiles}{a scalar or a vector of length 2 depending on \code{side}. Used only when \code{type = "CQR"}. See Details.} 35 | 36 | \item{outfun}{a function that models the conditional mean or quantiles, or a valid string. 37 | The default is random forest when \code{type = "mean"} and quantile random forest when 38 | \code{type = "CQR"}. See Details.} 39 | 40 | \item{outparams}{a list of other parameters to be passed into \code{outfun}.} 41 | 42 | \item{psfun}{a function that models the missing mechanism (probability of missing given X), or a valid string. 43 | The default is "Boosting". See Details.} 44 | 45 | \item{psparams}{a list of other parameters to be passed into \code{psfun}.} 46 | 47 | \item{useCV}{FALSE for split conformal inference and TRUE for CV+.} 48 | 49 | \item{trainprop}{proportion of units for training \code{outfun}. The default if 75\%. Used only when \code{useCV = FALSE}.} 50 | 51 | \item{nfolds}{number of folds. The default is 10. Used only when \code{useCV = TRUE}.} 52 | } 53 | \value{ 54 | a \code{conformalSplit} object when \code{useCV = FALSE} or a \code{conformalCV} object 55 | } 56 | \description{ 57 | \code{conformalCf} computes intervals for counterfactuals or outcomes with ignorable missing values in general. 58 | It supports both split conformal inference and CV+, 59 | including weighted Jackknife+ as a special case. For each type, it supports both conformalized 60 | quantile regression (CQR) and standard conformal inference based on conditional mean regression. 61 | } 62 | \details{ 63 | The outcome \code{Y} must comprise both observed values and missing values encoded as NA. 64 | The missing values are used to estimate the propensity score \eqn{P(missing | X)}. 65 | 66 | \code{estimand} controls the type of coverage to be guaranteed: 67 | \itemize{ 68 | \item (Default) when \code{estimand = "unconditional"}, the interval has 69 | \eqn{P(Y \in \hat{C}(X))\ge 1 - \alpha}. 70 | \item When \code{estimand = "nonmissing"}, the interval has 71 | \eqn{P(Y \in \hat{C}(X) | nonmissing) \ge 1 - \alpha}. 72 | \item When \code{estimand = "missing"}, the interval has 73 | \eqn{P(Y \in \hat{C}(X) | missing) \ge 1 - \alpha}. 74 | } 75 | 76 | When \code{side = "above"}, 77 | intervals are of form [-Inf, a(x)] and when \code{side = "below"} the intervals are of form [a(x), Inf]. 78 | 79 | \code{outfun} can be a valid string, including 80 | \itemize{ 81 | \item "RF" for random forest that predicts the conditional mean, a wrapper built on \code{randomForest} package. 82 | Used when \code{type = "mean"}. 83 | \item "quantRF" for quantile random forest that predicts the conditional quantiles, a wrapper built on 84 | \code{grf} package. Used when \code{type = "CQR"}. 85 | \item "Boosting" for gradient boosting that predicts the conditional mean, a wrapper built on \code{gbm} 86 | package. Used when \code{type = "mean"}. 87 | \item "quantBoosting" for quantile gradient boosting that predicts the conditional quantiles, a wrapper built on 88 | \code{gbm} package. Used when \code{type = "CQR"}. 89 | \item "BART" for gradient boosting that predicts the conditional mean, a wrapper built on \code{bartMachine} 90 | package. Used when \code{type = "mean"}. 91 | \item "quantBART" for quantile gradient boosting that predicts the conditional quantiles, a wrapper built on 92 | \code{bartMachine} package. Used when \code{type = "CQR"}. 93 | } 94 | or a function object whose input must include, but not limited to 95 | \itemize{ 96 | \item \code{Y} for outcome in the training data. 97 | \item \code{X} for covariates in the training data. 98 | \item \code{Xtest} for covariates in the testing data. 99 | } 100 | When \code{type = "CQR"}, \code{outfun} should also include an argument \code{quantiles} that is either 101 | a vector of length 2 or a scalar, depending on the argument \code{side}. The output of \code{outfun} 102 | must be a matrix with two columns giving the conditional quantile estimates when \code{quantiles} is 103 | a vector of length 2; otherwise, it must be a vector giving the conditional quantile estimate or 104 | conditional mean estimate. Other optional arguments can be passed into \code{outfun} through \code{outparams}. 105 | 106 | \code{psfun} can be a valid string, including 107 | \itemize{ 108 | \item "RF" for random forest that predicts the propensity score, a wrapper built on \code{randomForest} package. 109 | Used when \code{type = "mean"}. 110 | \item "Boosting" for gradient boosting that predicts the propensity score, a wrapper built on \code{gbm} 111 | package. Used when \code{type = "mean"}. 112 | } 113 | or a function object whose input must include, but not limited to 114 | \itemize{ 115 | \item \code{Y} for treatment assignment, a binary vector, in the training data. 116 | \item \code{X} for covariates in the training data. 117 | \item \code{Xtest} for covariates in the testing data. 118 | } 119 | The output of \code{psfun} must be a vector of predicted probabilities. Other optional arguments 120 | can be passed into \code{psfun} through \code{psparams}. 121 | } 122 | \examples{ 123 | \donttest{# Generate data from a linear model 124 | set.seed(1) 125 | n <- 1000 126 | d <- 5 127 | X <- matrix(rnorm(n * d), nrow = n) 128 | beta <- rep(1, 5) 129 | Y <- X \%*\% beta + rnorm(n) 130 | 131 | # Generate missing indicators 132 | missing_prob <- pnorm(X[, 1]) 133 | if_missing <- missing_prob < runif(n) 134 | Y[if_missing] <- NA 135 | 136 | # Generate testing data 137 | ntest <- 5 138 | Xtest <- matrix(rnorm(ntest * d), nrow = ntest) 139 | 140 | # Run weighted split CQR 141 | obj <- conformalCf(X, Y, type = "CQR", quantiles = c(0.05, 0.95), 142 | outfun = "quantRF", useCV = FALSE) 143 | predict(obj, Xtest, alpha = 0.1) 144 | 145 | # Run weighted standard conformal inference 146 | obj <- conformalCf(X, Y, type = "mean", 147 | outfun = "RF", useCV = FALSE) 148 | predict(obj, Xtest, alpha = 0.1) 149 | 150 | # Run one-sided weighted split CQR 151 | obj1 <- conformalCf(X, Y, type = "CQR", side = "above", 152 | quantiles = 0.95, outfun = "quantRF", useCV = FALSE) 153 | predict(obj1, Xtest, alpha = 0.1) 154 | obj2 <- conformalCf(X, Y, type = "CQR", side = "below", 155 | quantiles = 0.05, outfun = "quantRF", useCV = FALSE) 156 | predict(obj2, Xtest, alpha = 0.1) 157 | 158 | # Run split CQR with a self-defined quantile random forest 159 | # Y, X, Xtest, quantiles should be included in the inputs 160 | quantRF <- function(Y, X, Xtest, quantiles, ...){ 161 | fit <- grf::quantile_forest(X, Y, quantiles = quantiles, ...) 162 | res <- predict(fit, Xtest, quantiles = quantiles) 163 | if (is.list(res) && !is.data.frame(res)){ 164 | # for the recent update of \code{grf} package that 165 | # changes the output format 166 | res <- res$predictions 167 | } 168 | if (length(quantiles) == 1){ 169 | res <- as.numeric(res) 170 | } else { 171 | res <- as.matrix(res) 172 | } 173 | return(res) 174 | } 175 | obj <- conformalCf(X, Y, type = "CQR", quantiles = c(0.05, 0.95), 176 | outfun = quantRF, useCV = FALSE) 177 | predict(obj, Xtest, alpha = 0.1) 178 | 179 | # Run standard split conformal inference with a self-defined linear regression 180 | # Y, X, Xtest should be included in the inputs 181 | linearReg <- function(Y, X, Xtest){ 182 | X <- as.data.frame(X) 183 | Xtest <- as.data.frame(Xtest) 184 | data <- data.frame(Y = Y, X) 185 | fit <- lm(Y ~ ., data = data) 186 | as.numeric(predict(fit, Xtest)) 187 | } 188 | obj <- conformalCf(X, Y, type = "mean", 189 | outfun = linearReg, useCV = FALSE) 190 | predict(obj, Xtest, alpha = 0.1) 191 | 192 | # Run split CQR with a built-in psfun 193 | # Y, X, Xtest, should be included in the inputs 194 | obj <- conformalCf(X, Y, type = "CQR", quantiles = c(0.05, 0.95), 195 | outfun = "quantRF", psfun = "RF", useCV = FALSE) 196 | predict(obj, Xtest, alpha = 0.1) 197 | 198 | # Run split CQR with a self-defined function to estimate propensity scores 199 | # Y, X, Xtest, should be included in the inputs 200 | logitReg <- function(Y, X, Xtest, ...){ 201 | X <- as.data.frame(X) 202 | Xtest <- as.data.frame(Xtest) 203 | data <- data.frame(Y = Y, X) 204 | fit <- glm(Y ~ ., data = data, family = "binomial", ...) 205 | as.numeric(predict(fit, Xtest, type = "response")) 206 | } 207 | obj <- conformalCf(X, Y, type = "CQR", quantiles = c(0.05, 0.95), 208 | outfun = "quantRF", psfun = logitReg, useCV = FALSE) 209 | predict(obj, Xtest, alpha = 0.1) 210 | 211 | } 212 | } 213 | \seealso{ 214 | \code{\link{conformal}}, \code{\link{conformalIte}} 215 | } 216 | -------------------------------------------------------------------------------- /man/conformalInt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/conformalInt.R 3 | \name{conformalInt} 4 | \alias{conformalInt} 5 | \title{Conformal inference for interval outcomes} 6 | \usage{ 7 | conformalInt( 8 | X, 9 | Y, 10 | type = c("CQR", "mean"), 11 | lofun = NULL, 12 | loquantile = 0.5, 13 | loparams = list(), 14 | upfun = NULL, 15 | upquantile = 0.5, 16 | upparams = list(), 17 | wtfun = NULL, 18 | useCV = FALSE, 19 | trainprop = 0.75, 20 | trainid = NULL, 21 | nfolds = 10, 22 | idlist = NULL 23 | ) 24 | } 25 | \arguments{ 26 | \item{X}{covariates.} 27 | 28 | \item{Y}{interval outcomes. A matrix with two columns.} 29 | 30 | \item{type}{a string that takes values in \{"CQR", "mean"\}.} 31 | 32 | \item{lofun}{a function to fit the lower bound, or a valid string. See Details.} 33 | 34 | \item{loquantile}{the quantile to be fit by \code{lofun}. Used only when \code{type = "CQR"}.} 35 | 36 | \item{loparams}{a list of other parameters to be passed into \code{lofun}.} 37 | 38 | \item{upfun}{a function to fit the upper bound, or a valid string; see Details.} 39 | 40 | \item{upquantile}{the quantile to be fit by \code{upfun}. Used only when \code{type = "CQR"}.} 41 | 42 | \item{upparams}{a list of other parameters to be passed into \code{upfun}.} 43 | 44 | \item{wtfun}{NULL for unweighted conformal inference, or a function for weighted conformal inference 45 | when \code{useCV = FALSE}, or a list of functions for weighted conformal inference when \code{useCV = TRUE}. 46 | See Details.} 47 | 48 | \item{useCV}{FALSE for split conformal inference and TRUE for CV+.} 49 | 50 | \item{trainprop}{proportion of units for training \code{outfun}. The default it 75\%. Used only when \code{useCV = FALSE}.} 51 | 52 | \item{trainid}{indices of training units. The default is NULL, generating random indices. Used only when \code{useCV = FALSE}.} 53 | 54 | \item{nfolds}{number of folds. The default is 10. Used only when \code{useCV = TRUE}.} 55 | 56 | \item{idlist}{a list of indices of length \code{nfolds}. The default is NULL, generating random indices. Used only when \code{useCV = TRUE}.} 57 | } 58 | \value{ 59 | a \code{conformalIntSplit} object when \code{useCV = FALSE} with the following attributes: 60 | \itemize{ 61 | \item{Yscore:}{ a vector of non-conformity score on the calibration fold} 62 | \item{wt:}{ a vector of weights on the calibration fold} 63 | \item{Ymodel:}{ a function with required argument \code{X} that produces the estimates the conditional 64 | mean or quantiles of \code{X}} 65 | \item{wtfun, type, loquantile, upquantile, trainprop, trainid:}{ the same as inputs} 66 | } 67 | 68 | or a \code{conformalIntCV} object when \code{useCV = TRUE} with the following attributes: 69 | \itemize{ 70 | \item{info: }{ a list of length \code{nfolds} with each element being a list with attributes 71 | \code{Yscore}, \code{wt} and \code{Ymodel} described above for each fold} 72 | \item{wtfun, type, loquantile, upquantile, nfolds, idlist:}{ the same as inputs} 73 | } 74 | } 75 | \description{ 76 | \code{conformalInt} is a framework for weighted and unweighted conformal inference for interval 77 | outcomes. It supports both weighted split conformal inference and weighted CV+, 78 | including weighted Jackknife+ as a special case. For each type, it supports both conformalized 79 | quantile regression (CQR) and standard conformal inference based on conditional mean regression. 80 | } 81 | \details{ 82 | The conformal interval for a testing point x is in the form of 83 | \eqn{[\hat{m}^{L}(x) - \eta, \hat{m}^{R}(x) + \eta]} where \eqn{\hat{m}^{L}(x)} is fit by \code{lofun} 84 | and \eqn{\hat{m}^{R}(x)} is fit by \code{upfun}. 85 | 86 | \code{lofun}/\code{upfun} can be a valid string, including 87 | \itemize{ 88 | \item "RF" for random forest that predicts the conditional mean, a wrapper built on \code{randomForest} package. 89 | Used when \code{type = "mean"}; 90 | \item "quantRF" for quantile random forest that predicts the conditional quantiles, a wrapper built on 91 | \code{grf} package. Used when \code{type = "CQR"}; 92 | \item "Boosting" for gradient boosting that predicts the conditional mean, a wrapper built on \code{gbm} 93 | package. Used when \code{type = "mean"}; 94 | \item "quantBoosting" for quantile gradient boosting that predicts the conditional quantiles, a wrapper built on 95 | \code{gbm} package. Used when \code{type = "CQR"}; 96 | \item "BART" for gradient boosting that predicts the conditional mean, a wrapper built on \code{bartMachine} 97 | package. Used when \code{type = "mean"}; 98 | \item "quantBART" for quantile gradient boosting that predicts the conditional quantiles, a wrapper built on 99 | \code{bartMachine} package. Used when \code{type = "CQR"}; 100 | } 101 | 102 | or a function object whose input must include, but not limited to 103 | \itemize{ 104 | \item \code{Y} for outcome in the training data; 105 | \item \code{X} for covariates in the training data; 106 | \item \code{Xtest} for covariates in the testing data. 107 | } 108 | When \code{type = "CQR"}, \code{lofun} and \code{upfun} should also include an argument \code{quantiles} that is a scalar. The output of \code{lofun} and \code{upfun} must be a vector giving the conditional quantile estimate or conditional mean estimate. Other optional arguments can be 109 | passed into \code{lofun} and \code{upfun} through \code{loparams} and \code{upparams}. 110 | } 111 | \examples{ 112 | \donttest{# Generate data from a linear model 113 | set.seed(1) 114 | n <- 1000 115 | d <- 5 116 | X <- matrix(rnorm(n * d), nrow = n) 117 | beta <- rep(1, 5) 118 | Ylo <- X \%*\% beta + rnorm(n) 119 | Yup <- Ylo + pmax(1, 2 * rnorm(n)) 120 | Y <- cbind(Ylo, Yup) 121 | 122 | # Generate testing data 123 | ntest <- 5 124 | Xtest <- matrix(rnorm(ntest * d), nrow = ntest) 125 | 126 | # Run unweighted split CQR with the built-in quantile random forest learner 127 | # grf package needs to be installed 128 | obj <- conformalInt(X, Y, type = "CQR", 129 | lofun = "quantRF", upfun = "quantRF", 130 | wtfun = NULL, useCV = FALSE) 131 | predict(obj, Xtest, alpha = 0.1) 132 | 133 | # Run unweighted standard split conformal inference with the built-in random forest learner 134 | # randomForest package needs to be installed 135 | obj <- conformalInt(X, Y, type = "mean", 136 | lofun = "RF", upfun = "RF", 137 | wtfun = NULL, useCV = FALSE) 138 | predict(obj, Xtest, alpha = 0.1) 139 | 140 | # Run unweighted CQR-CV+ with the built-in quantile random forest learner 141 | # grf package needs to be installed 142 | obj <- conformalInt(X, Y, type = "CQR", 143 | lofun = "quantRF", upfun = "quantRF", 144 | wtfun = NULL, useCV = TRUE) 145 | predict(obj, Xtest, alpha = 0.1) 146 | 147 | # Run unweighted standard CV+ with the built-in random forest learner 148 | # randomForest package needs to be installed 149 | obj <- conformalInt(X, Y, type = "mean", 150 | lofun = "RF", upfun = "RF", 151 | wtfun = NULL, useCV = TRUE) 152 | predict(obj, Xtest, alpha = 0.1) 153 | 154 | # Run weighted split CQR with w(x) = pnorm(x1) 155 | wtfun <- function(X){pnorm(X[, 1])} 156 | obj <- conformalInt(X, Y, type = "CQR", 157 | lofun = "quantRF", upfun = "quantRF", 158 | wtfun = wtfun, useCV = FALSE) 159 | predict(obj, Xtest, alpha = 0.1) 160 | 161 | # Run unweighted split CQR with a self-defined quantile random forest 162 | # Y, X, Xtest, quantiles should be included in the inputs 163 | quantRF <- function(Y, X, Xtest, quantiles, ...){ 164 | fit <- grf::quantile_forest(X, Y, quantiles = quantiles, ...) 165 | res <- predict(fit, Xtest, quantiles = quantiles) 166 | if (is.list(res) && !is.data.frame(res)){ 167 | # for the recent update of \code{grf} package that 168 | # changes the output format 169 | res <- res$predictions 170 | } 171 | if (length(quantiles) == 1){ 172 | res <- as.numeric(res) 173 | } else { 174 | res <- as.matrix(res) 175 | } 176 | return(res) 177 | } 178 | obj <- conformalInt(X, Y, type = "CQR", 179 | lofun = quantRF, upfun = quantRF, 180 | wtfun = NULL, useCV = FALSE) 181 | predict(obj, Xtest, alpha = 0.1) 182 | 183 | # Run unweighted standard split conformal inference with a self-defined linear regression 184 | # Y, X, Xtest should be included in the inputs 185 | linearReg <- function(Y, X, Xtest){ 186 | X <- as.data.frame(X) 187 | Xtest <- as.data.frame(Xtest) 188 | data <- data.frame(Y = Y, X) 189 | fit <- lm(Y ~ ., data = data) 190 | as.numeric(predict(fit, Xtest)) 191 | } 192 | obj <- conformalInt(X, Y, type = "mean", 193 | lofun = linearReg, upfun = linearReg, 194 | wtfun = NULL, useCV = FALSE) 195 | predict(obj, Xtest, alpha = 0.1) 196 | 197 | # Run weighted split-CQR with user-defined weights 198 | wtfun <- function(X){ 199 | pnorm(X[, 1]) 200 | } 201 | obj <- conformalInt(X, Y, type = "CQR", 202 | lofun = "quantRF", upfun = "quantRF", 203 | wtfun = wtfun, useCV = FALSE) 204 | predict(obj, Xtest, alpha = 0.1) 205 | 206 | # Run weighted CQR-CV+ with user-defined weights 207 | # Use a list of identical functions 208 | set.seed(1) 209 | wtfun_list <- lapply(1:10, function(i){wtfun}) 210 | obj1 <- conformalInt(X, Y, type = "CQR", 211 | lofun = "quantRF", upfun = "quantRF", 212 | wtfun = wtfun_list, useCV = TRUE) 213 | predict(obj1, Xtest, alpha = 0.1) 214 | 215 | # Use a single function. Equivalent to the above approach 216 | set.seed(1) 217 | obj2 <- conformalInt(X, Y, type = "CQR", 218 | lofun = "quantRF", upfun = "quantRF", 219 | wtfun = wtfun, useCV = TRUE) 220 | predict(obj2, Xtest, alpha = 0.1) 221 | } 222 | } 223 | \seealso{ 224 | \code{\link{predict.conformalIntSplit}}, \code{\link{predict.conformalIntCV}}. 225 | } 226 | -------------------------------------------------------------------------------- /man/conformalIte.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/conformalIte.R 3 | \name{conformalIte} 4 | \alias{conformalIte} 5 | \title{Conformal inference for individual treatment effects} 6 | \usage{ 7 | conformalIte( 8 | X, 9 | Y, 10 | T, 11 | alpha = 0.1, 12 | algo = c("nest", "naive", "counterfactual"), 13 | exact = FALSE, 14 | type = c("CQR", "mean"), 15 | side = c("two", "above", "below"), 16 | quantiles = NULL, 17 | outfun = NULL, 18 | outparams = list(), 19 | psfun = NULL, 20 | psparams = list(), 21 | cfprop = 0.5, 22 | citype = c("CQR", "mean"), 23 | lofun = NULL, 24 | loquantile = 0.4, 25 | loparams = list(), 26 | upfun = NULL, 27 | upquantile = 0.6, 28 | upparams = list(), 29 | useCV = FALSE, 30 | trainprop = 0.75, 31 | nfolds = 10, 32 | wthigh = 20, 33 | wtlow = 0.05, 34 | useInf = FALSE 35 | ) 36 | } 37 | \arguments{ 38 | \item{X}{covariates.} 39 | 40 | \item{Y}{observed outcome vector.} 41 | 42 | \item{T}{treatment indicator, a binary vector.} 43 | 44 | \item{alpha}{confidence level.} 45 | 46 | \item{algo}{a string that takes values in \{"nest", "naive", "counterfactual"\}. See Details.} 47 | 48 | \item{exact}{a logical indicating whether the exact calibration is used for nested approach. Used only when \code{algo = "nest"}. See Details.} 49 | 50 | \item{type}{a string that takes values in \{"CQR", "mean"\}.} 51 | 52 | \item{side}{a string that takes values in \{"two", "above", "below"\}. See Details.} 53 | 54 | \item{quantiles}{for covariates in the training data. Used only when \code{type = "CQR"}. See Details.} 55 | 56 | \item{outfun}{a function that models the conditional mean or quantiles, or a valid string. 57 | The default is random forest when \code{type = "mean"} and quantile random forest when 58 | \code{type = "CQR"}. See Details.} 59 | 60 | \item{outparams}{a list of other parameters to be passed into \code{outfun}.} 61 | 62 | \item{psfun}{a function that models the missing mechanism (probability of missing given X), or a valid string. 63 | The default is "Boosting". See Details.} 64 | 65 | \item{psparams}{a list of other parameters to be passed into \code{psfun}.} 66 | 67 | \item{cfprop}{the proportion of units to be used to compute ITE intervals in nested approach. Used only when 68 | \code{algo = "nest"}.} 69 | 70 | \item{citype}{the type of interval conformal inference used in the nested approach with exact calibration. 71 | Used only when \code{algo = "nest"} and \code{exact = TRUE}.} 72 | 73 | \item{lofun}{the function to fit the lower bound, or a valid string. Used only when 74 | \code{algo = "nest"}. See Details.} 75 | 76 | \item{loquantile}{the quantile to fit for \code{lofun}; see Details. Used only when 77 | \code{algo = "nest"} and \code{citype = "CQR"}. See Details.} 78 | 79 | \item{loparams}{a list of other parameters to be passed into \code{lofun}.} 80 | 81 | \item{upfun}{the function to fit the upper bound, or a valid string. Used only when 82 | \code{algo = "nest"}. See Details.} 83 | 84 | \item{upquantile}{the quantile to fit for \code{upfun}. Used only when 85 | \code{algo = "nest"} and \code{citype = "CQR"}. See Details.} 86 | 87 | \item{upparams}{a list of other parameters to be passed into \code{upfun}.} 88 | 89 | \item{useCV}{FALSE for split conformal inference and TRUE for CV+.} 90 | 91 | \item{trainprop}{proportion of units for training \code{outfun}. The default if 75\%. Used only when \code{useCV = FALSE}.} 92 | 93 | \item{nfolds}{number of folds. The default is 10. Used only when \code{useCV = TRUE}.} 94 | 95 | \item{wthigh}{upper truncation level of weights. See \code{\link{predict.conformalSplit}} or \code{\link{predict.conformalCV}}.} 96 | 97 | \item{wtlow}{lower truncation level of weights. See \code{\link{predict.conformalSplit}} or \code{\link{predict.conformalCV}}.} 98 | 99 | \item{useInf}{if FALSE then replace infinity by the maximum conformity score.} 100 | } 101 | \value{ 102 | a function that outputs the interval estimates on a given dataset. When \code{algo = "nest"} or \code{"naive"}, it takes 103 | a single input \code{X}; when \code{algo = "counterfactual"}, it takes three inputs \code{X}, \code{Y} and \code{T}. 104 | 105 | #' @seealso 106 | \code{\link{conformal}}, \code{\link{conformalInt}}, \code{\link{conformalCf}} 107 | } 108 | \description{ 109 | \code{conformalIte} supports four algorithms: the nested approach with exact and inexact 110 | calibration for cases with both potential outcomes missing, the naive approach for cases with both potential outcomes missing and the counterfactual 111 | inference for cases with only one potential outcome missing. For each algorithm, it supports both 112 | split conformal inference and CV+, including weighted Jackknife+ as a special case. For each type, it 113 | supports both conformalized quantile regression (CQR) and standard conformal inference based on conditional mean regression. 114 | } 115 | \details{ 116 | The algorithm to be used is controlled by \code{algo} and \code{exact}: 117 | \itemize{ 118 | \item (Default) when \code{algo = "nest"} and \code{exact = FALSE}, the inexact nested approach is used. It 119 | first splits the data into two folds, with the second fold including \code{cfprop} fraction of units. Then it applies 120 | \code{conformalCf} on the first fold to compute counterfactual intervals on the second fold, which further yields 121 | interval estimates of ITE \eqn{\hat{C}(X_i)}. Finally it fits \eqn{\hat{C}^{L}(X_i)} and \eqn{\hat{C}^{R}(X_i)} on \eqn{X_i}'s. 122 | \item When \code{algo = "nest"} and \code{exact = TRUE}, the exact nested approach is used. It has the same steps as the inexact nested approach to produce 123 | ITE intervals \eqn{\hat{C}(X_i)}'s on the second fold but then applies \code{\link{conformalInt}} to calibrate them. 124 | \item When \code{algo = "naive"}, the naive approach is used. It applies \code{\link{conformalCf}} on the data and 125 | produce counterfactual intervals for both Y(1) and Y(0). The ITE intervals are computed by contrasting two counterfactual intervals. 126 | \item When \code{algo = "counterfactual"}, it handles the case where the treatment assignments and the observed outcome are 127 | both available for each testing point. As with the naive approach, it applies \code{\link{conformalCf}} on the data and 128 | produce counterfactual intervals for both Y(1) and Y(0). The ITE intervals are then computed by contrasting the observed outcome 129 | and the interval for the missing potential outcome. 130 | } 131 | 132 | When \code{side = "above"}, 133 | intervals are of form [-Inf, a(x)] and when \code{side = "below"} the intervals are of form [a(x), Inf]. 134 | 135 | When \code{type = "CQR"}, \code{quantiles} must be a vector of 2, regardless of \code{side}. When \code{side = "two"}, \code{quantiles} will be used in \code{outfun} for both Y(1) and Y(0); when \code{side = "above"} or \code{"below"}, \code{quantiles[1]} will be used for Y(0) and \code{quantiles[2]} will be used for Y(1). 136 | 137 | \code{outfun} is applied to both Y(1) and Y(0). \code{outfun} can be a valid string, including 138 | \itemize{ 139 | \item "RF" for random forest that predicts the conditional mean, a wrapper built on \code{randomForest} package. 140 | Used when \code{type = "mean"}. 141 | \item "quantRF" for quantile random forest that predicts the conditional quantiles, a wrapper built on 142 | \code{grf} package. Used when \code{type = "CQR"}. 143 | \item "Boosting" for gradient boosting that predicts the conditional mean, a wrapper built on \code{gbm} 144 | package. Used when \code{type = "mean"}. 145 | \item "quantBoosting" for quantile gradient boosting that predicts the conditional quantiles, a wrapper built on 146 | \code{gbm} package. Used when \code{type = "CQR"}. 147 | \item "BART" for gradient boosting that predicts the conditional mean, a wrapper built on \code{bartMachine} 148 | package. Used when \code{type = "mean"}. 149 | \item "quantBART" for quantile gradient boosting that predicts the conditional quantiles, a wrapper built on 150 | \code{bartMachine} package. Used when \code{type = "CQR"}. 151 | } 152 | or a function object whose input must include, but not limited to 153 | \itemize{ 154 | \item \code{Y} for outcome in the training data. 155 | \item \code{X} for covariates in the training data. 156 | \item \code{Xtest} for covariates in the testing data. 157 | } 158 | When \code{type = "CQR"}, \code{outfun} should also include an argument \code{quantiles} that is either 159 | a vector of length 2 or a scalar, depending on the argument \code{side}. The output of \code{outfun} 160 | must be a matrix with two columns giving the conditional quantile estimates when \code{quantiles} is 161 | a vector of length 2; otherwise, it must be a vector giving the conditional quantile estimate or 162 | conditional mean estimate. Other optional arguments can be passed into \code{outfun} through \code{outparams}. 163 | 164 | \code{lofun} and \code{upfun} have the same forms as \code{outfun} except that the input \code{quantiles} 165 | must be scalar when \code{citype = "CQR"}, instead of a vector of 2, because only one conditional quantile 166 | is fitted. The argument \code{loquantile} is used for \code{lofun} and the argument \code{hiquantile} is used 167 | for \code{upfun}. Moreover, the output must be a vector giving the conditional quantile estimate or conditional mean 168 | estimate. Other optional arguments can be passed into \code{lofun} through \code{loparams} and \code{upfun} 169 | through \code{upparams}. 170 | 171 | \code{psfun} can be a valid string, including 172 | \itemize{ 173 | \item "RF" for random forest that predicts the propensity score, a wrapper built on \code{randomForest} package. 174 | Used when \code{type = "mean"}. 175 | \item "Boosting" for gradient boosting that predicts the propensity score, a wrapper built on \code{gbm} 176 | package. Used when \code{type = "mean"}. 177 | } 178 | or a function object whose input must include, but not limited to 179 | \itemize{ 180 | \item \code{Y} for treatment assignment, a binary vector, in the training data. 181 | \item \code{X} for covariates in the training data. 182 | \item \code{Xtest} for covariates in the testing data. 183 | } 184 | The output of \code{psfun} must be a vector of predicted probabilities. Other optional arguments 185 | can be passed into \code{psfun} through \code{psparams}. 186 | } 187 | \examples{ 188 | \donttest{# Generate potential outcomes from two linear models 189 | set.seed(1) 190 | n <- 1000 191 | d <- 5 192 | X <- matrix(rnorm(n * d), nrow = n) 193 | beta <- rep(1, 5) 194 | Y1 <- X \%*\% beta + rnorm(n) 195 | Y0 <- rnorm(n) 196 | 197 | # Generate treatment indicators 198 | ps <- pnorm(X[, 1]) 199 | T <- as.numeric(ps < runif(n)) 200 | Y <- ifelse(T == 1, Y1, Y0) 201 | 202 | # Generate testing data 203 | ntest <- 5 204 | Xtest <- matrix(rnorm(ntest * d), nrow = ntest) 205 | 206 | # Inexact nested method 207 | CIfun <- conformalIte(X, Y, T, alpha = 0.1, algo = "nest", exact = FALSE, type = "CQR", 208 | quantiles = c(0.05, 0.95), outfun = "quantRF", useCV = FALSE) 209 | CIfun(Xtest) 210 | 211 | # Exact nested method 212 | CIfun <- conformalIte(X, Y, T, alpha = 0.1, algo = "nest", exact = TRUE, type = "CQR", 213 | quantiles = c(0.05, 0.95), outfun = "quantRF", useCV = FALSE) 214 | CIfun(Xtest) 215 | 216 | # naive method 217 | CIfun <- conformalIte(X, Y, T, alpha = 0.1, algo = "naive", type = "CQR", 218 | quantiles = c(0.05, 0.95), outfun = "quantRF", useCV = FALSE) 219 | CIfun(Xtest) 220 | 221 | # counterfactual method, Y and T needs to be observed 222 | pstest <- pnorm(Xtest[, 1]) 223 | Ttest <- as.numeric(pstest < runif(ntest)) 224 | Y1test <- Xtest \%*\% beta + rnorm(ntest) 225 | Y0test <- rnorm(ntest) 226 | Ytest <- ifelse(Ttest == 1, Y1test, Y0test) 227 | CIfun <- conformalIte(X, Y, T, alpha = 0.1, algo = "counterfactual", type = "CQR", 228 | quantiles = c(0.05, 0.95), outfun = "quantRF", useCV = FALSE) 229 | CIfun(Xtest, Ytest, Ttest) 230 | } 231 | 232 | } 233 | -------------------------------------------------------------------------------- /man/predict.conformalCV.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/conformal_CV.R 3 | \name{predict.conformalCV} 4 | \alias{predict.conformalCV} 5 | \title{Predict Method for conformalCV objects} 6 | \usage{ 7 | \method{predict}{conformalCV}( 8 | object, 9 | Xtest, 10 | alpha = 0.1, 11 | wthigh = 20, 12 | wtlow = 0.05, 13 | useInf = FALSE, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{object}{an object of class \code{conformalCV}; see \code{\link{conformal}}.} 19 | 20 | \item{Xtest}{testing covariates.} 21 | 22 | \item{alpha}{confidence level.} 23 | 24 | \item{wthigh}{upper truncation level of weights; see Details.} 25 | 26 | \item{wtlow}{lower truncation level of weights; see Details.} 27 | 28 | \item{useInf}{if FALSE then replace infinity by the maximum conformity score.} 29 | 30 | \item{...}{other arguments} 31 | } 32 | \value{ 33 | predictive intervals. A data.frame with \code{nrow(Xtest)} rows and two columns: 34 | "lower" for the lower bound and "upper" for the upper bound. 35 | } 36 | \description{ 37 | Obtains predictive intervals on a testing dataset based on a \code{conformalCV} object 38 | from \code{\link{conformal}} with \code{useCV = TRUE}. 39 | } 40 | \details{ 41 | Given a testing set \eqn{X_1, X_2, \ldots, X_n} and a weight function \eqn{w(x)}, the 42 | weight of the weighted distribution \eqn{p_j = w(X_j) / (w(X_1) + \cdots + w(X_n))}. 43 | In cases where some of \eqn{p_j} are extreme, we truncate \eqn{p_j} at level \code{wthigh} 44 | and \code{wtlow} to ensure stability. If \code{wthigh = Inf, wtlow = 0}, no truncation 45 | is being used. 46 | } 47 | \seealso{ 48 | \code{\link{predict.conformalSplit}}, \code{\link{conformal}}. 49 | } 50 | -------------------------------------------------------------------------------- /man/predict.conformalIntCV.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/conformalInt_CV.R 3 | \name{predict.conformalIntCV} 4 | \alias{predict.conformalIntCV} 5 | \title{Predict Method for conformalIntCV objects} 6 | \usage{ 7 | \method{predict}{conformalIntCV}( 8 | object, 9 | Xtest, 10 | alpha = 0.1, 11 | wthigh = 20, 12 | wtlow = 0.05, 13 | useInf = FALSE, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{object}{an object of class \code{conformalIntCV}; see \code{\link{conformalInt}}.} 19 | 20 | \item{Xtest}{testing covariates.} 21 | 22 | \item{alpha}{confidence level.} 23 | 24 | \item{wthigh}{upper truncation level of weights; see Details.} 25 | 26 | \item{wtlow}{lower truncation level of weights; see Details.} 27 | 28 | \item{useInf}{if FALSE then replace infinity by the maximum conformity score.} 29 | 30 | \item{...}{other arguments} 31 | } 32 | \value{ 33 | predictive intervals. A data.frame with \code{nrow(Xtest)} rows and two columns: 34 | "lower" for the lower bound and "upper" for the upper bound. 35 | } 36 | \description{ 37 | Obtains predictive intervals on a testing dataset based on a \code{conformalIntCV} object 38 | from \code{\link{conformalInt}} with \code{useCV = TRUE}. 39 | } 40 | \details{ 41 | Given a testing set \eqn{X_1, X_2, \ldots, X_n} and a weight function \eqn{w(x)}, the 42 | weight of the weighted distribution \eqn{p_j = w(X_j) / (w(X_1) + \cdots + w(X_n))}. 43 | In cases where some of \eqn{p_j} are extreme, we truncate \eqn{p_j} at level \code{wthigh} 44 | and \code{wtlow} to ensure stability. If \code{wthigh = Inf, wtlow = 0}, no truncation 45 | is being used. 46 | } 47 | \seealso{ 48 | \code{\link{predict.conformalIntSplit}}, \code{\link{conformalInt}}. 49 | } 50 | -------------------------------------------------------------------------------- /man/predict.conformalIntSplit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/conformalInt_split.R 3 | \name{predict.conformalIntSplit} 4 | \alias{predict.conformalIntSplit} 5 | \title{Predict Method for conformalIntSplit objects} 6 | \usage{ 7 | \method{predict}{conformalIntSplit}( 8 | object, 9 | Xtest, 10 | alpha = 0.1, 11 | wthigh = 20, 12 | wtlow = 0.05, 13 | useInf = FALSE, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{object}{an object of class \code{conformalIntSplit}; see \code{\link{conformalInt}}.} 19 | 20 | \item{Xtest}{testing covariates.} 21 | 22 | \item{alpha}{confidence level.} 23 | 24 | \item{wthigh}{upper truncation level of weights; see Details.} 25 | 26 | \item{wtlow}{lower truncation level of weights; see Details.} 27 | 28 | \item{useInf}{if FALSE then replace infinity by the maximum conformity score.} 29 | 30 | \item{...}{other arguments} 31 | } 32 | \value{ 33 | predictive intervals. A data.frame with \code{nrow(Xtest)} rows and two columns: 34 | "lower" for the lower bound and "upper" for the upper bound. 35 | } 36 | \description{ 37 | Obtains predictive intervals on a testing dataset based on a \code{conformalIntSplit} object 38 | from \code{\link{conformalInt}} with \code{useCV = FALSE}. 39 | } 40 | \details{ 41 | Given a testing set \eqn{X_1, X_2, \ldots, X_n} and a weight function \eqn{w(x)}, the 42 | weight of the weighted distribution \eqn{p_j = w(X_j) / (w(X_1) + \cdots + w(X_n))}. 43 | In cases where some of \eqn{p_j} are extreme, we truncate \eqn{p_j} at level \code{wthigh} 44 | and \code{wtlow} to ensure stability. If \code{wthigh = Inf, wtlow = 0}, no truncation 45 | is being used. 46 | } 47 | \seealso{ 48 | \code{\link{predict.conformalIntCV}}, \code{\link{conformalInt}}. 49 | } 50 | -------------------------------------------------------------------------------- /man/predict.conformalSplit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/conformal_split.R 3 | \name{predict.conformalSplit} 4 | \alias{predict.conformalSplit} 5 | \title{Predict Method for conformalSplit objects} 6 | \usage{ 7 | \method{predict}{conformalSplit}( 8 | object, 9 | Xtest, 10 | alpha = 0.1, 11 | wthigh = 20, 12 | wtlow = 0.05, 13 | useInf = FALSE, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{object}{an object of class \code{conformalSplit}; see \code{\link{conformal}}.} 19 | 20 | \item{Xtest}{testing covariates.} 21 | 22 | \item{alpha}{confidence level.} 23 | 24 | \item{wthigh}{upper truncation level of weights; see Details.} 25 | 26 | \item{wtlow}{lower truncation level of weights; see Details.} 27 | 28 | \item{useInf}{if FALSE then replace infinity by the maximum conformity score.} 29 | 30 | \item{...}{other arguments} 31 | } 32 | \value{ 33 | predictive intervals. A data.frame with \code{nrow(Xtest)} rows and two columns: 34 | "lower" for the lower bound and "upper" for the upper bound. 35 | } 36 | \description{ 37 | Obtains predictive intervals on a testing dataset based on a \code{conformalSplit} object 38 | from \code{\link{conformal}} with \code{useCV = FALSE}. 39 | } 40 | \details{ 41 | Given a testing set \eqn{X_1, X_2, \ldots, X_n} and a weight function \eqn{w(x)}, the 42 | weight of the weighted distribution \eqn{p_j = w(X_j) / (w(X_1) + \cdots + w(X_n))}. 43 | In cases where some of \eqn{p_j} are extreme, we truncate \eqn{p_j} at level \code{wthigh} 44 | and \code{wtlow} to ensure stability. If \code{wthigh = Inf, wtlow = 0}, no truncation 45 | is being used. 46 | } 47 | \seealso{ 48 | \code{\link{predict.conformalCV}}, \code{\link{conformal}}. 49 | } 50 | -------------------------------------------------------------------------------- /vignettes/auto/cfcausal.el: -------------------------------------------------------------------------------- 1 | (TeX-add-style-hook 2 | "cfcausal" 3 | (lambda () 4 | (LaTeX-add-bibitems 5 | "wager2018estimation" 6 | "lei2020conformal" 7 | "lei2020weighted" 8 | "grf" 9 | "barber2019predictive" 10 | "romano2019conformalized")) 11 | :bibtex) 12 | 13 | -------------------------------------------------------------------------------- /vignettes/cfcausal.bib: -------------------------------------------------------------------------------- 1 | @article{wager2018estimation, 2 | title = {Estimation and inference of heterogeneous treatment 3 | effects using random forests}, 4 | author = {Wager, Stefan and Athey, Susan}, 5 | journal = {Journal of the American Statistical Association}, 6 | volume = {113}, 7 | number = {523}, 8 | pages = {1228--1242}, 9 | year = {2018}, 10 | publisher = {Taylor \& Francis} 11 | } 12 | 13 | @article{lei2020conformal, 14 | title = {Conformal inference of counterfactuals and 15 | individual treatment effects}, 16 | author = {Lei, Lihua and Cand\`{e}s, Emmanuel}, 17 | journal = {arXiv preprint arXiv:2006.06138}, 18 | year = {2020} 19 | } 20 | 21 | @article{lei2020weighted, 22 | title = {Theory of weighted conformal inference}, 23 | author = {Lei, Lihua and Cand\`{e}s, Emmanuel}, 24 | journal = {Unpublished manuscript}, 25 | year = {2020} 26 | } 27 | 28 | @Manual{grf, 29 | title = {grf: Generalized Random Forests}, 30 | author = {Tibshirani, Julie and Athey, Susan and Wager, Stefan}, 31 | year = {2019}, 32 | note = {R package version 1.0.1}, 33 | url = {https://CRAN.R-project.org/package=grf}, 34 | } 35 | 36 | @article{barber2019predictive, 37 | title = {Predictive inference with the jackknife+}, 38 | author = {Barber, Rina Foygel and Cand\`{e}s, Emmanuel J and 39 | Ramdas, Aaditya and Tibshirani, Ryan J}, 40 | journal = {arXiv preprint arXiv:1905.02928}, 41 | year = {2019} 42 | } 43 | 44 | @inproceedings{romano2019conformalized, 45 | title = {Conformalized quantile regression}, 46 | author = {Romano, Yaniv and Patterson, Evan and Cand\`{e}s, 47 | Emmanuel}, 48 | booktitle = {Advances in Neural Information Processing Systems}, 49 | pages = {3538--3548}, 50 | year = {2019} 51 | } 52 | --------------------------------------------------------------------------------