├── .Rbuildignore ├── DESCRIPTION ├── NAMESPACE ├── R ├── .DS_Store ├── PTOforest.R ├── bagged.causalMARS.R ├── causalBoosting.R ├── causalMARS.R ├── cv.causalBoosting.R ├── makebx.newmars.R ├── myridge.R ├── pollinated.ranger.R ├── predict.PTOforest.R ├── predict.bagged.causalMARS.R ├── predict.causalBoosting.R ├── predict.causalMARS.R ├── predict.causalTree.R ├── predict.cv.causalBoosting.R ├── predict.pollinated.ranger.R ├── stratify.R └── truncpow.R ├── README.md ├── causalLearning.Rcheck ├── 00_pkg_src │ └── causalLearning │ │ ├── DESCRIPTION │ │ ├── NAMESPACE │ │ ├── R │ │ ├── PTOforest.R │ │ ├── bagged.causalMARS.R │ │ ├── causalBoosting.R │ │ ├── causalMARS.R │ │ ├── cv.causalBoosting.R │ │ ├── makebx.newmars.R │ │ ├── myridge.R │ │ ├── pollinated.ranger.R │ │ ├── predict.PTOforest.R │ │ ├── predict.bagged.causalMARS.R │ │ ├── predict.causalBoosting.R │ │ ├── predict.causalMARS.R │ │ ├── predict.causalTree.R │ │ ├── predict.cv.causalBoosting.R │ │ ├── predict.pollinated.ranger.R │ │ ├── stratify.R │ │ └── truncpow.R │ │ ├── README.md │ │ ├── causalLearning_1.0.0.tar.gz │ │ ├── man │ │ ├── PTOforest.Rd │ │ ├── bagged.causalMARS.Rd │ │ ├── causalBoosting.Rd │ │ ├── causalMARS.Rd │ │ ├── cv.causalBoosting.Rd │ │ ├── pollinated.ranger.Rd │ │ ├── predict.PTOforest.Rd │ │ ├── predict.bagged.causalMARS.Rd │ │ ├── predict.causalBoosting.Rd │ │ ├── predict.causalMARS.Rd │ │ ├── predict.causalTree.Rd │ │ ├── predict.cv.causalBoosting.Rd │ │ ├── predict.pollinated.ranger.Rd │ │ └── stratify.Rd │ │ └── src │ │ ├── causalBoosting.c │ │ ├── causalBoosting.o │ │ ├── causalLearning.so │ │ └── symbols.rds ├── 00check.log ├── 00install.out ├── Rdlatex.log ├── causalLearning-Ex.R ├── causalLearning-Ex.Rout ├── causalLearning-Ex.pdf ├── causalLearning-manual.log ├── causalLearning-manual.pdf └── causalLearning │ ├── DESCRIPTION │ ├── INDEX │ ├── Meta │ ├── Rd.rds │ ├── hsearch.rds │ ├── links.rds │ ├── nsInfo.rds │ └── package.rds │ ├── NAMESPACE │ ├── R │ ├── causalLearning │ ├── causalLearning.rdb │ └── causalLearning.rdx │ ├── help │ ├── AnIndex │ ├── aliases.rds │ ├── causalLearning.rdb │ ├── causalLearning.rdx │ └── paths.rds │ ├── html │ ├── 00Index.html │ └── R.css │ └── libs │ ├── causalLearning.so │ └── symbols.rds ├── causalLearning_1.0.0.tar.gz ├── man ├── PTOforest.Rd ├── bagged.causalMARS.Rd ├── causalBoosting.Rd ├── causalMARS.Rd ├── cv.causalBoosting.Rd ├── pollinated.ranger.Rd ├── predict.PTOforest.Rd ├── predict.bagged.causalMARS.Rd ├── predict.causalBoosting.Rd ├── predict.causalMARS.Rd ├── predict.causalTree.Rd ├── predict.cv.causalBoosting.Rd ├── predict.pollinated.ranger.Rd └── stratify.Rd └── src ├── .DS_Store ├── .gitignore └── causalBoosting.c /.Rbuildignore: -------------------------------------------------------------------------------- 1 | causalLearning_1.0.tar.gz 2 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: causalLearning 2 | Title: Methods for heterogeneous treatment effect estimation 3 | Version: 1.0.0 4 | Authors@R: c( 5 | person("Scott", "Powers", email = "saberpowers@gmail.com", 6 | role = c("aut", "cre")), 7 | person("Junyang", "Qian", email = "junyangq@stanford.edu", role = "aut"), 8 | person("Trevor", "Hastie", email = "hastie@stanford.edu", role = "aut"), 9 | person("Robert", "Tibshirani", email = "tibs@stanford.edu", role = "aut")) 10 | Description: The main functions are cv.causalBoosting and bagged.causalMARS, 11 | which build upon the simpler causalBoosting and causalMARS functions. All of 12 | these functions have their own predict methods. 13 | Depends: R (>= 3.3.0) 14 | Imports: ranger 15 | License: GPL-2 16 | LazyData: true 17 | RoxygenNote: 6.0.1 18 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(predict,PTOforest) 4 | S3method(predict,bagged.causalMARS) 5 | S3method(predict,causalBoosting) 6 | S3method(predict,causalMARS) 7 | S3method(predict,causalTree) 8 | S3method(predict,cv.causalBoosting) 9 | export(PTOforest) 10 | export(bagged.causalMARS) 11 | export(causalBoosting) 12 | export(causalMARS) 13 | export(cv.causalBoosting) 14 | export(stratify) 15 | useDynLib(causalLearning) 16 | -------------------------------------------------------------------------------- /R/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/R/.DS_Store -------------------------------------------------------------------------------- /R/PTOforest.R: -------------------------------------------------------------------------------- 1 | #' Fit a pollinated transformed outcome (PTO) forest model 2 | #' 3 | #' @param x matrix of covariates 4 | #' @param tx vector of treatment indicators (0 or 1) 5 | #' @param y vector of response values 6 | #' @param pscore vector of propensity scores 7 | #' @param num.trees number of trees for transformed outcome forest 8 | #' @param mtry number of variables to possibly split at in each node 9 | #' @param min.node.size minimum node size for transformed outcome forest 10 | #' @param postprocess logical: should optional post-processing random forest be 11 | #' fit at end? 12 | #' @param verbose logical: should progress be printed to console? 13 | #' 14 | #' @return an object of class \code{PTOforest} with attributes: 15 | #' \itemize{ 16 | #' \item x: matrix of covariates supplied by function call 17 | #' \item pscore: vector of propensity score supplied by function call 18 | #' \item postprocess: logical supplied by function call 19 | #' \item TOfit: fitted random forest on transformed outcomes 20 | #' \item PTOfit1: TOfit pollinated with treatment-arm outcomes 21 | #' \item PTOfit0: TOfit pollinated with control-arm outcomes 22 | #' \item postfit: post-processing random forest summarizing results 23 | #' } 24 | #' 25 | #' @examples 26 | #'# Randomized experiment example 27 | #' 28 | #'n = 100 # number of training-set patients to simulate 29 | #'p = 10 # number of features for each training-set patient 30 | #' 31 | #'# Simulate data 32 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 33 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 34 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 35 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 36 | #' 37 | #'# Estimate PTO forest model 38 | #'fit_pto = PTOforest(x, tx, y) 39 | #'pred_pto = predict(fit_pto, newx = x) 40 | #' 41 | #'# Visualize results 42 | #'plot(tx_effect, pred_pto, main = 'PTO forest', 43 | #' xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 44 | #'abline(0, 1, lty = 2) 45 | #' 46 | #' @export 47 | 48 | 49 | PTOforest = function(x, tx, y, pscore = rep(.5, nrow(x)), 50 | num.trees = 500, mtry = ncol(x), min.node.size = max(25, nrow(x) / 40), 51 | postprocess = TRUE, verbose = FALSE) { 52 | 53 | 54 | # Input sanitization 55 | 56 | x = as.matrix(x) 57 | 58 | if (nrow(x) != length(tx)) { 59 | stop('nrow(x) does not match length(tx)') 60 | 61 | } else if (nrow(x) != length(y)) { 62 | stop('nrow(x) does not match length(y)') 63 | 64 | } else if (!is.numeric(x)) { 65 | stop('x must be numeric matrix') 66 | 67 | } else if (!is.numeric(y)) { 68 | stop('y must be numeric (use 0/1 for binary response)') 69 | 70 | } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) { 71 | stop('tx must be vector of 0s and 1s') 72 | 73 | } 74 | 75 | 76 | colnames(x) = paste('x', 1:ncol(x), sep = '') 77 | fit = list(x = x, pscore = pscore, postprocess = postprocess) 78 | 79 | z = tx * y / pscore - (1 - tx) * y / (1 - pscore) 80 | 81 | 82 | if (verbose) cat('fitting IPW treatment forest\n') 83 | 84 | data = data.frame(y = z, x = x) 85 | colnames(data) = c('y', colnames(x)) 86 | fit$TOfit = ranger::ranger(data = data, dependent.variable.name = 'y', 87 | num.trees = num.trees, min.node.size = min.node.size, mtry = mtry, 88 | write.forest = TRUE) 89 | 90 | 91 | # Now pollinate the tree separately with treated and untreated 92 | if (verbose) { 93 | cat('pollinating IPW treatment forest separately with treated and', 94 | 'untreated y\n') 95 | } 96 | 97 | fit$PTOfit1 = pollinated.ranger(fit$TOfit, x = x[tx == 1, ], y = y[tx == 1]) 98 | fit$PTOfit0 = pollinated.ranger(fit$TOfit, x = x[tx == 0, ], y = y[tx == 0]) 99 | 100 | 101 | if (postprocess) { 102 | # and one more summarization rf 103 | if (verbose) cat('fitting TX summary forest\n') 104 | 105 | delta = stats::predict(fit$PTOfit1, x) - stats::predict(fit$PTOfit0, x) 106 | data = data.frame(y = delta, x = x) 107 | colnames(x) = paste('x', 1:ncol(x), sep = '') 108 | colnames(data) = c('y', colnames(x)) 109 | fit$postfit = ranger::ranger(data = data, dependent.variable.name = 'y', 110 | num.trees = num.trees, mtry = ncol(x), write.forest = TRUE) 111 | } 112 | 113 | class(fit) = 'PTOforest' 114 | fit 115 | } 116 | 117 | -------------------------------------------------------------------------------- /R/bagged.causalMARS.R: -------------------------------------------------------------------------------- 1 | #' Fit a bag of causal MARS models 2 | #' 3 | #' @param x matrix of covariates 4 | #' @param tx vector of treatment indicators (0 or 1) 5 | #' @param y vector of response values 6 | #' @param nbag number of models to bag 7 | #' @param maxterms maximum number of terms to include in the regression basis 8 | #' (e.g. \code{maxterms = 11} means intercept + 5 pairs added) 9 | #' @param nquant number of quantiles used in splitting 10 | #' @param degree max number of different predictors that can interact in model 11 | #' @param eps shrinkage factor for new term added 12 | #' @param backstep logical: should out-of-bag samples be used to prune each 13 | #' model? otherwise full regression basis is used for each model 14 | #' @param propensity logical: should propensity score stratification be used? 15 | #' @param stratum optional vector giving propensity score stratum for each 16 | #' observation (only used if \code{propensity = TRUE}) 17 | #' @param minnum minimum number of observations in each arm of each propensity 18 | #' score stratum needed to estimate regression coefficients for basis 19 | #' (only used if \code{propensity = TRUE}) 20 | #' @param verbose logical: should progress be printed to console? 21 | #' 22 | #' @return an object of class \code{bagged.causalMARS}, which is itself a list 23 | #' of \code{causalMARS} objects 24 | #' 25 | #' @examples 26 | #'# Randomized experiment example 27 | #' 28 | #'n = 100 # number of training-set patients to simulate 29 | #'p = 10 # number of features for each training-set patient 30 | #' 31 | #'# Simulate data 32 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 33 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 34 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 35 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 36 | #' 37 | #'# Estimate bagged causal MARS model 38 | #'fit_bcm = bagged.causalMARS(x, tx, y, nbag = 10) 39 | #'pred_bcm = predict(fit_bcm, newx = x) 40 | #' 41 | #'# Visualize results 42 | #'plot(tx_effect, pred_bcm, main = 'Bagged causal MARS', 43 | #' xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 44 | #'abline(0, 1, lty = 2) 45 | #' 46 | #' @export 47 | 48 | bagged.causalMARS = function(x, tx, y, nbag = 20, maxterms = 11, nquant = 5, 49 | degree = ncol(x), eps = 1, backstep = FALSE, 50 | propensity = FALSE, stratum = rep(1, nrow(x)), minnum = 5, verbose = FALSE) { 51 | 52 | 53 | # Input sanitization 54 | 55 | x = as.matrix(x) 56 | 57 | if (nrow(x) != length(tx)) { 58 | stop('nrow(x) does not match length(tx)') 59 | 60 | } else if (nrow(x) != length(y)) { 61 | stop('nrow(x) does not match length(y)') 62 | 63 | } else if (!is.numeric(x)) { 64 | stop('x must be numeric matrix') 65 | 66 | } else if (!is.numeric(y)) { 67 | stop('y must be numeric (use 0/1 for binary response)') 68 | 69 | } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) { 70 | stop('tx must be vector of 0s and 1s') 71 | 72 | } 73 | 74 | 75 | x = scale(x, center = TRUE, scale = FALSE) 76 | 77 | fit = list() 78 | 79 | for (b in 1:nbag) { 80 | 81 | if (verbose) cat(c('BAG=', b, '/', nbag), fill = TRUE) 82 | 83 | bag = sample(1:nrow(x), size = nrow(x), replace = TRUE) 84 | oob = rep(TRUE, nrow(x)) 85 | oob[bag] = FALSE 86 | fit[[b]] = causalMARS(x = x[bag, ], tx = tx[bag], y = y[bag], 87 | maxterms = maxterms, nquant = nquant, degree = degree, eps = eps, 88 | backstep = backstep, x.val = x[oob, ], tx.val = tx[oob], y.val = y[oob], 89 | propensity = propensity, stratum = stratum[bag], 90 | stratum.val = stratum[oob], minnum = minnum) 91 | } 92 | class(fit) = 'bagged.causalMARS' 93 | fit 94 | } 95 | 96 | -------------------------------------------------------------------------------- /R/causalBoosting.R: -------------------------------------------------------------------------------- 1 | #' Fit a causal boosting model 2 | #' 3 | #' @useDynLib causalLearning 4 | #' 5 | #' @param x matrix of covariates 6 | #' @param tx vector of treatment indicators (0 or 1) 7 | #' @param y vector of response values 8 | #' @param num.trees number of shallow causal trees to build 9 | #' @param maxleaves maximum number of leaves per causal tree 10 | #' @param eps learning rate 11 | #' @param splitSpread how far apart should the candidate splits be for the 12 | #' causal trees? (e.g. \code{splitSpread = 0.1}) means we consider 10 quantile 13 | #' cutpoints as candidates for making split 14 | #' @param x.est optional matrix of estimation-set covariates used for honest 15 | #' re-estimation (ignored if \code{tx.est = NULL} or \code{y.est = NULL}) 16 | #' @param tx.est optional vector of estimation-set treatment indicators 17 | #' (ignored if \code{x.est = NULL} or \code{y.est = NULL}) 18 | #' @param y.est optional vector of estimation-set response values 19 | #' (ignored if \code{x.est = NULL} or \code{y.est = NULL}) 20 | #' @param propensity logical: should propensity score stratification be used? 21 | #' @param stratum optional vector giving propensity score stratum for each 22 | #' observation (only used if \code{propensity = TRUE}) 23 | #' @param stratum.est optional vector giving propensity score stratum for each 24 | #' estimation-set observation (ignored if \code{x.est = NULL} or 25 | #' \code{tx.est = NULL} or \code{y.est = NULL}) 26 | #' @param isConstVar logical: for the causal tree splitting criterion 27 | #' (T-statistc), should it be assumed that the noise variance is the same in 28 | #' treatment and control arms? 29 | #' 30 | #' @return an object of class \code{causalBoosting} with attributes: 31 | #' \itemize{ 32 | #' \item CBM: a list storing the intercept, the causal trees and \code{eps} 33 | #' \item tauhat: matrix of treatment effects for each patient for each step 34 | #' \item G1: estimated-treatment conditional mean for each patient 35 | #' \item G0: estimated-control conditional mean for each patient 36 | #' \item err.y: training error at each step, in predicting response 37 | #' \item num.trees: number of trees specified by function call 38 | #' } 39 | #' 40 | #' @details 41 | #' This function exists primarily to be called by cv.causalBoosting because 42 | #' the num.trees parameter generally needs to be tuned via cross-validation. 43 | #' 44 | #' @examples 45 | #'# Randomized experiment example 46 | #' 47 | #'n = 100 # number of training-set patients to simulate 48 | #'p = 10 # number of features for each training-set patient 49 | #' 50 | #'# Simulate data 51 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 52 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 53 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 54 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 55 | #' 56 | #'# Estimate causal boosting model 57 | #'fit_cb = causalBoosting(x, tx, y, num.trees = 500) 58 | #'pred_cb = predict(fit_cb, newx = x, num.trees = 500) 59 | #' 60 | #'# Visualize results 61 | #'plot(tx_effect, pred_cb, main = 'Causal boosting', 62 | #' xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 63 | #'abline(0, 1, lty = 2) 64 | #' 65 | #' @export 66 | 67 | causalBoosting = function(x, tx, y, num.trees = 500, maxleaves = 4, eps = 0.01, 68 | splitSpread = 0.1, x.est = NULL, tx.est = NULL, y.est = NULL, 69 | propensity = FALSE, stratum = NULL, stratum.est = NULL, 70 | isConstVar = TRUE) { 71 | 72 | 73 | # Input sanitization 74 | 75 | x = as.matrix(x) 76 | 77 | if (nrow(x) != length(tx)) { 78 | stop('nrow(x) does not match length(tx)') 79 | 80 | } else if (nrow(x) != length(y)) { 81 | stop('nrow(x) does not match length(y)') 82 | 83 | } else if (!is.numeric(x)) { 84 | stop('x must be numeric matrix') 85 | 86 | } else if (!is.numeric(y)) { 87 | stop('y must be numeric (use 0/1 for binary response)') 88 | 89 | } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) { 90 | stop('tx must be vector of 0s and 1s') 91 | 92 | } 93 | 94 | 95 | # s indices are 0-based 96 | maxNodes = 2 * maxleaves - 1 97 | 98 | # if (usePropensity ^ !is.null(s)) { warnings('Non-consistent options: whether to 99 | # use propensity score will be based on value of s.') } 100 | 101 | if (is.null(stratum)) { 102 | if (propensity) stop('stratum must be specified if propensity = TRUE') 103 | stratum = -1 104 | } 105 | if (is.null(x.est) || is.null(y.est) || is.null(tx.est)) { 106 | x.est = y.est = tx.est = stratum.est = -1 107 | n.est = 1 108 | } else { 109 | n.est = nrow(x.est) 110 | if (is.null(stratum.est)) { 111 | stratum.est = -1 112 | } 113 | } 114 | 115 | vtxeff = 0 116 | 117 | fit = .C("causalBoosting", as.double(x), as.double(y), as.integer(tx), 118 | as.double(x.est), as.double(y.est), as.integer(tx.est), 119 | as.integer(num.trees), as.integer(maxleaves), as.double(eps), 120 | as.integer(propensity), as.integer(stratum), as.integer(stratum.est), 121 | as.integer(isConstVar), as.integer(nrow(x)), as.integer(ncol(x)), 122 | as.integer(n.est), as.double(vtxeff), as.double(splitSpread), 123 | var = integer(num.trees * maxNodes), val = double(num.trees * maxNodes), 124 | left = integer(num.trees * maxNodes), 125 | right = integer(num.trees * maxNodes), 126 | y0bar = double(1), y1bar = double(1), pred0 = double(num.trees * maxNodes), 127 | pred1 = double(num.trees * maxNodes), cost = double(num.trees * maxNodes), 128 | pred0e = double(num.trees * maxNodes), 129 | pred1e = double(num.trees * maxNodes), G0 = double(nrow(x)), 130 | G1 = double(nrow(x)), err.y = double(num.trees), err = double(num.trees), 131 | tauhat = double(num.trees * nrow(x)), PACKAGE = 'causalLearning') 132 | 133 | CBM = list() 134 | CBM$intercept = c(fit$y0bar, fit$y1bar) 135 | CBM$trees = list() 136 | CBM$eps = eps 137 | 138 | for (k in 1:num.trees) { 139 | start = (k - 1) * maxNodes + 1 140 | end = k * maxNodes 141 | tree = list(var = fit$var[start:end] + 1, val = fit$val[start:end], 142 | left = fit$left[start:end] + 1, right = fit$right[start:end] + 1, 143 | pred0 = fit$pred0[start:end], pred1 = fit$pred1[start:end], 144 | cost = fit$cost[start:end], pred0e = fit$pred0e[start:end], 145 | pred1e = fit$pred1e[start:end]) 146 | class(tree) = "causalTree" 147 | CBM$trees[[k]] = tree 148 | } 149 | result = list(CBM = CBM, tauhat = matrix(fit$tauhat, nrow = nrow(x)), 150 | G1 = fit$G1, G0 = fit$G0, err.y = fit$err.y, num.trees = num.trees) 151 | 152 | class(result) = "causalBoosting" 153 | 154 | result 155 | } 156 | 157 | -------------------------------------------------------------------------------- /R/causalMARS.R: -------------------------------------------------------------------------------- 1 | #' Fit a causal MARS model 2 | #' 3 | #' @param x matrix of covariates 4 | #' @param tx vector of treatment indicators (0 or 1) 5 | #' @param y vector of response values 6 | #' @param maxterms maximum number of terms to include in the regression basis 7 | #' (e.g. \code{maxterms = 11} means intercept + 5 pairs added) 8 | #' @param nquant number of quantiles used in splitting 9 | #' @param degree max number of different predictors that can interact in model 10 | #' @param eps shrinkage factor for new term added 11 | #' @param backstep logical: after building out regression basis, should 12 | #' backward stepwise selection be used to create a sequence of models, with 13 | #' the criterion evaluated on a validation set to choose among the sequence? 14 | #' @param x.val optional matrix of validation-set covariates 15 | #' (only used if \code{backstep = TRUE}) 16 | #' @param tx.val optional vector of validation-set treatment indicators 17 | #' (only used if \code{backstep = TRUE}) 18 | #' @param y.val optional vector of validation-set response values 19 | #' (only used if \code{backstep = TRUE}) 20 | #' @param propensity logical: should propensity score stratification be used? 21 | #' @param stratum optional vector giving propensity score stratum for each 22 | #' observation (only used if \code{propensity = TRUE}) 23 | #' @param stratum.val optional vector giving propensity score stratum for each 24 | #' validation-set observation 25 | #' (only used if \code{propensity = backstep = TRUE}) 26 | #' @param minnum minimum number of observations in each arm of each propensity 27 | #' score stratum needed to estimate regression coefficients for basis 28 | #' (only used if \code{propensity = TRUE}) 29 | #' 30 | #' @return an object of class \code{causalMARS} with attributes: 31 | #' \itemize{ 32 | #' \item parent: indices of nodes that are parents at each stage 33 | #' \item childvar: index of predictor chosen at each forward step 34 | #' \item childquant: quantile of cutoff chosen at each forward step 35 | #' \item quant: quantiles of the columns of x 36 | #' \item active: indices of columns with nonzero norm 37 | #' \item allvars: list of variables appearing in each term 38 | #' \item khat: the sequence of terms deleted at each step 39 | #' \item deltahat: relative change in rss 40 | #' \item rsstesthat: validation-set rss achieved by each model in sequence 41 | #' \item setesthat: standard error for rsstesthat 42 | #' \item tim1: time elapsed during forward stepwise phase 43 | #' \item tim2: total time elapsed 44 | #' \item x 45 | #' \item tx 46 | #' \item y 47 | #' \item maxterms 48 | #' \item eps 49 | #' \item backstep 50 | #' \item propensity 51 | #' \item x.val 52 | #' \item tx.val 53 | #' \item y.val 54 | #' \item stratum 55 | #' \item stratum.val 56 | #' \item minnum 57 | #' } 58 | #' 59 | #' @details 60 | #' parallel arms mars with backward stepwise BOTH randomized case and 61 | #' propensity stratum. data structures: model terms (nodes) are numbered 62 | #' 1, 2, ... with 1 representing the intercept. forward stepwise: 63 | #' modmatrix contains basis functions as model is built up -- two columns are 64 | #' added at each step. Does not include a column of ones for tidiness, 65 | #' we always add two terms, even when term added in linear (so that reflected 66 | #' version is just zero). 67 | #' backward stepwise: khat is the sequence of terms deleted at each step, 68 | #' based on deltahat = relative change in rss. rsstesthat is rss over test 69 | #' (validation) set achieved by each reduced model in sequence- used later for 70 | #' selecting a member of the sequence. active2 contains indices of columns with 71 | #' nonzero norm 72 | #' 73 | #' @examples 74 | #'# Randomized experiment example 75 | #' 76 | #'n = 100 # number of training-set patients to simulate 77 | #'p = 10 # number of features for each training-set patient 78 | #' 79 | #'# Simulate data 80 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 81 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 82 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 83 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 84 | #' 85 | #'# Estimate causal MARS model 86 | #'fit_cm = causalMARS(x, tx, y) 87 | #'pred_cm = predict(fit_cm, newx = x) 88 | #' 89 | #'# Visualize results 90 | #'plot(tx_effect, pred_cm, main = 'Causal MARS', 91 | #' xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 92 | #'abline(0, 1, lty = 2) 93 | #' 94 | #' @export 95 | 96 | causalMARS = function(x, tx, y, 97 | maxterms = 11, nquant = 5, degree = ncol(x), eps = 1, 98 | backstep = FALSE, x.val = NULL, tx.val = NULL, y.val = NULL, 99 | propensity = FALSE, stratum = rep(1, nrow(x)), stratum.val = NULL, 100 | minnum = 5) { 101 | 102 | 103 | # Input sanitization 104 | 105 | x = as.matrix(x) 106 | 107 | if (nrow(x) != length(tx)) { 108 | stop('nrow(x) does not match length(tx)') 109 | 110 | } else if (nrow(x) != length(y)) { 111 | stop('nrow(x) does not match length(y)') 112 | 113 | } else if (!is.numeric(x)) { 114 | stop('x must be numeric matrix') 115 | 116 | } else if (!is.numeric(y)) { 117 | stop('y must be numeric (use 0/1 for binary response)') 118 | 119 | } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) { 120 | stop('tx must be vector of 0s and 1s') 121 | 122 | } 123 | 124 | 125 | BIG = 1e+10 126 | n = nrow(x) 127 | p = ncol(x) 128 | x = scale(x, TRUE, FALSE) #NOTE 129 | if(!is.null(x.val)) { 130 | x.val = scale(x.val, TRUE, FALSE) 131 | } 132 | 133 | # compute quantiles for splitting 134 | discrete = rep(FALSE, p) 135 | for (j in 1:p) { 136 | if (length(table(x[, j])) == 2) 137 | discrete[j] = TRUE 138 | } 139 | 140 | probs = seq(0, 1, length = nquant)[-nquant] 141 | quant = apply(x, 2, stats::quantile, probs) 142 | 143 | nquantm = rep(nquant, p) 144 | if (sum(discrete) > 0) { 145 | for (j in which(discrete)) { 146 | nquantm[j] = 2 147 | quant[, j] = NA 148 | quant[1, j] = 0 149 | } 150 | } 151 | 152 | if (propensity) { 153 | stratum.sizes = table(stratum)/nrow(x) 154 | nstratum = length(stratum.sizes) 155 | if (sum(as.numeric(names(stratum.sizes)) != (1:length(stratum.sizes))) != 0) 156 | stop("Strata should be numbered 1:k") 157 | stratum.val.sizes = table(stratum.val)/nrow(x.val) 158 | nstratum.val = length(stratum.val.sizes) 159 | if (sum(as.numeric(names(stratum.val.sizes)) != (1:length(stratum.val.sizes))) != 160 | 0) 161 | stop("Stratatest should be numbered 1:k") 162 | } 163 | 164 | modmatrix = matrix(0, nrow = n, ncol = maxterms) 165 | active = rep(FALSE, maxterms) 166 | active[1] = TRUE 167 | modmatrix[, 1] = 1 168 | 169 | r = y - y 170 | a0 = c(mean(y[tx == 0]), mean(y[tx == 1])) 171 | 172 | r[tx == 0] = y[tx == 0] - mean(y[tx == 0]) 173 | r[tx == 1] = y[tx == 1] - mean(y[tx == 1]) 174 | 175 | parent = childvar = childquant = NULL 176 | allvars = vector("list", maxterms) 177 | maxscorall = rep(NA, maxterms) 178 | nterms = 1 179 | 180 | 181 | # forward stepwise 182 | while (nterms < maxterms) { 183 | maxscor = -1 * BIG 184 | act = active[1:nterms] 185 | num = unlist(lapply(allvars, length))[1:nterms] 186 | act = act & (num < degree) 187 | 188 | for (ii in (1:nterms)[act]) { 189 | jlist = rep(TRUE, p) 190 | if (ii > 1) 191 | jlist[allvars[[ii]]] = FALSE 192 | 193 | 194 | for (j in which(jlist)) { 195 | for (k in 1:(nquantm[j] - 1)) { 196 | # bx1 = modmatrix[, ii] * truncpow(x[, j], quant[k, j], dir = 1) bx2 = 197 | # modmatrix[, ii] * truncpow(x[, j], quant[k, j], dir = 2) NOTE have to make 198 | # correspondng chanegs in propensity section below! 199 | bx = NULL 200 | bx1 = modmatrix[, ii] * truncpow(x[, j], quant[k, j], dir = 1) 201 | if (sum(bx1^2) > 0) 202 | bx = cbind(bx, bx1) 203 | bx2 = modmatrix[, ii] * truncpow(x[, j], quant[k, j], dir = 2) 204 | if (sum(bx2^2) > 0) 205 | bx = cbind(bx, bx2) 206 | 207 | if (!propensity) { 208 | 209 | scor = -0.5 * BIG 210 | if (!is.null(bx)) { 211 | res = myridge(bx, r, int = TRUE)$res 212 | res0 = myridge(bx[tx == 0, ], r[tx == 0], int = TRUE)$res 213 | res1 = myridge(bx[tx == 1, ], r[tx == 1], int = TRUE)$res 214 | scor = sum(res^2) - sum(res0^2) - sum(res1^2) 215 | } 216 | } 217 | 218 | if (propensity) { 219 | if (!is.null(bx)) { 220 | scor = 0 221 | 222 | res0 = res1 = rep(0, n) 223 | for (s in 1:nstratum) { 224 | 225 | res = myridge(bx[stratum == s, ], r[stratum == s])$res 226 | 227 | if (sum(tx == 0 & stratum == s) >= minnum) { 228 | res0[stratum == s & tx == 0] = myridge(bx[tx == 0 & stratum == 229 | s, ], r[tx == 0 & stratum == s])$res 230 | } 231 | if (sum(tx == 1 & stratum == s) >= minnum) { 232 | res1[stratum == s & tx == 1] = myridge(bx[tx == 1 & stratum == 233 | s, ], r[tx == 1 & stratum == s])$res 234 | } 235 | scor = scor + (sum(res^2) - sum(res0^2) - sum(res1^2)) 236 | 237 | } 238 | res0 = res0[tx == 0] 239 | res1 = res1[tx == 1] 240 | } 241 | } # end of propensity loop 242 | 243 | 244 | if (scor > maxscor) { 245 | maxscor = scor 246 | 247 | iihat = ii 248 | jhat = j 249 | khat = k 250 | res0hat = res0 251 | res1hat = res1 252 | } 253 | } 254 | } 255 | } #end of for loop 256 | 257 | maxscorall[ii] = maxscor 258 | new1 = modmatrix[, iihat] * truncpow(x[, jhat], quant[khat, jhat], dir = 1) 259 | new2 = modmatrix[, iihat] * truncpow(x[, jhat], quant[khat, jhat], dir = 2) 260 | if (!is.matrix(new1)) 261 | new1 = matrix(new1, ncol = 1) 262 | if (!is.matrix(new2)) 263 | new2 = matrix(new2, ncol = 1) 264 | 265 | 266 | modmatrix[, nterms + 1] = new1 267 | modmatrix[, nterms + 2] = new2 268 | 269 | 270 | active[nterms + 1] = TRUE 271 | active[nterms + 2] = TRUE 272 | 273 | 274 | allvars[[nterms + 1]] = allvars[[nterms + 2]] = c(allvars[[iihat]], jhat) 275 | nterms = nterms + 2 276 | 277 | 278 | r[tx == 0] = r[tx == 0] * (1 - eps) + eps * res0hat 279 | r[tx == 1] = r[tx == 1] * (1 - eps) + eps * res1hat 280 | 281 | parent = c(parent, iihat) 282 | childvar = c(childvar, jhat) 283 | 284 | childquant = c(childquant, khat) 285 | } #end of while loop 286 | 287 | 288 | active = colSums(modmatrix^2) > 0 289 | deltahat = khat = NA 290 | out = list(parent = parent, childvar = childvar, childquant = childquant, quant = quant, 291 | active = active, eps = eps, allvars = allvars) 292 | 293 | tim1 = proc.time() 294 | # cat('forward done', fill = TRUE) 295 | 296 | 297 | # backward deletion 298 | rsstesthat = setesthat = NULL 299 | 300 | if (backstep) { 301 | 302 | if(is.null(x.val) | is.null(tx.val) | is.null(y.val)) { 303 | stop('If backstep = TRUE, then x.val, tx.val, y.val must be specified.') 304 | } 305 | 306 | BIG = 1e+10 307 | modmatrix = makebx.newmars(out, x, remove.zerocols = FALSE)[, -1] 308 | modmatrix.val = makebx.newmars(out, x.val, remove.zerocols = FALSE)[, -1] 309 | ss = colSums(modmatrix^2) > 0 310 | 311 | active2 = rep(TRUE, ncol(modmatrix.val)) 312 | active2[!ss] = FALSE 313 | khat = deltahat = deltatesthat = rsstesthat = setesthat = rep(NA, sum(active2)) 314 | rtest = rep(NA, nrow(x.val)) 315 | 316 | go = sum(active2) > 0 317 | 318 | ii = 0 319 | while (go) { 320 | go = FALSE 321 | 322 | delta = deltatest = rsstest = rep(BIG, length(active2)) 323 | # train 324 | if (!propensity) { 325 | fit0 = myridge(modmatrix[, active2, drop = FALSE][tx == 0, ], y[tx == 326 | 0], int = TRUE) 327 | fit1 = myridge(modmatrix[, active2, drop = FALSE][tx == 1, ], y[tx == 328 | 1], int = TRUE) 329 | res0 = fit0$res 330 | res1 = fit1$res 331 | 332 | # test 333 | yhat0 = cbind(1, modmatrix.val[, active2, drop = FALSE][tx.val == 0, 334 | , drop = FALSE]) %*% fit0$coef 335 | yhat1 = cbind(1, modmatrix.val[, active2, drop = FALSE][tx.val == 1, 336 | , drop = FALSE]) %*% fit1$coef 337 | res0test = (y.val[tx.val == 0] - yhat0) 338 | res1test = (y.val[tx.val == 1] - yhat1) 339 | rss0testsq = sum(res0test^2) 340 | rss1testsq = sum(res1test^2) 341 | } 342 | 343 | if (propensity) 344 | { 345 | rss0sq = rss1sq = rss0testsq = rss1testsq = 0 346 | # initial fit with parallel terms training 347 | for (s in 1:nstratum) { 348 | res0 = res1 = 0 349 | if (sum(tx == 0 & stratum == s) >= minnum) { 350 | fit0 = myridge(modmatrix[, active2, drop = FALSE][tx == 0 & stratum == 351 | s, ], y[tx == 0 & stratum == s], int = TRUE) 352 | res0 = fit0$res 353 | } 354 | if (sum(tx == 1 & stratum == s) >= minnum) { 355 | fit1 = myridge(modmatrix[, active2, drop = FALSE][tx == 1 & stratum == 356 | s, ], y[tx == 1 & stratum == s], int = TRUE) 357 | res1 = fit1$res 358 | } 359 | rss0sq = rss0sq + sum(res0^2) 360 | rss1sq = rss1sq + sum(res1^2) 361 | 362 | 363 | # test 364 | 365 | res0test = res1test = 0 366 | if (sum(tx.val == 0 & stratum.val == s) >= minnum) { 367 | yhat0 = cbind(1, modmatrix.val[, active2, drop = FALSE][tx.val == 368 | 0 & stratum.val == s, ]) %*% fit0$coef 369 | res0test = (y.val[tx.val == 0 & stratum.val == s] - yhat0) 370 | } 371 | if (sum(tx.val == 1 & stratum.val == s) >= minnum) { 372 | yhat1 = cbind(1, modmatrix.val[, active2, drop = FALSE][tx.val == 373 | 1 & stratum.val == s, ]) %*% fit1$coef 374 | res1test = (y.val[tx.val == 1 & stratum.val == s] - yhat1) 375 | } 376 | rss0testsq = rss0testsq + sum(res0test^2) 377 | rss1testsq = rss1testsq + sum(res1test^2) 378 | } 379 | } #end of propensity 380 | 381 | 382 | for (k in which(active2)) { 383 | # try collapsing a training set term 384 | act = active2 385 | act[k] = FALSE 386 | 387 | if (!propensity) { 388 | 389 | redfit = myridge(modmatrix[, k, drop = FALSE], y, int = TRUE) 390 | r = redfit$res 391 | fit00 = myridge(modmatrix[, act, drop = FALSE][tx == 0, ], r[tx == 392 | 0], int = TRUE) 393 | fit11 = myridge(modmatrix[, act, drop = FALSE][tx == 1, ], r[tx == 394 | 1], int = TRUE) 395 | 396 | res00 = fit00$res 397 | res11 = fit11$res 398 | delta[k] = (sum(res00^2) + sum(res11^2) - sum(res0^2) - sum(res1^2))/(sum(res00^2) + 399 | sum(res11^2)) 400 | # try collapsing the same test set term 401 | rtest = y.val - cbind(1, modmatrix.val[, k, drop = FALSE]) %*% redfit$coef 402 | yhat00 = cbind(1, modmatrix.val[, act, drop = FALSE][tx.val == 0, 403 | ]) %*% fit00$coef 404 | yhat11 = cbind(1, modmatrix.val[, act, drop = FALSE][tx.val == 1, 405 | ]) %*% fit11$coef 406 | 407 | res00test = (rtest[tx.val == 0] - yhat00) 408 | res11test = (rtest[tx.val == 1] - yhat11) 409 | rss00testsq = sum(res00test^2) 410 | rss11testsq = sum(res11test^2) 411 | } 412 | 413 | if (propensity) 414 | { 415 | rss00sq = rss11sq = rss00testsq = rss11testsq = 0 416 | for (s in 1:nstratum) { 417 | redfit = myridge(modmatrix[, k, drop = FALSE], y, int = TRUE) 418 | r = redfit$res 419 | res00 = res11 = res00test = res11test = 0 420 | if (sum(tx == 0 & stratum == s) >= minnum) { 421 | fit00 = myridge(modmatrix[, act, drop = FALSE][tx == 0 & stratum == 422 | s, ], r[tx == 0 & stratum == s], int = TRUE) 423 | res00 = fit00$res 424 | } 425 | if (sum(tx == 1 & stratum == s) >= minnum) { 426 | fit11 = myridge(modmatrix[, act, drop = FALSE][tx == 1 & stratum == 427 | s, ], r[tx == 1 & stratum == s], int = TRUE) 428 | res11 = fit11$res 429 | } 430 | 431 | rss00sq = rss00sq + sum(res00^2) 432 | rss11sq = rss11sq + sum(res11^2) 433 | 434 | # try deleting the same test set term 435 | 436 | 437 | if (sum(tx.val == 0 & stratum.val == s) >= minnum) { 438 | 439 | yhat00 = cbind(1, modmatrix.val[, act, drop = FALSE][tx.val == 440 | 0 & stratum.val == s, ]) %*% fit00$coef 441 | res00test = (y.val[tx.val == 0 & stratum.val == s] - yhat00) 442 | } 443 | if (sum(tx.val == 1 & stratum.val == s) >= minnum) { 444 | yhat11 = cbind(1, modmatrix.val[, act, drop = FALSE][tx.val == 445 | 1 & stratum.val == s, ]) %*% fit11$coef 446 | res11test = (y.val[tx.val == 1 & stratum.val == s] - yhat11) 447 | } 448 | 449 | rss00testsq = rss00testsq + sum(res00test^2) 450 | rss11testsq = rss11testsq + sum(res11test^2) 451 | 452 | } 453 | delta[k] = (rss00sq + rss11sq - rss0sq - rss1sq)/(rss00sq + rss11sq) 454 | } #end of propensity 455 | 456 | # deltatest[k] = (sum(res00test^2) + sum(res11test^2) - sum(res0test^2) 457 | # -sum(res1test^2))/(sum(res00test^2) + sum(res11test^2)) 458 | 459 | deltatest[k] = (rss00testsq + rss11testsq - rss0testsq - rss1testsq)/(rss00testsq + 460 | rss11testsq) 461 | } #end of propensity 462 | 463 | ii = ii + 1 464 | # cat(c('ii=', ii), fill = TRUE) 465 | khat[ii] = which.min(delta) 466 | # cat(delta,fill=TRUE) 467 | deltahat[ii] = delta[khat[ii]] 468 | deltatesthat[ii] = deltatest[khat[ii]] 469 | n1 = length(res0test) 470 | n2 = length(res1test) 471 | # rsstesthat[ii]=(sum(res0test^2) + sum(res1test^2))/(n1+n2) 472 | rsstesthat[ii] = (rss0testsq + rss1testsq)/(n1 + n2) 473 | setesthat[ii] = NA 474 | # setesthat[ii]=sqrt((n1*var(res0test^2)+n2*var(res1test^2))/(n1+n2)^2) # not 475 | # currently computed 476 | 477 | active2[khat] = FALSE 478 | num.colsleft = sum(colSums(modmatrix.val[, active2, drop = FALSE]^2) > 0) 479 | go = (sum(active2) > 0) & (num.colsleft > 0) 480 | # cat(c(sum(active2), num.colsleft),fill=TRUE) browser() cat(c('dropping ', khat), 481 | # fill = TRUE) 482 | } 483 | 484 | } 485 | out$khat = khat[!is.na(khat)] 486 | out$deltahat = deltahat[!is.na(deltahat)] 487 | out$rsstesthat = rsstesthat 488 | out$setesthat = setesthat 489 | out$tim1 = tim1 490 | out$tim2 = proc.time() 491 | out$x = x 492 | out$tx = tx 493 | out$y = y 494 | out$x.val = x.val 495 | out$tx.val = tx.val 496 | out$y.val = y.val 497 | out$maxterms = maxterms 498 | out$backstep = backstep 499 | out$propensity = propensity 500 | out$stratum = stratum 501 | out$stratum.val = stratum.val 502 | out$minnum = minnum 503 | class(out) = 'causalMARS' 504 | return(out) 505 | } 506 | 507 | -------------------------------------------------------------------------------- /R/cv.causalBoosting.R: -------------------------------------------------------------------------------- 1 | #' Fit a causal boosting model with cross validation 2 | #' 3 | #' @param x matrix of covariates 4 | #' @param tx vector of treatment indicators (0 or 1) 5 | #' @param y vector of response values 6 | #' @param num.trees number of shallow causal trees to build 7 | #' @param maxleaves maximum number of leaves per causal tree 8 | #' @param eps learning rate 9 | #' @param splitSpread how far apart should the candidate splits be for the 10 | #' causal trees? (e.g. \code{splitSpread = 0.1}) means we consider 10 quantile 11 | #' cutpoints as candidates for making split 12 | #' @param type.measure loss to use for cross validation: 13 | #' 'response' returns mean-square error for predicting response in each arm. 14 | #' 'effect' returns MSE for treatment effect using honest over-fit estimation. 15 | #' @param nfolds number of cross validation folds 16 | #' @param foldid vector of fold membership 17 | #' @param propensity logical: should propensity score stratification be used? 18 | #' @param stratum optional vector giving propensity score stratum for each 19 | #' observation (only used if \code{propensity = TRUE}) 20 | #' @param isConstVar logical: for the causal tree splitting criterion 21 | #' (T-statistc), should it be assumed that the noise variance is the same in 22 | #' treatment and control arms? 23 | #' 24 | #' @return an object of class \code{cv.causalBoosting} which is an object of 25 | #' class \code{causalBoosting} with these additional attributes: 26 | #' \itemize{ 27 | #' \item num.trees.min: number of trees with lowest CV error 28 | #' \item cvm: vector of mean CV error for each number of trees 29 | #' \item cvsd: vector of standard errors for mean CV errors 30 | #' } 31 | #' 32 | #' @examples 33 | #'# Randomized experiment example 34 | #' 35 | #'n = 100 # number of training-set patients to simulate 36 | #'p = 10 # number of features for each training-set patient 37 | #' 38 | #'# Simulate data 39 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 40 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 41 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 42 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 43 | #' 44 | #'# Estimate causal boosting model with cross-validation 45 | #'fit_cv = cv.causalBoosting(x, tx, y) 46 | #'fit_cv$num.trees.min.effect # number of trees chosen by cross-validation 47 | #'pred_cv = predict(fit_cv, newx = x) 48 | #' 49 | #'# Visualize results 50 | #'plot(tx_effect, pred_cv, main = 'Causal boosting w/ CV', 51 | #' xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 52 | #'abline(0, 1, lty = 2) 53 | #' 54 | #' @export 55 | 56 | 57 | cv.causalBoosting = function(x, tx, y, 58 | num.trees = 500, maxleaves = 4, eps = 0.01, splitSpread = 0.1, 59 | type.measure = c('effect', 'response'), nfolds = 5, foldid = NULL, 60 | propensity = FALSE, stratum = NULL, isConstVar = TRUE) { 61 | 62 | 63 | # Input sanitization 64 | 65 | x = as.matrix(x) 66 | 67 | if (nrow(x) != length(tx)) { 68 | stop('nrow(x) does not match length(tx)') 69 | 70 | } else if (nrow(x) != length(y)) { 71 | stop('nrow(x) does not match length(y)') 72 | 73 | } else if (!is.numeric(x)) { 74 | stop('x must be numeric matrix') 75 | 76 | } else if (!is.numeric(y)) { 77 | stop('y must be numeric (use 0/1 for binary response)') 78 | 79 | } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) { 80 | stop('tx must be vector of 0s and 1s') 81 | 82 | } 83 | 84 | 85 | type.measure = match.arg(type.measure) 86 | 87 | if (is.null(foldid)) foldid = sample(rep(1:nfolds, length = nrow(x))) 88 | nfolds = length(unique(foldid)) 89 | 90 | fit = list() 91 | pred.response = matrix(0, nrow(x), num.trees) 92 | pred.effect = matrix(0, nrow(x), num.trees) 93 | pred.refit = rep(NA, nrow(x)) 94 | 95 | for (k in 1:nfolds) { 96 | 97 | x.val = x[foldid == k, , drop = FALSE] 98 | tx.val = tx[foldid == k] 99 | y.val = y[foldid == k] 100 | stratum.val = stratum[foldid == k] 101 | 102 | fit[[k]] = causalBoosting(x = x[foldid != k, , drop = FALSE], 103 | tx = tx[foldid != k], y = y[foldid != k], 104 | num.trees = num.trees, maxleaves = maxleaves, eps = eps, 105 | splitSpread = splitSpread, x.est = x.val, tx.est = tx.val, y.est = y.val, 106 | propensity = propensity, stratum = stratum[foldid != k], 107 | stratum.est = stratum.val, isConstVar = isConstVar) 108 | 109 | tmp = stats::predict(fit[[k]], x[foldid == k, , drop = FALSE], 1:num.trees, 110 | type = 'conditional.mean') 111 | 112 | tmpMat = rbind(tmp$G0, tmp$G1) 113 | n_out = sum(foldid == k) 114 | pred.response[foldid == k, ] = 115 | tmpMat[tx[foldid == k] * n_out + (1:n_out), ] 116 | 117 | pred.effect[foldid == k, ] = tmp$G1 - tmp$G0 118 | tmp.refit = stats::predict(fit[[k]], x[foldid == k, , drop = FALSE], 119 | 1:num.trees, type = 'conditional.mean', honest = TRUE) 120 | pred.refit[foldid == k] = (tmp.refit$G1 - tmp.refit$G0)[, num.trees] 121 | } 122 | 123 | cvm.effect = colMeans((pred.effect - pred.refit)^2) 124 | cvsd.effect = apply((pred.effect - pred.refit)^2, 2, stats::sd) / 125 | sqrt(nrow(pred.effect)) 126 | 127 | cvm.response = apply(pred.response, 2, function(yhat) mean((yhat - y)^2)) 128 | cvsd.response = apply(pred.response, 2, 129 | function(yhat) stats::sd((yhat - y)^2)) / sqrt(nrow(pred.response)) 130 | 131 | fit = causalBoosting(x = x, tx = tx, y = y, num.trees = num.trees, 132 | maxleaves = maxleaves, eps = eps, splitSpread = splitSpread, 133 | propensity = propensity, stratum = stratum, isConstVar = isConstVar) 134 | 135 | fit$num.trees.min.effect = which.min(cvm.effect) 136 | fit$num.trees.min.response = which.min(cvm.response) 137 | fit$num.trees.1se.effect = 138 | max(which(cvm.effect < min(cvm.effect + cvsd.effect))) 139 | 140 | fit$cvm.effect = cvm.effect 141 | fit$cvsd.effect = cvsd.effect 142 | fit$cvm.response = cvm.response 143 | fit$cvsd.response = cvsd.response 144 | 145 | class(fit) = 'cv.causalBoosting' 146 | fit 147 | } 148 | 149 | -------------------------------------------------------------------------------- /R/makebx.newmars.R: -------------------------------------------------------------------------------- 1 | 2 | makebx.newmars = function(fit, x, remove.zerocols = FALSE) { 3 | # creates model matrix from 'fit' 4 | quant = fit$quant 5 | parent = fit$parent 6 | childvar = fit$childvar 7 | childquant = fit$childquant 8 | active = fit$active 9 | nterms = length(parent) 10 | nterms2 = 1 + 2 * nterms 11 | 12 | modmatrix = matrix(0, nrow = nrow(x), ncol = nterms2) 13 | modmatrix[, 1] = 1 14 | count = 1 15 | 16 | for (ii in 1:nterms) { 17 | new1 = modmatrix[, parent[ii]] * truncpow(x[, childvar[ii]], quant[childquant[ii], 18 | childvar[ii]], dir = 1) 19 | modmatrix[, count + 1] = new1 20 | new2 = modmatrix[, parent[ii]] * truncpow(x[, childvar[ii]], quant[childquant[ii], 21 | childvar[ii]], dir = 2) 22 | 23 | modmatrix[, count + 2] = new2 24 | count = count + 2 25 | } 26 | if (remove.zerocols) 27 | modmatrix = modmatrix[, active] 28 | return(modmatrix) 29 | } 30 | 31 | -------------------------------------------------------------------------------- /R/myridge.R: -------------------------------------------------------------------------------- 1 | 2 | myridge = function(x, y, int = T, eps = 0.001) { 3 | xx = x 4 | if (!is.matrix(xx)) 5 | xx = matrix(xx, ncol = 1) 6 | lam = rep(eps, ncol(xx)) 7 | 8 | if (int) { 9 | xx = cbind(1, xx) 10 | lam = c(0, lam) 11 | } 12 | d = diag(lam) 13 | if (ncol(xx) == 1) 14 | d = lam 15 | coef = solve(t(xx) %*% xx + d) %*% t(xx) %*% y 16 | res = y - xx %*% coef 17 | list(res = res, coef = coef) 18 | } 19 | 20 | -------------------------------------------------------------------------------- /R/pollinated.ranger.R: -------------------------------------------------------------------------------- 1 | #' Pollinate a fitted ranger random forest model 2 | #' 3 | #' @param object a fitted \code{ranger} object 4 | #' @param x matrix of covariates 5 | #' @param y vector of response values 6 | #' 7 | #' @return an object of class \code{pollinated.ranger} which is a \code{ranger} 8 | #' object that has been pollinated with the data in (x, y) 9 | 10 | pollinated.ranger = function(object, x, y) { 11 | 12 | forest = object$forest 13 | num.trees = forest$num.trees 14 | which.list = as.list(seq(num.trees)) 15 | split.values = forest$split.values 16 | split.varIDs = forest$split.varIDs 17 | 18 | for (i in 1:num.trees) { 19 | which = match(split.varIDs[[i]], 0, FALSE) 20 | split.values[[i]][which > 0] = seq(sum(which)) 21 | which.list[[i]] = which 22 | } 23 | 24 | forest$split.values = split.values 25 | object$forest = forest 26 | preds = stats::predict(object, x, predict.all = TRUE)$predictions 27 | 28 | ### Get list of means indexed by the unique terminal node values 29 | pmeans = apply(preds, 2, function(f, y) tapply(y, f, mean), y) 30 | 31 | ### Now populate these terminal nodes with these values 32 | for (i in 1:num.trees) { 33 | which = which.list[[i]] 34 | repvec = rep(NA, sum(which)) 35 | pmean = pmeans[[i]] 36 | ids = as.integer(names(pmean)) 37 | repvec[ids] = pmean 38 | split.values[[i]][which > 0] = repvec 39 | } 40 | 41 | forest$split.values = split.values 42 | object$forest = forest 43 | object$mean = mean(y) 44 | class(object) = c('pollinated.ranger', 'ranger') 45 | object 46 | } 47 | 48 | -------------------------------------------------------------------------------- /R/predict.PTOforest.R: -------------------------------------------------------------------------------- 1 | #' Make predictions from a fitted PTO forest model 2 | #' 3 | #' @param object a fitted \code{PTOforest} object 4 | #' @param newx matrix of new covariates for which estimated treatment effects 5 | #' are desired 6 | #' @param ... ignored 7 | #' 8 | #' @return a vector of predictions corresponding to the rows of \code{newx} 9 | #' 10 | #' @export 11 | 12 | predict.PTOforest = function(object, newx, ...) { 13 | 14 | colnames(newx) = colnames(object$x) 15 | 16 | if (object$postprocess) { 17 | stats::predict(object$postfit, data = newx)$predictions 18 | 19 | } else { 20 | stats::predict(object$PTOfit1, newx = newx) - 21 | stats::predict(object$PTOfit0, newx = newx) 22 | 23 | } 24 | } 25 | 26 | -------------------------------------------------------------------------------- /R/predict.bagged.causalMARS.R: -------------------------------------------------------------------------------- 1 | #' Make predictions from a bag of fitted causal MARS models 2 | #' 3 | #' @param object a fitted \code{bagged.causalMARS} object 4 | #' @param newx matrix of new covariates for which estimated treatment effects 5 | #' are desired 6 | #' @param type type of prediction required: 7 | #' 'average' returns a vector of the averages of the bootstrap estimates. 8 | #' 'all' returns a matrix of all of the bootstrap estimates. 9 | #' @param ... ignored 10 | #' 11 | #' @return a vector of estimated personalized treatment effects corresponding 12 | #' to the rows of \code{newx} 13 | #' 14 | #' @export 15 | 16 | predict.bagged.causalMARS = function(object, newx, type = c('average', 'all'), 17 | ...) { 18 | type = match.arg(type) 19 | newx = scale(newx, center = TRUE, scale = FALSE) 20 | pred = sapply(object, FUN = stats::predict, newx = newx) 21 | switch(type, 22 | all = pred, 23 | average = rowMeans(pred)) 24 | } 25 | 26 | -------------------------------------------------------------------------------- /R/predict.causalBoosting.R: -------------------------------------------------------------------------------- 1 | #' Make predictions from a fitted causal boosting model 2 | #' 3 | #' @param object a fitted \code{causalBoosting} object 4 | #' @param newx matrix of new covariates for which estimated treatment effects 5 | #' are desired 6 | #' @param newtx option vector of new treatment assignments 7 | #' (only used if \code{type = 'response'}) 8 | #' @param type type of prediction required: 9 | #' 'treatment.effect' returns estimated treatment effect. 10 | #' 'conditional.mean' returns two predictions, one for each arm. 11 | #' 'response' returns prediction for arm corresponding to newtx. 12 | #' @param num.trees number(s) of shallow causal trees to use for prediction 13 | #' @param honest logical: should honest re-estimates of leaf means be used for 14 | #' prediction? This requires that \code{x.est, tx.est, y.est} were specified 15 | #' when the causal boosting model was fit 16 | #' @param naVal value with which to replace \code{NA} predictions 17 | #' @param ... ignored 18 | #' 19 | #' @return a vector or matrix of predictions corresponding to the rows of 20 | #' \code{newx} 21 | #' 22 | #' @export 23 | 24 | predict.causalBoosting = function(object, newx, newtx = NULL, 25 | type = c('treatment.effect', 'conditional.mean', 'response'), 26 | num.trees = 1:object$num.trees, honest = FALSE, naVal = 0, ...) { 27 | 28 | type = match.arg(type) 29 | if (type == 'response' & is.null(newtx)) { 30 | stop('response predictions require that newtx be specified') 31 | } 32 | 33 | CBM = object$CBM 34 | 35 | n = nrow(newx) 36 | eps = CBM$eps 37 | G0 = matrix(CBM$intercept[1], n, max(num.trees) + 1) 38 | G1 = matrix(CBM$intercept[2], n, max(num.trees) + 1) 39 | for (k in 1:max(num.trees)) { 40 | pred = stats::predict(CBM$trees[[k]], newx, type = 'conditional.mean', 41 | honest = honest) 42 | G1[, k + 1] = G1[, k] + eps * pred[, 2] 43 | G0[, k + 1] = G0[, k] + eps * pred[, 1] 44 | } 45 | G1[is.na(G1)] = naVal 46 | G0[is.na(G0)] = naVal 47 | 48 | switch(type, 49 | treatment.effect = (G1 - G0)[, num.trees + 1], 50 | conditional.mean = list(G1 = G1[, num.trees + 1], G0 = G0[, num.trees + 1]), 51 | response = (newtx * G1 + (1 - newtx) * G0)[, num.trees + 1]) 52 | } 53 | 54 | -------------------------------------------------------------------------------- /R/predict.causalMARS.R: -------------------------------------------------------------------------------- 1 | #' Make predictions from a fitted causal MARS model 2 | #' 3 | #' @param object a fitted \code{causalMARS} object 4 | #' @param newx matrix of new covariates for which estimated treatment effects 5 | #' are desired 6 | #' @param active indices of columns with nonzero norm (defaults to model 7 | #' selected via backward stepwise phase, or the full model if 8 | #' \code{backstep = FALSE}) 9 | #' @param ... ignored 10 | #' 11 | #' @return a vector of estimated personalized treatment effects corresponding 12 | #' to the rows of \code{newx} 13 | #' 14 | #' @export 15 | 16 | predict.causalMARS = function(object, newx, active, ...) { 17 | 18 | if (missing(active)) { 19 | if (object$backstep) { 20 | ntermshat = length(object$khat) - which.min(object$rsstesthat) + 1 21 | active = (rev(object$khat))[1:ntermshat] 22 | } else { 23 | active = 1:(object$maxterms - 1) 24 | } 25 | } 26 | 27 | object$x = scale(object$x, center = TRUE, scale = FALSE) 28 | newx = scale(newx, center = TRUE, scale = FALSE) 29 | 30 | del = rep(mean(object$y[object$tx == 1]) - mean(object$y[object$tx == 0]), 31 | nrow(newx)) 32 | stratum = object$stratum 33 | stratum.val = object$stratum.val 34 | minnum = object$minnum 35 | nstrata = length(unique(stratum)) 36 | stratawt = (table(c(stratum, 1:nstrata)) - 1) 37 | stratawt = stratawt / sum(stratawt) 38 | 39 | if (length(active) > 0) { 40 | if (!object$propensity) { 41 | # make training matrices 42 | bx0 = (makebx.newmars(object, object$x)[, -1])[, active, drop = FALSE] 43 | beta0 = myridge(bx0[object$tx == 0, ], object$y[object$tx == 0])$coef 44 | beta1 = myridge(bx0[object$tx == 1, ], object$y[object$tx == 1])$coef 45 | # make test set matrices 46 | bx <- (makebx.newmars(object, newx)[, -1])[, active, drop = FALSE] 47 | del = cbind(1, bx) %*% (beta1 - beta0) 48 | } 49 | if (object$propensity) { 50 | # make training matrices 51 | bx0 = (makebx.newmars(object, object$x)[, -1])[, active, drop = FALSE] 52 | beta0 = beta1 = matrix(NA, ncol(bx0) + 1, nstrata) 53 | for (ss in 1:nstrata) { 54 | if (sum(object$tx == 0 & stratum == ss) > minnum) { 55 | beta0[, ss] = myridge(bx0[object$tx == 0 & stratum == ss, ], 56 | object$y[object$tx == 0 & stratum == ss])$coef 57 | beta1[, ss] = myridge(bx0[object$tx == 1 & stratum == ss, ], 58 | object$y[object$tx == 1 & stratum == ss])$coef 59 | } 60 | } 61 | # make test set matrices 62 | bx <- (makebx.newmars(object, newx)[, -1])[, active, drop = FALSE] 63 | del = rep(0, nrow(bx)) 64 | totwt = 0 65 | for (ss in 1:nstrata) { 66 | if (sum(object$tx == 0 & stratum == ss) > minnum) { 67 | del = del + stratawt[ss] * cbind(1, bx) %*% 68 | (beta1[, ss] - beta0[, ss]) 69 | totwt = totwt + stratawt[ss] 70 | } 71 | } 72 | del = del / totwt 73 | } 74 | } 75 | return(c(del)) 76 | } 77 | 78 | -------------------------------------------------------------------------------- /R/predict.causalTree.R: -------------------------------------------------------------------------------- 1 | #' Make predictions from a fitted causal tree model 2 | #' 3 | #' @param object a fitted \code{causalTree} object 4 | #' @param newx matrix of new covariates for which estimated treatment effects 5 | #' are desired 6 | #' @param newtx option vector of new treatment assignments 7 | #' (only used if \code{type = 'response'}) 8 | #' @param type type of prediction required: 9 | #' 'treatment.effect' returns estimated treatment effect. 10 | #' 'conditional.mean' returns two predictions, one for each arm. 11 | #' 'response' returns prediction for arm corresponding to newtx. 12 | #' @param honest logical: should honest re-estimates of leaf means be used for 13 | #' prediction? This requires that \code{x.est, tx.est, y.est} were specified 14 | #' when the causal boosting model was fit 15 | #' @param naVal value with which to replace \code{NA} predictions 16 | #' @param ... ignored 17 | #' 18 | #' @return a vector or matrix of predictions corresponding to the rows of 19 | #' \code{newx} 20 | #' 21 | #' @export 22 | 23 | predict.causalTree = function(object, newx, newtx = NULL, 24 | type = c('treatment.effect', 'conditional.mean', 'response'), honest = FALSE, 25 | naVal = 0, ...) { 26 | 27 | leaf = rep(1, nrow(newx)) 28 | terminal = rep(FALSE, nrow(newx)) 29 | while (sum(!is.na(object$var[leaf])) > 0) { 30 | var = cbind((1:nrow(newx))[!terminal], object$var[leaf[!terminal]]) 31 | leaf[!terminal] = object$left[leaf[!terminal]] + (newx[var] >= object$val[leaf[!terminal]]) 32 | terminal = is.na(object$var[leaf]) 33 | } 34 | pred = list() 35 | pred$leaf = leaf 36 | if (!honest) { 37 | pred$pred0 = object$pred0[leaf] 38 | pred$pred1 = object$pred1[leaf] 39 | } else { 40 | pred$pred0 = object$pred0e[leaf] 41 | pred$pred1 = object$pred1e[leaf] 42 | } 43 | type = match.arg(type) 44 | predMatrix = cbind(pred$pred0, pred$pred1) 45 | predMatrix[is.na(predMatrix)] = naVal 46 | out = switch(type, 47 | treatment.effect = predMatrix[, 2] - predMatrix[, 1], 48 | conditional.mean = predMatrix, 49 | response = ifelse(!is.null(newtx), 50 | predMatrix[cbind(1:nrow(newx), newtx + 1)], rep(NA, nrow(newx)))) 51 | out 52 | } 53 | 54 | -------------------------------------------------------------------------------- /R/predict.cv.causalBoosting.R: -------------------------------------------------------------------------------- 1 | #' Make predictions from a fitted cross-validated causal boosting model 2 | #' 3 | #' @param object a fitted \code{cv.causalBoosting} object 4 | #' @param newx matrix of new covariates for which estimated treatment effects 5 | #' are desired 6 | #' @param newtx option vector of new treatment assignments 7 | #' (only used if \code{type = 'individual'}) 8 | #' @param type type of prediction required: 9 | #' 'treatment.effect' returns estimated treatment effect. 10 | #' 'conditional.mean' returns two predictions, one for each arm. 11 | #' 'response' returns prediction for arm corresponding to newtx. 12 | #' @param num.trees number of shallow causal trees to use for prediction 13 | #' @param naVal value with which to replace \code{NA} predictions 14 | #' @param ... ignored 15 | #' 16 | #' @return a vector or matrix of predictions corresponding to the rows of 17 | #' \code{newx} 18 | #' 19 | #' @export 20 | 21 | predict.cv.causalBoosting = function(object, newx, newtx = NULL, 22 | type = c('treatment.effect', 'conditional.mean', 'response'), 23 | num.trees = object$num.trees.min.effect, naVal = 0, ...) { 24 | 25 | predict.causalBoosting(object = object, newx = newx, newtx = newtx, 26 | type = type, num.trees = num.trees, naVal = naVal) 27 | } 28 | 29 | -------------------------------------------------------------------------------- /R/predict.pollinated.ranger.R: -------------------------------------------------------------------------------- 1 | #' Make predictions from a pollinated ranger random forest model 2 | #' 3 | #' @param object a fitted \code{pollinated.ranger} object 4 | #' @param newx matrix of new covariates for which predictions are desired 5 | #' @param predict.all logical: should predictions from all trees be returned? 6 | #' Otherwise the average across trees is returned 7 | #' @param na.treatment how to treat NA predictions from individual trees: 8 | #' 'omit' only uses trees for which the prediction is not NA. 9 | #' 'replace' replaces NA predictions with the overall mean response. 10 | #' 'NA' returns NA if any tree prediction is NA. 11 | #' @param ... additional arguments passed on to \code{predict.ranger} 12 | 13 | #' 14 | #' @return a vector of predicted treatment effects corresponding to the rows of 15 | #' newx 16 | 17 | predict.pollinated.ranger = function(object, newx, predict.all = FALSE, 18 | na.treatment = c('omit', 'replace', 'NA'), ...) { 19 | 20 | na.treatment = match.arg(na.treatment) 21 | 22 | class(object) = 'ranger' 23 | preds = stats::predict(object, newx, predict.all = TRUE, 24 | ...)$predictions 25 | class(object) = 'pollinated.ranger' 26 | 27 | if (na.treatment == 'replace') { 28 | wh <- is.na(preds) 29 | preds[wh] = object$mean 30 | } 31 | 32 | if (!predict.all) { 33 | preds = apply(preds, 1, mean, na.rm = (na.treatment == 'omit')) 34 | } 35 | 36 | preds 37 | } 38 | 39 | -------------------------------------------------------------------------------- /R/stratify.R: -------------------------------------------------------------------------------- 1 | #' Get propensity strata from propensity scores 2 | #' 3 | #' @param pscore vector of propensity scores 4 | #' @param tx vector of treatment indicators 5 | #' @param min.per.arm minimum number of observations for each arm within each 6 | #' stratum 7 | #' 8 | #' @return a vector of integers with length equal to the length of pscore, 9 | #' reporting the propensity stratum corresponding to each propensity score 10 | #' 11 | #' @export 12 | 13 | stratify = function(pscore, tx, min.per.arm = 30) { 14 | 15 | stratum = ceiling(10 * pscore) 16 | cutoffs = sort(unique(stratum/10)) 17 | stratum = as.numeric(as.factor(stratum)) 18 | 19 | num.treated = stats::aggregate(tx, list(stratum = stratum), sum)$x 20 | while(min(num.treated) < min.per.arm & length(unique(stratum)) > 1) { 21 | stratum1 = which.min(num.treated) 22 | cutoffs = cutoffs[-which.min(num.treated)] 23 | neighbors = intersect(stratum, stratum1 + c(-1, 1)) 24 | stratum2 = neighbors[which.min(num.treated[neighbors])] 25 | stratum[stratum == stratum1] = stratum2 26 | stratum = as.numeric(as.factor(stratum)) 27 | num.treated = stats::aggregate(tx, list(stratum = stratum), sum)$x 28 | } 29 | 30 | stratum = 1 + max(stratum) - stratum 31 | cutoffs = rev(cutoffs) 32 | 33 | num.control = stats::aggregate(1 - tx, list(stratum = stratum), sum)$x 34 | while(min(num.control) < min.per.arm & length(unique(stratum)) > 1) { 35 | stratum1 = which.min(num.control) 36 | cutoffs = cutoffs[-which.min(num.control)] 37 | neighbors = intersect(stratum, stratum1 + c(-1, 1)) 38 | stratum2 = neighbors[which.min(num.control[neighbors])] 39 | stratum[stratum == stratum1] = stratum2 40 | stratum = as.numeric(as.factor(stratum)) 41 | num.control = stats::aggregate(tx, list(stratum = stratum), sum)$x 42 | } 43 | 44 | cutoffs[1] = 1 45 | cutoffs = rev(cutoffs) 46 | 47 | list(stratum = 1 + max(stratum) - stratum, cutoffs = cutoffs) 48 | } 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /R/truncpow.R: -------------------------------------------------------------------------------- 1 | 2 | truncpow = function(x, cutp, dir = 1) { 3 | if (dir == 1) 4 | out = (x - cutp) * (x > cutp) 5 | if (dir == 2) 6 | out = (cutp - x) * (x < cutp) 7 | return(out) 8 | } 9 | 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # causalLearning 2 | Methods for heterogeneous treatment effect estimation 3 | 4 | This R package includes functions for fitting the pollinated 5 | transformed outcome forest, causal boosting and causal MARS from 6 | [Powers et al. (2017)](https://arxiv.org/abs/1707.00102). 7 | 8 | The package is currently in beta, not yet ready for public consumption. 9 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: causalLearning 2 | Title: Methods for heterogeneous treatment effect estimation 3 | Version: 1.0.0 4 | Authors@R: c( 5 | person("Scott", "Powers", email = "saberpowers@gmail.com", 6 | role = c("aut", "cre")), 7 | person("Junyang", "Qian", email = "junyangq@stanford.edu", role = "aut"), 8 | person("Trevor", "Hastie", email = "hastie@stanford.edu", role = "aut"), 9 | person("Robert", "Tibshirani", email = "tibs@stanford.edu", role = "aut")) 10 | Description: The main functions are cv.causalBoosting and bagged.causalMARS, 11 | which build upon the simpler causalBoosting and causalMARS functions. All of 12 | these functions have their own predict methods. 13 | Depends: R (>= 3.3.0) 14 | Imports: ranger 15 | License: GPL-2 16 | LazyData: true 17 | RoxygenNote: 6.0.1 18 | NeedsCompilation: yes 19 | Packaged: 2018-09-07 15:58:32 UTC; sspowers 20 | Author: Scott Powers [aut, cre], 21 | Junyang Qian [aut], 22 | Trevor Hastie [aut], 23 | Robert Tibshirani [aut] 24 | Maintainer: Scott Powers 25 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(predict,PTOforest) 4 | S3method(predict,bagged.causalMARS) 5 | S3method(predict,causalBoosting) 6 | S3method(predict,causalMARS) 7 | S3method(predict,causalTree) 8 | S3method(predict,cv.causalBoosting) 9 | export(PTOforest) 10 | export(bagged.causalMARS) 11 | export(causalBoosting) 12 | export(causalMARS) 13 | export(cv.causalBoosting) 14 | export(stratify) 15 | useDynLib(causalLearning) 16 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/R/PTOforest.R: -------------------------------------------------------------------------------- 1 | #' Fit a pollinated transformed outcome (PTO) forest model 2 | #' 3 | #' @param x matrix of covariates 4 | #' @param tx vector of treatment indicators (0 or 1) 5 | #' @param y vector of response values 6 | #' @param pscore vector of propensity scores 7 | #' @param num.trees number of trees for transformed outcome forest 8 | #' @param mtry number of variables to possibly split at in each node 9 | #' @param min.node.size minimum node size for transformed outcome forest 10 | #' @param postprocess logical: should optional post-processing random forest be 11 | #' fit at end? 12 | #' @param verbose logical: should progress be printed to console? 13 | #' 14 | #' @return an object of class \code{PTOforest} with attributes: 15 | #' \itemize{ 16 | #' \item x: matrix of covariates supplied by function call 17 | #' \item pscore: vector of propensity score supplied by function call 18 | #' \item postprocess: logical supplied by function call 19 | #' \item TOfit: fitted random forest on transformed outcomes 20 | #' \item PTOfit1: TOfit pollinated with treatment-arm outcomes 21 | #' \item PTOfit0: TOfit pollinated with control-arm outcomes 22 | #' \item postfit: post-processing random forest summarizing results 23 | #' } 24 | #' 25 | #' @examples 26 | #'# Randomized experiment example 27 | #' 28 | #'n = 100 # number of training-set patients to simulate 29 | #'p = 10 # number of features for each training-set patient 30 | #' 31 | #'# Simulate data 32 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 33 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 34 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 35 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 36 | #' 37 | #'# Estimate PTO forest model 38 | #'fit_pto = PTOforest(x, tx, y) 39 | #'pred_pto = predict(fit_pto, newx = x) 40 | #' 41 | #'# Visualize results 42 | #'plot(tx_effect, pred_pto, main = 'PTO forest', 43 | #' xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 44 | #'abline(0, 1, lty = 2) 45 | #' 46 | #' @export 47 | 48 | 49 | PTOforest = function(x, tx, y, pscore = rep(.5, nrow(x)), 50 | num.trees = 500, mtry = ncol(x), min.node.size = max(25, nrow(x) / 40), 51 | postprocess = TRUE, verbose = FALSE) { 52 | 53 | 54 | # Input sanitization 55 | 56 | x = as.matrix(x) 57 | 58 | if (nrow(x) != length(tx)) { 59 | stop('nrow(x) does not match length(tx)') 60 | 61 | } else if (nrow(x) != length(y)) { 62 | stop('nrow(x) does not match length(y)') 63 | 64 | } else if (!is.numeric(x)) { 65 | stop('x must be numeric matrix') 66 | 67 | } else if (!is.numeric(y)) { 68 | stop('y must be numeric (use 0/1 for binary response)') 69 | 70 | } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) { 71 | stop('tx must be vector of 0s and 1s') 72 | 73 | } 74 | 75 | 76 | colnames(x) = paste('x', 1:ncol(x), sep = '') 77 | fit = list(x = x, pscore = pscore, postprocess = postprocess) 78 | 79 | z = tx * y / pscore - (1 - tx) * y / (1 - pscore) 80 | 81 | 82 | if (verbose) cat('fitting IPW treatment forest\n') 83 | 84 | data = data.frame(y = z, x = x) 85 | colnames(data) = c('y', colnames(x)) 86 | fit$TOfit = ranger::ranger(data = data, dependent.variable.name = 'y', 87 | num.trees = num.trees, min.node.size = min.node.size, mtry = mtry, 88 | write.forest = TRUE) 89 | 90 | 91 | # Now pollinate the tree separately with treated and untreated 92 | if (verbose) { 93 | cat('pollinating IPW treatment forest separately with treated and', 94 | 'untreated y\n') 95 | } 96 | 97 | fit$PTOfit1 = pollinated.ranger(fit$TOfit, x = x[tx == 1, ], y = y[tx == 1]) 98 | fit$PTOfit0 = pollinated.ranger(fit$TOfit, x = x[tx == 0, ], y = y[tx == 0]) 99 | 100 | 101 | if (postprocess) { 102 | # and one more summarization rf 103 | if (verbose) cat('fitting TX summary forest\n') 104 | 105 | delta = stats::predict(fit$PTOfit1, x) - stats::predict(fit$PTOfit0, x) 106 | data = data.frame(y = delta, x = x) 107 | colnames(x) = paste('x', 1:ncol(x), sep = '') 108 | colnames(data) = c('y', colnames(x)) 109 | fit$postfit = ranger::ranger(data = data, dependent.variable.name = 'y', 110 | num.trees = num.trees, mtry = ncol(x), write.forest = TRUE) 111 | } 112 | 113 | class(fit) = 'PTOforest' 114 | fit 115 | } 116 | 117 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/R/bagged.causalMARS.R: -------------------------------------------------------------------------------- 1 | #' Fit a bag of causal MARS models 2 | #' 3 | #' @param x matrix of covariates 4 | #' @param tx vector of treatment indicators (0 or 1) 5 | #' @param y vector of response values 6 | #' @param nbag number of models to bag 7 | #' @param maxterms maximum number of terms to include in the regression basis 8 | #' (e.g. \code{maxterms = 11} means intercept + 5 pairs added) 9 | #' @param nquant number of quantiles used in splitting 10 | #' @param degree max number of different predictors that can interact in model 11 | #' @param eps shrinkage factor for new term added 12 | #' @param backstep logical: should out-of-bag samples be used to prune each 13 | #' model? otherwise full regression basis is used for each model 14 | #' @param propensity logical: should propensity score stratification be used? 15 | #' @param stratum optional vector giving propensity score stratum for each 16 | #' observation (only used if \code{propensity = TRUE}) 17 | #' @param minnum minimum number of observations in each arm of each propensity 18 | #' score stratum needed to estimate regression coefficients for basis 19 | #' (only used if \code{propensity = TRUE}) 20 | #' @param verbose logical: should progress be printed to console? 21 | #' 22 | #' @return an object of class \code{bagged.causalMARS}, which is itself a list 23 | #' of \code{causalMARS} objects 24 | #' 25 | #' @examples 26 | #'# Randomized experiment example 27 | #' 28 | #'n = 100 # number of training-set patients to simulate 29 | #'p = 10 # number of features for each training-set patient 30 | #' 31 | #'# Simulate data 32 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 33 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 34 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 35 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 36 | #' 37 | #'# Estimate bagged causal MARS model 38 | #'fit_bcm = bagged.causalMARS(x, tx, y, nbag = 10) 39 | #'pred_bcm = predict(fit_bcm, newx = x) 40 | #' 41 | #'# Visualize results 42 | #'plot(tx_effect, pred_bcm, main = 'Bagged causal MARS', 43 | #' xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 44 | #'abline(0, 1, lty = 2) 45 | #' 46 | #' @export 47 | 48 | bagged.causalMARS = function(x, tx, y, nbag = 20, maxterms = 11, nquant = 5, 49 | degree = ncol(x), eps = 1, backstep = FALSE, 50 | propensity = FALSE, stratum = rep(1, nrow(x)), minnum = 5, verbose = FALSE) { 51 | 52 | 53 | # Input sanitization 54 | 55 | x = as.matrix(x) 56 | 57 | if (nrow(x) != length(tx)) { 58 | stop('nrow(x) does not match length(tx)') 59 | 60 | } else if (nrow(x) != length(y)) { 61 | stop('nrow(x) does not match length(y)') 62 | 63 | } else if (!is.numeric(x)) { 64 | stop('x must be numeric matrix') 65 | 66 | } else if (!is.numeric(y)) { 67 | stop('y must be numeric (use 0/1 for binary response)') 68 | 69 | } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) { 70 | stop('tx must be vector of 0s and 1s') 71 | 72 | } 73 | 74 | 75 | x = scale(x, center = TRUE, scale = FALSE) 76 | 77 | fit = list() 78 | 79 | for (b in 1:nbag) { 80 | 81 | if (verbose) cat(c('BAG=', b, '/', nbag), fill = TRUE) 82 | 83 | bag = sample(1:nrow(x), size = nrow(x), replace = TRUE) 84 | oob = rep(TRUE, nrow(x)) 85 | oob[bag] = FALSE 86 | fit[[b]] = causalMARS(x = x[bag, ], tx = tx[bag], y = y[bag], 87 | maxterms = maxterms, nquant = nquant, degree = degree, eps = eps, 88 | backstep = backstep, x.val = x[oob, ], tx.val = tx[oob], y.val = y[oob], 89 | propensity = propensity, stratum = stratum[bag], 90 | stratum.val = stratum[oob], minnum = minnum) 91 | } 92 | class(fit) = 'bagged.causalMARS' 93 | fit 94 | } 95 | 96 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/R/causalBoosting.R: -------------------------------------------------------------------------------- 1 | #' Fit a causal boosting model 2 | #' 3 | #' @useDynLib causalLearning 4 | #' 5 | #' @param x matrix of covariates 6 | #' @param tx vector of treatment indicators (0 or 1) 7 | #' @param y vector of response values 8 | #' @param num.trees number of shallow causal trees to build 9 | #' @param maxleaves maximum number of leaves per causal tree 10 | #' @param eps learning rate 11 | #' @param splitSpread how far apart should the candidate splits be for the 12 | #' causal trees? (e.g. \code{splitSpread = 0.1}) means we consider 10 quantile 13 | #' cutpoints as candidates for making split 14 | #' @param x.est optional matrix of estimation-set covariates used for honest 15 | #' re-estimation (ignored if \code{tx.est = NULL} or \code{y.est = NULL}) 16 | #' @param tx.est optional vector of estimation-set treatment indicators 17 | #' (ignored if \code{x.est = NULL} or \code{y.est = NULL}) 18 | #' @param y.est optional vector of estimation-set response values 19 | #' (ignored if \code{x.est = NULL} or \code{y.est = NULL}) 20 | #' @param propensity logical: should propensity score stratification be used? 21 | #' @param stratum optional vector giving propensity score stratum for each 22 | #' observation (only used if \code{propensity = TRUE}) 23 | #' @param stratum.est optional vector giving propensity score stratum for each 24 | #' estimation-set observation (ignored if \code{x.est = NULL} or 25 | #' \code{tx.est = NULL} or \code{y.est = NULL}) 26 | #' @param isConstVar logical: for the causal tree splitting criterion 27 | #' (T-statistc), should it be assumed that the noise variance is the same in 28 | #' treatment and control arms? 29 | #' 30 | #' @return an object of class \code{causalBoosting} with attributes: 31 | #' \itemize{ 32 | #' \item CBM: a list storing the intercept, the causal trees and \code{eps} 33 | #' \item tauhat: matrix of treatment effects for each patient for each step 34 | #' \item G1: estimated-treatment conditional mean for each patient 35 | #' \item G0: estimated-control conditional mean for each patient 36 | #' \item err.y: training error at each step, in predicting response 37 | #' \item num.trees: number of trees specified by function call 38 | #' } 39 | #' 40 | #' @details 41 | #' This function exists primarily to be called by cv.causalBoosting because 42 | #' the num.trees parameter generally needs to be tuned via cross-validation. 43 | #' 44 | #' @examples 45 | #'# Randomized experiment example 46 | #' 47 | #'n = 100 # number of training-set patients to simulate 48 | #'p = 10 # number of features for each training-set patient 49 | #' 50 | #'# Simulate data 51 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 52 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 53 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 54 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 55 | #' 56 | #'# Estimate causal boosting model 57 | #'fit_cb = causalBoosting(x, tx, y, num.trees = 500) 58 | #'pred_cb = predict(fit_cb, newx = x, num.trees = 500) 59 | #' 60 | #'# Visualize results 61 | #'plot(tx_effect, pred_cb, main = 'Causal boosting', 62 | #' xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 63 | #'abline(0, 1, lty = 2) 64 | #' 65 | #' @export 66 | 67 | causalBoosting = function(x, tx, y, num.trees = 500, maxleaves = 4, eps = 0.01, 68 | splitSpread = 0.1, x.est = NULL, tx.est = NULL, y.est = NULL, 69 | propensity = FALSE, stratum = NULL, stratum.est = NULL, 70 | isConstVar = TRUE) { 71 | 72 | 73 | # Input sanitization 74 | 75 | x = as.matrix(x) 76 | 77 | if (nrow(x) != length(tx)) { 78 | stop('nrow(x) does not match length(tx)') 79 | 80 | } else if (nrow(x) != length(y)) { 81 | stop('nrow(x) does not match length(y)') 82 | 83 | } else if (!is.numeric(x)) { 84 | stop('x must be numeric matrix') 85 | 86 | } else if (!is.numeric(y)) { 87 | stop('y must be numeric (use 0/1 for binary response)') 88 | 89 | } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) { 90 | stop('tx must be vector of 0s and 1s') 91 | 92 | } 93 | 94 | 95 | # s indices are 0-based 96 | maxNodes = 2 * maxleaves - 1 97 | 98 | # if (usePropensity ^ !is.null(s)) { warnings('Non-consistent options: whether to 99 | # use propensity score will be based on value of s.') } 100 | 101 | if (is.null(stratum)) { 102 | if (propensity) stop('stratum must be specified if propensity = TRUE') 103 | stratum = -1 104 | } 105 | if (is.null(x.est) || is.null(y.est) || is.null(tx.est)) { 106 | x.est = y.est = tx.est = stratum.est = -1 107 | n.est = 1 108 | } else { 109 | n.est = nrow(x.est) 110 | if (is.null(stratum.est)) { 111 | stratum.est = -1 112 | } 113 | } 114 | 115 | vtxeff = 0 116 | 117 | fit = .C("causalBoosting", as.double(x), as.double(y), as.integer(tx), 118 | as.double(x.est), as.double(y.est), as.integer(tx.est), 119 | as.integer(num.trees), as.integer(maxleaves), as.double(eps), 120 | as.integer(propensity), as.integer(stratum), as.integer(stratum.est), 121 | as.integer(isConstVar), as.integer(nrow(x)), as.integer(ncol(x)), 122 | as.integer(n.est), as.double(vtxeff), as.double(splitSpread), 123 | var = integer(num.trees * maxNodes), val = double(num.trees * maxNodes), 124 | left = integer(num.trees * maxNodes), 125 | right = integer(num.trees * maxNodes), 126 | y0bar = double(1), y1bar = double(1), pred0 = double(num.trees * maxNodes), 127 | pred1 = double(num.trees * maxNodes), cost = double(num.trees * maxNodes), 128 | pred0e = double(num.trees * maxNodes), 129 | pred1e = double(num.trees * maxNodes), G0 = double(nrow(x)), 130 | G1 = double(nrow(x)), err.y = double(num.trees), err = double(num.trees), 131 | tauhat = double(num.trees * nrow(x)), PACKAGE = 'causalLearning') 132 | 133 | CBM = list() 134 | CBM$intercept = c(fit$y0bar, fit$y1bar) 135 | CBM$trees = list() 136 | CBM$eps = eps 137 | 138 | for (k in 1:num.trees) { 139 | start = (k - 1) * maxNodes + 1 140 | end = k * maxNodes 141 | tree = list(var = fit$var[start:end] + 1, val = fit$val[start:end], 142 | left = fit$left[start:end] + 1, right = fit$right[start:end] + 1, 143 | pred0 = fit$pred0[start:end], pred1 = fit$pred1[start:end], 144 | cost = fit$cost[start:end], pred0e = fit$pred0e[start:end], 145 | pred1e = fit$pred1e[start:end]) 146 | class(tree) = "causalTree" 147 | CBM$trees[[k]] = tree 148 | } 149 | result = list(CBM = CBM, tauhat = matrix(fit$tauhat, nrow = nrow(x)), 150 | G1 = fit$G1, G0 = fit$G0, err.y = fit$err.y, num.trees = num.trees) 151 | 152 | class(result) = "causalBoosting" 153 | 154 | result 155 | } 156 | 157 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/R/causalMARS.R: -------------------------------------------------------------------------------- 1 | #' Fit a causal MARS model 2 | #' 3 | #' @param x matrix of covariates 4 | #' @param tx vector of treatment indicators (0 or 1) 5 | #' @param y vector of response values 6 | #' @param maxterms maximum number of terms to include in the regression basis 7 | #' (e.g. \code{maxterms = 11} means intercept + 5 pairs added) 8 | #' @param nquant number of quantiles used in splitting 9 | #' @param degree max number of different predictors that can interact in model 10 | #' @param eps shrinkage factor for new term added 11 | #' @param backstep logical: after building out regression basis, should 12 | #' backward stepwise selection be used to create a sequence of models, with 13 | #' the criterion evaluated on a validation set to choose among the sequence? 14 | #' @param x.val optional matrix of validation-set covariates 15 | #' (only used if \code{backstep = TRUE}) 16 | #' @param tx.val optional vector of validation-set treatment indicators 17 | #' (only used if \code{backstep = TRUE}) 18 | #' @param y.val optional vector of validation-set response values 19 | #' (only used if \code{backstep = TRUE}) 20 | #' @param propensity logical: should propensity score stratification be used? 21 | #' @param stratum optional vector giving propensity score stratum for each 22 | #' observation (only used if \code{propensity = TRUE}) 23 | #' @param stratum.val optional vector giving propensity score stratum for each 24 | #' validation-set observation 25 | #' (only used if \code{propensity = backstep = TRUE}) 26 | #' @param minnum minimum number of observations in each arm of each propensity 27 | #' score stratum needed to estimate regression coefficients for basis 28 | #' (only used if \code{propensity = TRUE}) 29 | #' 30 | #' @return an object of class \code{causalMARS} with attributes: 31 | #' \itemize{ 32 | #' \item parent: indices of nodes that are parents at each stage 33 | #' \item childvar: index of predictor chosen at each forward step 34 | #' \item childquant: quantile of cutoff chosen at each forward step 35 | #' \item quant: quantiles of the columns of x 36 | #' \item active: indices of columns with nonzero norm 37 | #' \item allvars: list of variables appearing in each term 38 | #' \item khat: the sequence of terms deleted at each step 39 | #' \item deltahat: relative change in rss 40 | #' \item rsstesthat: validation-set rss achieved by each model in sequence 41 | #' \item setesthat: standard error for rsstesthat 42 | #' \item tim1: time elapsed during forward stepwise phase 43 | #' \item tim2: total time elapsed 44 | #' \item x 45 | #' \item tx 46 | #' \item y 47 | #' \item maxterms 48 | #' \item eps 49 | #' \item backstep 50 | #' \item propensity 51 | #' \item x.val 52 | #' \item tx.val 53 | #' \item y.val 54 | #' \item stratum 55 | #' \item stratum.val 56 | #' \item minnum 57 | #' } 58 | #' 59 | #' @details 60 | #' parallel arms mars with backward stepwise BOTH randomized case and 61 | #' propensity stratum. data structures: model terms (nodes) are numbered 62 | #' 1, 2, ... with 1 representing the intercept. forward stepwise: 63 | #' modmatrix contains basis functions as model is built up -- two columns are 64 | #' added at each step. Does not include a column of ones for tidiness, 65 | #' we always add two terms, even when term added in linear (so that reflected 66 | #' version is just zero). 67 | #' backward stepwise: khat is the sequence of terms deleted at each step, 68 | #' based on deltahat = relative change in rss. rsstesthat is rss over test 69 | #' (validation) set achieved by each reduced model in sequence- used later for 70 | #' selecting a member of the sequence. active2 contains indices of columns with 71 | #' nonzero norm 72 | #' 73 | #' @examples 74 | #'# Randomized experiment example 75 | #' 76 | #'n = 100 # number of training-set patients to simulate 77 | #'p = 10 # number of features for each training-set patient 78 | #' 79 | #'# Simulate data 80 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 81 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 82 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 83 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 84 | #' 85 | #'# Estimate causal MARS model 86 | #'fit_cm = causalMARS(x, tx, y) 87 | #'pred_cm = predict(fit_cm, newx = x) 88 | #' 89 | #'# Visualize results 90 | #'plot(tx_effect, pred_cm, main = 'Causal MARS', 91 | #' xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 92 | #'abline(0, 1, lty = 2) 93 | #' 94 | #' @export 95 | 96 | causalMARS = function(x, tx, y, 97 | maxterms = 11, nquant = 5, degree = ncol(x), eps = 1, 98 | backstep = FALSE, x.val = NULL, tx.val = NULL, y.val = NULL, 99 | propensity = FALSE, stratum = rep(1, nrow(x)), stratum.val = NULL, 100 | minnum = 5) { 101 | 102 | 103 | # Input sanitization 104 | 105 | x = as.matrix(x) 106 | 107 | if (nrow(x) != length(tx)) { 108 | stop('nrow(x) does not match length(tx)') 109 | 110 | } else if (nrow(x) != length(y)) { 111 | stop('nrow(x) does not match length(y)') 112 | 113 | } else if (!is.numeric(x)) { 114 | stop('x must be numeric matrix') 115 | 116 | } else if (!is.numeric(y)) { 117 | stop('y must be numeric (use 0/1 for binary response)') 118 | 119 | } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) { 120 | stop('tx must be vector of 0s and 1s') 121 | 122 | } 123 | 124 | 125 | BIG = 1e+10 126 | n = nrow(x) 127 | p = ncol(x) 128 | x = scale(x, TRUE, FALSE) #NOTE 129 | if(!is.null(x.val)) { 130 | x.val = scale(x.val, TRUE, FALSE) 131 | } 132 | 133 | # compute quantiles for splitting 134 | discrete = rep(FALSE, p) 135 | for (j in 1:p) { 136 | if (length(table(x[, j])) == 2) 137 | discrete[j] = TRUE 138 | } 139 | 140 | probs = seq(0, 1, length = nquant)[-nquant] 141 | quant = apply(x, 2, stats::quantile, probs) 142 | 143 | nquantm = rep(nquant, p) 144 | if (sum(discrete) > 0) { 145 | for (j in which(discrete)) { 146 | nquantm[j] = 2 147 | quant[, j] = NA 148 | quant[1, j] = 0 149 | } 150 | } 151 | 152 | if (propensity) { 153 | stratum.sizes = table(stratum)/nrow(x) 154 | nstratum = length(stratum.sizes) 155 | if (sum(as.numeric(names(stratum.sizes)) != (1:length(stratum.sizes))) != 0) 156 | stop("Strata should be numbered 1:k") 157 | stratum.val.sizes = table(stratum.val)/nrow(x.val) 158 | nstratum.val = length(stratum.val.sizes) 159 | if (sum(as.numeric(names(stratum.val.sizes)) != (1:length(stratum.val.sizes))) != 160 | 0) 161 | stop("Stratatest should be numbered 1:k") 162 | } 163 | 164 | modmatrix = matrix(0, nrow = n, ncol = maxterms) 165 | active = rep(FALSE, maxterms) 166 | active[1] = TRUE 167 | modmatrix[, 1] = 1 168 | 169 | r = y - y 170 | a0 = c(mean(y[tx == 0]), mean(y[tx == 1])) 171 | 172 | r[tx == 0] = y[tx == 0] - mean(y[tx == 0]) 173 | r[tx == 1] = y[tx == 1] - mean(y[tx == 1]) 174 | 175 | parent = childvar = childquant = NULL 176 | allvars = vector("list", maxterms) 177 | maxscorall = rep(NA, maxterms) 178 | nterms = 1 179 | 180 | 181 | # forward stepwise 182 | while (nterms < maxterms) { 183 | maxscor = -1 * BIG 184 | act = active[1:nterms] 185 | num = unlist(lapply(allvars, length))[1:nterms] 186 | act = act & (num < degree) 187 | 188 | for (ii in (1:nterms)[act]) { 189 | jlist = rep(TRUE, p) 190 | if (ii > 1) 191 | jlist[allvars[[ii]]] = FALSE 192 | 193 | 194 | for (j in which(jlist)) { 195 | for (k in 1:(nquantm[j] - 1)) { 196 | # bx1 = modmatrix[, ii] * truncpow(x[, j], quant[k, j], dir = 1) bx2 = 197 | # modmatrix[, ii] * truncpow(x[, j], quant[k, j], dir = 2) NOTE have to make 198 | # correspondng chanegs in propensity section below! 199 | bx = NULL 200 | bx1 = modmatrix[, ii] * truncpow(x[, j], quant[k, j], dir = 1) 201 | if (sum(bx1^2) > 0) 202 | bx = cbind(bx, bx1) 203 | bx2 = modmatrix[, ii] * truncpow(x[, j], quant[k, j], dir = 2) 204 | if (sum(bx2^2) > 0) 205 | bx = cbind(bx, bx2) 206 | 207 | if (!propensity) { 208 | 209 | scor = -0.5 * BIG 210 | if (!is.null(bx)) { 211 | res = myridge(bx, r, int = TRUE)$res 212 | res0 = myridge(bx[tx == 0, ], r[tx == 0], int = TRUE)$res 213 | res1 = myridge(bx[tx == 1, ], r[tx == 1], int = TRUE)$res 214 | scor = sum(res^2) - sum(res0^2) - sum(res1^2) 215 | } 216 | } 217 | 218 | if (propensity) { 219 | if (!is.null(bx)) { 220 | scor = 0 221 | 222 | res0 = res1 = rep(0, n) 223 | for (s in 1:nstratum) { 224 | 225 | res = myridge(bx[stratum == s, ], r[stratum == s])$res 226 | 227 | if (sum(tx == 0 & stratum == s) >= minnum) { 228 | res0[stratum == s & tx == 0] = myridge(bx[tx == 0 & stratum == 229 | s, ], r[tx == 0 & stratum == s])$res 230 | } 231 | if (sum(tx == 1 & stratum == s) >= minnum) { 232 | res1[stratum == s & tx == 1] = myridge(bx[tx == 1 & stratum == 233 | s, ], r[tx == 1 & stratum == s])$res 234 | } 235 | scor = scor + (sum(res^2) - sum(res0^2) - sum(res1^2)) 236 | 237 | } 238 | res0 = res0[tx == 0] 239 | res1 = res1[tx == 1] 240 | } 241 | } # end of propensity loop 242 | 243 | 244 | if (scor > maxscor) { 245 | maxscor = scor 246 | 247 | iihat = ii 248 | jhat = j 249 | khat = k 250 | res0hat = res0 251 | res1hat = res1 252 | } 253 | } 254 | } 255 | } #end of for loop 256 | 257 | maxscorall[ii] = maxscor 258 | new1 = modmatrix[, iihat] * truncpow(x[, jhat], quant[khat, jhat], dir = 1) 259 | new2 = modmatrix[, iihat] * truncpow(x[, jhat], quant[khat, jhat], dir = 2) 260 | if (!is.matrix(new1)) 261 | new1 = matrix(new1, ncol = 1) 262 | if (!is.matrix(new2)) 263 | new2 = matrix(new2, ncol = 1) 264 | 265 | 266 | modmatrix[, nterms + 1] = new1 267 | modmatrix[, nterms + 2] = new2 268 | 269 | 270 | active[nterms + 1] = TRUE 271 | active[nterms + 2] = TRUE 272 | 273 | 274 | allvars[[nterms + 1]] = allvars[[nterms + 2]] = c(allvars[[iihat]], jhat) 275 | nterms = nterms + 2 276 | 277 | 278 | r[tx == 0] = r[tx == 0] * (1 - eps) + eps * res0hat 279 | r[tx == 1] = r[tx == 1] * (1 - eps) + eps * res1hat 280 | 281 | parent = c(parent, iihat) 282 | childvar = c(childvar, jhat) 283 | 284 | childquant = c(childquant, khat) 285 | } #end of while loop 286 | 287 | 288 | active = colSums(modmatrix^2) > 0 289 | deltahat = khat = NA 290 | out = list(parent = parent, childvar = childvar, childquant = childquant, quant = quant, 291 | active = active, eps = eps, allvars = allvars) 292 | 293 | tim1 = proc.time() 294 | # cat('forward done', fill = TRUE) 295 | 296 | 297 | # backward deletion 298 | rsstesthat = setesthat = NULL 299 | 300 | if (backstep) { 301 | 302 | if(is.null(x.val) | is.null(tx.val) | is.null(y.val)) { 303 | stop('If backstep = TRUE, then x.val, tx.val, y.val must be specified.') 304 | } 305 | 306 | BIG = 1e+10 307 | modmatrix = makebx.newmars(out, x, remove.zerocols = FALSE)[, -1] 308 | modmatrix.val = makebx.newmars(out, x.val, remove.zerocols = FALSE)[, -1] 309 | ss = colSums(modmatrix^2) > 0 310 | 311 | active2 = rep(TRUE, ncol(modmatrix.val)) 312 | active2[!ss] = FALSE 313 | khat = deltahat = deltatesthat = rsstesthat = setesthat = rep(NA, sum(active2)) 314 | rtest = rep(NA, nrow(x.val)) 315 | 316 | go = sum(active2) > 0 317 | 318 | ii = 0 319 | while (go) { 320 | go = FALSE 321 | 322 | delta = deltatest = rsstest = rep(BIG, length(active2)) 323 | # train 324 | if (!propensity) { 325 | fit0 = myridge(modmatrix[, active2, drop = FALSE][tx == 0, ], y[tx == 326 | 0], int = TRUE) 327 | fit1 = myridge(modmatrix[, active2, drop = FALSE][tx == 1, ], y[tx == 328 | 1], int = TRUE) 329 | res0 = fit0$res 330 | res1 = fit1$res 331 | 332 | # test 333 | yhat0 = cbind(1, modmatrix.val[, active2, drop = FALSE][tx.val == 0, 334 | , drop = FALSE]) %*% fit0$coef 335 | yhat1 = cbind(1, modmatrix.val[, active2, drop = FALSE][tx.val == 1, 336 | , drop = FALSE]) %*% fit1$coef 337 | res0test = (y.val[tx.val == 0] - yhat0) 338 | res1test = (y.val[tx.val == 1] - yhat1) 339 | rss0testsq = sum(res0test^2) 340 | rss1testsq = sum(res1test^2) 341 | } 342 | 343 | if (propensity) 344 | { 345 | rss0sq = rss1sq = rss0testsq = rss1testsq = 0 346 | # initial fit with parallel terms training 347 | for (s in 1:nstratum) { 348 | res0 = res1 = 0 349 | if (sum(tx == 0 & stratum == s) >= minnum) { 350 | fit0 = myridge(modmatrix[, active2, drop = FALSE][tx == 0 & stratum == 351 | s, ], y[tx == 0 & stratum == s], int = TRUE) 352 | res0 = fit0$res 353 | } 354 | if (sum(tx == 1 & stratum == s) >= minnum) { 355 | fit1 = myridge(modmatrix[, active2, drop = FALSE][tx == 1 & stratum == 356 | s, ], y[tx == 1 & stratum == s], int = TRUE) 357 | res1 = fit1$res 358 | } 359 | rss0sq = rss0sq + sum(res0^2) 360 | rss1sq = rss1sq + sum(res1^2) 361 | 362 | 363 | # test 364 | 365 | res0test = res1test = 0 366 | if (sum(tx.val == 0 & stratum.val == s) >= minnum) { 367 | yhat0 = cbind(1, modmatrix.val[, active2, drop = FALSE][tx.val == 368 | 0 & stratum.val == s, ]) %*% fit0$coef 369 | res0test = (y.val[tx.val == 0 & stratum.val == s] - yhat0) 370 | } 371 | if (sum(tx.val == 1 & stratum.val == s) >= minnum) { 372 | yhat1 = cbind(1, modmatrix.val[, active2, drop = FALSE][tx.val == 373 | 1 & stratum.val == s, ]) %*% fit1$coef 374 | res1test = (y.val[tx.val == 1 & stratum.val == s] - yhat1) 375 | } 376 | rss0testsq = rss0testsq + sum(res0test^2) 377 | rss1testsq = rss1testsq + sum(res1test^2) 378 | } 379 | } #end of propensity 380 | 381 | 382 | for (k in which(active2)) { 383 | # try collapsing a training set term 384 | act = active2 385 | act[k] = FALSE 386 | 387 | if (!propensity) { 388 | 389 | redfit = myridge(modmatrix[, k, drop = FALSE], y, int = TRUE) 390 | r = redfit$res 391 | fit00 = myridge(modmatrix[, act, drop = FALSE][tx == 0, ], r[tx == 392 | 0], int = TRUE) 393 | fit11 = myridge(modmatrix[, act, drop = FALSE][tx == 1, ], r[tx == 394 | 1], int = TRUE) 395 | 396 | res00 = fit00$res 397 | res11 = fit11$res 398 | delta[k] = (sum(res00^2) + sum(res11^2) - sum(res0^2) - sum(res1^2))/(sum(res00^2) + 399 | sum(res11^2)) 400 | # try collapsing the same test set term 401 | rtest = y.val - cbind(1, modmatrix.val[, k, drop = FALSE]) %*% redfit$coef 402 | yhat00 = cbind(1, modmatrix.val[, act, drop = FALSE][tx.val == 0, 403 | ]) %*% fit00$coef 404 | yhat11 = cbind(1, modmatrix.val[, act, drop = FALSE][tx.val == 1, 405 | ]) %*% fit11$coef 406 | 407 | res00test = (rtest[tx.val == 0] - yhat00) 408 | res11test = (rtest[tx.val == 1] - yhat11) 409 | rss00testsq = sum(res00test^2) 410 | rss11testsq = sum(res11test^2) 411 | } 412 | 413 | if (propensity) 414 | { 415 | rss00sq = rss11sq = rss00testsq = rss11testsq = 0 416 | for (s in 1:nstratum) { 417 | redfit = myridge(modmatrix[, k, drop = FALSE], y, int = TRUE) 418 | r = redfit$res 419 | res00 = res11 = res00test = res11test = 0 420 | if (sum(tx == 0 & stratum == s) >= minnum) { 421 | fit00 = myridge(modmatrix[, act, drop = FALSE][tx == 0 & stratum == 422 | s, ], r[tx == 0 & stratum == s], int = TRUE) 423 | res00 = fit00$res 424 | } 425 | if (sum(tx == 1 & stratum == s) >= minnum) { 426 | fit11 = myridge(modmatrix[, act, drop = FALSE][tx == 1 & stratum == 427 | s, ], r[tx == 1 & stratum == s], int = TRUE) 428 | res11 = fit11$res 429 | } 430 | 431 | rss00sq = rss00sq + sum(res00^2) 432 | rss11sq = rss11sq + sum(res11^2) 433 | 434 | # try deleting the same test set term 435 | 436 | 437 | if (sum(tx.val == 0 & stratum.val == s) >= minnum) { 438 | 439 | yhat00 = cbind(1, modmatrix.val[, act, drop = FALSE][tx.val == 440 | 0 & stratum.val == s, ]) %*% fit00$coef 441 | res00test = (y.val[tx.val == 0 & stratum.val == s] - yhat00) 442 | } 443 | if (sum(tx.val == 1 & stratum.val == s) >= minnum) { 444 | yhat11 = cbind(1, modmatrix.val[, act, drop = FALSE][tx.val == 445 | 1 & stratum.val == s, ]) %*% fit11$coef 446 | res11test = (y.val[tx.val == 1 & stratum.val == s] - yhat11) 447 | } 448 | 449 | rss00testsq = rss00testsq + sum(res00test^2) 450 | rss11testsq = rss11testsq + sum(res11test^2) 451 | 452 | } 453 | delta[k] = (rss00sq + rss11sq - rss0sq - rss1sq)/(rss00sq + rss11sq) 454 | } #end of propensity 455 | 456 | # deltatest[k] = (sum(res00test^2) + sum(res11test^2) - sum(res0test^2) 457 | # -sum(res1test^2))/(sum(res00test^2) + sum(res11test^2)) 458 | 459 | deltatest[k] = (rss00testsq + rss11testsq - rss0testsq - rss1testsq)/(rss00testsq + 460 | rss11testsq) 461 | } #end of propensity 462 | 463 | ii = ii + 1 464 | # cat(c('ii=', ii), fill = TRUE) 465 | khat[ii] = which.min(delta) 466 | # cat(delta,fill=TRUE) 467 | deltahat[ii] = delta[khat[ii]] 468 | deltatesthat[ii] = deltatest[khat[ii]] 469 | n1 = length(res0test) 470 | n2 = length(res1test) 471 | # rsstesthat[ii]=(sum(res0test^2) + sum(res1test^2))/(n1+n2) 472 | rsstesthat[ii] = (rss0testsq + rss1testsq)/(n1 + n2) 473 | setesthat[ii] = NA 474 | # setesthat[ii]=sqrt((n1*var(res0test^2)+n2*var(res1test^2))/(n1+n2)^2) # not 475 | # currently computed 476 | 477 | active2[khat] = FALSE 478 | num.colsleft = sum(colSums(modmatrix.val[, active2, drop = FALSE]^2) > 0) 479 | go = (sum(active2) > 0) & (num.colsleft > 0) 480 | # cat(c(sum(active2), num.colsleft),fill=TRUE) browser() cat(c('dropping ', khat), 481 | # fill = TRUE) 482 | } 483 | 484 | } 485 | out$khat = khat[!is.na(khat)] 486 | out$deltahat = deltahat[!is.na(deltahat)] 487 | out$rsstesthat = rsstesthat 488 | out$setesthat = setesthat 489 | out$tim1 = tim1 490 | out$tim2 = proc.time() 491 | out$x = x 492 | out$tx = tx 493 | out$y = y 494 | out$x.val = x.val 495 | out$tx.val = tx.val 496 | out$y.val = y.val 497 | out$maxterms = maxterms 498 | out$backstep = backstep 499 | out$propensity = propensity 500 | out$stratum = stratum 501 | out$stratum.val = stratum.val 502 | out$minnum = minnum 503 | class(out) = 'causalMARS' 504 | return(out) 505 | } 506 | 507 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/R/cv.causalBoosting.R: -------------------------------------------------------------------------------- 1 | #' Fit a causal boosting model with cross validation 2 | #' 3 | #' @param x matrix of covariates 4 | #' @param tx vector of treatment indicators (0 or 1) 5 | #' @param y vector of response values 6 | #' @param num.trees number of shallow causal trees to build 7 | #' @param maxleaves maximum number of leaves per causal tree 8 | #' @param eps learning rate 9 | #' @param splitSpread how far apart should the candidate splits be for the 10 | #' causal trees? (e.g. \code{splitSpread = 0.1}) means we consider 10 quantile 11 | #' cutpoints as candidates for making split 12 | #' @param type.measure loss to use for cross validation: 13 | #' 'response' returns mean-square error for predicting response in each arm. 14 | #' 'effect' returns MSE for treatment effect using honest over-fit estimation. 15 | #' @param nfolds number of cross validation folds 16 | #' @param foldid vector of fold membership 17 | #' @param propensity logical: should propensity score stratification be used? 18 | #' @param stratum optional vector giving propensity score stratum for each 19 | #' observation (only used if \code{propensity = TRUE}) 20 | #' @param isConstVar logical: for the causal tree splitting criterion 21 | #' (T-statistc), should it be assumed that the noise variance is the same in 22 | #' treatment and control arms? 23 | #' 24 | #' @return an object of class \code{cv.causalBoosting} which is an object of 25 | #' class \code{causalBoosting} with these additional attributes: 26 | #' \itemize{ 27 | #' \item num.trees.min: number of trees with lowest CV error 28 | #' \item cvm: vector of mean CV error for each number of trees 29 | #' \item cvsd: vector of standard errors for mean CV errors 30 | #' } 31 | #' 32 | #' @examples 33 | #'# Randomized experiment example 34 | #' 35 | #'n = 100 # number of training-set patients to simulate 36 | #'p = 10 # number of features for each training-set patient 37 | #' 38 | #'# Simulate data 39 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 40 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 41 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 42 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 43 | #' 44 | #'# Estimate causal boosting model with cross-validation 45 | #'fit_cv = cv.causalBoosting(x, tx, y) 46 | #'fit_cv$num.trees.min.effect # number of trees chosen by cross-validation 47 | #'pred_cv = predict(fit_cv, newx = x) 48 | #' 49 | #'# Visualize results 50 | #'plot(tx_effect, pred_cv, main = 'Causal boosting w/ CV', 51 | #' xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 52 | #'abline(0, 1, lty = 2) 53 | #' 54 | #' @export 55 | 56 | 57 | cv.causalBoosting = function(x, tx, y, 58 | num.trees = 500, maxleaves = 4, eps = 0.01, splitSpread = 0.1, 59 | type.measure = c('effect', 'response'), nfolds = 5, foldid = NULL, 60 | propensity = FALSE, stratum = NULL, isConstVar = TRUE) { 61 | 62 | 63 | # Input sanitization 64 | 65 | x = as.matrix(x) 66 | 67 | if (nrow(x) != length(tx)) { 68 | stop('nrow(x) does not match length(tx)') 69 | 70 | } else if (nrow(x) != length(y)) { 71 | stop('nrow(x) does not match length(y)') 72 | 73 | } else if (!is.numeric(x)) { 74 | stop('x must be numeric matrix') 75 | 76 | } else if (!is.numeric(y)) { 77 | stop('y must be numeric (use 0/1 for binary response)') 78 | 79 | } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) { 80 | stop('tx must be vector of 0s and 1s') 81 | 82 | } 83 | 84 | 85 | type.measure = match.arg(type.measure) 86 | 87 | if (is.null(foldid)) foldid = sample(rep(1:nfolds, length = nrow(x))) 88 | nfolds = length(unique(foldid)) 89 | 90 | fit = list() 91 | pred.response = matrix(0, nrow(x), num.trees) 92 | pred.effect = matrix(0, nrow(x), num.trees) 93 | pred.refit = rep(NA, nrow(x)) 94 | 95 | for (k in 1:nfolds) { 96 | 97 | x.val = x[foldid == k, , drop = FALSE] 98 | tx.val = tx[foldid == k] 99 | y.val = y[foldid == k] 100 | stratum.val = stratum[foldid == k] 101 | 102 | fit[[k]] = causalBoosting(x = x[foldid != k, , drop = FALSE], 103 | tx = tx[foldid != k], y = y[foldid != k], 104 | num.trees = num.trees, maxleaves = maxleaves, eps = eps, 105 | splitSpread = splitSpread, x.est = x.val, tx.est = tx.val, y.est = y.val, 106 | propensity = propensity, stratum = stratum[foldid != k], 107 | stratum.est = stratum.val, isConstVar = isConstVar) 108 | 109 | tmp = stats::predict(fit[[k]], x[foldid == k, , drop = FALSE], 1:num.trees, 110 | type = 'conditional.mean') 111 | 112 | tmpMat = rbind(tmp$G0, tmp$G1) 113 | n_out = sum(foldid == k) 114 | pred.response[foldid == k, ] = 115 | tmpMat[tx[foldid == k] * n_out + (1:n_out), ] 116 | 117 | pred.effect[foldid == k, ] = tmp$G1 - tmp$G0 118 | tmp.refit = stats::predict(fit[[k]], x[foldid == k, , drop = FALSE], 119 | 1:num.trees, type = 'conditional.mean', honest = TRUE) 120 | pred.refit[foldid == k] = (tmp.refit$G1 - tmp.refit$G0)[, num.trees] 121 | } 122 | 123 | cvm.effect = colMeans((pred.effect - pred.refit)^2) 124 | cvsd.effect = apply((pred.effect - pred.refit)^2, 2, stats::sd) / 125 | sqrt(nrow(pred.effect)) 126 | 127 | cvm.response = apply(pred.response, 2, function(yhat) mean((yhat - y)^2)) 128 | cvsd.response = apply(pred.response, 2, 129 | function(yhat) stats::sd((yhat - y)^2)) / sqrt(nrow(pred.response)) 130 | 131 | fit = causalBoosting(x = x, tx = tx, y = y, num.trees = num.trees, 132 | maxleaves = maxleaves, eps = eps, splitSpread = splitSpread, 133 | propensity = propensity, stratum = stratum, isConstVar = isConstVar) 134 | 135 | fit$num.trees.min.effect = which.min(cvm.effect) 136 | fit$num.trees.min.response = which.min(cvm.response) 137 | fit$num.trees.1se.effect = 138 | max(which(cvm.effect < min(cvm.effect + cvsd.effect))) 139 | 140 | fit$cvm.effect = cvm.effect 141 | fit$cvsd.effect = cvsd.effect 142 | fit$cvm.response = cvm.response 143 | fit$cvsd.response = cvsd.response 144 | 145 | class(fit) = 'cv.causalBoosting' 146 | fit 147 | } 148 | 149 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/R/makebx.newmars.R: -------------------------------------------------------------------------------- 1 | 2 | makebx.newmars = function(fit, x, remove.zerocols = FALSE) { 3 | # creates model matrix from 'fit' 4 | quant = fit$quant 5 | parent = fit$parent 6 | childvar = fit$childvar 7 | childquant = fit$childquant 8 | active = fit$active 9 | nterms = length(parent) 10 | nterms2 = 1 + 2 * nterms 11 | 12 | modmatrix = matrix(0, nrow = nrow(x), ncol = nterms2) 13 | modmatrix[, 1] = 1 14 | count = 1 15 | 16 | for (ii in 1:nterms) { 17 | new1 = modmatrix[, parent[ii]] * truncpow(x[, childvar[ii]], quant[childquant[ii], 18 | childvar[ii]], dir = 1) 19 | modmatrix[, count + 1] = new1 20 | new2 = modmatrix[, parent[ii]] * truncpow(x[, childvar[ii]], quant[childquant[ii], 21 | childvar[ii]], dir = 2) 22 | 23 | modmatrix[, count + 2] = new2 24 | count = count + 2 25 | } 26 | if (remove.zerocols) 27 | modmatrix = modmatrix[, active] 28 | return(modmatrix) 29 | } 30 | 31 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/R/myridge.R: -------------------------------------------------------------------------------- 1 | 2 | myridge = function(x, y, int = T, eps = 0.001) { 3 | xx = x 4 | if (!is.matrix(xx)) 5 | xx = matrix(xx, ncol = 1) 6 | lam = rep(eps, ncol(xx)) 7 | 8 | if (int) { 9 | xx = cbind(1, xx) 10 | lam = c(0, lam) 11 | } 12 | d = diag(lam) 13 | if (ncol(xx) == 1) 14 | d = lam 15 | coef = solve(t(xx) %*% xx + d) %*% t(xx) %*% y 16 | res = y - xx %*% coef 17 | list(res = res, coef = coef) 18 | } 19 | 20 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/R/pollinated.ranger.R: -------------------------------------------------------------------------------- 1 | #' Pollinate a fitted ranger random forest model 2 | #' 3 | #' @param object a fitted \code{ranger} object 4 | #' @param x matrix of covariates 5 | #' @param y vector of response values 6 | #' 7 | #' @return an object of class \code{pollinated.ranger} which is a \code{ranger} 8 | #' object that has been pollinated with the data in (x, y) 9 | 10 | pollinated.ranger = function(object, x, y) { 11 | 12 | forest = object$forest 13 | num.trees = forest$num.trees 14 | which.list = as.list(seq(num.trees)) 15 | split.values = forest$split.values 16 | split.varIDs = forest$split.varIDs 17 | 18 | for (i in 1:num.trees) { 19 | which = match(split.varIDs[[i]], 0, FALSE) 20 | split.values[[i]][which > 0] = seq(sum(which)) 21 | which.list[[i]] = which 22 | } 23 | 24 | forest$split.values = split.values 25 | object$forest = forest 26 | preds = stats::predict(object, x, predict.all = TRUE)$predictions 27 | 28 | ### Get list of means indexed by the unique terminal node values 29 | pmeans = apply(preds, 2, function(f, y) tapply(y, f, mean), y) 30 | 31 | ### Now populate these terminal nodes with these values 32 | for (i in 1:num.trees) { 33 | which = which.list[[i]] 34 | repvec = rep(NA, sum(which)) 35 | pmean = pmeans[[i]] 36 | ids = as.integer(names(pmean)) 37 | repvec[ids] = pmean 38 | split.values[[i]][which > 0] = repvec 39 | } 40 | 41 | forest$split.values = split.values 42 | object$forest = forest 43 | object$mean = mean(y) 44 | class(object) = c('pollinated.ranger', 'ranger') 45 | object 46 | } 47 | 48 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/R/predict.PTOforest.R: -------------------------------------------------------------------------------- 1 | #' Make predictions from a fitted PTO forest model 2 | #' 3 | #' @param object a fitted \code{PTOforest} object 4 | #' @param newx matrix of new covariates for which estimated treatment effects 5 | #' are desired 6 | #' @param ... ignored 7 | #' 8 | #' @return a vector of predictions corresponding to the rows of \code{newx} 9 | #' 10 | #' @export 11 | 12 | predict.PTOforest = function(object, newx, ...) { 13 | 14 | colnames(newx) = colnames(object$x) 15 | 16 | if (object$postprocess) { 17 | stats::predict(object$postfit, data = newx)$predictions 18 | 19 | } else { 20 | stats::predict(object$PTOfit1, newx = newx) - 21 | stats::predict(object$PTOfit0, newx = newx) 22 | 23 | } 24 | } 25 | 26 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/R/predict.bagged.causalMARS.R: -------------------------------------------------------------------------------- 1 | #' Make predictions from a bag of fitted causal MARS models 2 | #' 3 | #' @param object a fitted \code{bagged.causalMARS} object 4 | #' @param newx matrix of new covariates for which estimated treatment effects 5 | #' are desired 6 | #' @param type type of prediction required: 7 | #' 'average' returns a vector of the averages of the bootstrap estimates. 8 | #' 'all' returns a matrix of all of the bootstrap estimates. 9 | #' @param ... ignored 10 | #' 11 | #' @return a vector of estimated personalized treatment effects corresponding 12 | #' to the rows of \code{newx} 13 | #' 14 | #' @export 15 | 16 | predict.bagged.causalMARS = function(object, newx, type = c('average', 'all'), 17 | ...) { 18 | type = match.arg(type) 19 | newx = scale(newx, center = TRUE, scale = FALSE) 20 | pred = sapply(object, FUN = stats::predict, newx = newx) 21 | switch(type, 22 | all = pred, 23 | average = rowMeans(pred)) 24 | } 25 | 26 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/R/predict.causalBoosting.R: -------------------------------------------------------------------------------- 1 | #' Make predictions from a fitted causal boosting model 2 | #' 3 | #' @param object a fitted \code{causalBoosting} object 4 | #' @param newx matrix of new covariates for which estimated treatment effects 5 | #' are desired 6 | #' @param newtx option vector of new treatment assignments 7 | #' (only used if \code{type = 'response'}) 8 | #' @param type type of prediction required: 9 | #' 'treatment.effect' returns estimated treatment effect. 10 | #' 'conditional.mean' returns two predictions, one for each arm. 11 | #' 'response' returns prediction for arm corresponding to newtx. 12 | #' @param num.trees number(s) of shallow causal trees to use for prediction 13 | #' @param honest logical: should honest re-estimates of leaf means be used for 14 | #' prediction? This requires that \code{x.est, tx.est, y.est} were specified 15 | #' when the causal boosting model was fit 16 | #' @param naVal value with which to replace \code{NA} predictions 17 | #' @param ... ignored 18 | #' 19 | #' @return a vector or matrix of predictions corresponding to the rows of 20 | #' \code{newx} 21 | #' 22 | #' @export 23 | 24 | predict.causalBoosting = function(object, newx, newtx = NULL, 25 | type = c('treatment.effect', 'conditional.mean', 'response'), 26 | num.trees = 1:object$num.trees, honest = FALSE, naVal = 0, ...) { 27 | 28 | type = match.arg(type) 29 | if (type == 'response' & is.null(newtx)) { 30 | stop('response predictions require that newtx be specified') 31 | } 32 | 33 | CBM = object$CBM 34 | 35 | n = nrow(newx) 36 | eps = CBM$eps 37 | G0 = matrix(CBM$intercept[1], n, max(num.trees) + 1) 38 | G1 = matrix(CBM$intercept[2], n, max(num.trees) + 1) 39 | for (k in 1:max(num.trees)) { 40 | pred = stats::predict(CBM$trees[[k]], newx, type = 'conditional.mean', 41 | honest = honest) 42 | G1[, k + 1] = G1[, k] + eps * pred[, 2] 43 | G0[, k + 1] = G0[, k] + eps * pred[, 1] 44 | } 45 | G1[is.na(G1)] = naVal 46 | G0[is.na(G0)] = naVal 47 | 48 | switch(type, 49 | treatment.effect = (G1 - G0)[, num.trees + 1], 50 | conditional.mean = list(G1 = G1[, num.trees + 1], G0 = G0[, num.trees + 1]), 51 | response = (newtx * G1 + (1 - newtx) * G0)[, num.trees + 1]) 52 | } 53 | 54 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/R/predict.causalMARS.R: -------------------------------------------------------------------------------- 1 | #' Make predictions from a fitted causal MARS model 2 | #' 3 | #' @param object a fitted \code{causalMARS} object 4 | #' @param newx matrix of new covariates for which estimated treatment effects 5 | #' are desired 6 | #' @param active indices of columns with nonzero norm (defaults to model 7 | #' selected via backward stepwise phase, or the full model if 8 | #' \code{backstep = FALSE}) 9 | #' @param ... ignored 10 | #' 11 | #' @return a vector of estimated personalized treatment effects corresponding 12 | #' to the rows of \code{newx} 13 | #' 14 | #' @export 15 | 16 | predict.causalMARS = function(object, newx, active, ...) { 17 | 18 | if (missing(active)) { 19 | if (object$backstep) { 20 | ntermshat = length(object$khat) - which.min(object$rsstesthat) + 1 21 | active = (rev(object$khat))[1:ntermshat] 22 | } else { 23 | active = 1:(object$maxterms - 1) 24 | } 25 | } 26 | 27 | object$x = scale(object$x, center = TRUE, scale = FALSE) 28 | newx = scale(newx, center = TRUE, scale = FALSE) 29 | 30 | del = rep(mean(object$y[object$tx == 1]) - mean(object$y[object$tx == 0]), 31 | nrow(newx)) 32 | stratum = object$stratum 33 | stratum.val = object$stratum.val 34 | minnum = object$minnum 35 | nstrata = length(unique(stratum)) 36 | stratawt = (table(c(stratum, 1:nstrata)) - 1) 37 | stratawt = stratawt / sum(stratawt) 38 | 39 | if (length(active) > 0) { 40 | if (!object$propensity) { 41 | # make training matrices 42 | bx0 = (makebx.newmars(object, object$x)[, -1])[, active, drop = FALSE] 43 | beta0 = myridge(bx0[object$tx == 0, ], object$y[object$tx == 0])$coef 44 | beta1 = myridge(bx0[object$tx == 1, ], object$y[object$tx == 1])$coef 45 | # make test set matrices 46 | bx <- (makebx.newmars(object, newx)[, -1])[, active, drop = FALSE] 47 | del = cbind(1, bx) %*% (beta1 - beta0) 48 | } 49 | if (object$propensity) { 50 | # make training matrices 51 | bx0 = (makebx.newmars(object, object$x)[, -1])[, active, drop = FALSE] 52 | beta0 = beta1 = matrix(NA, ncol(bx0) + 1, nstrata) 53 | for (ss in 1:nstrata) { 54 | if (sum(object$tx == 0 & stratum == ss) > minnum) { 55 | beta0[, ss] = myridge(bx0[object$tx == 0 & stratum == ss, ], 56 | object$y[object$tx == 0 & stratum == ss])$coef 57 | beta1[, ss] = myridge(bx0[object$tx == 1 & stratum == ss, ], 58 | object$y[object$tx == 1 & stratum == ss])$coef 59 | } 60 | } 61 | # make test set matrices 62 | bx <- (makebx.newmars(object, newx)[, -1])[, active, drop = FALSE] 63 | del = rep(0, nrow(bx)) 64 | totwt = 0 65 | for (ss in 1:nstrata) { 66 | if (sum(object$tx == 0 & stratum == ss) > minnum) { 67 | del = del + stratawt[ss] * cbind(1, bx) %*% 68 | (beta1[, ss] - beta0[, ss]) 69 | totwt = totwt + stratawt[ss] 70 | } 71 | } 72 | del = del / totwt 73 | } 74 | } 75 | return(c(del)) 76 | } 77 | 78 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/R/predict.causalTree.R: -------------------------------------------------------------------------------- 1 | #' Make predictions from a fitted causal tree model 2 | #' 3 | #' @param object a fitted \code{causalTree} object 4 | #' @param newx matrix of new covariates for which estimated treatment effects 5 | #' are desired 6 | #' @param newtx option vector of new treatment assignments 7 | #' (only used if \code{type = 'response'}) 8 | #' @param type type of prediction required: 9 | #' 'treatment.effect' returns estimated treatment effect. 10 | #' 'conditional.mean' returns two predictions, one for each arm. 11 | #' 'response' returns prediction for arm corresponding to newtx. 12 | #' @param honest logical: should honest re-estimates of leaf means be used for 13 | #' prediction? This requires that \code{x.est, tx.est, y.est} were specified 14 | #' when the causal boosting model was fit 15 | #' @param naVal value with which to replace \code{NA} predictions 16 | #' @param ... ignored 17 | #' 18 | #' @return a vector or matrix of predictions corresponding to the rows of 19 | #' \code{newx} 20 | #' 21 | #' @export 22 | 23 | predict.causalTree = function(object, newx, newtx = NULL, 24 | type = c('treatment.effect', 'conditional.mean', 'response'), honest = FALSE, 25 | naVal = 0, ...) { 26 | 27 | leaf = rep(1, nrow(newx)) 28 | terminal = rep(FALSE, nrow(newx)) 29 | while (sum(!is.na(object$var[leaf])) > 0) { 30 | var = cbind((1:nrow(newx))[!terminal], object$var[leaf[!terminal]]) 31 | leaf[!terminal] = object$left[leaf[!terminal]] + (newx[var] >= object$val[leaf[!terminal]]) 32 | terminal = is.na(object$var[leaf]) 33 | } 34 | pred = list() 35 | pred$leaf = leaf 36 | if (!honest) { 37 | pred$pred0 = object$pred0[leaf] 38 | pred$pred1 = object$pred1[leaf] 39 | } else { 40 | pred$pred0 = object$pred0e[leaf] 41 | pred$pred1 = object$pred1e[leaf] 42 | } 43 | type = match.arg(type) 44 | predMatrix = cbind(pred$pred0, pred$pred1) 45 | predMatrix[is.na(predMatrix)] = naVal 46 | out = switch(type, 47 | treatment.effect = predMatrix[, 2] - predMatrix[, 1], 48 | conditional.mean = predMatrix, 49 | response = ifelse(!is.null(newtx), 50 | predMatrix[cbind(1:nrow(newx), newtx + 1)], rep(NA, nrow(newx)))) 51 | out 52 | } 53 | 54 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/R/predict.cv.causalBoosting.R: -------------------------------------------------------------------------------- 1 | #' Make predictions from a fitted cross-validated causal boosting model 2 | #' 3 | #' @param object a fitted \code{cv.causalBoosting} object 4 | #' @param newx matrix of new covariates for which estimated treatment effects 5 | #' are desired 6 | #' @param newtx option vector of new treatment assignments 7 | #' (only used if \code{type = 'individual'}) 8 | #' @param type type of prediction required: 9 | #' 'treatment.effect' returns estimated treatment effect. 10 | #' 'conditional.mean' returns two predictions, one for each arm. 11 | #' 'response' returns prediction for arm corresponding to newtx. 12 | #' @param num.trees number of shallow causal trees to use for prediction 13 | #' @param naVal value with which to replace \code{NA} predictions 14 | #' @param ... ignored 15 | #' 16 | #' @return a vector or matrix of predictions corresponding to the rows of 17 | #' \code{newx} 18 | #' 19 | #' @export 20 | 21 | predict.cv.causalBoosting = function(object, newx, newtx = NULL, 22 | type = c('treatment.effect', 'conditional.mean', 'response'), 23 | num.trees = object$num.trees.min.effect, naVal = 0, ...) { 24 | 25 | predict.causalBoosting(object = object, newx = newx, newtx = newtx, 26 | type = type, num.trees = num.trees, naVal = naVal) 27 | } 28 | 29 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/R/predict.pollinated.ranger.R: -------------------------------------------------------------------------------- 1 | #' Make predictions from a pollinated ranger random forest model 2 | #' 3 | #' @param object a fitted \code{pollinated.ranger} object 4 | #' @param newx matrix of new covariates for which predictions are desired 5 | #' @param predict.all logical: should predictions from all trees be returned? 6 | #' Otherwise the average across trees is returned 7 | #' @param na.treatment how to treat NA predictions from individual trees: 8 | #' 'omit' only uses trees for which the prediction is not NA. 9 | #' 'replace' replaces NA predictions with the overall mean response. 10 | #' 'NA' returns NA if any tree prediction is NA. 11 | #' @param ... additional arguments passed on to \code{predict.ranger} 12 | 13 | #' 14 | #' @return a vector of predicted treatment effects corresponding to the rows of 15 | #' newx 16 | 17 | predict.pollinated.ranger = function(object, newx, predict.all = FALSE, 18 | na.treatment = c('omit', 'replace', 'NA'), ...) { 19 | 20 | na.treatment = match.arg(na.treatment) 21 | 22 | class(object) = 'ranger' 23 | preds = stats::predict(object, newx, predict.all = TRUE, 24 | ...)$predictions 25 | class(object) = 'pollinated.ranger' 26 | 27 | if (na.treatment == 'replace') { 28 | wh <- is.na(preds) 29 | preds[wh] = object$mean 30 | } 31 | 32 | if (!predict.all) { 33 | preds = apply(preds, 1, mean, na.rm = (na.treatment == 'omit')) 34 | } 35 | 36 | preds 37 | } 38 | 39 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/R/stratify.R: -------------------------------------------------------------------------------- 1 | #' Get propensity strata from propensity scores 2 | #' 3 | #' @param pscore vector of propensity scores 4 | #' @param tx vector of treatment indicators 5 | #' @param min.per.arm minimum number of observations for each arm within each 6 | #' stratum 7 | #' 8 | #' @return a vector of integers with length equal to the length of pscore, 9 | #' reporting the propensity stratum corresponding to each propensity score 10 | #' 11 | #' @export 12 | 13 | stratify = function(pscore, tx, min.per.arm = 30) { 14 | 15 | stratum = ceiling(10 * pscore) 16 | cutoffs = sort(unique(stratum/10)) 17 | stratum = as.numeric(as.factor(stratum)) 18 | 19 | num.treated = stats::aggregate(tx, list(stratum = stratum), sum)$x 20 | while(min(num.treated) < min.per.arm & length(unique(stratum)) > 1) { 21 | stratum1 = which.min(num.treated) 22 | cutoffs = cutoffs[-which.min(num.treated)] 23 | neighbors = intersect(stratum, stratum1 + c(-1, 1)) 24 | stratum2 = neighbors[which.min(num.treated[neighbors])] 25 | stratum[stratum == stratum1] = stratum2 26 | stratum = as.numeric(as.factor(stratum)) 27 | num.treated = stats::aggregate(tx, list(stratum = stratum), sum)$x 28 | } 29 | 30 | stratum = 1 + max(stratum) - stratum 31 | cutoffs = rev(cutoffs) 32 | 33 | num.control = stats::aggregate(1 - tx, list(stratum = stratum), sum)$x 34 | while(min(num.control) < min.per.arm & length(unique(stratum)) > 1) { 35 | stratum1 = which.min(num.control) 36 | cutoffs = cutoffs[-which.min(num.control)] 37 | neighbors = intersect(stratum, stratum1 + c(-1, 1)) 38 | stratum2 = neighbors[which.min(num.control[neighbors])] 39 | stratum[stratum == stratum1] = stratum2 40 | stratum = as.numeric(as.factor(stratum)) 41 | num.control = stats::aggregate(tx, list(stratum = stratum), sum)$x 42 | } 43 | 44 | cutoffs[1] = 1 45 | cutoffs = rev(cutoffs) 46 | 47 | list(stratum = 1 + max(stratum) - stratum, cutoffs = cutoffs) 48 | } 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/R/truncpow.R: -------------------------------------------------------------------------------- 1 | 2 | truncpow = function(x, cutp, dir = 1) { 3 | if (dir == 1) 4 | out = (x - cutp) * (x > cutp) 5 | if (dir == 2) 6 | out = (cutp - x) * (x < cutp) 7 | return(out) 8 | } 9 | 10 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/README.md: -------------------------------------------------------------------------------- 1 | # causalLearning 2 | Methods for heterogeneous treatment effect estimation 3 | 4 | This R package includes functions for fitting the pollinated 5 | transformed outcome forest, causal boosting and causal MARS from 6 | [Powers et al. (2017)](https://arxiv.org/abs/1707.00102). 7 | 8 | The package is currently in beta, not yet ready for public consumption. 9 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/causalLearning_1.0.0.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/00_pkg_src/causalLearning/causalLearning_1.0.0.tar.gz -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/man/PTOforest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PTOforest.R 3 | \name{PTOforest} 4 | \alias{PTOforest} 5 | \title{Fit a pollinated transformed outcome (PTO) forest model} 6 | \usage{ 7 | PTOforest(x, tx, y, pscore = rep(0.5, nrow(x)), num.trees = 500, 8 | mtry = ncol(x), min.node.size = max(25, nrow(x)/40), postprocess = TRUE, 9 | verbose = FALSE) 10 | } 11 | \arguments{ 12 | \item{x}{matrix of covariates} 13 | 14 | \item{tx}{vector of treatment indicators (0 or 1)} 15 | 16 | \item{y}{vector of response values} 17 | 18 | \item{pscore}{vector of propensity scores} 19 | 20 | \item{num.trees}{number of trees for transformed outcome forest} 21 | 22 | \item{mtry}{number of variables to possibly split at in each node} 23 | 24 | \item{min.node.size}{minimum node size for transformed outcome forest} 25 | 26 | \item{postprocess}{logical: should optional post-processing random forest be 27 | fit at end?} 28 | 29 | \item{verbose}{logical: should progress be printed to console?} 30 | } 31 | \value{ 32 | an object of class \code{PTOforest} with attributes: 33 | \itemize{ 34 | \item x: matrix of covariates supplied by function call 35 | \item pscore: vector of propensity score supplied by function call 36 | \item postprocess: logical supplied by function call 37 | \item TOfit: fitted random forest on transformed outcomes 38 | \item PTOfit1: TOfit pollinated with treatment-arm outcomes 39 | \item PTOfit0: TOfit pollinated with control-arm outcomes 40 | \item postfit: post-processing random forest summarizing results 41 | } 42 | } 43 | \description{ 44 | Fit a pollinated transformed outcome (PTO) forest model 45 | } 46 | \examples{ 47 | # Randomized experiment example 48 | 49 | n = 100 # number of training-set patients to simulate 50 | p = 10 # number of features for each training-set patient 51 | 52 | # Simulate data 53 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 54 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 55 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 56 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 57 | 58 | # Estimate PTO forest model 59 | fit_pto = PTOforest(x, tx, y) 60 | pred_pto = predict(fit_pto, newx = x) 61 | 62 | # Visualize results 63 | plot(tx_effect, pred_pto, main = 'PTO forest', 64 | xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 65 | abline(0, 1, lty = 2) 66 | 67 | } 68 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/man/bagged.causalMARS.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bagged.causalMARS.R 3 | \name{bagged.causalMARS} 4 | \alias{bagged.causalMARS} 5 | \title{Fit a bag of causal MARS models} 6 | \usage{ 7 | bagged.causalMARS(x, tx, y, nbag = 20, maxterms = 11, nquant = 5, 8 | degree = ncol(x), eps = 1, backstep = FALSE, propensity = FALSE, 9 | stratum = rep(1, nrow(x)), minnum = 5, verbose = FALSE) 10 | } 11 | \arguments{ 12 | \item{x}{matrix of covariates} 13 | 14 | \item{tx}{vector of treatment indicators (0 or 1)} 15 | 16 | \item{y}{vector of response values} 17 | 18 | \item{nbag}{number of models to bag} 19 | 20 | \item{maxterms}{maximum number of terms to include in the regression basis 21 | (e.g. \code{maxterms = 11} means intercept + 5 pairs added)} 22 | 23 | \item{nquant}{number of quantiles used in splitting} 24 | 25 | \item{degree}{max number of different predictors that can interact in model} 26 | 27 | \item{eps}{shrinkage factor for new term added} 28 | 29 | \item{backstep}{logical: should out-of-bag samples be used to prune each 30 | model? otherwise full regression basis is used for each model} 31 | 32 | \item{propensity}{logical: should propensity score stratification be used?} 33 | 34 | \item{stratum}{optional vector giving propensity score stratum for each 35 | observation (only used if \code{propensity = TRUE})} 36 | 37 | \item{minnum}{minimum number of observations in each arm of each propensity 38 | score stratum needed to estimate regression coefficients for basis 39 | (only used if \code{propensity = TRUE})} 40 | 41 | \item{verbose}{logical: should progress be printed to console?} 42 | } 43 | \value{ 44 | an object of class \code{bagged.causalMARS}, which is itself a list 45 | of \code{causalMARS} objects 46 | } 47 | \description{ 48 | Fit a bag of causal MARS models 49 | } 50 | \examples{ 51 | # Randomized experiment example 52 | 53 | n = 100 # number of training-set patients to simulate 54 | p = 10 # number of features for each training-set patient 55 | 56 | # Simulate data 57 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 58 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 59 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 60 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 61 | 62 | # Estimate bagged causal MARS model 63 | fit_bcm = causalLearning::bagged.causalMARS(x, tx, y, nbag = 10) 64 | pred_bcm = predict(fit_bcm, newx = x) 65 | 66 | # Visualize results 67 | plot(tx_effect, pred_bcm, main = 'Bagged causal MARS', 68 | xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 69 | abline(0, 1, lty = 2) 70 | 71 | } 72 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/man/causalBoosting.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/causalBoosting.R 3 | \name{causalBoosting} 4 | \alias{causalBoosting} 5 | \title{Fit a causal boosting model} 6 | \usage{ 7 | causalBoosting(x, tx, y, num.trees = 500, maxleaves = 4, eps = 0.01, 8 | splitSpread = 0.1, x.est = NULL, tx.est = NULL, y.est = NULL, 9 | propensity = FALSE, stratum = NULL, stratum.est = NULL, 10 | isConstVar = TRUE) 11 | } 12 | \arguments{ 13 | \item{x}{matrix of covariates} 14 | 15 | \item{tx}{vector of treatment indicators (0 or 1)} 16 | 17 | \item{y}{vector of response values} 18 | 19 | \item{num.trees}{number of shallow causal trees to build} 20 | 21 | \item{maxleaves}{maximum number of leaves per causal tree} 22 | 23 | \item{eps}{learning rate} 24 | 25 | \item{splitSpread}{how far apart should the candidate splits be for the 26 | causal trees? (e.g. \code{splitSpread = 0.1}) means we consider 10 quantile 27 | cutpoints as candidates for making split} 28 | 29 | \item{x.est}{optional matrix of estimation-set covariates used for honest 30 | re-estimation (ignored if \code{tx.est = NULL} or \code{y.est = NULL})} 31 | 32 | \item{tx.est}{optional vector of estimation-set treatment indicators 33 | (ignored if \code{x.est = NULL} or \code{y.est = NULL})} 34 | 35 | \item{y.est}{optional vector of estimation-set response values 36 | (ignored if \code{x.est = NULL} or \code{y.est = NULL})} 37 | 38 | \item{propensity}{logical: should propensity score stratification be used?} 39 | 40 | \item{stratum}{optional vector giving propensity score stratum for each 41 | observation (only used if \code{propensity = TRUE})} 42 | 43 | \item{stratum.est}{optional vector giving propensity score stratum for each 44 | estimation-set observation (ignored if \code{x.est = NULL} or 45 | \code{tx.est = NULL} or \code{y.est = NULL})} 46 | 47 | \item{isConstVar}{logical: for the causal tree splitting criterion 48 | (T-statistc), should it be assumed that the noise variance is the same in 49 | treatment and control arms?} 50 | } 51 | \value{ 52 | an object of class \code{causalBoosting} with attributes: 53 | \itemize{ 54 | \item CBM: a list storing the intercept, the causal trees and \code{eps} 55 | \item tauhat: matrix of treatment effects for each patient for each step 56 | \item G1: estimated-treatment conditional mean for each patient 57 | \item G0: estimated-control conditional mean for each patient 58 | \item err.y: training error at each step, in predicting response 59 | \item num.trees: number of trees specified by function call 60 | } 61 | } 62 | \description{ 63 | Fit a causal boosting model 64 | } 65 | \details{ 66 | This function exists primarily to be called by cv.causalBoosting because 67 | the num.trees parameter generally needs to be tuned via cross-validation. 68 | } 69 | \examples{ 70 | # Randomized experiment example 71 | 72 | n = 100 # number of training-set patients to simulate 73 | p = 10 # number of features for each training-set patient 74 | 75 | # Simulate data 76 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 77 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 78 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 79 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 80 | 81 | # Estimate causal boosting model 82 | fit_cb = causalBoosting(x, tx, y, num.trees = 500) 83 | pred_cb = predict(fit_cb, newx = x, num.trees = 500) 84 | 85 | # Visualize results 86 | plot(tx_effect, pred_cb, main = 'Causal boosting', 87 | xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 88 | abline(0, 1, lty = 2) 89 | 90 | } 91 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/man/causalMARS.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/causalMARS.R 3 | \name{causalMARS} 4 | \alias{causalMARS} 5 | \title{Fit a causal MARS model} 6 | \usage{ 7 | causalMARS(x, tx, y, maxterms = 11, nquant = 5, degree = ncol(x), 8 | eps = 1, backstep = FALSE, x.val = NULL, tx.val = NULL, 9 | y.val = NULL, propensity = FALSE, stratum = rep(1, nrow(x)), 10 | stratum.val = NULL, minnum = 5) 11 | } 12 | \arguments{ 13 | \item{x}{matrix of covariates} 14 | 15 | \item{tx}{vector of treatment indicators (0 or 1)} 16 | 17 | \item{y}{vector of response values} 18 | 19 | \item{maxterms}{maximum number of terms to include in the regression basis 20 | (e.g. \code{maxterms = 11} means intercept + 5 pairs added)} 21 | 22 | \item{nquant}{number of quantiles used in splitting} 23 | 24 | \item{degree}{max number of different predictors that can interact in model} 25 | 26 | \item{eps}{shrinkage factor for new term added} 27 | 28 | \item{backstep}{logical: after building out regression basis, should 29 | backward stepwise selection be used to create a sequence of models, with 30 | the criterion evaluated on a validation set to choose among the sequence?} 31 | 32 | \item{x.val}{optional matrix of validation-set covariates 33 | (only used if \code{backstep = TRUE})} 34 | 35 | \item{tx.val}{optional vector of validation-set treatment indicators 36 | (only used if \code{backstep = TRUE})} 37 | 38 | \item{y.val}{optional vector of validation-set response values 39 | (only used if \code{backstep = TRUE})} 40 | 41 | \item{propensity}{logical: should propensity score stratification be used?} 42 | 43 | \item{stratum}{optional vector giving propensity score stratum for each 44 | observation (only used if \code{propensity = TRUE})} 45 | 46 | \item{stratum.val}{optional vector giving propensity score stratum for each 47 | validation-set observation 48 | (only used if \code{propensity = backstep = TRUE})} 49 | 50 | \item{minnum}{minimum number of observations in each arm of each propensity 51 | score stratum needed to estimate regression coefficients for basis 52 | (only used if \code{propensity = TRUE})} 53 | } 54 | \value{ 55 | an object of class \code{causalMARS} with attributes: 56 | \itemize{ 57 | \item parent: indices of nodes that are parents at each stage 58 | \item childvar: index of predictor chosen at each forward step 59 | \item childquant: quantile of cutoff chosen at each forward step 60 | \item quant: quantiles of the columns of x 61 | \item active: indices of columns with nonzero norm 62 | \item allvars: list of variables appearing in each term 63 | \item khat: the sequence of terms deleted at each step 64 | \item deltahat: relative change in rss 65 | \item rsstesthat: validation-set rss achieved by each model in sequence 66 | \item setesthat: standard error for rsstesthat 67 | \item tim1: time elapsed during forward stepwise phase 68 | \item tim2: total time elapsed 69 | \item x 70 | \item tx 71 | \item y 72 | \item maxterms 73 | \item eps 74 | \item backstep 75 | \item propensity 76 | \item x.val 77 | \item tx.val 78 | \item y.val 79 | \item stratum 80 | \item stratum.val 81 | \item minnum 82 | } 83 | } 84 | \description{ 85 | Fit a causal MARS model 86 | } 87 | \details{ 88 | parallel arms mars with backward stepwise BOTH randomized case and 89 | propensity stratum. data structures: model terms (nodes) are numbered 90 | 1, 2, ... with 1 representing the intercept. forward stepwise: 91 | modmatrix contains basis functions as model is built up -- two columns are 92 | added at each step. Does not include a column of ones for tidiness, 93 | we always add two terms, even when term added in linear (so that reflected 94 | version is just zero). 95 | backward stepwise: khat is the sequence of terms deleted at each step, 96 | based on deltahat = relative change in rss. rsstesthat is rss over test 97 | (validation) set achieved by each reduced model in sequence- used later for 98 | selecting a member of the sequence. active2 contains indices of columns with 99 | nonzero norm 100 | } 101 | \examples{ 102 | # Randomized experiment example 103 | 104 | n = 100 # number of training-set patients to simulate 105 | p = 10 # number of features for each training-set patient 106 | 107 | # Simulate data 108 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 109 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 110 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 111 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 112 | 113 | # Estimate causal MARS model 114 | fit_cm = causalLearning::causalMARS(x, tx, y) 115 | pred_cm = predict(fit_cm, newx = x) 116 | 117 | # Visualize results 118 | plot(tx_effect, pred_cm, main = 'Causal MARS', 119 | xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 120 | abline(0, 1, lty = 2) 121 | 122 | } 123 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/man/cv.causalBoosting.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cv.causalBoosting.R 3 | \name{cv.causalBoosting} 4 | \alias{cv.causalBoosting} 5 | \title{Fit a causal boosting model with cross validation} 6 | \usage{ 7 | cv.causalBoosting(x, tx, y, num.trees = 500, maxleaves = 4, eps = 0.01, 8 | splitSpread = 0.1, type.measure = c("effect", "response"), nfolds = 5, 9 | foldid = NULL, propensity = FALSE, stratum = NULL, isConstVar = TRUE) 10 | } 11 | \arguments{ 12 | \item{x}{matrix of covariates} 13 | 14 | \item{tx}{vector of treatment indicators (0 or 1)} 15 | 16 | \item{y}{vector of response values} 17 | 18 | \item{num.trees}{number of shallow causal trees to build} 19 | 20 | \item{maxleaves}{maximum number of leaves per causal tree} 21 | 22 | \item{eps}{learning rate} 23 | 24 | \item{splitSpread}{how far apart should the candidate splits be for the 25 | causal trees? (e.g. \code{splitSpread = 0.1}) means we consider 10 quantile 26 | cutpoints as candidates for making split} 27 | 28 | \item{type.measure}{loss to use for cross validation: 29 | 'response' returns mean-square error for predicting response in each arm. 30 | 'effect' returns MSE for treatment effect using honest over-fit estimation.} 31 | 32 | \item{nfolds}{number of cross validation folds} 33 | 34 | \item{foldid}{vector of fold membership} 35 | 36 | \item{propensity}{logical: should propensity score stratification be used?} 37 | 38 | \item{stratum}{optional vector giving propensity score stratum for each 39 | observation (only used if \code{propensity = TRUE})} 40 | 41 | \item{isConstVar}{logical: for the causal tree splitting criterion 42 | (T-statistc), should it be assumed that the noise variance is the same in 43 | treatment and control arms?} 44 | } 45 | \value{ 46 | an object of class \code{cv.causalBoosting} which is an object of 47 | class \code{causalBoosting} with these additional attributes: 48 | \itemize{ 49 | \item num.trees.min: number of trees with lowest CV error 50 | \item cvm: vector of mean CV error for each number of trees 51 | \item cvsd: vector of standard errors for mean CV errors 52 | } 53 | } 54 | \description{ 55 | Fit a causal boosting model with cross validation 56 | } 57 | \examples{ 58 | # Randomized experiment example 59 | 60 | n = 100 # number of training-set patients to simulate 61 | p = 10 # number of features for each training-set patient 62 | 63 | # Simulate data 64 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 65 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 66 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 67 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 68 | 69 | # Estimate causal boosting model with cross-validation 70 | fit_cv = causalLearning::cv.causalBoosting(x, tx, y) 71 | fit_cv$num.trees.min.effect # number of trees chosen by cross-validation 72 | pred_cv = predict(fit_cv, newx = x) 73 | 74 | # Visualize results 75 | plot(tx_effect, pred_cv, main = 'Causal boosting w/ CV', 76 | xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 77 | abline(0, 1, lty = 2) 78 | 79 | } 80 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/man/pollinated.ranger.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pollinated.ranger.R 3 | \name{pollinated.ranger} 4 | \alias{pollinated.ranger} 5 | \title{Pollinate a fitted ranger random forest model} 6 | \usage{ 7 | pollinated.ranger(object, x, y) 8 | } 9 | \arguments{ 10 | \item{object}{a fitted \code{ranger} object} 11 | 12 | \item{x}{matrix of covariates} 13 | 14 | \item{y}{vector of response values} 15 | } 16 | \value{ 17 | an object of class \code{pollinated.ranger} which is a \code{ranger} 18 | object that has been pollinated with the data in (x, y) 19 | } 20 | \description{ 21 | Pollinate a fitted ranger random forest model 22 | } 23 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/man/predict.PTOforest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict.PTOforest.R 3 | \name{predict.PTOforest} 4 | \alias{predict.PTOforest} 5 | \title{Make predictions from a fitted PTO forest model} 6 | \usage{ 7 | \method{predict}{PTOforest}(object, newx, ...) 8 | } 9 | \arguments{ 10 | \item{object}{a fitted \code{PTOforest} object} 11 | 12 | \item{newx}{matrix of new covariates for which estimated treatment effects 13 | are desired} 14 | 15 | \item{...}{ignored} 16 | } 17 | \value{ 18 | a vector of predictions corresponding to the rows of \code{newx} 19 | } 20 | \description{ 21 | Make predictions from a fitted PTO forest model 22 | } 23 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/man/predict.bagged.causalMARS.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict.bagged.causalMARS.R 3 | \name{predict.bagged.causalMARS} 4 | \alias{predict.bagged.causalMARS} 5 | \title{Make predictions from a bag of fitted causal MARS models} 6 | \usage{ 7 | \method{predict}{bagged.causalMARS}(object, newx, type = c("average", "all"), 8 | ...) 9 | } 10 | \arguments{ 11 | \item{object}{a fitted \code{bagged.causalMARS} object} 12 | 13 | \item{newx}{matrix of new covariates for which estimated treatment effects 14 | are desired} 15 | 16 | \item{type}{type of prediction required: 17 | 'average' returns a vector of the averages of the bootstrap estimates. 18 | 'all' returns a matrix of all of the bootstrap estimates.} 19 | 20 | \item{...}{ignored} 21 | } 22 | \value{ 23 | a vector of estimated personalized treatment effects corresponding 24 | to the rows of \code{newx} 25 | } 26 | \description{ 27 | Make predictions from a bag of fitted causal MARS models 28 | } 29 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/man/predict.causalBoosting.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict.causalBoosting.R 3 | \name{predict.causalBoosting} 4 | \alias{predict.causalBoosting} 5 | \title{Make predictions from a fitted causal boosting model} 6 | \usage{ 7 | \method{predict}{causalBoosting}(object, newx, newtx = NULL, 8 | type = c("treatment.effect", "conditional.mean", "response"), 9 | num.trees = 1:object$num.trees, honest = FALSE, naVal = 0, ...) 10 | } 11 | \arguments{ 12 | \item{object}{a fitted \code{causalBoosting} object} 13 | 14 | \item{newx}{matrix of new covariates for which estimated treatment effects 15 | are desired} 16 | 17 | \item{newtx}{option vector of new treatment assignments 18 | (only used if \code{type = 'response'})} 19 | 20 | \item{type}{type of prediction required: 21 | 'treatment.effect' returns estimated treatment effect. 22 | 'conditional.mean' returns two predictions, one for each arm. 23 | 'response' returns prediction for arm corresponding to newtx.} 24 | 25 | \item{num.trees}{number(s) of shallow causal trees to use for prediction} 26 | 27 | \item{honest}{logical: should honest re-estimates of leaf means be used for 28 | prediction? This requires that \code{x.est, tx.est, y.est} were specified 29 | when the causal boosting model was fit} 30 | 31 | \item{naVal}{value with which to replace \code{NA} predictions} 32 | 33 | \item{...}{ignored} 34 | } 35 | \value{ 36 | a vector or matrix of predictions corresponding to the rows of 37 | \code{newx} 38 | } 39 | \description{ 40 | Make predictions from a fitted causal boosting model 41 | } 42 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/man/predict.causalMARS.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict.causalMARS.R 3 | \name{predict.causalMARS} 4 | \alias{predict.causalMARS} 5 | \title{Make predictions from a fitted causal MARS model} 6 | \usage{ 7 | \method{predict}{causalMARS}(object, newx, active, ...) 8 | } 9 | \arguments{ 10 | \item{object}{a fitted \code{causalMARS} object} 11 | 12 | \item{newx}{matrix of new covariates for which estimated treatment effects 13 | are desired} 14 | 15 | \item{active}{indices of columns with nonzero norm (defaults to model 16 | selected via backward stepwise phase, or the full model if 17 | \code{backstep = FALSE})} 18 | 19 | \item{...}{ignored} 20 | } 21 | \value{ 22 | a vector of estimated personalized treatment effects corresponding 23 | to the rows of \code{newx} 24 | } 25 | \description{ 26 | Make predictions from a fitted causal MARS model 27 | } 28 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/man/predict.causalTree.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict.causalTree.R 3 | \name{predict.causalTree} 4 | \alias{predict.causalTree} 5 | \title{Make predictions from a fitted causal tree model} 6 | \usage{ 7 | \method{predict}{causalTree}(object, newx, newtx = NULL, 8 | type = c("treatment.effect", "conditional.mean", "response"), 9 | honest = FALSE, naVal = 0, ...) 10 | } 11 | \arguments{ 12 | \item{object}{a fitted \code{causalTree} object} 13 | 14 | \item{newx}{matrix of new covariates for which estimated treatment effects 15 | are desired} 16 | 17 | \item{newtx}{option vector of new treatment assignments 18 | (only used if \code{type = 'response'})} 19 | 20 | \item{type}{type of prediction required: 21 | 'treatment.effect' returns estimated treatment effect. 22 | 'conditional.mean' returns two predictions, one for each arm. 23 | 'response' returns prediction for arm corresponding to newtx.} 24 | 25 | \item{honest}{logical: should honest re-estimates of leaf means be used for 26 | prediction? This requires that \code{x.est, tx.est, y.est} were specified 27 | when the causal boosting model was fit} 28 | 29 | \item{naVal}{value with which to replace \code{NA} predictions} 30 | 31 | \item{...}{ignored} 32 | } 33 | \value{ 34 | a vector or matrix of predictions corresponding to the rows of 35 | \code{newx} 36 | } 37 | \description{ 38 | Make predictions from a fitted causal tree model 39 | } 40 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/man/predict.cv.causalBoosting.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict.cv.causalBoosting.R 3 | \name{predict.cv.causalBoosting} 4 | \alias{predict.cv.causalBoosting} 5 | \title{Make predictions from a fitted cross-validated causal boosting model} 6 | \usage{ 7 | \method{predict}{cv.causalBoosting}(object, newx, newtx = NULL, 8 | type = c("treatment.effect", "conditional.mean", "response"), 9 | num.trees = object$num.trees.min.effect, naVal = 0, ...) 10 | } 11 | \arguments{ 12 | \item{object}{a fitted \code{cv.causalBoosting} object} 13 | 14 | \item{newx}{matrix of new covariates for which estimated treatment effects 15 | are desired} 16 | 17 | \item{newtx}{option vector of new treatment assignments 18 | (only used if \code{type = 'individual'})} 19 | 20 | \item{type}{type of prediction required: 21 | 'treatment.effect' returns estimated treatment effect. 22 | 'conditional.mean' returns two predictions, one for each arm. 23 | 'response' returns prediction for arm corresponding to newtx.} 24 | 25 | \item{num.trees}{number of shallow causal trees to use for prediction} 26 | 27 | \item{naVal}{value with which to replace \code{NA} predictions} 28 | 29 | \item{...}{ignored} 30 | } 31 | \value{ 32 | a vector or matrix of predictions corresponding to the rows of 33 | \code{newx} 34 | } 35 | \description{ 36 | Make predictions from a fitted cross-validated causal boosting model 37 | } 38 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/man/predict.pollinated.ranger.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predict.pollinated.ranger.R 3 | \name{predict.pollinated.ranger} 4 | \alias{predict.pollinated.ranger} 5 | \title{Make predictions from a pollinated ranger random forest model} 6 | \usage{ 7 | \method{predict}{pollinated.ranger}(object, newx, predict.all = FALSE, 8 | na.treatment = c("omit", "replace", "NA"), ...) 9 | } 10 | \arguments{ 11 | \item{object}{a fitted \code{pollinated.ranger} object} 12 | 13 | \item{newx}{matrix of new covariates for which predictions are desired} 14 | 15 | \item{predict.all}{logical: should predictions from all trees be returned? 16 | Otherwise the average across trees is returned} 17 | 18 | \item{na.treatment}{how to treat NA predictions from individual trees: 19 | 'omit' only uses trees for which the prediction is not NA. 20 | 'replace' replaces NA predictions with the overall mean response. 21 | 'NA' returns NA if any tree prediction is NA.} 22 | 23 | \item{...}{additional arguments passed on to \code{predict.ranger}} 24 | } 25 | \value{ 26 | a vector of predicted treatment effects corresponding to the rows of 27 | newx 28 | } 29 | \description{ 30 | Make predictions from a pollinated ranger random forest model 31 | } 32 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/man/stratify.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/stratify.R 3 | \name{stratify} 4 | \alias{stratify} 5 | \title{Get propensity strata from propensity scores} 6 | \usage{ 7 | stratify(pscore, tx, min.per.arm = 30) 8 | } 9 | \arguments{ 10 | \item{pscore}{vector of propensity scores} 11 | 12 | \item{tx}{vector of treatment indicators} 13 | 14 | \item{min.per.arm}{minimum number of observations for each arm within each 15 | stratum} 16 | } 17 | \value{ 18 | a vector of integers with length equal to the length of pscore, 19 | reporting the propensity stratum corresponding to each propensity score 20 | } 21 | \description{ 22 | Get propensity strata from propensity scores 23 | } 24 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/src/causalBoosting.o: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/00_pkg_src/causalLearning/src/causalBoosting.o -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/src/causalLearning.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/00_pkg_src/causalLearning/src/causalLearning.so -------------------------------------------------------------------------------- /causalLearning.Rcheck/00_pkg_src/causalLearning/src/symbols.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/00_pkg_src/causalLearning/src/symbols.rds -------------------------------------------------------------------------------- /causalLearning.Rcheck/00check.log: -------------------------------------------------------------------------------- 1 | * using log directory ‘/Users/sspowers/GitHub/causalLearning/causalLearning.Rcheck’ 2 | * using R version 3.3.2 (2016-10-31) 3 | * using platform: x86_64-apple-darwin13.4.0 (64-bit) 4 | * using session charset: UTF-8 5 | * checking for file ‘causalLearning/DESCRIPTION’ ... OK 6 | * this is package ‘causalLearning’ version ‘1.0.0’ 7 | * checking package namespace information ... OK 8 | * checking package dependencies ... OK 9 | * checking if this is a source package ... OK 10 | * checking if there is a namespace ... OK 11 | * checking for executable files ... OK 12 | * checking for hidden files and directories ... OK 13 | * checking for portable file names ... OK 14 | * checking for sufficient/correct file permissions ... OK 15 | * checking whether package ‘causalLearning’ can be installed ... OK 16 | * checking installed package size ... OK 17 | * checking package directory ... OK 18 | * checking DESCRIPTION meta-information ... OK 19 | * checking top-level files ... OK 20 | * checking for left-over files ... OK 21 | * checking index information ... OK 22 | * checking package subdirectories ... OK 23 | * checking R files for non-ASCII characters ... OK 24 | * checking R files for syntax errors ... OK 25 | * checking whether the package can be loaded ... OK 26 | * checking whether the package can be loaded with stated dependencies ... OK 27 | * checking whether the package can be unloaded cleanly ... OK 28 | * checking whether the namespace can be loaded with stated dependencies ... OK 29 | * checking whether the namespace can be unloaded cleanly ... OK 30 | * checking dependencies in R code ... OK 31 | * checking S3 generic/method consistency ... OK 32 | * checking replacement functions ... OK 33 | * checking foreign function calls ... OK 34 | * checking R code for possible problems ... OK 35 | * checking Rd files ... OK 36 | * checking Rd metadata ... OK 37 | * checking Rd cross-references ... OK 38 | * checking for missing documentation entries ... OK 39 | * checking for code/documentation mismatches ... OK 40 | * checking Rd \usage sections ... OK 41 | * checking Rd contents ... OK 42 | * checking for unstated dependencies in examples ... OK 43 | * checking line endings in C/C++/Fortran sources/headers ... OK 44 | * checking compiled code ... OK 45 | * checking examples ... OK 46 | * checking PDF version of manual ... OK 47 | * DONE 48 | Status: OK 49 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/00install.out: -------------------------------------------------------------------------------- 1 | * installing *source* package ‘causalLearning’ ... 2 | ** libs 3 | clang -I/Library/Frameworks/R.framework/Resources/include -DNDEBUG -I/usr/local/include -I/usr/local/include/freetype2 -I/opt/X11/include -fPIC -Wall -mtune=core2 -g -O2 -c causalBoosting.c -o causalBoosting.o 4 | clang -dynamiclib -Wl,-headerpad_max_install_names -undefined dynamic_lookup -single_module -multiply_defined suppress -L/Library/Frameworks/R.framework/Resources/lib -L/usr/local/lib -o causalLearning.so causalBoosting.o -F/Library/Frameworks/R.framework/.. -framework R -Wl,-framework -Wl,CoreFoundation 5 | installing to /Users/sspowers/GitHub/causalLearning/causalLearning.Rcheck/causalLearning/libs 6 | ** R 7 | ** preparing package for lazy loading 8 | ** help 9 | *** installing help indices 10 | ** building package indices 11 | ** testing if installed package can be loaded 12 | * DONE (causalLearning) 13 | -------------------------------------------------------------------------------- /causalLearning.Rcheck/causalLearning-Ex.R: -------------------------------------------------------------------------------- 1 | pkgname <- "causalLearning" 2 | source(file.path(R.home("share"), "R", "examples-header.R")) 3 | options(warn = 1) 4 | library('causalLearning') 5 | 6 | base::assign(".oldSearch", base::search(), pos = 'CheckExEnv') 7 | cleanEx() 8 | nameEx("PTOforest") 9 | ### * PTOforest 10 | 11 | flush(stderr()); flush(stdout()) 12 | 13 | ### Name: PTOforest 14 | ### Title: Fit a pollinated transformed outcome (PTO) forest model 15 | ### Aliases: PTOforest 16 | 17 | ### ** Examples 18 | 19 | # Randomized experiment example 20 | 21 | n = 100 # number of training-set patients to simulate 22 | p = 10 # number of features for each training-set patient 23 | 24 | # Simulate data 25 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 26 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 27 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 28 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 29 | 30 | # Estimate PTO forest model 31 | fit_pto = PTOforest(x, tx, y) 32 | pred_pto = predict(fit_pto, newx = x) 33 | 34 | # Visualize results 35 | plot(tx_effect, pred_pto, main = 'PTO forest', 36 | xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 37 | abline(0, 1, lty = 2) 38 | 39 | 40 | 41 | 42 | cleanEx() 43 | nameEx("bagged.causalMARS") 44 | ### * bagged.causalMARS 45 | 46 | flush(stderr()); flush(stdout()) 47 | 48 | ### Name: bagged.causalMARS 49 | ### Title: Fit a bag of causal MARS models 50 | ### Aliases: bagged.causalMARS 51 | 52 | ### ** Examples 53 | 54 | # Randomized experiment example 55 | 56 | n = 100 # number of training-set patients to simulate 57 | p = 10 # number of features for each training-set patient 58 | 59 | # Simulate data 60 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 61 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 62 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 63 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 64 | 65 | # Estimate bagged causal MARS model 66 | fit_bcm = causalLearning::bagged.causalMARS(x, tx, y, nbag = 10) 67 | pred_bcm = predict(fit_bcm, newx = x) 68 | 69 | # Visualize results 70 | plot(tx_effect, pred_bcm, main = 'Bagged causal MARS', 71 | xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 72 | abline(0, 1, lty = 2) 73 | 74 | 75 | 76 | 77 | cleanEx() 78 | nameEx("causalBoosting") 79 | ### * causalBoosting 80 | 81 | flush(stderr()); flush(stdout()) 82 | 83 | ### Name: causalBoosting 84 | ### Title: Fit a causal boosting model 85 | ### Aliases: causalBoosting 86 | 87 | ### ** Examples 88 | 89 | # Randomized experiment example 90 | 91 | n = 100 # number of training-set patients to simulate 92 | p = 10 # number of features for each training-set patient 93 | 94 | # Simulate data 95 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 96 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 97 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 98 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 99 | 100 | # Estimate causal boosting model 101 | fit_cb = causalBoosting(x, tx, y, num.trees = 500) 102 | pred_cb = predict(fit_cb, newx = x, num.trees = 500) 103 | 104 | # Visualize results 105 | plot(tx_effect, pred_cb, main = 'Causal boosting', 106 | xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 107 | abline(0, 1, lty = 2) 108 | 109 | 110 | 111 | 112 | cleanEx() 113 | nameEx("causalMARS") 114 | ### * causalMARS 115 | 116 | flush(stderr()); flush(stdout()) 117 | 118 | ### Name: causalMARS 119 | ### Title: Fit a causal MARS model 120 | ### Aliases: causalMARS 121 | 122 | ### ** Examples 123 | 124 | # Randomized experiment example 125 | 126 | n = 100 # number of training-set patients to simulate 127 | p = 10 # number of features for each training-set patient 128 | 129 | # Simulate data 130 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 131 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 132 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 133 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 134 | 135 | # Estimate causal MARS model 136 | fit_cm = causalLearning::causalMARS(x, tx, y) 137 | pred_cm = predict(fit_cm, newx = x) 138 | 139 | # Visualize results 140 | plot(tx_effect, pred_cm, main = 'Causal MARS', 141 | xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 142 | abline(0, 1, lty = 2) 143 | 144 | 145 | 146 | 147 | cleanEx() 148 | nameEx("cv.causalBoosting") 149 | ### * cv.causalBoosting 150 | 151 | flush(stderr()); flush(stdout()) 152 | 153 | ### Name: cv.causalBoosting 154 | ### Title: Fit a causal boosting model with cross validation 155 | ### Aliases: cv.causalBoosting 156 | 157 | ### ** Examples 158 | 159 | # Randomized experiment example 160 | 161 | n = 100 # number of training-set patients to simulate 162 | p = 10 # number of features for each training-set patient 163 | 164 | # Simulate data 165 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix 166 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect 167 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment 168 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response 169 | 170 | # Estimate causal boosting model with cross-validation 171 | fit_cv = causalLearning::cv.causalBoosting(x, tx, y) 172 | fit_cv$num.trees.min.effect # number of trees chosen by cross-validation 173 | pred_cv = predict(fit_cv, newx = x) 174 | 175 | # Visualize results 176 | plot(tx_effect, pred_cv, main = 'Causal boosting w/ CV', 177 | xlab = 'True treatment effect', ylab = 'Estimated treatment effect') 178 | abline(0, 1, lty = 2) 179 | 180 | 181 | 182 | 183 | ### *