├── .Rbuildignore
├── DESCRIPTION
├── NAMESPACE
├── R
    ├── .DS_Store
    ├── PTOforest.R
    ├── bagged.causalMARS.R
    ├── causalBoosting.R
    ├── causalMARS.R
    ├── cv.causalBoosting.R
    ├── makebx.newmars.R
    ├── myridge.R
    ├── pollinated.ranger.R
    ├── predict.PTOforest.R
    ├── predict.bagged.causalMARS.R
    ├── predict.causalBoosting.R
    ├── predict.causalMARS.R
    ├── predict.causalTree.R
    ├── predict.cv.causalBoosting.R
    ├── predict.pollinated.ranger.R
    ├── stratify.R
    └── truncpow.R
├── README.md
├── causalLearning.Rcheck
    ├── 00_pkg_src
    │   └── causalLearning
    │   │   ├── DESCRIPTION
    │   │   ├── NAMESPACE
    │   │   ├── R
    │   │       ├── PTOforest.R
    │   │       ├── bagged.causalMARS.R
    │   │       ├── causalBoosting.R
    │   │       ├── causalMARS.R
    │   │       ├── cv.causalBoosting.R
    │   │       ├── makebx.newmars.R
    │   │       ├── myridge.R
    │   │       ├── pollinated.ranger.R
    │   │       ├── predict.PTOforest.R
    │   │       ├── predict.bagged.causalMARS.R
    │   │       ├── predict.causalBoosting.R
    │   │       ├── predict.causalMARS.R
    │   │       ├── predict.causalTree.R
    │   │       ├── predict.cv.causalBoosting.R
    │   │       ├── predict.pollinated.ranger.R
    │   │       ├── stratify.R
    │   │       └── truncpow.R
    │   │   ├── README.md
    │   │   ├── causalLearning_1.0.0.tar.gz
    │   │   ├── man
    │   │       ├── PTOforest.Rd
    │   │       ├── bagged.causalMARS.Rd
    │   │       ├── causalBoosting.Rd
    │   │       ├── causalMARS.Rd
    │   │       ├── cv.causalBoosting.Rd
    │   │       ├── pollinated.ranger.Rd
    │   │       ├── predict.PTOforest.Rd
    │   │       ├── predict.bagged.causalMARS.Rd
    │   │       ├── predict.causalBoosting.Rd
    │   │       ├── predict.causalMARS.Rd
    │   │       ├── predict.causalTree.Rd
    │   │       ├── predict.cv.causalBoosting.Rd
    │   │       ├── predict.pollinated.ranger.Rd
    │   │       └── stratify.Rd
    │   │   └── src
    │   │       ├── causalBoosting.c
    │   │       ├── causalBoosting.o
    │   │       ├── causalLearning.so
    │   │       └── symbols.rds
    ├── 00check.log
    ├── 00install.out
    ├── Rdlatex.log
    ├── causalLearning-Ex.R
    ├── causalLearning-Ex.Rout
    ├── causalLearning-Ex.pdf
    ├── causalLearning-manual.log
    ├── causalLearning-manual.pdf
    └── causalLearning
    │   ├── DESCRIPTION
    │   ├── INDEX
    │   ├── Meta
    │       ├── Rd.rds
    │       ├── hsearch.rds
    │       ├── links.rds
    │       ├── nsInfo.rds
    │       └── package.rds
    │   ├── NAMESPACE
    │   ├── R
    │       ├── causalLearning
    │       ├── causalLearning.rdb
    │       └── causalLearning.rdx
    │   ├── help
    │       ├── AnIndex
    │       ├── aliases.rds
    │       ├── causalLearning.rdb
    │       ├── causalLearning.rdx
    │       └── paths.rds
    │   ├── html
    │       ├── 00Index.html
    │       └── R.css
    │   └── libs
    │       ├── causalLearning.so
    │       └── symbols.rds
├── causalLearning_1.0.0.tar.gz
├── man
    ├── PTOforest.Rd
    ├── bagged.causalMARS.Rd
    ├── causalBoosting.Rd
    ├── causalMARS.Rd
    ├── cv.causalBoosting.Rd
    ├── pollinated.ranger.Rd
    ├── predict.PTOforest.Rd
    ├── predict.bagged.causalMARS.Rd
    ├── predict.causalBoosting.Rd
    ├── predict.causalMARS.Rd
    ├── predict.causalTree.Rd
    ├── predict.cv.causalBoosting.Rd
    ├── predict.pollinated.ranger.Rd
    └── stratify.Rd
└── src
    ├── .DS_Store
    ├── .gitignore
    └── causalBoosting.c


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | causalLearning_1.0.tar.gz
2 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: causalLearning
 2 | Title: Methods for heterogeneous treatment effect estimation
 3 | Version: 1.0.0
 4 | Authors@R: c(
 5 |   person("Scott", "Powers", email = "saberpowers@gmail.com",
 6 |     role = c("aut", "cre")),
 7 |   person("Junyang", "Qian", email = "junyangq@stanford.edu", role = "aut"),
 8 |   person("Trevor", "Hastie", email = "hastie@stanford.edu", role = "aut"),
 9 |   person("Robert", "Tibshirani", email = "tibs@stanford.edu", role = "aut"))
10 | Description: The main functions are cv.causalBoosting and bagged.causalMARS,
11 |   which build upon the simpler causalBoosting and causalMARS functions. All of
12 |   these functions have their own predict methods.
13 | Depends: R (>= 3.3.0)
14 | Imports: ranger
15 | License: GPL-2
16 | LazyData: true
17 | RoxygenNote: 6.0.1
18 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(predict,PTOforest)
 4 | S3method(predict,bagged.causalMARS)
 5 | S3method(predict,causalBoosting)
 6 | S3method(predict,causalMARS)
 7 | S3method(predict,causalTree)
 8 | S3method(predict,cv.causalBoosting)
 9 | export(PTOforest)
10 | export(bagged.causalMARS)
11 | export(causalBoosting)
12 | export(causalMARS)
13 | export(cv.causalBoosting)
14 | export(stratify)
15 | useDynLib(causalLearning)
16 | 


--------------------------------------------------------------------------------
/R/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/R/.DS_Store


--------------------------------------------------------------------------------
/R/PTOforest.R:
--------------------------------------------------------------------------------
  1 | #'  Fit a pollinated transformed outcome (PTO) forest model
  2 | #'
  3 | #' @param x matrix of covariates
  4 | #' @param tx vector of treatment indicators (0 or 1)
  5 | #' @param y vector of response values
  6 | #' @param pscore vector of propensity scores
  7 | #' @param num.trees number of trees for transformed outcome forest
  8 | #' @param mtry number of variables to possibly split at in each node
  9 | #' @param min.node.size minimum node size for transformed outcome forest
 10 | #' @param postprocess logical: should optional post-processing random forest be
 11 | #'  fit at end?
 12 | #' @param verbose logical: should progress be printed to console?
 13 | #'
 14 | #' @return an object of class \code{PTOforest} with attributes:
 15 | #'  \itemize{
 16 | #'    \item x: matrix of covariates supplied by function call
 17 | #'    \item pscore: vector of propensity score supplied by function call
 18 | #'    \item postprocess: logical supplied by function call
 19 | #'    \item TOfit: fitted random forest on transformed outcomes
 20 | #'    \item PTOfit1: TOfit pollinated with treatment-arm outcomes
 21 | #'    \item PTOfit0: TOfit pollinated with control-arm outcomes
 22 | #'    \item postfit: post-processing random forest summarizing results
 23 | #'  }
 24 | #'
 25 | #' @examples
 26 | #'# Randomized experiment example
 27 | #'
 28 | #'n = 100 # number of training-set patients to simulate
 29 | #'p = 10  # number of features for each training-set patient
 30 | #'
 31 | #'# Simulate data
 32 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
 33 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
 34 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
 35 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
 36 | #'
 37 | #'# Estimate PTO forest model
 38 | #'fit_pto = PTOforest(x, tx, y)
 39 | #'pred_pto = predict(fit_pto, newx = x)
 40 | #'
 41 | #'# Visualize results
 42 | #'plot(tx_effect, pred_pto, main = 'PTO forest',
 43 | #'  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
 44 | #'abline(0, 1, lty = 2)
 45 | #'
 46 | #' @export
 47 | 
 48 | 
 49 | PTOforest = function(x, tx, y, pscore = rep(.5, nrow(x)),
 50 |   num.trees = 500, mtry = ncol(x), min.node.size = max(25, nrow(x) / 40),
 51 |   postprocess = TRUE, verbose = FALSE) {
 52 | 
 53 | 
 54 |   # Input sanitization
 55 | 
 56 |   x = as.matrix(x)
 57 | 
 58 |   if (nrow(x) != length(tx)) {
 59 |     stop('nrow(x) does not match length(tx)')
 60 | 
 61 |   } else if (nrow(x) != length(y)) {
 62 |     stop('nrow(x) does not match length(y)')
 63 | 
 64 |   } else if (!is.numeric(x)) {
 65 |     stop('x must be numeric matrix')
 66 | 
 67 |   } else if (!is.numeric(y)) {
 68 |     stop('y must be numeric (use 0/1 for binary response)')
 69 | 
 70 |   } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) {
 71 |     stop('tx must be vector of 0s and 1s')
 72 | 
 73 |   }
 74 | 
 75 | 
 76 |   colnames(x) = paste('x', 1:ncol(x), sep = '')
 77 |   fit = list(x = x, pscore = pscore, postprocess = postprocess)
 78 | 
 79 |   z = tx * y / pscore - (1 - tx) * y / (1 - pscore)
 80 | 
 81 |   
 82 |   if (verbose) cat('fitting IPW treatment forest\n')
 83 |   
 84 |   data = data.frame(y = z, x = x)
 85 |   colnames(data) = c('y', colnames(x))
 86 |   fit$TOfit = ranger::ranger(data = data, dependent.variable.name = 'y',
 87 |     num.trees = num.trees, min.node.size = min.node.size, mtry = mtry,
 88 |     write.forest = TRUE)
 89 | 
 90 |   
 91 | # Now pollinate the tree separately with treated and untreated
 92 |   if (verbose) {
 93 |     cat('pollinating IPW treatment forest separately with treated and',
 94 |       'untreated y\n')
 95 |   }
 96 |   
 97 |   fit$PTOfit1 = pollinated.ranger(fit$TOfit, x = x[tx == 1, ], y = y[tx == 1])
 98 |   fit$PTOfit0 = pollinated.ranger(fit$TOfit, x = x[tx == 0, ], y = y[tx == 0])
 99 |   
100 | 
101 |   if (postprocess) {
102 | # and one more summarization rf
103 |     if (verbose) cat('fitting TX summary forest\n')
104 | 
105 |     delta = stats::predict(fit$PTOfit1, x) - stats::predict(fit$PTOfit0, x)
106 |     data = data.frame(y = delta, x = x)
107 |     colnames(x) = paste('x', 1:ncol(x), sep = '')
108 |     colnames(data) = c('y', colnames(x))
109 |     fit$postfit = ranger::ranger(data = data, dependent.variable.name = 'y',
110 |       num.trees = num.trees, mtry = ncol(x), write.forest = TRUE)
111 |   }
112 | 
113 |   class(fit) = 'PTOforest'
114 |   fit
115 | }
116 | 
117 | 


--------------------------------------------------------------------------------
/R/bagged.causalMARS.R:
--------------------------------------------------------------------------------
 1 | #'  Fit a bag of causal MARS models
 2 | #'
 3 | #' @param x matrix of covariates
 4 | #' @param tx vector of treatment indicators (0 or 1)
 5 | #' @param y vector of response values
 6 | #' @param nbag number of models to bag
 7 | #' @param maxterms maximum number of terms to include in the regression basis
 8 | #'  (e.g. \code{maxterms = 11} means intercept + 5 pairs added)
 9 | #' @param nquant number of quantiles used in splitting
10 | #' @param degree max number of different predictors that can interact in model
11 | #' @param eps shrinkage factor for new term added
12 | #' @param backstep logical: should out-of-bag samples be used to prune each
13 | #'  model? otherwise full regression basis is used for each model
14 | #' @param propensity logical: should propensity score stratification be used?
15 | #' @param stratum optional vector giving propensity score stratum for each
16 | #'  observation (only used if \code{propensity = TRUE})
17 | #' @param minnum minimum number of observations in each arm of each propensity
18 | #'  score stratum needed to estimate regression coefficients for basis
19 | #'  (only used if \code{propensity = TRUE})
20 | #' @param verbose logical: should progress be printed to console?
21 | #'
22 | #' @return an object of class \code{bagged.causalMARS}, which is itself a list
23 | #'  of \code{causalMARS} objects
24 | #'
25 | #' @examples
26 | #'# Randomized experiment example
27 | #'
28 | #'n = 100 # number of training-set patients to simulate
29 | #'p = 10  # number of features for each training-set patient
30 | #'
31 | #'# Simulate data
32 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
33 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
34 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
35 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
36 | #'
37 | #'# Estimate bagged causal MARS model
38 | #'fit_bcm = bagged.causalMARS(x, tx, y, nbag = 10)
39 | #'pred_bcm = predict(fit_bcm, newx = x)
40 | #'
41 | #'# Visualize results
42 | #'plot(tx_effect, pred_bcm, main = 'Bagged causal MARS',
43 | #'  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
44 | #'abline(0, 1, lty = 2)
45 | #'
46 | #' @export
47 | 
48 | bagged.causalMARS = function(x, tx, y, nbag = 20, maxterms = 11, nquant = 5,
49 |   degree = ncol(x), eps = 1, backstep = FALSE,
50 |   propensity = FALSE, stratum = rep(1, nrow(x)), minnum = 5, verbose = FALSE) {
51 | 
52 |  
53 |   # Input sanitization
54 | 
55 |   x = as.matrix(x)
56 | 
57 |   if (nrow(x) != length(tx)) {
58 |     stop('nrow(x) does not match length(tx)')
59 | 
60 |   } else if (nrow(x) != length(y)) {
61 |     stop('nrow(x) does not match length(y)')
62 | 
63 |   } else if (!is.numeric(x)) {
64 |     stop('x must be numeric matrix')
65 | 
66 |   } else if (!is.numeric(y)) {
67 |     stop('y must be numeric (use 0/1 for binary response)')
68 | 
69 |   } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) {
70 |     stop('tx must be vector of 0s and 1s')
71 | 
72 |   }
73 | 
74 |  
75 |   x = scale(x, center = TRUE, scale = FALSE)
76 | 
77 |   fit = list()
78 | 
79 |   for (b in 1:nbag) {
80 | 
81 |     if (verbose) cat(c('BAG=', b, '/', nbag), fill = TRUE)
82 | 
83 |     bag = sample(1:nrow(x), size = nrow(x), replace = TRUE)
84 |     oob = rep(TRUE, nrow(x))
85 |     oob[bag] = FALSE
86 |     fit[[b]] = causalMARS(x = x[bag, ], tx = tx[bag], y = y[bag],
87 |       maxterms = maxterms, nquant = nquant, degree = degree, eps = eps,
88 |       backstep = backstep, x.val = x[oob, ], tx.val = tx[oob], y.val = y[oob],
89 |       propensity = propensity, stratum = stratum[bag],
90 |       stratum.val = stratum[oob], minnum = minnum)
91 |   }
92 |   class(fit) = 'bagged.causalMARS'
93 |   fit
94 | }
95 | 
96 | 


--------------------------------------------------------------------------------
/R/causalBoosting.R:
--------------------------------------------------------------------------------
  1 | #'  Fit a causal boosting model
  2 | #'
  3 | #' @useDynLib causalLearning
  4 | #'
  5 | #' @param x matrix of covariates
  6 | #' @param tx vector of treatment indicators (0 or 1)
  7 | #' @param y vector of response values
  8 | #' @param num.trees number of shallow causal trees to build
  9 | #' @param maxleaves maximum number of leaves per causal tree
 10 | #' @param eps learning rate
 11 | #' @param splitSpread how far apart should the candidate splits be for the
 12 | #'  causal trees? (e.g. \code{splitSpread = 0.1}) means we consider 10 quantile
 13 | #'  cutpoints as candidates for making split
 14 | #' @param x.est optional matrix of estimation-set covariates used for honest
 15 | #'  re-estimation (ignored if \code{tx.est = NULL} or \code{y.est = NULL})
 16 | #' @param tx.est optional vector of estimation-set treatment indicators
 17 | #'  (ignored if \code{x.est = NULL} or \code{y.est = NULL})
 18 | #' @param y.est optional vector of estimation-set response values
 19 | #'  (ignored if \code{x.est = NULL} or \code{y.est = NULL})
 20 | #' @param propensity logical: should propensity score stratification be used?
 21 | #' @param stratum optional vector giving propensity score stratum for each
 22 | #'  observation (only used if \code{propensity = TRUE})
 23 | #' @param stratum.est optional vector giving propensity score stratum for each
 24 | #'  estimation-set observation (ignored if \code{x.est = NULL} or
 25 | #'  \code{tx.est = NULL} or \code{y.est = NULL})
 26 | #' @param isConstVar logical: for the causal tree splitting criterion
 27 | #'  (T-statistc), should it be assumed that the noise variance is the same in
 28 | #'  treatment and control arms?
 29 | #'
 30 | #' @return an object of class \code{causalBoosting} with attributes:
 31 | #'  \itemize{
 32 | #'    \item CBM: a list storing the intercept, the causal trees and \code{eps}
 33 | #'    \item tauhat: matrix of treatment effects for each patient for each step
 34 | #'    \item G1: estimated-treatment conditional mean for each patient
 35 | #'    \item G0: estimated-control conditional mean for each patient
 36 | #'    \item err.y: training error at each step, in predicting response
 37 | #'    \item num.trees: number of trees specified by function call
 38 | #'  }
 39 | #'
 40 | #' @details
 41 | #'  This function exists primarily to be called by cv.causalBoosting because
 42 | #'  the num.trees parameter generally needs to be tuned via cross-validation.
 43 | #'
 44 | #' @examples
 45 | #'# Randomized experiment example
 46 | #'
 47 | #'n = 100 # number of training-set patients to simulate
 48 | #'p = 10  # number of features for each training-set patient
 49 | #'
 50 | #'# Simulate data
 51 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
 52 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
 53 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
 54 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
 55 | #'
 56 | #'# Estimate causal boosting model
 57 | #'fit_cb = causalBoosting(x, tx, y, num.trees = 500)
 58 | #'pred_cb = predict(fit_cb, newx = x, num.trees = 500)
 59 | #'
 60 | #'# Visualize results
 61 | #'plot(tx_effect, pred_cb, main = 'Causal boosting',
 62 | #'  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
 63 | #'abline(0, 1, lty = 2)
 64 | #'
 65 | #' @export
 66 | 
 67 | causalBoosting = function(x, tx, y, num.trees = 500, maxleaves = 4, eps = 0.01,
 68 |   splitSpread = 0.1, x.est = NULL, tx.est = NULL, y.est = NULL,
 69 |   propensity = FALSE, stratum = NULL, stratum.est = NULL, 
 70 |   isConstVar = TRUE) {
 71 | 
 72 |  
 73 |   # Input sanitization
 74 | 
 75 |   x = as.matrix(x)
 76 | 
 77 |   if (nrow(x) != length(tx)) {
 78 |     stop('nrow(x) does not match length(tx)')
 79 | 
 80 |   } else if (nrow(x) != length(y)) {
 81 |     stop('nrow(x) does not match length(y)')
 82 | 
 83 |   } else if (!is.numeric(x)) {
 84 |     stop('x must be numeric matrix')
 85 | 
 86 |   } else if (!is.numeric(y)) {
 87 |     stop('y must be numeric (use 0/1 for binary response)')
 88 | 
 89 |   } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) {
 90 |     stop('tx must be vector of 0s and 1s')
 91 | 
 92 |   }
 93 | 
 94 |  
 95 |   # s indices are 0-based
 96 |   maxNodes = 2 * maxleaves - 1
 97 |   
 98 |   # if (usePropensity ^ !is.null(s)) { warnings('Non-consistent options: whether to
 99 |   # use propensity score will be based on value of s.') }
100 |   
101 |   if (is.null(stratum)) {
102 |     if (propensity) stop('stratum must be specified if propensity = TRUE')
103 |     stratum = -1
104 |   }
105 |   if (is.null(x.est) || is.null(y.est) || is.null(tx.est)) {
106 |     x.est = y.est = tx.est = stratum.est = -1
107 |     n.est = 1
108 |   } else {
109 |     n.est = nrow(x.est)
110 |   if (is.null(stratum.est)) {
111 |       stratum.est = -1
112 |     }
113 |   }
114 |   
115 |   vtxeff = 0
116 |   
117 |   fit = .C("causalBoosting", as.double(x), as.double(y), as.integer(tx),
118 |     as.double(x.est), as.double(y.est), as.integer(tx.est),
119 |     as.integer(num.trees), as.integer(maxleaves), as.double(eps),
120 |     as.integer(propensity), as.integer(stratum), as.integer(stratum.est),
121 |     as.integer(isConstVar), as.integer(nrow(x)), as.integer(ncol(x)),
122 |     as.integer(n.est), as.double(vtxeff), as.double(splitSpread),
123 |     var = integer(num.trees * maxNodes), val = double(num.trees * maxNodes),
124 |     left = integer(num.trees * maxNodes),
125 |     right = integer(num.trees * maxNodes),
126 |     y0bar = double(1), y1bar = double(1), pred0 = double(num.trees * maxNodes),
127 |     pred1 = double(num.trees * maxNodes), cost = double(num.trees * maxNodes),
128 |     pred0e = double(num.trees * maxNodes),
129 |     pred1e = double(num.trees *  maxNodes), G0 = double(nrow(x)),
130 |     G1 = double(nrow(x)), err.y = double(num.trees), err = double(num.trees),
131 |     tauhat = double(num.trees * nrow(x)), PACKAGE = 'causalLearning')
132 |   
133 |   CBM = list()
134 |   CBM$intercept = c(fit$y0bar, fit$y1bar)
135 |   CBM$trees = list()
136 |   CBM$eps = eps
137 |   
138 |   for (k in 1:num.trees) {
139 |     start = (k - 1) * maxNodes + 1
140 |     end = k * maxNodes
141 |     tree = list(var = fit$var[start:end] + 1, val = fit$val[start:end],
142 |       left = fit$left[start:end] + 1, right = fit$right[start:end] + 1,
143 |       pred0 = fit$pred0[start:end], pred1 = fit$pred1[start:end], 
144 |       cost = fit$cost[start:end], pred0e = fit$pred0e[start:end],
145 |       pred1e = fit$pred1e[start:end])
146 |     class(tree) = "causalTree"
147 |     CBM$trees[[k]] = tree
148 |   }
149 |   result = list(CBM = CBM, tauhat = matrix(fit$tauhat, nrow = nrow(x)),
150 |     G1 = fit$G1, G0 = fit$G0, err.y = fit$err.y, num.trees = num.trees)
151 |   
152 |   class(result) = "causalBoosting"
153 |   
154 |   result
155 | }
156 | 
157 | 


--------------------------------------------------------------------------------
/R/causalMARS.R:
--------------------------------------------------------------------------------
  1 | #'  Fit a causal MARS model
  2 | #'
  3 | #' @param x matrix of covariates
  4 | #' @param tx vector of treatment indicators (0 or 1)
  5 | #' @param y vector of response values
  6 | #' @param maxterms maximum number of terms to include in the regression basis
  7 | #'  (e.g. \code{maxterms = 11} means intercept + 5 pairs added)
  8 | #' @param nquant number of quantiles used in splitting
  9 | #' @param degree max number of different predictors that can interact in model
 10 | #' @param eps shrinkage factor for new term added
 11 | #' @param backstep logical: after building out regression basis, should
 12 | #'  backward stepwise selection be used to create a sequence of models, with
 13 | #'  the criterion evaluated on a validation set to choose among the sequence?
 14 | #' @param x.val optional matrix of validation-set covariates
 15 | #'  (only used if \code{backstep = TRUE})
 16 | #' @param tx.val optional vector of validation-set treatment indicators
 17 | #'  (only used if \code{backstep = TRUE})
 18 | #' @param y.val optional vector of validation-set response values
 19 | #'  (only used if \code{backstep = TRUE})
 20 | #' @param propensity logical: should propensity score stratification be used?
 21 | #' @param stratum optional vector giving propensity score stratum for each
 22 | #'  observation (only used if \code{propensity = TRUE})
 23 | #' @param stratum.val optional vector giving propensity score stratum for each
 24 | #'  validation-set observation
 25 | #'  (only used if \code{propensity = backstep = TRUE})
 26 | #' @param minnum minimum number of observations in each arm of each propensity
 27 | #'  score stratum needed to estimate regression coefficients for basis
 28 | #'  (only used if \code{propensity = TRUE})
 29 | #'
 30 | #' @return an object of class \code{causalMARS} with attributes:
 31 | #'  \itemize{
 32 | #'    \item parent: indices of nodes that are parents at each stage
 33 | #'    \item childvar: index of predictor chosen at each forward step
 34 | #'    \item childquant: quantile of cutoff chosen at each forward step
 35 | #'    \item quant: quantiles of the columns of x
 36 | #'    \item active: indices of columns with nonzero norm
 37 | #'    \item allvars: list of variables appearing in each term
 38 | #'    \item khat: the sequence of terms deleted at each step
 39 | #'    \item deltahat: relative change in rss
 40 | #'    \item rsstesthat: validation-set rss achieved by each model in sequence
 41 | #'    \item setesthat: standard error for rsstesthat
 42 | #'    \item tim1: time elapsed during forward stepwise phase
 43 | #'    \item tim2: total time elapsed
 44 | #'    \item x
 45 | #'    \item tx
 46 | #'    \item y
 47 | #'    \item maxterms
 48 | #'    \item eps
 49 | #'    \item backstep
 50 | #'    \item propensity
 51 | #'    \item x.val
 52 | #'    \item tx.val
 53 | #'    \item y.val
 54 | #'    \item stratum
 55 | #'    \item stratum.val
 56 | #'    \item minnum
 57 | #'  }
 58 | #'
 59 | #' @details
 60 | #'  parallel arms mars with backward stepwise BOTH randomized case and
 61 | #'  propensity stratum. data structures: model terms (nodes) are numbered
 62 | #'  1, 2, ... with 1 representing the intercept. forward stepwise:
 63 | #'  modmatrix contains basis functions as model is built up -- two columns are
 64 | #'  added at each step. Does not include a column of ones for tidiness,
 65 | #'  we always add two terms, even when term added in linear (so that reflected
 66 | #'  version is just zero).
 67 | #'  backward stepwise: khat is the sequence of terms deleted at each step,
 68 | #'  based on deltahat = relative change in rss. rsstesthat is rss over test
 69 | #'  (validation) set achieved by each reduced model in sequence- used later for
 70 | #'  selecting a member of the sequence. active2 contains indices of columns with
 71 | #'  nonzero norm 
 72 | #'
 73 | #' @examples
 74 | #'# Randomized experiment example
 75 | #'
 76 | #'n = 100 # number of training-set patients to simulate
 77 | #'p = 10  # number of features for each training-set patient
 78 | #'
 79 | #'# Simulate data
 80 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
 81 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
 82 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
 83 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
 84 | #'
 85 | #'# Estimate causal MARS model
 86 | #'fit_cm = causalMARS(x, tx, y)
 87 | #'pred_cm = predict(fit_cm, newx = x)
 88 | #'
 89 | #'# Visualize results
 90 | #'plot(tx_effect, pred_cm, main = 'Causal MARS',
 91 | #'  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
 92 | #'abline(0, 1, lty = 2)
 93 | #'
 94 | #' @export
 95 | 
 96 | causalMARS = function(x, tx, y,
 97 |   maxterms = 11, nquant = 5, degree = ncol(x), eps = 1,
 98 |   backstep = FALSE, x.val = NULL, tx.val = NULL, y.val = NULL,
 99 |   propensity = FALSE, stratum = rep(1, nrow(x)), stratum.val = NULL,
100 |   minnum = 5) {
101 | 
102 | 
103 |   # Input sanitization
104 | 
105 |   x = as.matrix(x)
106 | 
107 |   if (nrow(x) != length(tx)) {
108 |     stop('nrow(x) does not match length(tx)')
109 | 
110 |   } else if (nrow(x) != length(y)) {
111 |     stop('nrow(x) does not match length(y)')
112 | 
113 |   } else if (!is.numeric(x)) {
114 |     stop('x must be numeric matrix')
115 | 
116 |   } else if (!is.numeric(y)) {
117 |     stop('y must be numeric (use 0/1 for binary response)')
118 | 
119 |   } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) {
120 |     stop('tx must be vector of 0s and 1s')
121 | 
122 |   }
123 | 
124 | 
125 |   BIG = 1e+10
126 |   n = nrow(x)
127 |   p = ncol(x)
128 |   x = scale(x, TRUE, FALSE)  #NOTE
129 |   if(!is.null(x.val)) {
130 |     x.val = scale(x.val, TRUE, FALSE)
131 |   }
132 |   
133 |   # compute quantiles for splitting
134 |   discrete = rep(FALSE, p)
135 |   for (j in 1:p) {
136 |     if (length(table(x[, j])) == 2) 
137 |       discrete[j] = TRUE
138 |   }
139 |   
140 |   probs = seq(0, 1, length = nquant)[-nquant]
141 |   quant = apply(x, 2, stats::quantile, probs)
142 |   
143 |   nquantm = rep(nquant, p)
144 |   if (sum(discrete) > 0) {
145 |     for (j in which(discrete)) {
146 |       nquantm[j] = 2
147 |       quant[, j] = NA
148 |       quant[1, j] = 0
149 |     }
150 |   }
151 |   
152 |   if (propensity) {
153 |     stratum.sizes = table(stratum)/nrow(x)
154 |     nstratum = length(stratum.sizes)
155 |     if (sum(as.numeric(names(stratum.sizes)) != (1:length(stratum.sizes))) != 0) 
156 |       stop("Strata should be numbered 1:k")
157 |     stratum.val.sizes = table(stratum.val)/nrow(x.val)
158 |     nstratum.val = length(stratum.val.sizes)
159 |     if (sum(as.numeric(names(stratum.val.sizes)) != (1:length(stratum.val.sizes))) != 
160 |       0) 
161 |       stop("Stratatest should be numbered 1:k")
162 |   }
163 |   
164 |   modmatrix = matrix(0, nrow = n, ncol = maxterms)
165 |   active = rep(FALSE, maxterms)
166 |   active[1] = TRUE
167 |   modmatrix[, 1] = 1
168 |   
169 |   r = y - y
170 |   a0 = c(mean(y[tx == 0]), mean(y[tx == 1]))
171 |   
172 |   r[tx == 0] = y[tx == 0] - mean(y[tx == 0])
173 |   r[tx == 1] = y[tx == 1] - mean(y[tx == 1])
174 |   
175 |   parent = childvar = childquant = NULL
176 |   allvars = vector("list", maxterms)
177 |   maxscorall = rep(NA, maxterms)
178 |   nterms = 1
179 |   
180 |   
181 |   # forward stepwise
182 |   while (nterms < maxterms) {
183 |     maxscor = -1 * BIG
184 |     act = active[1:nterms]
185 |     num = unlist(lapply(allvars, length))[1:nterms]
186 |     act = act & (num < degree)
187 |     
188 |     for (ii in (1:nterms)[act]) {
189 |       jlist = rep(TRUE, p)
190 |       if (ii > 1) 
191 |         jlist[allvars[[ii]]] = FALSE
192 |       
193 |       
194 |       for (j in which(jlist)) {
195 |         for (k in 1:(nquantm[j] - 1)) {
196 |           # bx1 = modmatrix[, ii] * truncpow(x[, j], quant[k, j], dir = 1) bx2 =
197 |           # modmatrix[, ii] * truncpow(x[, j], quant[k, j], dir = 2) NOTE have to make
198 |           # correspondng chanegs in propensity section below!
199 |           bx = NULL
200 |           bx1 = modmatrix[, ii] * truncpow(x[, j], quant[k, j], dir = 1)
201 |           if (sum(bx1^2) > 0) 
202 |           bx = cbind(bx, bx1)
203 |           bx2 = modmatrix[, ii] * truncpow(x[, j], quant[k, j], dir = 2)
204 |           if (sum(bx2^2) > 0) 
205 |           bx = cbind(bx, bx2)
206 |           
207 |           if (!propensity) {
208 |           
209 |             scor = -0.5 * BIG
210 |             if (!is.null(bx)) {
211 |               res = myridge(bx, r, int = TRUE)$res
212 |               res0 = myridge(bx[tx == 0, ], r[tx == 0], int = TRUE)$res
213 |               res1 = myridge(bx[tx == 1, ], r[tx == 1], int = TRUE)$res
214 |               scor = sum(res^2) - sum(res0^2) - sum(res1^2)
215 |             }
216 |           }
217 |           
218 |           if (propensity) {
219 |             if (!is.null(bx)) {
220 |               scor = 0
221 |               
222 |               res0 = res1 = rep(0, n)
223 |               for (s in 1:nstratum) {
224 |                 
225 |                 res = myridge(bx[stratum == s, ], r[stratum == s])$res
226 |                 
227 |                 if (sum(tx == 0 & stratum == s) >= minnum) {
228 |                 res0[stratum == s & tx == 0] = myridge(bx[tx == 0 & stratum == 
229 |                   s, ], r[tx == 0 & stratum == s])$res
230 |                 }
231 |                 if (sum(tx == 1 & stratum == s) >= minnum) {
232 |                 res1[stratum == s & tx == 1] = myridge(bx[tx == 1 & stratum == 
233 |                   s, ], r[tx == 1 & stratum == s])$res
234 |                 }
235 |                 scor = scor + (sum(res^2) - sum(res0^2) - sum(res1^2))
236 |                 
237 |               }
238 |               res0 = res0[tx == 0]
239 |               res1 = res1[tx == 1]
240 |             }
241 |           }  # end of propensity loop
242 |           
243 |           
244 |           if (scor > maxscor) {
245 |           maxscor = scor
246 |           
247 |           iihat = ii
248 |           jhat = j
249 |           khat = k
250 |           res0hat = res0
251 |           res1hat = res1
252 |           }
253 |         }
254 |       }
255 |     }  #end of for loop
256 | 
257 |     maxscorall[ii] = maxscor
258 |     new1 = modmatrix[, iihat] * truncpow(x[, jhat], quant[khat, jhat], dir = 1)
259 |     new2 = modmatrix[, iihat] * truncpow(x[, jhat], quant[khat, jhat], dir = 2)
260 |     if (!is.matrix(new1)) 
261 |       new1 = matrix(new1, ncol = 1)
262 |     if (!is.matrix(new2)) 
263 |       new2 = matrix(new2, ncol = 1)
264 |     
265 |     
266 |     modmatrix[, nterms + 1] = new1
267 |     modmatrix[, nterms + 2] = new2
268 |     
269 |     
270 |     active[nterms + 1] = TRUE
271 |     active[nterms + 2] = TRUE
272 |     
273 |     
274 |     allvars[[nterms + 1]] = allvars[[nterms + 2]] = c(allvars[[iihat]], jhat)
275 |     nterms = nterms + 2
276 |     
277 |     
278 |     r[tx == 0] = r[tx == 0] * (1 - eps) + eps * res0hat
279 |     r[tx == 1] = r[tx == 1] * (1 - eps) + eps * res1hat
280 |     
281 |     parent = c(parent, iihat)
282 |     childvar = c(childvar, jhat)
283 |     
284 |     childquant = c(childquant, khat)
285 |   }  #end of while loop
286 |   
287 |   
288 |   active = colSums(modmatrix^2) > 0
289 |   deltahat = khat = NA
290 |   out = list(parent = parent, childvar = childvar, childquant = childquant, quant = quant, 
291 |     active = active, eps = eps, allvars = allvars)
292 |   
293 |   tim1 = proc.time()
294 |   # cat('forward done', fill = TRUE)
295 |   
296 |   
297 |   # backward deletion
298 |   rsstesthat = setesthat = NULL
299 |   
300 |   if (backstep) {
301 | 
302 |     if(is.null(x.val) | is.null(tx.val) | is.null(y.val)) {
303 |       stop('If backstep = TRUE, then x.val, tx.val, y.val must be specified.')
304 |     }
305 | 
306 |     BIG = 1e+10
307 |     modmatrix = makebx.newmars(out, x, remove.zerocols = FALSE)[, -1]
308 |     modmatrix.val = makebx.newmars(out, x.val, remove.zerocols = FALSE)[, -1]
309 |     ss = colSums(modmatrix^2) > 0
310 |     
311 |     active2 = rep(TRUE, ncol(modmatrix.val))
312 |     active2[!ss] = FALSE
313 |     khat = deltahat = deltatesthat = rsstesthat = setesthat = rep(NA, sum(active2))
314 |     rtest = rep(NA, nrow(x.val))
315 |     
316 |     go = sum(active2) > 0
317 |     
318 |     ii = 0
319 |     while (go) {
320 |       go = FALSE
321 |       
322 |       delta = deltatest = rsstest = rep(BIG, length(active2))
323 |       # train
324 |       if (!propensity) {
325 |         fit0 = myridge(modmatrix[, active2, drop = FALSE][tx == 0, ], y[tx == 
326 |           0], int = TRUE)
327 |         fit1 = myridge(modmatrix[, active2, drop = FALSE][tx == 1, ], y[tx == 
328 |           1], int = TRUE)
329 |         res0 = fit0$res
330 |         res1 = fit1$res
331 |         
332 |         # test
333 |         yhat0 = cbind(1, modmatrix.val[, active2, drop = FALSE][tx.val == 0, 
334 |           , drop = FALSE]) %*% fit0$coef
335 |         yhat1 = cbind(1, modmatrix.val[, active2, drop = FALSE][tx.val == 1, 
336 |           , drop = FALSE]) %*% fit1$coef
337 |         res0test = (y.val[tx.val == 0] - yhat0)
338 |         res1test = (y.val[tx.val == 1] - yhat1)
339 |         rss0testsq = sum(res0test^2)
340 |         rss1testsq = sum(res1test^2)
341 |       }
342 |       
343 |       if (propensity) 
344 |         {
345 |           rss0sq = rss1sq = rss0testsq = rss1testsq = 0
346 |           # initial fit with parallel terms training
347 |           for (s in 1:nstratum) {
348 |             res0 = res1 = 0
349 |             if (sum(tx == 0 & stratum == s) >= minnum) {
350 |               fit0 = myridge(modmatrix[, active2, drop = FALSE][tx == 0 & stratum == 
351 |               s, ], y[tx == 0 & stratum == s], int = TRUE)
352 |               res0 = fit0$res
353 |             }
354 |             if (sum(tx == 1 & stratum == s) >= minnum) {
355 |               fit1 = myridge(modmatrix[, active2, drop = FALSE][tx == 1 & stratum == 
356 |               s, ], y[tx == 1 & stratum == s], int = TRUE)
357 |               res1 = fit1$res
358 |             }
359 |             rss0sq = rss0sq + sum(res0^2)
360 |             rss1sq = rss1sq + sum(res1^2)
361 |             
362 |             
363 |             # test
364 |             
365 |             res0test = res1test = 0
366 |             if (sum(tx.val == 0 & stratum.val == s) >= minnum) {
367 |               yhat0 = cbind(1, modmatrix.val[, active2, drop = FALSE][tx.val == 
368 |               0 & stratum.val == s, ]) %*% fit0$coef
369 |               res0test = (y.val[tx.val == 0 & stratum.val == s] - yhat0)
370 |             }
371 |             if (sum(tx.val == 1 & stratum.val == s) >= minnum) {
372 |               yhat1 = cbind(1, modmatrix.val[, active2, drop = FALSE][tx.val == 
373 |               1 & stratum.val == s, ]) %*% fit1$coef
374 |               res1test = (y.val[tx.val == 1 & stratum.val == s] - yhat1)
375 |             }
376 |             rss0testsq = rss0testsq + sum(res0test^2)
377 |             rss1testsq = rss1testsq + sum(res1test^2)
378 |           }
379 |         }  #end of propensity
380 |       
381 |       
382 |       for (k in which(active2)) {
383 |         # try collapsing a training set term
384 |         act = active2
385 |         act[k] = FALSE
386 |         
387 |         if (!propensity) {
388 |           
389 |           redfit = myridge(modmatrix[, k, drop = FALSE], y, int = TRUE)
390 |           r = redfit$res
391 |           fit00 = myridge(modmatrix[, act, drop = FALSE][tx == 0, ], r[tx == 
392 |           0], int = TRUE)
393 |           fit11 = myridge(modmatrix[, act, drop = FALSE][tx == 1, ], r[tx == 
394 |           1], int = TRUE)
395 |           
396 |           res00 = fit00$res
397 |           res11 = fit11$res
398 |           delta[k] = (sum(res00^2) + sum(res11^2) - sum(res0^2) - sum(res1^2))/(sum(res00^2) + 
399 |           sum(res11^2))
400 |           # try collapsing the same test set term
401 |           rtest = y.val - cbind(1, modmatrix.val[, k, drop = FALSE]) %*% redfit$coef
402 |           yhat00 = cbind(1, modmatrix.val[, act, drop = FALSE][tx.val == 0, 
403 |           ]) %*% fit00$coef
404 |           yhat11 = cbind(1, modmatrix.val[, act, drop = FALSE][tx.val == 1, 
405 |           ]) %*% fit11$coef
406 |           
407 |           res00test = (rtest[tx.val == 0] - yhat00)
408 |           res11test = (rtest[tx.val == 1] - yhat11)
409 |           rss00testsq = sum(res00test^2)
410 |           rss11testsq = sum(res11test^2)
411 |         }
412 |         
413 |         if (propensity) 
414 |           {
415 |           rss00sq = rss11sq = rss00testsq = rss11testsq = 0
416 |           for (s in 1:nstratum) {
417 |             redfit = myridge(modmatrix[, k, drop = FALSE], y, int = TRUE)
418 |             r = redfit$res
419 |             res00 = res11 = res00test = res11test = 0
420 |             if (sum(tx == 0 & stratum == s) >= minnum) {
421 |             fit00 = myridge(modmatrix[, act, drop = FALSE][tx == 0 & stratum == 
422 |               s, ], r[tx == 0 & stratum == s], int = TRUE)
423 |             res00 = fit00$res
424 |             }
425 |             if (sum(tx == 1 & stratum == s) >= minnum) {
426 |             fit11 = myridge(modmatrix[, act, drop = FALSE][tx == 1 & stratum == 
427 |               s, ], r[tx == 1 & stratum == s], int = TRUE)
428 |             res11 = fit11$res
429 |             }
430 |             
431 |             rss00sq = rss00sq + sum(res00^2)
432 |             rss11sq = rss11sq + sum(res11^2)
433 |             
434 |             # try deleting the same test set term
435 |             
436 |             
437 |             if (sum(tx.val == 0 & stratum.val == s) >= minnum) {
438 |             
439 |             yhat00 = cbind(1, modmatrix.val[, act, drop = FALSE][tx.val == 
440 |               0 & stratum.val == s, ]) %*% fit00$coef
441 |             res00test = (y.val[tx.val == 0 & stratum.val == s] - yhat00)
442 |             }
443 |             if (sum(tx.val == 1 & stratum.val == s) >= minnum) {
444 |             yhat11 = cbind(1, modmatrix.val[, act, drop = FALSE][tx.val == 
445 |               1 & stratum.val == s, ]) %*% fit11$coef
446 |             res11test = (y.val[tx.val == 1 & stratum.val == s] - yhat11)
447 |             }
448 |             
449 |             rss00testsq = rss00testsq + sum(res00test^2)
450 |             rss11testsq = rss11testsq + sum(res11test^2)
451 | 
452 |           }
453 |           delta[k] = (rss00sq + rss11sq - rss0sq - rss1sq)/(rss00sq + rss11sq)
454 |           }  #end of propensity
455 |         
456 |         # deltatest[k] = (sum(res00test^2) + sum(res11test^2) - sum(res0test^2)
457 |         # -sum(res1test^2))/(sum(res00test^2) + sum(res11test^2))
458 |         
459 |         deltatest[k] = (rss00testsq + rss11testsq - rss0testsq - rss1testsq)/(rss00testsq + 
460 |           rss11testsq)
461 |       }  #end of propensity
462 |       
463 |       ii = ii + 1
464 |       # cat(c('ii=', ii), fill = TRUE)
465 |       khat[ii] = which.min(delta)
466 |       # cat(delta,fill=TRUE)
467 |       deltahat[ii] = delta[khat[ii]]
468 |       deltatesthat[ii] = deltatest[khat[ii]]
469 |       n1 = length(res0test)
470 |       n2 = length(res1test)
471 |       # rsstesthat[ii]=(sum(res0test^2) + sum(res1test^2))/(n1+n2)
472 |       rsstesthat[ii] = (rss0testsq + rss1testsq)/(n1 + n2)
473 |       setesthat[ii] = NA
474 |       # setesthat[ii]=sqrt((n1*var(res0test^2)+n2*var(res1test^2))/(n1+n2)^2) # not
475 |       # currently computed
476 |       
477 |       active2[khat] = FALSE
478 |       num.colsleft = sum(colSums(modmatrix.val[, active2, drop = FALSE]^2) > 0)
479 |       go = (sum(active2) > 0) & (num.colsleft > 0)
480 |       # cat(c(sum(active2), num.colsleft),fill=TRUE) browser() cat(c('dropping ', khat),
481 |       # fill = TRUE)
482 |     }
483 |     
484 |   }
485 |   out$khat = khat[!is.na(khat)]
486 |   out$deltahat = deltahat[!is.na(deltahat)]
487 |   out$rsstesthat = rsstesthat
488 |   out$setesthat = setesthat
489 |   out$tim1 = tim1
490 |   out$tim2 = proc.time()
491 |   out$x = x
492 |   out$tx = tx
493 |   out$y = y
494 |   out$x.val = x.val
495 |   out$tx.val = tx.val
496 |   out$y.val = y.val
497 |   out$maxterms = maxterms
498 |   out$backstep = backstep
499 |   out$propensity = propensity
500 |   out$stratum = stratum
501 |   out$stratum.val = stratum.val
502 |   out$minnum = minnum
503 |   class(out) = 'causalMARS'
504 |   return(out)
505 | }
506 | 
507 | 


--------------------------------------------------------------------------------
/R/cv.causalBoosting.R:
--------------------------------------------------------------------------------
  1 | #'  Fit a causal boosting model with cross validation
  2 | #'
  3 | #' @param x matrix of covariates
  4 | #' @param tx vector of treatment indicators (0 or 1)
  5 | #' @param y vector of response values
  6 | #' @param num.trees number of shallow causal trees to build
  7 | #' @param maxleaves maximum number of leaves per causal tree
  8 | #' @param eps learning rate
  9 | #' @param splitSpread how far apart should the candidate splits be for the
 10 | #'  causal trees? (e.g. \code{splitSpread = 0.1}) means we consider 10 quantile
 11 | #'  cutpoints as candidates for making split
 12 | #' @param type.measure loss to use for cross validation:
 13 | #'  'response' returns mean-square error for predicting response in each arm.
 14 | #'  'effect' returns MSE for treatment effect using honest over-fit estimation.
 15 | #' @param nfolds number of cross validation folds
 16 | #' @param foldid vector of fold membership
 17 | #' @param propensity logical: should propensity score stratification be used?
 18 | #' @param stratum optional vector giving propensity score stratum for each
 19 | #'  observation (only used if \code{propensity = TRUE})
 20 | #' @param isConstVar logical: for the causal tree splitting criterion
 21 | #'  (T-statistc), should it be assumed that the noise variance is the same in
 22 | #'  treatment and control arms?
 23 | #'
 24 | #' @return an object of class \code{cv.causalBoosting} which is an object of
 25 | #'  class \code{causalBoosting} with these additional attributes:
 26 | #'  \itemize{
 27 | #'    \item num.trees.min: number of trees with lowest CV error
 28 | #'    \item cvm: vector of mean CV error for each number of trees
 29 | #'    \item cvsd: vector of standard errors for mean CV errors
 30 | #'  }
 31 | #'
 32 | #' @examples
 33 | #'# Randomized experiment example
 34 | #'
 35 | #'n = 100 # number of training-set patients to simulate
 36 | #'p = 10  # number of features for each training-set patient
 37 | #'
 38 | #'# Simulate data
 39 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
 40 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
 41 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
 42 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
 43 | #'
 44 | #'# Estimate causal boosting model with cross-validation
 45 | #'fit_cv = cv.causalBoosting(x, tx, y)
 46 | #'fit_cv$num.trees.min.effect # number of trees chosen by cross-validation
 47 | #'pred_cv = predict(fit_cv, newx = x)
 48 | #'
 49 | #'# Visualize results
 50 | #'plot(tx_effect, pred_cv, main = 'Causal boosting w/ CV',
 51 | #'  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
 52 | #'abline(0, 1, lty = 2)
 53 | #'
 54 | #' @export
 55 | 
 56 | 
 57 | cv.causalBoosting = function(x, tx, y,
 58 |   num.trees = 500, maxleaves = 4, eps = 0.01, splitSpread = 0.1,
 59 |   type.measure = c('effect', 'response'), nfolds = 5, foldid = NULL,
 60 |   propensity = FALSE, stratum = NULL, isConstVar = TRUE) {
 61 | 
 62 |  
 63 |   # Input sanitization
 64 | 
 65 |   x = as.matrix(x)
 66 | 
 67 |   if (nrow(x) != length(tx)) {
 68 |     stop('nrow(x) does not match length(tx)')
 69 | 
 70 |   } else if (nrow(x) != length(y)) {
 71 |     stop('nrow(x) does not match length(y)')
 72 | 
 73 |   } else if (!is.numeric(x)) {
 74 |     stop('x must be numeric matrix')
 75 | 
 76 |   } else if (!is.numeric(y)) {
 77 |     stop('y must be numeric (use 0/1 for binary response)')
 78 | 
 79 |   } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) {
 80 |     stop('tx must be vector of 0s and 1s')
 81 | 
 82 |   }
 83 | 
 84 |  
 85 |   type.measure = match.arg(type.measure)
 86 | 
 87 |   if (is.null(foldid)) foldid = sample(rep(1:nfolds, length = nrow(x)))
 88 |   nfolds = length(unique(foldid))
 89 |   
 90 |   fit = list()
 91 |   pred.response = matrix(0, nrow(x), num.trees)
 92 |   pred.effect = matrix(0, nrow(x), num.trees)
 93 |   pred.refit = rep(NA, nrow(x))
 94 |   
 95 |   for (k in 1:nfolds) {
 96 | 
 97 |     x.val = x[foldid == k, , drop = FALSE]
 98 |     tx.val = tx[foldid == k]
 99 |     y.val = y[foldid == k]
100 |     stratum.val = stratum[foldid == k]
101 | 
102 |     fit[[k]] = causalBoosting(x = x[foldid != k, , drop = FALSE],
103 |       tx = tx[foldid != k], y = y[foldid != k],
104 |       num.trees = num.trees, maxleaves = maxleaves, eps = eps,
105 |       splitSpread = splitSpread, x.est = x.val, tx.est = tx.val, y.est = y.val,
106 |       propensity = propensity, stratum = stratum[foldid != k],
107 |       stratum.est = stratum.val, isConstVar = isConstVar)
108 | 
109 |     tmp = stats::predict(fit[[k]], x[foldid == k, , drop = FALSE], 1:num.trees,
110 |       type = 'conditional.mean')
111 | 
112 |     tmpMat = rbind(tmp$G0, tmp$G1)
113 |     n_out = sum(foldid == k)
114 |     pred.response[foldid == k, ] =
115 |       tmpMat[tx[foldid == k] * n_out + (1:n_out), ]
116 | 
117 |     pred.effect[foldid == k, ] = tmp$G1 - tmp$G0
118 |     tmp.refit = stats::predict(fit[[k]], x[foldid == k, , drop = FALSE],
119 |       1:num.trees, type = 'conditional.mean', honest = TRUE)
120 |     pred.refit[foldid == k] = (tmp.refit$G1 - tmp.refit$G0)[, num.trees]
121 |   }
122 | 
123 |   cvm.effect = colMeans((pred.effect - pred.refit)^2)
124 |   cvsd.effect = apply((pred.effect - pred.refit)^2, 2, stats::sd) /
125 |     sqrt(nrow(pred.effect))
126 | 
127 |   cvm.response = apply(pred.response, 2, function(yhat) mean((yhat - y)^2))
128 |   cvsd.response = apply(pred.response, 2,
129 |     function(yhat) stats::sd((yhat - y)^2)) / sqrt(nrow(pred.response))
130 |  
131 |   fit = causalBoosting(x = x, tx = tx, y = y, num.trees = num.trees,
132 |     maxleaves = maxleaves, eps = eps, splitSpread = splitSpread,
133 |     propensity = propensity, stratum = stratum, isConstVar = isConstVar)
134 | 
135 |   fit$num.trees.min.effect = which.min(cvm.effect)
136 |   fit$num.trees.min.response = which.min(cvm.response)
137 |   fit$num.trees.1se.effect =
138 |     max(which(cvm.effect < min(cvm.effect + cvsd.effect)))
139 | 
140 |   fit$cvm.effect = cvm.effect
141 |   fit$cvsd.effect = cvsd.effect
142 |   fit$cvm.response = cvm.response
143 |   fit$cvsd.response = cvsd.response
144 | 
145 |   class(fit) = 'cv.causalBoosting'
146 |   fit
147 | }
148 | 
149 | 


--------------------------------------------------------------------------------
/R/makebx.newmars.R:
--------------------------------------------------------------------------------
 1 | 
 2 | makebx.newmars = function(fit, x, remove.zerocols = FALSE) {
 3 |   # creates model matrix from 'fit'
 4 |   quant = fit$quant
 5 |   parent = fit$parent
 6 |   childvar = fit$childvar
 7 |   childquant = fit$childquant
 8 |   active = fit$active
 9 |   nterms = length(parent)
10 |   nterms2 = 1 + 2 * nterms
11 |   
12 |   modmatrix = matrix(0, nrow = nrow(x), ncol = nterms2)
13 |   modmatrix[, 1] = 1
14 |   count = 1
15 |   
16 |   for (ii in 1:nterms) {
17 |     new1 = modmatrix[, parent[ii]] * truncpow(x[, childvar[ii]], quant[childquant[ii], 
18 |       childvar[ii]], dir = 1)
19 |     modmatrix[, count + 1] = new1
20 |     new2 = modmatrix[, parent[ii]] * truncpow(x[, childvar[ii]], quant[childquant[ii], 
21 |       childvar[ii]], dir = 2)
22 |     
23 |     modmatrix[, count + 2] = new2
24 |     count = count + 2
25 |   }
26 |   if (remove.zerocols) 
27 |     modmatrix = modmatrix[, active]
28 |   return(modmatrix)
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/R/myridge.R:
--------------------------------------------------------------------------------
 1 | 
 2 | myridge = function(x, y, int = T, eps = 0.001) {
 3 |   xx = x
 4 |   if (!is.matrix(xx)) 
 5 |     xx = matrix(xx, ncol = 1)
 6 |   lam = rep(eps, ncol(xx))
 7 |   
 8 |   if (int) {
 9 |     xx = cbind(1, xx)
10 |     lam = c(0, lam)
11 |   }
12 |   d = diag(lam)
13 |   if (ncol(xx) == 1) 
14 |     d = lam
15 |   coef = solve(t(xx) %*% xx + d) %*% t(xx) %*% y
16 |   res = y - xx %*% coef
17 |   list(res = res, coef = coef)
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/R/pollinated.ranger.R:
--------------------------------------------------------------------------------
 1 | #'  Pollinate a fitted ranger random forest model
 2 | #'
 3 | #' @param object a fitted \code{ranger} object
 4 | #' @param x matrix of covariates
 5 | #' @param y vector of response values
 6 | #'
 7 | #' @return an object of class \code{pollinated.ranger} which is a \code{ranger}
 8 | #'  object that has been pollinated with the data in (x, y)
 9 | 
10 | pollinated.ranger = function(object, x, y) {
11 | 
12 |   forest = object$forest
13 |   num.trees = forest$num.trees
14 |   which.list = as.list(seq(num.trees))
15 |   split.values = forest$split.values
16 |   split.varIDs = forest$split.varIDs
17 | 
18 |   for (i in 1:num.trees) {
19 |     which = match(split.varIDs[[i]], 0, FALSE)
20 |     split.values[[i]][which > 0] = seq(sum(which))
21 |     which.list[[i]] = which
22 |   }
23 | 
24 |   forest$split.values = split.values
25 |   object$forest = forest
26 |   preds = stats::predict(object, x, predict.all = TRUE)$predictions
27 | 
28 |   ### Get list of means indexed by the unique terminal node values
29 |   pmeans = apply(preds, 2, function(f, y) tapply(y, f, mean), y)
30 | 
31 |   ### Now populate these terminal nodes with these values
32 |   for (i in 1:num.trees) {
33 |     which = which.list[[i]]
34 |     repvec = rep(NA, sum(which))
35 |     pmean = pmeans[[i]]
36 |     ids = as.integer(names(pmean))
37 |     repvec[ids] = pmean
38 |     split.values[[i]][which > 0] = repvec
39 |   }
40 | 
41 |   forest$split.values = split.values
42 |   object$forest = forest
43 |   object$mean = mean(y)
44 |   class(object) = c('pollinated.ranger', 'ranger')
45 |   object
46 | }
47 | 
48 | 


--------------------------------------------------------------------------------
/R/predict.PTOforest.R:
--------------------------------------------------------------------------------
 1 | #'  Make predictions from a fitted PTO forest model
 2 | #'
 3 | #' @param object a fitted \code{PTOforest} object
 4 | #' @param newx matrix of new covariates for which estimated treatment effects
 5 | #'  are desired
 6 | #' @param ... ignored
 7 | #'
 8 | #' @return a vector of predictions corresponding to the rows of \code{newx}
 9 | #'
10 | #' @export
11 | 
12 | predict.PTOforest = function(object, newx, ...) {
13 | 
14 |   colnames(newx) = colnames(object$x)
15 | 
16 |   if (object$postprocess) {
17 |     stats::predict(object$postfit, data = newx)$predictions
18 | 
19 |   } else {
20 |     stats::predict(object$PTOfit1, newx = newx) -
21 |       stats::predict(object$PTOfit0, newx = newx)
22 | 
23 |   }
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/R/predict.bagged.causalMARS.R:
--------------------------------------------------------------------------------
 1 | #'  Make predictions from a bag of fitted causal MARS models
 2 | #'
 3 | #' @param object a fitted \code{bagged.causalMARS} object
 4 | #' @param newx matrix of new covariates for which estimated treatment effects
 5 | #'  are desired
 6 | #' @param type type of prediction required:
 7 | #'  'average' returns a vector of the averages of the bootstrap estimates.
 8 | #'  'all' returns a matrix of all of the bootstrap estimates.
 9 | #' @param ... ignored
10 | #'
11 | #' @return a vector of estimated personalized treatment effects corresponding
12 | #'  to the rows of \code{newx}
13 | #'
14 | #' @export
15 | 
16 | predict.bagged.causalMARS = function(object, newx, type = c('average', 'all'),
17 |   ...) {
18 |   type = match.arg(type)
19 |   newx = scale(newx, center = TRUE, scale = FALSE)
20 |   pred = sapply(object, FUN = stats::predict, newx = newx)
21 |   switch(type,
22 |     all = pred,
23 |     average = rowMeans(pred))
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/R/predict.causalBoosting.R:
--------------------------------------------------------------------------------
 1 | #'  Make predictions from a fitted causal boosting model
 2 | #'
 3 | #' @param object a fitted \code{causalBoosting} object
 4 | #' @param newx matrix of new covariates for which estimated treatment effects
 5 | #'  are desired
 6 | #' @param newtx option vector of new treatment assignments
 7 | #'  (only used if \code{type = 'response'})
 8 | #' @param type type of prediction required:
 9 | #'  'treatment.effect' returns estimated treatment effect.
10 | #'  'conditional.mean' returns two predictions, one for each arm.
11 | #'  'response' returns prediction for arm corresponding to newtx.
12 | #' @param num.trees number(s) of shallow causal trees to use for prediction
13 | #' @param honest logical: should honest re-estimates of leaf means be used for
14 | #'  prediction? This requires that \code{x.est, tx.est, y.est} were specified
15 | #'  when the causal boosting model was fit
16 | #' @param naVal value with which to replace \code{NA} predictions
17 | #' @param ... ignored
18 | #'
19 | #' @return a vector or matrix of predictions corresponding to the rows of
20 | #'  \code{newx}
21 | #'
22 | #' @export
23 | 
24 | predict.causalBoosting = function(object, newx, newtx = NULL,
25 |   type = c('treatment.effect', 'conditional.mean', 'response'),
26 |   num.trees = 1:object$num.trees, honest = FALSE, naVal = 0, ...) {
27 |   
28 |   type = match.arg(type)
29 |   if (type == 'response' & is.null(newtx)) {
30 |     stop('response predictions require that newtx be specified')
31 |   }
32 | 
33 |   CBM = object$CBM
34 | 
35 |   n = nrow(newx)
36 |   eps = CBM$eps
37 |   G0 = matrix(CBM$intercept[1], n, max(num.trees) + 1)
38 |   G1 = matrix(CBM$intercept[2], n, max(num.trees) + 1)
39 |   for (k in 1:max(num.trees)) {
40 |     pred = stats::predict(CBM$trees[[k]], newx, type = 'conditional.mean',
41 |       honest = honest)
42 |     G1[, k + 1] = G1[, k] + eps * pred[, 2]
43 |     G0[, k + 1] = G0[, k] + eps * pred[, 1]
44 |   }
45 |   G1[is.na(G1)] = naVal
46 |   G0[is.na(G0)] = naVal
47 | 
48 |   switch(type,
49 |     treatment.effect = (G1 - G0)[, num.trees + 1],
50 |     conditional.mean = list(G1 = G1[, num.trees + 1], G0 = G0[, num.trees + 1]),
51 |     response = (newtx * G1 + (1 - newtx) * G0)[, num.trees + 1])
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/R/predict.causalMARS.R:
--------------------------------------------------------------------------------
 1 | #'  Make predictions from a fitted causal MARS model
 2 | #'
 3 | #' @param object a fitted \code{causalMARS} object
 4 | #' @param newx matrix of new covariates for which estimated treatment effects
 5 | #'  are desired
 6 | #' @param active indices of columns with nonzero norm (defaults to model
 7 | #'  selected via backward stepwise phase, or the full model if
 8 | #'  \code{backstep = FALSE})
 9 | #' @param ... ignored
10 | #'
11 | #' @return a vector of estimated personalized treatment effects corresponding
12 | #'  to the rows of \code{newx}
13 | #'
14 | #' @export
15 | 
16 | predict.causalMARS = function(object, newx, active, ...) {
17 | 
18 |   if (missing(active)) {
19 |     if (object$backstep) {
20 |       ntermshat = length(object$khat) - which.min(object$rsstesthat) + 1
21 |       active = (rev(object$khat))[1:ntermshat]
22 |     } else {
23 |       active = 1:(object$maxterms - 1)
24 |     }
25 |   }
26 | 
27 |   object$x = scale(object$x, center = TRUE, scale = FALSE)
28 |   newx = scale(newx, center = TRUE, scale = FALSE)
29 |   
30 |   del = rep(mean(object$y[object$tx == 1]) - mean(object$y[object$tx == 0]),
31 |     nrow(newx))
32 |   stratum = object$stratum
33 |   stratum.val = object$stratum.val
34 |   minnum = object$minnum
35 |   nstrata = length(unique(stratum))
36 |   stratawt = (table(c(stratum, 1:nstrata)) - 1)
37 |   stratawt = stratawt / sum(stratawt)
38 |   
39 |   if (length(active) > 0) {
40 |     if (!object$propensity) {
41 |       # make training matrices
42 |       bx0 = (makebx.newmars(object, object$x)[, -1])[, active, drop = FALSE]
43 |       beta0 = myridge(bx0[object$tx == 0, ], object$y[object$tx == 0])$coef
44 |       beta1 = myridge(bx0[object$tx == 1, ], object$y[object$tx == 1])$coef
45 |       # make test set matrices
46 |       bx <- (makebx.newmars(object, newx)[, -1])[, active, drop = FALSE]
47 |       del = cbind(1, bx) %*% (beta1 - beta0)
48 |     }
49 |     if (object$propensity) {
50 |       # make training matrices
51 |       bx0 = (makebx.newmars(object, object$x)[, -1])[, active, drop = FALSE]
52 |       beta0 = beta1 = matrix(NA, ncol(bx0) + 1, nstrata)
53 |       for (ss in 1:nstrata) {
54 |         if (sum(object$tx == 0 & stratum == ss) > minnum) {
55 |           beta0[, ss] = myridge(bx0[object$tx == 0 & stratum == ss, ],
56 |             object$y[object$tx == 0 & stratum == ss])$coef
57 |           beta1[, ss] = myridge(bx0[object$tx == 1 & stratum == ss, ],
58 |             object$y[object$tx == 1 & stratum == ss])$coef
59 |         }
60 |       }
61 |       # make test set matrices
62 |       bx <- (makebx.newmars(object, newx)[, -1])[, active, drop = FALSE]
63 |       del = rep(0, nrow(bx))
64 |       totwt = 0
65 |       for (ss in 1:nstrata) {
66 |         if (sum(object$tx == 0 & stratum == ss) > minnum) {
67 |           del = del + stratawt[ss] * cbind(1, bx) %*%
68 |             (beta1[, ss] - beta0[, ss])
69 |           totwt = totwt + stratawt[ss]
70 |         }
71 |       }
72 |       del = del / totwt
73 |     }
74 |   }
75 |   return(c(del))
76 | }
77 | 
78 | 


--------------------------------------------------------------------------------
/R/predict.causalTree.R:
--------------------------------------------------------------------------------
 1 | #'  Make predictions from a fitted causal tree model
 2 | #'
 3 | #' @param object a fitted \code{causalTree} object
 4 | #' @param newx matrix of new covariates for which estimated treatment effects
 5 | #'  are desired
 6 | #' @param newtx option vector of new treatment assignments
 7 | #'  (only used if \code{type = 'response'})
 8 | #' @param type type of prediction required:
 9 | #'  'treatment.effect' returns estimated treatment effect.
10 | #'  'conditional.mean' returns two predictions, one for each arm.
11 | #'  'response' returns prediction for arm corresponding to newtx.
12 | #' @param honest logical: should honest re-estimates of leaf means be used for
13 | #'  prediction? This requires that \code{x.est, tx.est, y.est} were specified
14 | #'  when the causal boosting model was fit
15 | #' @param naVal value with which to replace \code{NA} predictions
16 | #' @param ... ignored
17 | #'
18 | #' @return a vector or matrix of predictions corresponding to the rows of
19 | #'  \code{newx}
20 | #'
21 | #' @export
22 | 
23 | predict.causalTree = function(object, newx, newtx = NULL,
24 |   type = c('treatment.effect', 'conditional.mean', 'response'), honest = FALSE,
25 |   naVal = 0, ...) {
26 | 
27 |   leaf = rep(1, nrow(newx))
28 |   terminal = rep(FALSE, nrow(newx))
29 |   while (sum(!is.na(object$var[leaf])) > 0) {
30 |     var = cbind((1:nrow(newx))[!terminal], object$var[leaf[!terminal]])
31 |     leaf[!terminal] = object$left[leaf[!terminal]] + (newx[var] >= object$val[leaf[!terminal]])
32 |     terminal = is.na(object$var[leaf])
33 |   }
34 |   pred = list()
35 |   pred$leaf = leaf
36 |   if (!honest) {
37 |     pred$pred0 = object$pred0[leaf]
38 |     pred$pred1 = object$pred1[leaf]
39 |   } else {
40 |     pred$pred0 = object$pred0e[leaf]
41 |     pred$pred1 = object$pred1e[leaf]
42 |   }
43 |   type = match.arg(type)
44 |   predMatrix = cbind(pred$pred0, pred$pred1)
45 |   predMatrix[is.na(predMatrix)] = naVal
46 |   out = switch(type,
47 |     treatment.effect = predMatrix[, 2] - predMatrix[, 1],
48 |     conditional.mean = predMatrix,
49 |     response = ifelse(!is.null(newtx),
50 |       predMatrix[cbind(1:nrow(newx), newtx + 1)], rep(NA, nrow(newx))))
51 |   out
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/R/predict.cv.causalBoosting.R:
--------------------------------------------------------------------------------
 1 | #'  Make predictions from a fitted cross-validated causal boosting model
 2 | #'
 3 | #' @param object a fitted \code{cv.causalBoosting} object
 4 | #' @param newx matrix of new covariates for which estimated treatment effects
 5 | #'  are desired
 6 | #' @param newtx option vector of new treatment assignments
 7 | #'  (only used if \code{type = 'individual'})
 8 | #' @param type type of prediction required:
 9 | #'  'treatment.effect' returns estimated treatment effect.
10 | #'  'conditional.mean' returns two predictions, one for each arm.
11 | #'  'response' returns prediction for arm corresponding to newtx.
12 | #' @param num.trees number of shallow causal trees to use for prediction
13 | #' @param naVal value with which to replace \code{NA} predictions
14 | #' @param ... ignored
15 | #'
16 | #' @return a vector or matrix of predictions corresponding to the rows of
17 | #'  \code{newx}
18 | #'
19 | #' @export
20 | 
21 | predict.cv.causalBoosting = function(object, newx, newtx = NULL,
22 |   type = c('treatment.effect', 'conditional.mean', 'response'),
23 |   num.trees = object$num.trees.min.effect, naVal = 0, ...) {
24 | 
25 |   predict.causalBoosting(object = object, newx = newx, newtx = newtx,
26 |     type = type, num.trees = num.trees, naVal = naVal)
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/R/predict.pollinated.ranger.R:
--------------------------------------------------------------------------------
 1 | #'  Make predictions from a pollinated ranger random forest model
 2 | #'
 3 | #' @param object a fitted \code{pollinated.ranger} object
 4 | #' @param newx matrix of new covariates for which predictions are desired
 5 | #' @param predict.all logical: should predictions from all trees be returned?
 6 | #'  Otherwise the average across trees is returned
 7 | #' @param na.treatment how to treat NA predictions from individual trees:
 8 | #'  'omit' only uses trees for which the prediction is not NA.
 9 | #'  'replace' replaces NA predictions with the overall mean response.
10 | #'  'NA' returns NA if any tree prediction is NA.
11 | #' @param ... additional arguments passed on to \code{predict.ranger}
12 | 
13 | #'
14 | #' @return a vector of predicted treatment effects corresponding to the rows of
15 | #'  newx
16 | 
17 | predict.pollinated.ranger = function(object, newx, predict.all = FALSE,
18 |   na.treatment = c('omit', 'replace', 'NA'), ...) {
19 | 
20 |   na.treatment = match.arg(na.treatment)
21 | 
22 |   class(object) = 'ranger'
23 |   preds = stats::predict(object, newx, predict.all = TRUE,
24 |     ...)$predictions
25 |   class(object) = 'pollinated.ranger'
26 | 
27 |   if (na.treatment == 'replace') {
28 |     wh <- is.na(preds)
29 |     preds[wh] = object$mean
30 |   }
31 | 
32 |   if (!predict.all) {
33 |     preds = apply(preds, 1, mean, na.rm = (na.treatment == 'omit'))
34 |   }
35 | 
36 |   preds
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/R/stratify.R:
--------------------------------------------------------------------------------
 1 | #'  Get propensity strata from propensity scores
 2 | #'
 3 | #' @param pscore vector of propensity scores
 4 | #' @param tx vector of treatment indicators
 5 | #' @param min.per.arm minimum number of observations for each arm within each
 6 | #'  stratum
 7 | #'
 8 | #' @return a vector of integers with length equal to the length of pscore,
 9 | #'  reporting the propensity stratum corresponding to each propensity score
10 | #'
11 | #' @export
12 | 
13 | stratify = function(pscore, tx, min.per.arm = 30) {
14 | 
15 |   stratum = ceiling(10 * pscore)
16 |   cutoffs = sort(unique(stratum/10))
17 |   stratum = as.numeric(as.factor(stratum))
18 | 
19 |   num.treated = stats::aggregate(tx, list(stratum = stratum), sum)$x
20 |   while(min(num.treated) < min.per.arm & length(unique(stratum)) > 1) {
21 |     stratum1 = which.min(num.treated)
22 |     cutoffs = cutoffs[-which.min(num.treated)]
23 |     neighbors = intersect(stratum, stratum1 + c(-1, 1))
24 |     stratum2 = neighbors[which.min(num.treated[neighbors])]
25 |     stratum[stratum == stratum1] = stratum2
26 |     stratum = as.numeric(as.factor(stratum))
27 |     num.treated = stats::aggregate(tx, list(stratum = stratum), sum)$x
28 |   }
29 | 
30 |   stratum = 1 + max(stratum) - stratum
31 |   cutoffs = rev(cutoffs)
32 | 
33 |   num.control = stats::aggregate(1 - tx, list(stratum = stratum), sum)$x
34 |   while(min(num.control) < min.per.arm & length(unique(stratum)) > 1) {
35 |     stratum1 = which.min(num.control)
36 |     cutoffs = cutoffs[-which.min(num.control)]
37 |     neighbors = intersect(stratum, stratum1 + c(-1, 1))
38 |     stratum2 = neighbors[which.min(num.control[neighbors])]
39 |     stratum[stratum == stratum1] = stratum2
40 |     stratum = as.numeric(as.factor(stratum))
41 |     num.control = stats::aggregate(tx, list(stratum = stratum), sum)$x
42 |   }
43 | 
44 |   cutoffs[1] = 1
45 |   cutoffs = rev(cutoffs)
46 | 
47 |   list(stratum = 1 + max(stratum) - stratum, cutoffs = cutoffs)
48 | }
49 | 
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/R/truncpow.R:
--------------------------------------------------------------------------------
 1 | 
 2 | truncpow = function(x, cutp, dir = 1) {
 3 |   if (dir == 1) 
 4 |     out = (x - cutp) * (x > cutp)
 5 |   if (dir == 2) 
 6 |     out = (cutp - x) * (x < cutp)
 7 |   return(out)
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # causalLearning
2 | Methods for heterogeneous treatment effect estimation
3 | 
4 | This R package includes functions for fitting the pollinated
5 | transformed outcome forest, causal boosting and causal MARS from
6 | [Powers et al. (2017)](https://arxiv.org/abs/1707.00102).
7 | 
8 | The package is currently in beta, not yet ready for public consumption.
9 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: causalLearning
 2 | Title: Methods for heterogeneous treatment effect estimation
 3 | Version: 1.0.0
 4 | Authors@R: c(
 5 |   person("Scott", "Powers", email = "saberpowers@gmail.com",
 6 |     role = c("aut", "cre")),
 7 |   person("Junyang", "Qian", email = "junyangq@stanford.edu", role = "aut"),
 8 |   person("Trevor", "Hastie", email = "hastie@stanford.edu", role = "aut"),
 9 |   person("Robert", "Tibshirani", email = "tibs@stanford.edu", role = "aut"))
10 | Description: The main functions are cv.causalBoosting and bagged.causalMARS,
11 |   which build upon the simpler causalBoosting and causalMARS functions. All of
12 |   these functions have their own predict methods.
13 | Depends: R (>= 3.3.0)
14 | Imports: ranger
15 | License: GPL-2
16 | LazyData: true
17 | RoxygenNote: 6.0.1
18 | NeedsCompilation: yes
19 | Packaged: 2018-09-07 15:58:32 UTC; sspowers
20 | Author: Scott Powers [aut, cre],
21 |   Junyang Qian [aut],
22 |   Trevor Hastie [aut],
23 |   Robert Tibshirani [aut]
24 | Maintainer: Scott Powers <saberpowers@gmail.com>
25 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(predict,PTOforest)
 4 | S3method(predict,bagged.causalMARS)
 5 | S3method(predict,causalBoosting)
 6 | S3method(predict,causalMARS)
 7 | S3method(predict,causalTree)
 8 | S3method(predict,cv.causalBoosting)
 9 | export(PTOforest)
10 | export(bagged.causalMARS)
11 | export(causalBoosting)
12 | export(causalMARS)
13 | export(cv.causalBoosting)
14 | export(stratify)
15 | useDynLib(causalLearning)
16 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/R/PTOforest.R:
--------------------------------------------------------------------------------
  1 | #'  Fit a pollinated transformed outcome (PTO) forest model
  2 | #'
  3 | #' @param x matrix of covariates
  4 | #' @param tx vector of treatment indicators (0 or 1)
  5 | #' @param y vector of response values
  6 | #' @param pscore vector of propensity scores
  7 | #' @param num.trees number of trees for transformed outcome forest
  8 | #' @param mtry number of variables to possibly split at in each node
  9 | #' @param min.node.size minimum node size for transformed outcome forest
 10 | #' @param postprocess logical: should optional post-processing random forest be
 11 | #'  fit at end?
 12 | #' @param verbose logical: should progress be printed to console?
 13 | #'
 14 | #' @return an object of class \code{PTOforest} with attributes:
 15 | #'  \itemize{
 16 | #'    \item x: matrix of covariates supplied by function call
 17 | #'    \item pscore: vector of propensity score supplied by function call
 18 | #'    \item postprocess: logical supplied by function call
 19 | #'    \item TOfit: fitted random forest on transformed outcomes
 20 | #'    \item PTOfit1: TOfit pollinated with treatment-arm outcomes
 21 | #'    \item PTOfit0: TOfit pollinated with control-arm outcomes
 22 | #'    \item postfit: post-processing random forest summarizing results
 23 | #'  }
 24 | #'
 25 | #' @examples
 26 | #'# Randomized experiment example
 27 | #'
 28 | #'n = 100 # number of training-set patients to simulate
 29 | #'p = 10  # number of features for each training-set patient
 30 | #'
 31 | #'# Simulate data
 32 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
 33 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
 34 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
 35 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
 36 | #'
 37 | #'# Estimate PTO forest model
 38 | #'fit_pto = PTOforest(x, tx, y)
 39 | #'pred_pto = predict(fit_pto, newx = x)
 40 | #'
 41 | #'# Visualize results
 42 | #'plot(tx_effect, pred_pto, main = 'PTO forest',
 43 | #'  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
 44 | #'abline(0, 1, lty = 2)
 45 | #'
 46 | #' @export
 47 | 
 48 | 
 49 | PTOforest = function(x, tx, y, pscore = rep(.5, nrow(x)),
 50 |   num.trees = 500, mtry = ncol(x), min.node.size = max(25, nrow(x) / 40),
 51 |   postprocess = TRUE, verbose = FALSE) {
 52 | 
 53 | 
 54 |   # Input sanitization
 55 | 
 56 |   x = as.matrix(x)
 57 | 
 58 |   if (nrow(x) != length(tx)) {
 59 |     stop('nrow(x) does not match length(tx)')
 60 | 
 61 |   } else if (nrow(x) != length(y)) {
 62 |     stop('nrow(x) does not match length(y)')
 63 | 
 64 |   } else if (!is.numeric(x)) {
 65 |     stop('x must be numeric matrix')
 66 | 
 67 |   } else if (!is.numeric(y)) {
 68 |     stop('y must be numeric (use 0/1 for binary response)')
 69 | 
 70 |   } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) {
 71 |     stop('tx must be vector of 0s and 1s')
 72 | 
 73 |   }
 74 | 
 75 | 
 76 |   colnames(x) = paste('x', 1:ncol(x), sep = '')
 77 |   fit = list(x = x, pscore = pscore, postprocess = postprocess)
 78 | 
 79 |   z = tx * y / pscore - (1 - tx) * y / (1 - pscore)
 80 | 
 81 |   
 82 |   if (verbose) cat('fitting IPW treatment forest\n')
 83 |   
 84 |   data = data.frame(y = z, x = x)
 85 |   colnames(data) = c('y', colnames(x))
 86 |   fit$TOfit = ranger::ranger(data = data, dependent.variable.name = 'y',
 87 |     num.trees = num.trees, min.node.size = min.node.size, mtry = mtry,
 88 |     write.forest = TRUE)
 89 | 
 90 |   
 91 | # Now pollinate the tree separately with treated and untreated
 92 |   if (verbose) {
 93 |     cat('pollinating IPW treatment forest separately with treated and',
 94 |       'untreated y\n')
 95 |   }
 96 |   
 97 |   fit$PTOfit1 = pollinated.ranger(fit$TOfit, x = x[tx == 1, ], y = y[tx == 1])
 98 |   fit$PTOfit0 = pollinated.ranger(fit$TOfit, x = x[tx == 0, ], y = y[tx == 0])
 99 |   
100 | 
101 |   if (postprocess) {
102 | # and one more summarization rf
103 |     if (verbose) cat('fitting TX summary forest\n')
104 | 
105 |     delta = stats::predict(fit$PTOfit1, x) - stats::predict(fit$PTOfit0, x)
106 |     data = data.frame(y = delta, x = x)
107 |     colnames(x) = paste('x', 1:ncol(x), sep = '')
108 |     colnames(data) = c('y', colnames(x))
109 |     fit$postfit = ranger::ranger(data = data, dependent.variable.name = 'y',
110 |       num.trees = num.trees, mtry = ncol(x), write.forest = TRUE)
111 |   }
112 | 
113 |   class(fit) = 'PTOforest'
114 |   fit
115 | }
116 | 
117 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/R/bagged.causalMARS.R:
--------------------------------------------------------------------------------
 1 | #'  Fit a bag of causal MARS models
 2 | #'
 3 | #' @param x matrix of covariates
 4 | #' @param tx vector of treatment indicators (0 or 1)
 5 | #' @param y vector of response values
 6 | #' @param nbag number of models to bag
 7 | #' @param maxterms maximum number of terms to include in the regression basis
 8 | #'  (e.g. \code{maxterms = 11} means intercept + 5 pairs added)
 9 | #' @param nquant number of quantiles used in splitting
10 | #' @param degree max number of different predictors that can interact in model
11 | #' @param eps shrinkage factor for new term added
12 | #' @param backstep logical: should out-of-bag samples be used to prune each
13 | #'  model? otherwise full regression basis is used for each model
14 | #' @param propensity logical: should propensity score stratification be used?
15 | #' @param stratum optional vector giving propensity score stratum for each
16 | #'  observation (only used if \code{propensity = TRUE})
17 | #' @param minnum minimum number of observations in each arm of each propensity
18 | #'  score stratum needed to estimate regression coefficients for basis
19 | #'  (only used if \code{propensity = TRUE})
20 | #' @param verbose logical: should progress be printed to console?
21 | #'
22 | #' @return an object of class \code{bagged.causalMARS}, which is itself a list
23 | #'  of \code{causalMARS} objects
24 | #'
25 | #' @examples
26 | #'# Randomized experiment example
27 | #'
28 | #'n = 100 # number of training-set patients to simulate
29 | #'p = 10  # number of features for each training-set patient
30 | #'
31 | #'# Simulate data
32 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
33 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
34 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
35 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
36 | #'
37 | #'# Estimate bagged causal MARS model
38 | #'fit_bcm = bagged.causalMARS(x, tx, y, nbag = 10)
39 | #'pred_bcm = predict(fit_bcm, newx = x)
40 | #'
41 | #'# Visualize results
42 | #'plot(tx_effect, pred_bcm, main = 'Bagged causal MARS',
43 | #'  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
44 | #'abline(0, 1, lty = 2)
45 | #'
46 | #' @export
47 | 
48 | bagged.causalMARS = function(x, tx, y, nbag = 20, maxterms = 11, nquant = 5,
49 |   degree = ncol(x), eps = 1, backstep = FALSE,
50 |   propensity = FALSE, stratum = rep(1, nrow(x)), minnum = 5, verbose = FALSE) {
51 | 
52 |  
53 |   # Input sanitization
54 | 
55 |   x = as.matrix(x)
56 | 
57 |   if (nrow(x) != length(tx)) {
58 |     stop('nrow(x) does not match length(tx)')
59 | 
60 |   } else if (nrow(x) != length(y)) {
61 |     stop('nrow(x) does not match length(y)')
62 | 
63 |   } else if (!is.numeric(x)) {
64 |     stop('x must be numeric matrix')
65 | 
66 |   } else if (!is.numeric(y)) {
67 |     stop('y must be numeric (use 0/1 for binary response)')
68 | 
69 |   } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) {
70 |     stop('tx must be vector of 0s and 1s')
71 | 
72 |   }
73 | 
74 |  
75 |   x = scale(x, center = TRUE, scale = FALSE)
76 | 
77 |   fit = list()
78 | 
79 |   for (b in 1:nbag) {
80 | 
81 |     if (verbose) cat(c('BAG=', b, '/', nbag), fill = TRUE)
82 | 
83 |     bag = sample(1:nrow(x), size = nrow(x), replace = TRUE)
84 |     oob = rep(TRUE, nrow(x))
85 |     oob[bag] = FALSE
86 |     fit[[b]] = causalMARS(x = x[bag, ], tx = tx[bag], y = y[bag],
87 |       maxterms = maxterms, nquant = nquant, degree = degree, eps = eps,
88 |       backstep = backstep, x.val = x[oob, ], tx.val = tx[oob], y.val = y[oob],
89 |       propensity = propensity, stratum = stratum[bag],
90 |       stratum.val = stratum[oob], minnum = minnum)
91 |   }
92 |   class(fit) = 'bagged.causalMARS'
93 |   fit
94 | }
95 | 
96 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/R/causalBoosting.R:
--------------------------------------------------------------------------------
  1 | #'  Fit a causal boosting model
  2 | #'
  3 | #' @useDynLib causalLearning
  4 | #'
  5 | #' @param x matrix of covariates
  6 | #' @param tx vector of treatment indicators (0 or 1)
  7 | #' @param y vector of response values
  8 | #' @param num.trees number of shallow causal trees to build
  9 | #' @param maxleaves maximum number of leaves per causal tree
 10 | #' @param eps learning rate
 11 | #' @param splitSpread how far apart should the candidate splits be for the
 12 | #'  causal trees? (e.g. \code{splitSpread = 0.1}) means we consider 10 quantile
 13 | #'  cutpoints as candidates for making split
 14 | #' @param x.est optional matrix of estimation-set covariates used for honest
 15 | #'  re-estimation (ignored if \code{tx.est = NULL} or \code{y.est = NULL})
 16 | #' @param tx.est optional vector of estimation-set treatment indicators
 17 | #'  (ignored if \code{x.est = NULL} or \code{y.est = NULL})
 18 | #' @param y.est optional vector of estimation-set response values
 19 | #'  (ignored if \code{x.est = NULL} or \code{y.est = NULL})
 20 | #' @param propensity logical: should propensity score stratification be used?
 21 | #' @param stratum optional vector giving propensity score stratum for each
 22 | #'  observation (only used if \code{propensity = TRUE})
 23 | #' @param stratum.est optional vector giving propensity score stratum for each
 24 | #'  estimation-set observation (ignored if \code{x.est = NULL} or
 25 | #'  \code{tx.est = NULL} or \code{y.est = NULL})
 26 | #' @param isConstVar logical: for the causal tree splitting criterion
 27 | #'  (T-statistc), should it be assumed that the noise variance is the same in
 28 | #'  treatment and control arms?
 29 | #'
 30 | #' @return an object of class \code{causalBoosting} with attributes:
 31 | #'  \itemize{
 32 | #'    \item CBM: a list storing the intercept, the causal trees and \code{eps}
 33 | #'    \item tauhat: matrix of treatment effects for each patient for each step
 34 | #'    \item G1: estimated-treatment conditional mean for each patient
 35 | #'    \item G0: estimated-control conditional mean for each patient
 36 | #'    \item err.y: training error at each step, in predicting response
 37 | #'    \item num.trees: number of trees specified by function call
 38 | #'  }
 39 | #'
 40 | #' @details
 41 | #'  This function exists primarily to be called by cv.causalBoosting because
 42 | #'  the num.trees parameter generally needs to be tuned via cross-validation.
 43 | #'
 44 | #' @examples
 45 | #'# Randomized experiment example
 46 | #'
 47 | #'n = 100 # number of training-set patients to simulate
 48 | #'p = 10  # number of features for each training-set patient
 49 | #'
 50 | #'# Simulate data
 51 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
 52 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
 53 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
 54 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
 55 | #'
 56 | #'# Estimate causal boosting model
 57 | #'fit_cb = causalBoosting(x, tx, y, num.trees = 500)
 58 | #'pred_cb = predict(fit_cb, newx = x, num.trees = 500)
 59 | #'
 60 | #'# Visualize results
 61 | #'plot(tx_effect, pred_cb, main = 'Causal boosting',
 62 | #'  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
 63 | #'abline(0, 1, lty = 2)
 64 | #'
 65 | #' @export
 66 | 
 67 | causalBoosting = function(x, tx, y, num.trees = 500, maxleaves = 4, eps = 0.01,
 68 |   splitSpread = 0.1, x.est = NULL, tx.est = NULL, y.est = NULL,
 69 |   propensity = FALSE, stratum = NULL, stratum.est = NULL, 
 70 |   isConstVar = TRUE) {
 71 | 
 72 |  
 73 |   # Input sanitization
 74 | 
 75 |   x = as.matrix(x)
 76 | 
 77 |   if (nrow(x) != length(tx)) {
 78 |     stop('nrow(x) does not match length(tx)')
 79 | 
 80 |   } else if (nrow(x) != length(y)) {
 81 |     stop('nrow(x) does not match length(y)')
 82 | 
 83 |   } else if (!is.numeric(x)) {
 84 |     stop('x must be numeric matrix')
 85 | 
 86 |   } else if (!is.numeric(y)) {
 87 |     stop('y must be numeric (use 0/1 for binary response)')
 88 | 
 89 |   } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) {
 90 |     stop('tx must be vector of 0s and 1s')
 91 | 
 92 |   }
 93 | 
 94 |  
 95 |   # s indices are 0-based
 96 |   maxNodes = 2 * maxleaves - 1
 97 |   
 98 |   # if (usePropensity ^ !is.null(s)) { warnings('Non-consistent options: whether to
 99 |   # use propensity score will be based on value of s.') }
100 |   
101 |   if (is.null(stratum)) {
102 |     if (propensity) stop('stratum must be specified if propensity = TRUE')
103 |     stratum = -1
104 |   }
105 |   if (is.null(x.est) || is.null(y.est) || is.null(tx.est)) {
106 |     x.est = y.est = tx.est = stratum.est = -1
107 |     n.est = 1
108 |   } else {
109 |     n.est = nrow(x.est)
110 |   if (is.null(stratum.est)) {
111 |       stratum.est = -1
112 |     }
113 |   }
114 |   
115 |   vtxeff = 0
116 |   
117 |   fit = .C("causalBoosting", as.double(x), as.double(y), as.integer(tx),
118 |     as.double(x.est), as.double(y.est), as.integer(tx.est),
119 |     as.integer(num.trees), as.integer(maxleaves), as.double(eps),
120 |     as.integer(propensity), as.integer(stratum), as.integer(stratum.est),
121 |     as.integer(isConstVar), as.integer(nrow(x)), as.integer(ncol(x)),
122 |     as.integer(n.est), as.double(vtxeff), as.double(splitSpread),
123 |     var = integer(num.trees * maxNodes), val = double(num.trees * maxNodes),
124 |     left = integer(num.trees * maxNodes),
125 |     right = integer(num.trees * maxNodes),
126 |     y0bar = double(1), y1bar = double(1), pred0 = double(num.trees * maxNodes),
127 |     pred1 = double(num.trees * maxNodes), cost = double(num.trees * maxNodes),
128 |     pred0e = double(num.trees * maxNodes),
129 |     pred1e = double(num.trees *  maxNodes), G0 = double(nrow(x)),
130 |     G1 = double(nrow(x)), err.y = double(num.trees), err = double(num.trees),
131 |     tauhat = double(num.trees * nrow(x)), PACKAGE = 'causalLearning')
132 |   
133 |   CBM = list()
134 |   CBM$intercept = c(fit$y0bar, fit$y1bar)
135 |   CBM$trees = list()
136 |   CBM$eps = eps
137 |   
138 |   for (k in 1:num.trees) {
139 |     start = (k - 1) * maxNodes + 1
140 |     end = k * maxNodes
141 |     tree = list(var = fit$var[start:end] + 1, val = fit$val[start:end],
142 |       left = fit$left[start:end] + 1, right = fit$right[start:end] + 1,
143 |       pred0 = fit$pred0[start:end], pred1 = fit$pred1[start:end], 
144 |       cost = fit$cost[start:end], pred0e = fit$pred0e[start:end],
145 |       pred1e = fit$pred1e[start:end])
146 |     class(tree) = "causalTree"
147 |     CBM$trees[[k]] = tree
148 |   }
149 |   result = list(CBM = CBM, tauhat = matrix(fit$tauhat, nrow = nrow(x)),
150 |     G1 = fit$G1, G0 = fit$G0, err.y = fit$err.y, num.trees = num.trees)
151 |   
152 |   class(result) = "causalBoosting"
153 |   
154 |   result
155 | }
156 | 
157 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/R/causalMARS.R:
--------------------------------------------------------------------------------
  1 | #'  Fit a causal MARS model
  2 | #'
  3 | #' @param x matrix of covariates
  4 | #' @param tx vector of treatment indicators (0 or 1)
  5 | #' @param y vector of response values
  6 | #' @param maxterms maximum number of terms to include in the regression basis
  7 | #'  (e.g. \code{maxterms = 11} means intercept + 5 pairs added)
  8 | #' @param nquant number of quantiles used in splitting
  9 | #' @param degree max number of different predictors that can interact in model
 10 | #' @param eps shrinkage factor for new term added
 11 | #' @param backstep logical: after building out regression basis, should
 12 | #'  backward stepwise selection be used to create a sequence of models, with
 13 | #'  the criterion evaluated on a validation set to choose among the sequence?
 14 | #' @param x.val optional matrix of validation-set covariates
 15 | #'  (only used if \code{backstep = TRUE})
 16 | #' @param tx.val optional vector of validation-set treatment indicators
 17 | #'  (only used if \code{backstep = TRUE})
 18 | #' @param y.val optional vector of validation-set response values
 19 | #'  (only used if \code{backstep = TRUE})
 20 | #' @param propensity logical: should propensity score stratification be used?
 21 | #' @param stratum optional vector giving propensity score stratum for each
 22 | #'  observation (only used if \code{propensity = TRUE})
 23 | #' @param stratum.val optional vector giving propensity score stratum for each
 24 | #'  validation-set observation
 25 | #'  (only used if \code{propensity = backstep = TRUE})
 26 | #' @param minnum minimum number of observations in each arm of each propensity
 27 | #'  score stratum needed to estimate regression coefficients for basis
 28 | #'  (only used if \code{propensity = TRUE})
 29 | #'
 30 | #' @return an object of class \code{causalMARS} with attributes:
 31 | #'  \itemize{
 32 | #'    \item parent: indices of nodes that are parents at each stage
 33 | #'    \item childvar: index of predictor chosen at each forward step
 34 | #'    \item childquant: quantile of cutoff chosen at each forward step
 35 | #'    \item quant: quantiles of the columns of x
 36 | #'    \item active: indices of columns with nonzero norm
 37 | #'    \item allvars: list of variables appearing in each term
 38 | #'    \item khat: the sequence of terms deleted at each step
 39 | #'    \item deltahat: relative change in rss
 40 | #'    \item rsstesthat: validation-set rss achieved by each model in sequence
 41 | #'    \item setesthat: standard error for rsstesthat
 42 | #'    \item tim1: time elapsed during forward stepwise phase
 43 | #'    \item tim2: total time elapsed
 44 | #'    \item x
 45 | #'    \item tx
 46 | #'    \item y
 47 | #'    \item maxterms
 48 | #'    \item eps
 49 | #'    \item backstep
 50 | #'    \item propensity
 51 | #'    \item x.val
 52 | #'    \item tx.val
 53 | #'    \item y.val
 54 | #'    \item stratum
 55 | #'    \item stratum.val
 56 | #'    \item minnum
 57 | #'  }
 58 | #'
 59 | #' @details
 60 | #'  parallel arms mars with backward stepwise BOTH randomized case and
 61 | #'  propensity stratum. data structures: model terms (nodes) are numbered
 62 | #'  1, 2, ... with 1 representing the intercept. forward stepwise:
 63 | #'  modmatrix contains basis functions as model is built up -- two columns are
 64 | #'  added at each step. Does not include a column of ones for tidiness,
 65 | #'  we always add two terms, even when term added in linear (so that reflected
 66 | #'  version is just zero).
 67 | #'  backward stepwise: khat is the sequence of terms deleted at each step,
 68 | #'  based on deltahat = relative change in rss. rsstesthat is rss over test
 69 | #'  (validation) set achieved by each reduced model in sequence- used later for
 70 | #'  selecting a member of the sequence. active2 contains indices of columns with
 71 | #'  nonzero norm 
 72 | #'
 73 | #' @examples
 74 | #'# Randomized experiment example
 75 | #'
 76 | #'n = 100 # number of training-set patients to simulate
 77 | #'p = 10  # number of features for each training-set patient
 78 | #'
 79 | #'# Simulate data
 80 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
 81 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
 82 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
 83 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
 84 | #'
 85 | #'# Estimate causal MARS model
 86 | #'fit_cm = causalMARS(x, tx, y)
 87 | #'pred_cm = predict(fit_cm, newx = x)
 88 | #'
 89 | #'# Visualize results
 90 | #'plot(tx_effect, pred_cm, main = 'Causal MARS',
 91 | #'  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
 92 | #'abline(0, 1, lty = 2)
 93 | #'
 94 | #' @export
 95 | 
 96 | causalMARS = function(x, tx, y,
 97 |   maxterms = 11, nquant = 5, degree = ncol(x), eps = 1,
 98 |   backstep = FALSE, x.val = NULL, tx.val = NULL, y.val = NULL,
 99 |   propensity = FALSE, stratum = rep(1, nrow(x)), stratum.val = NULL,
100 |   minnum = 5) {
101 | 
102 | 
103 |   # Input sanitization
104 | 
105 |   x = as.matrix(x)
106 | 
107 |   if (nrow(x) != length(tx)) {
108 |     stop('nrow(x) does not match length(tx)')
109 | 
110 |   } else if (nrow(x) != length(y)) {
111 |     stop('nrow(x) does not match length(y)')
112 | 
113 |   } else if (!is.numeric(x)) {
114 |     stop('x must be numeric matrix')
115 | 
116 |   } else if (!is.numeric(y)) {
117 |     stop('y must be numeric (use 0/1 for binary response)')
118 | 
119 |   } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) {
120 |     stop('tx must be vector of 0s and 1s')
121 | 
122 |   }
123 | 
124 | 
125 |   BIG = 1e+10
126 |   n = nrow(x)
127 |   p = ncol(x)
128 |   x = scale(x, TRUE, FALSE)  #NOTE
129 |   if(!is.null(x.val)) {
130 |     x.val = scale(x.val, TRUE, FALSE)
131 |   }
132 |   
133 |   # compute quantiles for splitting
134 |   discrete = rep(FALSE, p)
135 |   for (j in 1:p) {
136 |     if (length(table(x[, j])) == 2) 
137 |       discrete[j] = TRUE
138 |   }
139 |   
140 |   probs = seq(0, 1, length = nquant)[-nquant]
141 |   quant = apply(x, 2, stats::quantile, probs)
142 |   
143 |   nquantm = rep(nquant, p)
144 |   if (sum(discrete) > 0) {
145 |     for (j in which(discrete)) {
146 |       nquantm[j] = 2
147 |       quant[, j] = NA
148 |       quant[1, j] = 0
149 |     }
150 |   }
151 |   
152 |   if (propensity) {
153 |     stratum.sizes = table(stratum)/nrow(x)
154 |     nstratum = length(stratum.sizes)
155 |     if (sum(as.numeric(names(stratum.sizes)) != (1:length(stratum.sizes))) != 0) 
156 |       stop("Strata should be numbered 1:k")
157 |     stratum.val.sizes = table(stratum.val)/nrow(x.val)
158 |     nstratum.val = length(stratum.val.sizes)
159 |     if (sum(as.numeric(names(stratum.val.sizes)) != (1:length(stratum.val.sizes))) != 
160 |       0) 
161 |       stop("Stratatest should be numbered 1:k")
162 |   }
163 |   
164 |   modmatrix = matrix(0, nrow = n, ncol = maxterms)
165 |   active = rep(FALSE, maxterms)
166 |   active[1] = TRUE
167 |   modmatrix[, 1] = 1
168 |   
169 |   r = y - y
170 |   a0 = c(mean(y[tx == 0]), mean(y[tx == 1]))
171 |   
172 |   r[tx == 0] = y[tx == 0] - mean(y[tx == 0])
173 |   r[tx == 1] = y[tx == 1] - mean(y[tx == 1])
174 |   
175 |   parent = childvar = childquant = NULL
176 |   allvars = vector("list", maxterms)
177 |   maxscorall = rep(NA, maxterms)
178 |   nterms = 1
179 |   
180 |   
181 |   # forward stepwise
182 |   while (nterms < maxterms) {
183 |     maxscor = -1 * BIG
184 |     act = active[1:nterms]
185 |     num = unlist(lapply(allvars, length))[1:nterms]
186 |     act = act & (num < degree)
187 |     
188 |     for (ii in (1:nterms)[act]) {
189 |       jlist = rep(TRUE, p)
190 |       if (ii > 1) 
191 |         jlist[allvars[[ii]]] = FALSE
192 |       
193 |       
194 |       for (j in which(jlist)) {
195 |         for (k in 1:(nquantm[j] - 1)) {
196 |           # bx1 = modmatrix[, ii] * truncpow(x[, j], quant[k, j], dir = 1) bx2 =
197 |           # modmatrix[, ii] * truncpow(x[, j], quant[k, j], dir = 2) NOTE have to make
198 |           # correspondng chanegs in propensity section below!
199 |           bx = NULL
200 |           bx1 = modmatrix[, ii] * truncpow(x[, j], quant[k, j], dir = 1)
201 |           if (sum(bx1^2) > 0) 
202 |           bx = cbind(bx, bx1)
203 |           bx2 = modmatrix[, ii] * truncpow(x[, j], quant[k, j], dir = 2)
204 |           if (sum(bx2^2) > 0) 
205 |           bx = cbind(bx, bx2)
206 |           
207 |           if (!propensity) {
208 |           
209 |             scor = -0.5 * BIG
210 |             if (!is.null(bx)) {
211 |               res = myridge(bx, r, int = TRUE)$res
212 |               res0 = myridge(bx[tx == 0, ], r[tx == 0], int = TRUE)$res
213 |               res1 = myridge(bx[tx == 1, ], r[tx == 1], int = TRUE)$res
214 |               scor = sum(res^2) - sum(res0^2) - sum(res1^2)
215 |             }
216 |           }
217 |           
218 |           if (propensity) {
219 |             if (!is.null(bx)) {
220 |               scor = 0
221 |               
222 |               res0 = res1 = rep(0, n)
223 |               for (s in 1:nstratum) {
224 |                 
225 |                 res = myridge(bx[stratum == s, ], r[stratum == s])$res
226 |                 
227 |                 if (sum(tx == 0 & stratum == s) >= minnum) {
228 |                 res0[stratum == s & tx == 0] = myridge(bx[tx == 0 & stratum == 
229 |                   s, ], r[tx == 0 & stratum == s])$res
230 |                 }
231 |                 if (sum(tx == 1 & stratum == s) >= minnum) {
232 |                 res1[stratum == s & tx == 1] = myridge(bx[tx == 1 & stratum == 
233 |                   s, ], r[tx == 1 & stratum == s])$res
234 |                 }
235 |                 scor = scor + (sum(res^2) - sum(res0^2) - sum(res1^2))
236 |                 
237 |               }
238 |               res0 = res0[tx == 0]
239 |               res1 = res1[tx == 1]
240 |             }
241 |           }  # end of propensity loop
242 |           
243 |           
244 |           if (scor > maxscor) {
245 |           maxscor = scor
246 |           
247 |           iihat = ii
248 |           jhat = j
249 |           khat = k
250 |           res0hat = res0
251 |           res1hat = res1
252 |           }
253 |         }
254 |       }
255 |     }  #end of for loop
256 | 
257 |     maxscorall[ii] = maxscor
258 |     new1 = modmatrix[, iihat] * truncpow(x[, jhat], quant[khat, jhat], dir = 1)
259 |     new2 = modmatrix[, iihat] * truncpow(x[, jhat], quant[khat, jhat], dir = 2)
260 |     if (!is.matrix(new1)) 
261 |       new1 = matrix(new1, ncol = 1)
262 |     if (!is.matrix(new2)) 
263 |       new2 = matrix(new2, ncol = 1)
264 |     
265 |     
266 |     modmatrix[, nterms + 1] = new1
267 |     modmatrix[, nterms + 2] = new2
268 |     
269 |     
270 |     active[nterms + 1] = TRUE
271 |     active[nterms + 2] = TRUE
272 |     
273 |     
274 |     allvars[[nterms + 1]] = allvars[[nterms + 2]] = c(allvars[[iihat]], jhat)
275 |     nterms = nterms + 2
276 |     
277 |     
278 |     r[tx == 0] = r[tx == 0] * (1 - eps) + eps * res0hat
279 |     r[tx == 1] = r[tx == 1] * (1 - eps) + eps * res1hat
280 |     
281 |     parent = c(parent, iihat)
282 |     childvar = c(childvar, jhat)
283 |     
284 |     childquant = c(childquant, khat)
285 |   }  #end of while loop
286 |   
287 |   
288 |   active = colSums(modmatrix^2) > 0
289 |   deltahat = khat = NA
290 |   out = list(parent = parent, childvar = childvar, childquant = childquant, quant = quant, 
291 |     active = active, eps = eps, allvars = allvars)
292 |   
293 |   tim1 = proc.time()
294 |   # cat('forward done', fill = TRUE)
295 |   
296 |   
297 |   # backward deletion
298 |   rsstesthat = setesthat = NULL
299 |   
300 |   if (backstep) {
301 | 
302 |     if(is.null(x.val) | is.null(tx.val) | is.null(y.val)) {
303 |       stop('If backstep = TRUE, then x.val, tx.val, y.val must be specified.')
304 |     }
305 | 
306 |     BIG = 1e+10
307 |     modmatrix = makebx.newmars(out, x, remove.zerocols = FALSE)[, -1]
308 |     modmatrix.val = makebx.newmars(out, x.val, remove.zerocols = FALSE)[, -1]
309 |     ss = colSums(modmatrix^2) > 0
310 |     
311 |     active2 = rep(TRUE, ncol(modmatrix.val))
312 |     active2[!ss] = FALSE
313 |     khat = deltahat = deltatesthat = rsstesthat = setesthat = rep(NA, sum(active2))
314 |     rtest = rep(NA, nrow(x.val))
315 |     
316 |     go = sum(active2) > 0
317 |     
318 |     ii = 0
319 |     while (go) {
320 |       go = FALSE
321 |       
322 |       delta = deltatest = rsstest = rep(BIG, length(active2))
323 |       # train
324 |       if (!propensity) {
325 |         fit0 = myridge(modmatrix[, active2, drop = FALSE][tx == 0, ], y[tx == 
326 |           0], int = TRUE)
327 |         fit1 = myridge(modmatrix[, active2, drop = FALSE][tx == 1, ], y[tx == 
328 |           1], int = TRUE)
329 |         res0 = fit0$res
330 |         res1 = fit1$res
331 |         
332 |         # test
333 |         yhat0 = cbind(1, modmatrix.val[, active2, drop = FALSE][tx.val == 0, 
334 |           , drop = FALSE]) %*% fit0$coef
335 |         yhat1 = cbind(1, modmatrix.val[, active2, drop = FALSE][tx.val == 1, 
336 |           , drop = FALSE]) %*% fit1$coef
337 |         res0test = (y.val[tx.val == 0] - yhat0)
338 |         res1test = (y.val[tx.val == 1] - yhat1)
339 |         rss0testsq = sum(res0test^2)
340 |         rss1testsq = sum(res1test^2)
341 |       }
342 |       
343 |       if (propensity) 
344 |         {
345 |           rss0sq = rss1sq = rss0testsq = rss1testsq = 0
346 |           # initial fit with parallel terms training
347 |           for (s in 1:nstratum) {
348 |             res0 = res1 = 0
349 |             if (sum(tx == 0 & stratum == s) >= minnum) {
350 |               fit0 = myridge(modmatrix[, active2, drop = FALSE][tx == 0 & stratum == 
351 |               s, ], y[tx == 0 & stratum == s], int = TRUE)
352 |               res0 = fit0$res
353 |             }
354 |             if (sum(tx == 1 & stratum == s) >= minnum) {
355 |               fit1 = myridge(modmatrix[, active2, drop = FALSE][tx == 1 & stratum == 
356 |               s, ], y[tx == 1 & stratum == s], int = TRUE)
357 |               res1 = fit1$res
358 |             }
359 |             rss0sq = rss0sq + sum(res0^2)
360 |             rss1sq = rss1sq + sum(res1^2)
361 |             
362 |             
363 |             # test
364 |             
365 |             res0test = res1test = 0
366 |             if (sum(tx.val == 0 & stratum.val == s) >= minnum) {
367 |               yhat0 = cbind(1, modmatrix.val[, active2, drop = FALSE][tx.val == 
368 |               0 & stratum.val == s, ]) %*% fit0$coef
369 |               res0test = (y.val[tx.val == 0 & stratum.val == s] - yhat0)
370 |             }
371 |             if (sum(tx.val == 1 & stratum.val == s) >= minnum) {
372 |               yhat1 = cbind(1, modmatrix.val[, active2, drop = FALSE][tx.val == 
373 |               1 & stratum.val == s, ]) %*% fit1$coef
374 |               res1test = (y.val[tx.val == 1 & stratum.val == s] - yhat1)
375 |             }
376 |             rss0testsq = rss0testsq + sum(res0test^2)
377 |             rss1testsq = rss1testsq + sum(res1test^2)
378 |           }
379 |         }  #end of propensity
380 |       
381 |       
382 |       for (k in which(active2)) {
383 |         # try collapsing a training set term
384 |         act = active2
385 |         act[k] = FALSE
386 |         
387 |         if (!propensity) {
388 |           
389 |           redfit = myridge(modmatrix[, k, drop = FALSE], y, int = TRUE)
390 |           r = redfit$res
391 |           fit00 = myridge(modmatrix[, act, drop = FALSE][tx == 0, ], r[tx == 
392 |           0], int = TRUE)
393 |           fit11 = myridge(modmatrix[, act, drop = FALSE][tx == 1, ], r[tx == 
394 |           1], int = TRUE)
395 |           
396 |           res00 = fit00$res
397 |           res11 = fit11$res
398 |           delta[k] = (sum(res00^2) + sum(res11^2) - sum(res0^2) - sum(res1^2))/(sum(res00^2) + 
399 |           sum(res11^2))
400 |           # try collapsing the same test set term
401 |           rtest = y.val - cbind(1, modmatrix.val[, k, drop = FALSE]) %*% redfit$coef
402 |           yhat00 = cbind(1, modmatrix.val[, act, drop = FALSE][tx.val == 0, 
403 |           ]) %*% fit00$coef
404 |           yhat11 = cbind(1, modmatrix.val[, act, drop = FALSE][tx.val == 1, 
405 |           ]) %*% fit11$coef
406 |           
407 |           res00test = (rtest[tx.val == 0] - yhat00)
408 |           res11test = (rtest[tx.val == 1] - yhat11)
409 |           rss00testsq = sum(res00test^2)
410 |           rss11testsq = sum(res11test^2)
411 |         }
412 |         
413 |         if (propensity) 
414 |           {
415 |           rss00sq = rss11sq = rss00testsq = rss11testsq = 0
416 |           for (s in 1:nstratum) {
417 |             redfit = myridge(modmatrix[, k, drop = FALSE], y, int = TRUE)
418 |             r = redfit$res
419 |             res00 = res11 = res00test = res11test = 0
420 |             if (sum(tx == 0 & stratum == s) >= minnum) {
421 |             fit00 = myridge(modmatrix[, act, drop = FALSE][tx == 0 & stratum == 
422 |               s, ], r[tx == 0 & stratum == s], int = TRUE)
423 |             res00 = fit00$res
424 |             }
425 |             if (sum(tx == 1 & stratum == s) >= minnum) {
426 |             fit11 = myridge(modmatrix[, act, drop = FALSE][tx == 1 & stratum == 
427 |               s, ], r[tx == 1 & stratum == s], int = TRUE)
428 |             res11 = fit11$res
429 |             }
430 |             
431 |             rss00sq = rss00sq + sum(res00^2)
432 |             rss11sq = rss11sq + sum(res11^2)
433 |             
434 |             # try deleting the same test set term
435 |             
436 |             
437 |             if (sum(tx.val == 0 & stratum.val == s) >= minnum) {
438 |             
439 |             yhat00 = cbind(1, modmatrix.val[, act, drop = FALSE][tx.val == 
440 |               0 & stratum.val == s, ]) %*% fit00$coef
441 |             res00test = (y.val[tx.val == 0 & stratum.val == s] - yhat00)
442 |             }
443 |             if (sum(tx.val == 1 & stratum.val == s) >= minnum) {
444 |             yhat11 = cbind(1, modmatrix.val[, act, drop = FALSE][tx.val == 
445 |               1 & stratum.val == s, ]) %*% fit11$coef
446 |             res11test = (y.val[tx.val == 1 & stratum.val == s] - yhat11)
447 |             }
448 |             
449 |             rss00testsq = rss00testsq + sum(res00test^2)
450 |             rss11testsq = rss11testsq + sum(res11test^2)
451 | 
452 |           }
453 |           delta[k] = (rss00sq + rss11sq - rss0sq - rss1sq)/(rss00sq + rss11sq)
454 |           }  #end of propensity
455 |         
456 |         # deltatest[k] = (sum(res00test^2) + sum(res11test^2) - sum(res0test^2)
457 |         # -sum(res1test^2))/(sum(res00test^2) + sum(res11test^2))
458 |         
459 |         deltatest[k] = (rss00testsq + rss11testsq - rss0testsq - rss1testsq)/(rss00testsq + 
460 |           rss11testsq)
461 |       }  #end of propensity
462 |       
463 |       ii = ii + 1
464 |       # cat(c('ii=', ii), fill = TRUE)
465 |       khat[ii] = which.min(delta)
466 |       # cat(delta,fill=TRUE)
467 |       deltahat[ii] = delta[khat[ii]]
468 |       deltatesthat[ii] = deltatest[khat[ii]]
469 |       n1 = length(res0test)
470 |       n2 = length(res1test)
471 |       # rsstesthat[ii]=(sum(res0test^2) + sum(res1test^2))/(n1+n2)
472 |       rsstesthat[ii] = (rss0testsq + rss1testsq)/(n1 + n2)
473 |       setesthat[ii] = NA
474 |       # setesthat[ii]=sqrt((n1*var(res0test^2)+n2*var(res1test^2))/(n1+n2)^2) # not
475 |       # currently computed
476 |       
477 |       active2[khat] = FALSE
478 |       num.colsleft = sum(colSums(modmatrix.val[, active2, drop = FALSE]^2) > 0)
479 |       go = (sum(active2) > 0) & (num.colsleft > 0)
480 |       # cat(c(sum(active2), num.colsleft),fill=TRUE) browser() cat(c('dropping ', khat),
481 |       # fill = TRUE)
482 |     }
483 |     
484 |   }
485 |   out$khat = khat[!is.na(khat)]
486 |   out$deltahat = deltahat[!is.na(deltahat)]
487 |   out$rsstesthat = rsstesthat
488 |   out$setesthat = setesthat
489 |   out$tim1 = tim1
490 |   out$tim2 = proc.time()
491 |   out$x = x
492 |   out$tx = tx
493 |   out$y = y
494 |   out$x.val = x.val
495 |   out$tx.val = tx.val
496 |   out$y.val = y.val
497 |   out$maxterms = maxterms
498 |   out$backstep = backstep
499 |   out$propensity = propensity
500 |   out$stratum = stratum
501 |   out$stratum.val = stratum.val
502 |   out$minnum = minnum
503 |   class(out) = 'causalMARS'
504 |   return(out)
505 | }
506 | 
507 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/R/cv.causalBoosting.R:
--------------------------------------------------------------------------------
  1 | #'  Fit a causal boosting model with cross validation
  2 | #'
  3 | #' @param x matrix of covariates
  4 | #' @param tx vector of treatment indicators (0 or 1)
  5 | #' @param y vector of response values
  6 | #' @param num.trees number of shallow causal trees to build
  7 | #' @param maxleaves maximum number of leaves per causal tree
  8 | #' @param eps learning rate
  9 | #' @param splitSpread how far apart should the candidate splits be for the
 10 | #'  causal trees? (e.g. \code{splitSpread = 0.1}) means we consider 10 quantile
 11 | #'  cutpoints as candidates for making split
 12 | #' @param type.measure loss to use for cross validation:
 13 | #'  'response' returns mean-square error for predicting response in each arm.
 14 | #'  'effect' returns MSE for treatment effect using honest over-fit estimation.
 15 | #' @param nfolds number of cross validation folds
 16 | #' @param foldid vector of fold membership
 17 | #' @param propensity logical: should propensity score stratification be used?
 18 | #' @param stratum optional vector giving propensity score stratum for each
 19 | #'  observation (only used if \code{propensity = TRUE})
 20 | #' @param isConstVar logical: for the causal tree splitting criterion
 21 | #'  (T-statistc), should it be assumed that the noise variance is the same in
 22 | #'  treatment and control arms?
 23 | #'
 24 | #' @return an object of class \code{cv.causalBoosting} which is an object of
 25 | #'  class \code{causalBoosting} with these additional attributes:
 26 | #'  \itemize{
 27 | #'    \item num.trees.min: number of trees with lowest CV error
 28 | #'    \item cvm: vector of mean CV error for each number of trees
 29 | #'    \item cvsd: vector of standard errors for mean CV errors
 30 | #'  }
 31 | #'
 32 | #' @examples
 33 | #'# Randomized experiment example
 34 | #'
 35 | #'n = 100 # number of training-set patients to simulate
 36 | #'p = 10  # number of features for each training-set patient
 37 | #'
 38 | #'# Simulate data
 39 | #'x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
 40 | #'tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
 41 | #'tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
 42 | #'y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
 43 | #'
 44 | #'# Estimate causal boosting model with cross-validation
 45 | #'fit_cv = cv.causalBoosting(x, tx, y)
 46 | #'fit_cv$num.trees.min.effect # number of trees chosen by cross-validation
 47 | #'pred_cv = predict(fit_cv, newx = x)
 48 | #'
 49 | #'# Visualize results
 50 | #'plot(tx_effect, pred_cv, main = 'Causal boosting w/ CV',
 51 | #'  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
 52 | #'abline(0, 1, lty = 2)
 53 | #'
 54 | #' @export
 55 | 
 56 | 
 57 | cv.causalBoosting = function(x, tx, y,
 58 |   num.trees = 500, maxleaves = 4, eps = 0.01, splitSpread = 0.1,
 59 |   type.measure = c('effect', 'response'), nfolds = 5, foldid = NULL,
 60 |   propensity = FALSE, stratum = NULL, isConstVar = TRUE) {
 61 | 
 62 |  
 63 |   # Input sanitization
 64 | 
 65 |   x = as.matrix(x)
 66 | 
 67 |   if (nrow(x) != length(tx)) {
 68 |     stop('nrow(x) does not match length(tx)')
 69 | 
 70 |   } else if (nrow(x) != length(y)) {
 71 |     stop('nrow(x) does not match length(y)')
 72 | 
 73 |   } else if (!is.numeric(x)) {
 74 |     stop('x must be numeric matrix')
 75 | 
 76 |   } else if (!is.numeric(y)) {
 77 |     stop('y must be numeric (use 0/1 for binary response)')
 78 | 
 79 |   } else if (!is.numeric(tx) | length(setdiff(tx, 0:1)) > 0) {
 80 |     stop('tx must be vector of 0s and 1s')
 81 | 
 82 |   }
 83 | 
 84 |  
 85 |   type.measure = match.arg(type.measure)
 86 | 
 87 |   if (is.null(foldid)) foldid = sample(rep(1:nfolds, length = nrow(x)))
 88 |   nfolds = length(unique(foldid))
 89 |   
 90 |   fit = list()
 91 |   pred.response = matrix(0, nrow(x), num.trees)
 92 |   pred.effect = matrix(0, nrow(x), num.trees)
 93 |   pred.refit = rep(NA, nrow(x))
 94 |   
 95 |   for (k in 1:nfolds) {
 96 | 
 97 |     x.val = x[foldid == k, , drop = FALSE]
 98 |     tx.val = tx[foldid == k]
 99 |     y.val = y[foldid == k]
100 |     stratum.val = stratum[foldid == k]
101 | 
102 |     fit[[k]] = causalBoosting(x = x[foldid != k, , drop = FALSE],
103 |       tx = tx[foldid != k], y = y[foldid != k],
104 |       num.trees = num.trees, maxleaves = maxleaves, eps = eps,
105 |       splitSpread = splitSpread, x.est = x.val, tx.est = tx.val, y.est = y.val,
106 |       propensity = propensity, stratum = stratum[foldid != k],
107 |       stratum.est = stratum.val, isConstVar = isConstVar)
108 | 
109 |     tmp = stats::predict(fit[[k]], x[foldid == k, , drop = FALSE], 1:num.trees,
110 |       type = 'conditional.mean')
111 | 
112 |     tmpMat = rbind(tmp$G0, tmp$G1)
113 |     n_out = sum(foldid == k)
114 |     pred.response[foldid == k, ] =
115 |       tmpMat[tx[foldid == k] * n_out + (1:n_out), ]
116 | 
117 |     pred.effect[foldid == k, ] = tmp$G1 - tmp$G0
118 |     tmp.refit = stats::predict(fit[[k]], x[foldid == k, , drop = FALSE],
119 |       1:num.trees, type = 'conditional.mean', honest = TRUE)
120 |     pred.refit[foldid == k] = (tmp.refit$G1 - tmp.refit$G0)[, num.trees]
121 |   }
122 | 
123 |   cvm.effect = colMeans((pred.effect - pred.refit)^2)
124 |   cvsd.effect = apply((pred.effect - pred.refit)^2, 2, stats::sd) /
125 |     sqrt(nrow(pred.effect))
126 | 
127 |   cvm.response = apply(pred.response, 2, function(yhat) mean((yhat - y)^2))
128 |   cvsd.response = apply(pred.response, 2,
129 |     function(yhat) stats::sd((yhat - y)^2)) / sqrt(nrow(pred.response))
130 |  
131 |   fit = causalBoosting(x = x, tx = tx, y = y, num.trees = num.trees,
132 |     maxleaves = maxleaves, eps = eps, splitSpread = splitSpread,
133 |     propensity = propensity, stratum = stratum, isConstVar = isConstVar)
134 | 
135 |   fit$num.trees.min.effect = which.min(cvm.effect)
136 |   fit$num.trees.min.response = which.min(cvm.response)
137 |   fit$num.trees.1se.effect =
138 |     max(which(cvm.effect < min(cvm.effect + cvsd.effect)))
139 | 
140 |   fit$cvm.effect = cvm.effect
141 |   fit$cvsd.effect = cvsd.effect
142 |   fit$cvm.response = cvm.response
143 |   fit$cvsd.response = cvsd.response
144 | 
145 |   class(fit) = 'cv.causalBoosting'
146 |   fit
147 | }
148 | 
149 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/R/makebx.newmars.R:
--------------------------------------------------------------------------------
 1 | 
 2 | makebx.newmars = function(fit, x, remove.zerocols = FALSE) {
 3 |   # creates model matrix from 'fit'
 4 |   quant = fit$quant
 5 |   parent = fit$parent
 6 |   childvar = fit$childvar
 7 |   childquant = fit$childquant
 8 |   active = fit$active
 9 |   nterms = length(parent)
10 |   nterms2 = 1 + 2 * nterms
11 |   
12 |   modmatrix = matrix(0, nrow = nrow(x), ncol = nterms2)
13 |   modmatrix[, 1] = 1
14 |   count = 1
15 |   
16 |   for (ii in 1:nterms) {
17 |     new1 = modmatrix[, parent[ii]] * truncpow(x[, childvar[ii]], quant[childquant[ii], 
18 |       childvar[ii]], dir = 1)
19 |     modmatrix[, count + 1] = new1
20 |     new2 = modmatrix[, parent[ii]] * truncpow(x[, childvar[ii]], quant[childquant[ii], 
21 |       childvar[ii]], dir = 2)
22 |     
23 |     modmatrix[, count + 2] = new2
24 |     count = count + 2
25 |   }
26 |   if (remove.zerocols) 
27 |     modmatrix = modmatrix[, active]
28 |   return(modmatrix)
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/R/myridge.R:
--------------------------------------------------------------------------------
 1 | 
 2 | myridge = function(x, y, int = T, eps = 0.001) {
 3 |   xx = x
 4 |   if (!is.matrix(xx)) 
 5 |     xx = matrix(xx, ncol = 1)
 6 |   lam = rep(eps, ncol(xx))
 7 |   
 8 |   if (int) {
 9 |     xx = cbind(1, xx)
10 |     lam = c(0, lam)
11 |   }
12 |   d = diag(lam)
13 |   if (ncol(xx) == 1) 
14 |     d = lam
15 |   coef = solve(t(xx) %*% xx + d) %*% t(xx) %*% y
16 |   res = y - xx %*% coef
17 |   list(res = res, coef = coef)
18 | }
19 | 
20 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/R/pollinated.ranger.R:
--------------------------------------------------------------------------------
 1 | #'  Pollinate a fitted ranger random forest model
 2 | #'
 3 | #' @param object a fitted \code{ranger} object
 4 | #' @param x matrix of covariates
 5 | #' @param y vector of response values
 6 | #'
 7 | #' @return an object of class \code{pollinated.ranger} which is a \code{ranger}
 8 | #'  object that has been pollinated with the data in (x, y)
 9 | 
10 | pollinated.ranger = function(object, x, y) {
11 | 
12 |   forest = object$forest
13 |   num.trees = forest$num.trees
14 |   which.list = as.list(seq(num.trees))
15 |   split.values = forest$split.values
16 |   split.varIDs = forest$split.varIDs
17 | 
18 |   for (i in 1:num.trees) {
19 |     which = match(split.varIDs[[i]], 0, FALSE)
20 |     split.values[[i]][which > 0] = seq(sum(which))
21 |     which.list[[i]] = which
22 |   }
23 | 
24 |   forest$split.values = split.values
25 |   object$forest = forest
26 |   preds = stats::predict(object, x, predict.all = TRUE)$predictions
27 | 
28 |   ### Get list of means indexed by the unique terminal node values
29 |   pmeans = apply(preds, 2, function(f, y) tapply(y, f, mean), y)
30 | 
31 |   ### Now populate these terminal nodes with these values
32 |   for (i in 1:num.trees) {
33 |     which = which.list[[i]]
34 |     repvec = rep(NA, sum(which))
35 |     pmean = pmeans[[i]]
36 |     ids = as.integer(names(pmean))
37 |     repvec[ids] = pmean
38 |     split.values[[i]][which > 0] = repvec
39 |   }
40 | 
41 |   forest$split.values = split.values
42 |   object$forest = forest
43 |   object$mean = mean(y)
44 |   class(object) = c('pollinated.ranger', 'ranger')
45 |   object
46 | }
47 | 
48 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/R/predict.PTOforest.R:
--------------------------------------------------------------------------------
 1 | #'  Make predictions from a fitted PTO forest model
 2 | #'
 3 | #' @param object a fitted \code{PTOforest} object
 4 | #' @param newx matrix of new covariates for which estimated treatment effects
 5 | #'  are desired
 6 | #' @param ... ignored
 7 | #'
 8 | #' @return a vector of predictions corresponding to the rows of \code{newx}
 9 | #'
10 | #' @export
11 | 
12 | predict.PTOforest = function(object, newx, ...) {
13 | 
14 |   colnames(newx) = colnames(object$x)
15 | 
16 |   if (object$postprocess) {
17 |     stats::predict(object$postfit, data = newx)$predictions
18 | 
19 |   } else {
20 |     stats::predict(object$PTOfit1, newx = newx) -
21 |       stats::predict(object$PTOfit0, newx = newx)
22 | 
23 |   }
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/R/predict.bagged.causalMARS.R:
--------------------------------------------------------------------------------
 1 | #'  Make predictions from a bag of fitted causal MARS models
 2 | #'
 3 | #' @param object a fitted \code{bagged.causalMARS} object
 4 | #' @param newx matrix of new covariates for which estimated treatment effects
 5 | #'  are desired
 6 | #' @param type type of prediction required:
 7 | #'  'average' returns a vector of the averages of the bootstrap estimates.
 8 | #'  'all' returns a matrix of all of the bootstrap estimates.
 9 | #' @param ... ignored
10 | #'
11 | #' @return a vector of estimated personalized treatment effects corresponding
12 | #'  to the rows of \code{newx}
13 | #'
14 | #' @export
15 | 
16 | predict.bagged.causalMARS = function(object, newx, type = c('average', 'all'),
17 |   ...) {
18 |   type = match.arg(type)
19 |   newx = scale(newx, center = TRUE, scale = FALSE)
20 |   pred = sapply(object, FUN = stats::predict, newx = newx)
21 |   switch(type,
22 |     all = pred,
23 |     average = rowMeans(pred))
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/R/predict.causalBoosting.R:
--------------------------------------------------------------------------------
 1 | #'  Make predictions from a fitted causal boosting model
 2 | #'
 3 | #' @param object a fitted \code{causalBoosting} object
 4 | #' @param newx matrix of new covariates for which estimated treatment effects
 5 | #'  are desired
 6 | #' @param newtx option vector of new treatment assignments
 7 | #'  (only used if \code{type = 'response'})
 8 | #' @param type type of prediction required:
 9 | #'  'treatment.effect' returns estimated treatment effect.
10 | #'  'conditional.mean' returns two predictions, one for each arm.
11 | #'  'response' returns prediction for arm corresponding to newtx.
12 | #' @param num.trees number(s) of shallow causal trees to use for prediction
13 | #' @param honest logical: should honest re-estimates of leaf means be used for
14 | #'  prediction? This requires that \code{x.est, tx.est, y.est} were specified
15 | #'  when the causal boosting model was fit
16 | #' @param naVal value with which to replace \code{NA} predictions
17 | #' @param ... ignored
18 | #'
19 | #' @return a vector or matrix of predictions corresponding to the rows of
20 | #'  \code{newx}
21 | #'
22 | #' @export
23 | 
24 | predict.causalBoosting = function(object, newx, newtx = NULL,
25 |   type = c('treatment.effect', 'conditional.mean', 'response'),
26 |   num.trees = 1:object$num.trees, honest = FALSE, naVal = 0, ...) {
27 |   
28 |   type = match.arg(type)
29 |   if (type == 'response' & is.null(newtx)) {
30 |     stop('response predictions require that newtx be specified')
31 |   }
32 | 
33 |   CBM = object$CBM
34 | 
35 |   n = nrow(newx)
36 |   eps = CBM$eps
37 |   G0 = matrix(CBM$intercept[1], n, max(num.trees) + 1)
38 |   G1 = matrix(CBM$intercept[2], n, max(num.trees) + 1)
39 |   for (k in 1:max(num.trees)) {
40 |     pred = stats::predict(CBM$trees[[k]], newx, type = 'conditional.mean',
41 |       honest = honest)
42 |     G1[, k + 1] = G1[, k] + eps * pred[, 2]
43 |     G0[, k + 1] = G0[, k] + eps * pred[, 1]
44 |   }
45 |   G1[is.na(G1)] = naVal
46 |   G0[is.na(G0)] = naVal
47 | 
48 |   switch(type,
49 |     treatment.effect = (G1 - G0)[, num.trees + 1],
50 |     conditional.mean = list(G1 = G1[, num.trees + 1], G0 = G0[, num.trees + 1]),
51 |     response = (newtx * G1 + (1 - newtx) * G0)[, num.trees + 1])
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/R/predict.causalMARS.R:
--------------------------------------------------------------------------------
 1 | #'  Make predictions from a fitted causal MARS model
 2 | #'
 3 | #' @param object a fitted \code{causalMARS} object
 4 | #' @param newx matrix of new covariates for which estimated treatment effects
 5 | #'  are desired
 6 | #' @param active indices of columns with nonzero norm (defaults to model
 7 | #'  selected via backward stepwise phase, or the full model if
 8 | #'  \code{backstep = FALSE})
 9 | #' @param ... ignored
10 | #'
11 | #' @return a vector of estimated personalized treatment effects corresponding
12 | #'  to the rows of \code{newx}
13 | #'
14 | #' @export
15 | 
16 | predict.causalMARS = function(object, newx, active, ...) {
17 | 
18 |   if (missing(active)) {
19 |     if (object$backstep) {
20 |       ntermshat = length(object$khat) - which.min(object$rsstesthat) + 1
21 |       active = (rev(object$khat))[1:ntermshat]
22 |     } else {
23 |       active = 1:(object$maxterms - 1)
24 |     }
25 |   }
26 | 
27 |   object$x = scale(object$x, center = TRUE, scale = FALSE)
28 |   newx = scale(newx, center = TRUE, scale = FALSE)
29 |   
30 |   del = rep(mean(object$y[object$tx == 1]) - mean(object$y[object$tx == 0]),
31 |     nrow(newx))
32 |   stratum = object$stratum
33 |   stratum.val = object$stratum.val
34 |   minnum = object$minnum
35 |   nstrata = length(unique(stratum))
36 |   stratawt = (table(c(stratum, 1:nstrata)) - 1)
37 |   stratawt = stratawt / sum(stratawt)
38 |   
39 |   if (length(active) > 0) {
40 |     if (!object$propensity) {
41 |       # make training matrices
42 |       bx0 = (makebx.newmars(object, object$x)[, -1])[, active, drop = FALSE]
43 |       beta0 = myridge(bx0[object$tx == 0, ], object$y[object$tx == 0])$coef
44 |       beta1 = myridge(bx0[object$tx == 1, ], object$y[object$tx == 1])$coef
45 |       # make test set matrices
46 |       bx <- (makebx.newmars(object, newx)[, -1])[, active, drop = FALSE]
47 |       del = cbind(1, bx) %*% (beta1 - beta0)
48 |     }
49 |     if (object$propensity) {
50 |       # make training matrices
51 |       bx0 = (makebx.newmars(object, object$x)[, -1])[, active, drop = FALSE]
52 |       beta0 = beta1 = matrix(NA, ncol(bx0) + 1, nstrata)
53 |       for (ss in 1:nstrata) {
54 |         if (sum(object$tx == 0 & stratum == ss) > minnum) {
55 |           beta0[, ss] = myridge(bx0[object$tx == 0 & stratum == ss, ],
56 |             object$y[object$tx == 0 & stratum == ss])$coef
57 |           beta1[, ss] = myridge(bx0[object$tx == 1 & stratum == ss, ],
58 |             object$y[object$tx == 1 & stratum == ss])$coef
59 |         }
60 |       }
61 |       # make test set matrices
62 |       bx <- (makebx.newmars(object, newx)[, -1])[, active, drop = FALSE]
63 |       del = rep(0, nrow(bx))
64 |       totwt = 0
65 |       for (ss in 1:nstrata) {
66 |         if (sum(object$tx == 0 & stratum == ss) > minnum) {
67 |           del = del + stratawt[ss] * cbind(1, bx) %*%
68 |             (beta1[, ss] - beta0[, ss])
69 |           totwt = totwt + stratawt[ss]
70 |         }
71 |       }
72 |       del = del / totwt
73 |     }
74 |   }
75 |   return(c(del))
76 | }
77 | 
78 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/R/predict.causalTree.R:
--------------------------------------------------------------------------------
 1 | #'  Make predictions from a fitted causal tree model
 2 | #'
 3 | #' @param object a fitted \code{causalTree} object
 4 | #' @param newx matrix of new covariates for which estimated treatment effects
 5 | #'  are desired
 6 | #' @param newtx option vector of new treatment assignments
 7 | #'  (only used if \code{type = 'response'})
 8 | #' @param type type of prediction required:
 9 | #'  'treatment.effect' returns estimated treatment effect.
10 | #'  'conditional.mean' returns two predictions, one for each arm.
11 | #'  'response' returns prediction for arm corresponding to newtx.
12 | #' @param honest logical: should honest re-estimates of leaf means be used for
13 | #'  prediction? This requires that \code{x.est, tx.est, y.est} were specified
14 | #'  when the causal boosting model was fit
15 | #' @param naVal value with which to replace \code{NA} predictions
16 | #' @param ... ignored
17 | #'
18 | #' @return a vector or matrix of predictions corresponding to the rows of
19 | #'  \code{newx}
20 | #'
21 | #' @export
22 | 
23 | predict.causalTree = function(object, newx, newtx = NULL,
24 |   type = c('treatment.effect', 'conditional.mean', 'response'), honest = FALSE,
25 |   naVal = 0, ...) {
26 | 
27 |   leaf = rep(1, nrow(newx))
28 |   terminal = rep(FALSE, nrow(newx))
29 |   while (sum(!is.na(object$var[leaf])) > 0) {
30 |     var = cbind((1:nrow(newx))[!terminal], object$var[leaf[!terminal]])
31 |     leaf[!terminal] = object$left[leaf[!terminal]] + (newx[var] >= object$val[leaf[!terminal]])
32 |     terminal = is.na(object$var[leaf])
33 |   }
34 |   pred = list()
35 |   pred$leaf = leaf
36 |   if (!honest) {
37 |     pred$pred0 = object$pred0[leaf]
38 |     pred$pred1 = object$pred1[leaf]
39 |   } else {
40 |     pred$pred0 = object$pred0e[leaf]
41 |     pred$pred1 = object$pred1e[leaf]
42 |   }
43 |   type = match.arg(type)
44 |   predMatrix = cbind(pred$pred0, pred$pred1)
45 |   predMatrix[is.na(predMatrix)] = naVal
46 |   out = switch(type,
47 |     treatment.effect = predMatrix[, 2] - predMatrix[, 1],
48 |     conditional.mean = predMatrix,
49 |     response = ifelse(!is.null(newtx),
50 |       predMatrix[cbind(1:nrow(newx), newtx + 1)], rep(NA, nrow(newx))))
51 |   out
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/R/predict.cv.causalBoosting.R:
--------------------------------------------------------------------------------
 1 | #'  Make predictions from a fitted cross-validated causal boosting model
 2 | #'
 3 | #' @param object a fitted \code{cv.causalBoosting} object
 4 | #' @param newx matrix of new covariates for which estimated treatment effects
 5 | #'  are desired
 6 | #' @param newtx option vector of new treatment assignments
 7 | #'  (only used if \code{type = 'individual'})
 8 | #' @param type type of prediction required:
 9 | #'  'treatment.effect' returns estimated treatment effect.
10 | #'  'conditional.mean' returns two predictions, one for each arm.
11 | #'  'response' returns prediction for arm corresponding to newtx.
12 | #' @param num.trees number of shallow causal trees to use for prediction
13 | #' @param naVal value with which to replace \code{NA} predictions
14 | #' @param ... ignored
15 | #'
16 | #' @return a vector or matrix of predictions corresponding to the rows of
17 | #'  \code{newx}
18 | #'
19 | #' @export
20 | 
21 | predict.cv.causalBoosting = function(object, newx, newtx = NULL,
22 |   type = c('treatment.effect', 'conditional.mean', 'response'),
23 |   num.trees = object$num.trees.min.effect, naVal = 0, ...) {
24 | 
25 |   predict.causalBoosting(object = object, newx = newx, newtx = newtx,
26 |     type = type, num.trees = num.trees, naVal = naVal)
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/R/predict.pollinated.ranger.R:
--------------------------------------------------------------------------------
 1 | #'  Make predictions from a pollinated ranger random forest model
 2 | #'
 3 | #' @param object a fitted \code{pollinated.ranger} object
 4 | #' @param newx matrix of new covariates for which predictions are desired
 5 | #' @param predict.all logical: should predictions from all trees be returned?
 6 | #'  Otherwise the average across trees is returned
 7 | #' @param na.treatment how to treat NA predictions from individual trees:
 8 | #'  'omit' only uses trees for which the prediction is not NA.
 9 | #'  'replace' replaces NA predictions with the overall mean response.
10 | #'  'NA' returns NA if any tree prediction is NA.
11 | #' @param ... additional arguments passed on to \code{predict.ranger}
12 | 
13 | #'
14 | #' @return a vector of predicted treatment effects corresponding to the rows of
15 | #'  newx
16 | 
17 | predict.pollinated.ranger = function(object, newx, predict.all = FALSE,
18 |   na.treatment = c('omit', 'replace', 'NA'), ...) {
19 | 
20 |   na.treatment = match.arg(na.treatment)
21 | 
22 |   class(object) = 'ranger'
23 |   preds = stats::predict(object, newx, predict.all = TRUE,
24 |     ...)$predictions
25 |   class(object) = 'pollinated.ranger'
26 | 
27 |   if (na.treatment == 'replace') {
28 |     wh <- is.na(preds)
29 |     preds[wh] = object$mean
30 |   }
31 | 
32 |   if (!predict.all) {
33 |     preds = apply(preds, 1, mean, na.rm = (na.treatment == 'omit'))
34 |   }
35 | 
36 |   preds
37 | }
38 | 
39 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/R/stratify.R:
--------------------------------------------------------------------------------
 1 | #'  Get propensity strata from propensity scores
 2 | #'
 3 | #' @param pscore vector of propensity scores
 4 | #' @param tx vector of treatment indicators
 5 | #' @param min.per.arm minimum number of observations for each arm within each
 6 | #'  stratum
 7 | #'
 8 | #' @return a vector of integers with length equal to the length of pscore,
 9 | #'  reporting the propensity stratum corresponding to each propensity score
10 | #'
11 | #' @export
12 | 
13 | stratify = function(pscore, tx, min.per.arm = 30) {
14 | 
15 |   stratum = ceiling(10 * pscore)
16 |   cutoffs = sort(unique(stratum/10))
17 |   stratum = as.numeric(as.factor(stratum))
18 | 
19 |   num.treated = stats::aggregate(tx, list(stratum = stratum), sum)$x
20 |   while(min(num.treated) < min.per.arm & length(unique(stratum)) > 1) {
21 |     stratum1 = which.min(num.treated)
22 |     cutoffs = cutoffs[-which.min(num.treated)]
23 |     neighbors = intersect(stratum, stratum1 + c(-1, 1))
24 |     stratum2 = neighbors[which.min(num.treated[neighbors])]
25 |     stratum[stratum == stratum1] = stratum2
26 |     stratum = as.numeric(as.factor(stratum))
27 |     num.treated = stats::aggregate(tx, list(stratum = stratum), sum)$x
28 |   }
29 | 
30 |   stratum = 1 + max(stratum) - stratum
31 |   cutoffs = rev(cutoffs)
32 | 
33 |   num.control = stats::aggregate(1 - tx, list(stratum = stratum), sum)$x
34 |   while(min(num.control) < min.per.arm & length(unique(stratum)) > 1) {
35 |     stratum1 = which.min(num.control)
36 |     cutoffs = cutoffs[-which.min(num.control)]
37 |     neighbors = intersect(stratum, stratum1 + c(-1, 1))
38 |     stratum2 = neighbors[which.min(num.control[neighbors])]
39 |     stratum[stratum == stratum1] = stratum2
40 |     stratum = as.numeric(as.factor(stratum))
41 |     num.control = stats::aggregate(tx, list(stratum = stratum), sum)$x
42 |   }
43 | 
44 |   cutoffs[1] = 1
45 |   cutoffs = rev(cutoffs)
46 | 
47 |   list(stratum = 1 + max(stratum) - stratum, cutoffs = cutoffs)
48 | }
49 | 
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/R/truncpow.R:
--------------------------------------------------------------------------------
 1 | 
 2 | truncpow = function(x, cutp, dir = 1) {
 3 |   if (dir == 1) 
 4 |     out = (x - cutp) * (x > cutp)
 5 |   if (dir == 2) 
 6 |     out = (cutp - x) * (x < cutp)
 7 |   return(out)
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/README.md:
--------------------------------------------------------------------------------
1 | # causalLearning
2 | Methods for heterogeneous treatment effect estimation
3 | 
4 | This R package includes functions for fitting the pollinated
5 | transformed outcome forest, causal boosting and causal MARS from
6 | [Powers et al. (2017)](https://arxiv.org/abs/1707.00102).
7 | 
8 | The package is currently in beta, not yet ready for public consumption.
9 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/causalLearning_1.0.0.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/00_pkg_src/causalLearning/causalLearning_1.0.0.tar.gz


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/man/PTOforest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PTOforest.R
 3 | \name{PTOforest}
 4 | \alias{PTOforest}
 5 | \title{Fit a pollinated transformed outcome (PTO) forest model}
 6 | \usage{
 7 | PTOforest(x, tx, y, pscore = rep(0.5, nrow(x)), num.trees = 500,
 8 |   mtry = ncol(x), min.node.size = max(25, nrow(x)/40), postprocess = TRUE,
 9 |   verbose = FALSE)
10 | }
11 | \arguments{
12 | \item{x}{matrix of covariates}
13 | 
14 | \item{tx}{vector of treatment indicators (0 or 1)}
15 | 
16 | \item{y}{vector of response values}
17 | 
18 | \item{pscore}{vector of propensity scores}
19 | 
20 | \item{num.trees}{number of trees for transformed outcome forest}
21 | 
22 | \item{mtry}{number of variables to possibly split at in each node}
23 | 
24 | \item{min.node.size}{minimum node size for transformed outcome forest}
25 | 
26 | \item{postprocess}{logical: should optional post-processing random forest be
27 | fit at end?}
28 | 
29 | \item{verbose}{logical: should progress be printed to console?}
30 | }
31 | \value{
32 | an object of class \code{PTOforest} with attributes:
33 |  \itemize{
34 |    \item x: matrix of covariates supplied by function call
35 |    \item pscore: vector of propensity score supplied by function call
36 |    \item postprocess: logical supplied by function call
37 |    \item TOfit: fitted random forest on transformed outcomes
38 |    \item PTOfit1: TOfit pollinated with treatment-arm outcomes
39 |    \item PTOfit0: TOfit pollinated with control-arm outcomes
40 |    \item postfit: post-processing random forest summarizing results
41 |  }
42 | }
43 | \description{
44 | Fit a pollinated transformed outcome (PTO) forest model
45 | }
46 | \examples{
47 | # Randomized experiment example
48 | 
49 | n = 100 # number of training-set patients to simulate
50 | p = 10  # number of features for each training-set patient
51 | 
52 | # Simulate data
53 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
54 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
55 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
56 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
57 | 
58 | # Estimate PTO forest model
59 | fit_pto = PTOforest(x, tx, y)
60 | pred_pto = predict(fit_pto, newx = x)
61 | 
62 | # Visualize results
63 | plot(tx_effect, pred_pto, main = 'PTO forest',
64 |  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
65 | abline(0, 1, lty = 2)
66 | 
67 | }
68 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/man/bagged.causalMARS.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bagged.causalMARS.R
 3 | \name{bagged.causalMARS}
 4 | \alias{bagged.causalMARS}
 5 | \title{Fit a bag of causal MARS models}
 6 | \usage{
 7 | bagged.causalMARS(x, tx, y, nbag = 20, maxterms = 11, nquant = 5,
 8 |   degree = ncol(x), eps = 1, backstep = FALSE, propensity = FALSE,
 9 |   stratum = rep(1, nrow(x)), minnum = 5, verbose = FALSE)
10 | }
11 | \arguments{
12 | \item{x}{matrix of covariates}
13 | 
14 | \item{tx}{vector of treatment indicators (0 or 1)}
15 | 
16 | \item{y}{vector of response values}
17 | 
18 | \item{nbag}{number of models to bag}
19 | 
20 | \item{maxterms}{maximum number of terms to include in the regression basis
21 | (e.g. \code{maxterms = 11} means intercept + 5 pairs added)}
22 | 
23 | \item{nquant}{number of quantiles used in splitting}
24 | 
25 | \item{degree}{max number of different predictors that can interact in model}
26 | 
27 | \item{eps}{shrinkage factor for new term added}
28 | 
29 | \item{backstep}{logical: should out-of-bag samples be used to prune each
30 | model? otherwise full regression basis is used for each model}
31 | 
32 | \item{propensity}{logical: should propensity score stratification be used?}
33 | 
34 | \item{stratum}{optional vector giving propensity score stratum for each
35 | observation (only used if \code{propensity = TRUE})}
36 | 
37 | \item{minnum}{minimum number of observations in each arm of each propensity
38 | score stratum needed to estimate regression coefficients for basis
39 | (only used if \code{propensity = TRUE})}
40 | 
41 | \item{verbose}{logical: should progress be printed to console?}
42 | }
43 | \value{
44 | an object of class \code{bagged.causalMARS}, which is itself a list
45 |  of \code{causalMARS} objects
46 | }
47 | \description{
48 | Fit a bag of causal MARS models
49 | }
50 | \examples{
51 | # Randomized experiment example
52 | 
53 | n = 100 # number of training-set patients to simulate
54 | p = 10  # number of features for each training-set patient
55 | 
56 | # Simulate data
57 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
58 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
59 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
60 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
61 | 
62 | # Estimate bagged causal MARS model
63 | fit_bcm = causalLearning::bagged.causalMARS(x, tx, y, nbag = 10)
64 | pred_bcm = predict(fit_bcm, newx = x)
65 | 
66 | # Visualize results
67 | plot(tx_effect, pred_bcm, main = 'Bagged causal MARS',
68 |  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
69 | abline(0, 1, lty = 2)
70 | 
71 | }
72 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/man/causalBoosting.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/causalBoosting.R
 3 | \name{causalBoosting}
 4 | \alias{causalBoosting}
 5 | \title{Fit a causal boosting model}
 6 | \usage{
 7 | causalBoosting(x, tx, y, num.trees = 500, maxleaves = 4, eps = 0.01,
 8 |   splitSpread = 0.1, x.est = NULL, tx.est = NULL, y.est = NULL,
 9 |   propensity = FALSE, stratum = NULL, stratum.est = NULL,
10 |   isConstVar = TRUE)
11 | }
12 | \arguments{
13 | \item{x}{matrix of covariates}
14 | 
15 | \item{tx}{vector of treatment indicators (0 or 1)}
16 | 
17 | \item{y}{vector of response values}
18 | 
19 | \item{num.trees}{number of shallow causal trees to build}
20 | 
21 | \item{maxleaves}{maximum number of leaves per causal tree}
22 | 
23 | \item{eps}{learning rate}
24 | 
25 | \item{splitSpread}{how far apart should the candidate splits be for the
26 | causal trees? (e.g. \code{splitSpread = 0.1}) means we consider 10 quantile
27 | cutpoints as candidates for making split}
28 | 
29 | \item{x.est}{optional matrix of estimation-set covariates used for honest
30 | re-estimation (ignored if \code{tx.est = NULL} or \code{y.est = NULL})}
31 | 
32 | \item{tx.est}{optional vector of estimation-set treatment indicators
33 | (ignored if \code{x.est = NULL} or \code{y.est = NULL})}
34 | 
35 | \item{y.est}{optional vector of estimation-set response values
36 | (ignored if \code{x.est = NULL} or \code{y.est = NULL})}
37 | 
38 | \item{propensity}{logical: should propensity score stratification be used?}
39 | 
40 | \item{stratum}{optional vector giving propensity score stratum for each
41 | observation (only used if \code{propensity = TRUE})}
42 | 
43 | \item{stratum.est}{optional vector giving propensity score stratum for each
44 | estimation-set observation (ignored if \code{x.est = NULL} or
45 | \code{tx.est = NULL} or \code{y.est = NULL})}
46 | 
47 | \item{isConstVar}{logical: for the causal tree splitting criterion
48 | (T-statistc), should it be assumed that the noise variance is the same in
49 | treatment and control arms?}
50 | }
51 | \value{
52 | an object of class \code{causalBoosting} with attributes:
53 |  \itemize{
54 |    \item CBM: a list storing the intercept, the causal trees and \code{eps}
55 |    \item tauhat: matrix of treatment effects for each patient for each step
56 |    \item G1: estimated-treatment conditional mean for each patient
57 |    \item G0: estimated-control conditional mean for each patient
58 |    \item err.y: training error at each step, in predicting response
59 |    \item num.trees: number of trees specified by function call
60 |  }
61 | }
62 | \description{
63 | Fit a causal boosting model
64 | }
65 | \details{
66 | This function exists primarily to be called by cv.causalBoosting because
67 |  the num.trees parameter generally needs to be tuned via cross-validation.
68 | }
69 | \examples{
70 | # Randomized experiment example
71 | 
72 | n = 100 # number of training-set patients to simulate
73 | p = 10  # number of features for each training-set patient
74 | 
75 | # Simulate data
76 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
77 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
78 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
79 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
80 | 
81 | # Estimate causal boosting model
82 | fit_cb = causalBoosting(x, tx, y, num.trees = 500)
83 | pred_cb = predict(fit_cb, newx = x, num.trees = 500)
84 | 
85 | # Visualize results
86 | plot(tx_effect, pred_cb, main = 'Causal boosting',
87 |  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
88 | abline(0, 1, lty = 2)
89 | 
90 | }
91 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/man/causalMARS.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/causalMARS.R
  3 | \name{causalMARS}
  4 | \alias{causalMARS}
  5 | \title{Fit a causal MARS model}
  6 | \usage{
  7 | causalMARS(x, tx, y, maxterms = 11, nquant = 5, degree = ncol(x),
  8 |   eps = 1, backstep = FALSE, x.val = NULL, tx.val = NULL,
  9 |   y.val = NULL, propensity = FALSE, stratum = rep(1, nrow(x)),
 10 |   stratum.val = NULL, minnum = 5)
 11 | }
 12 | \arguments{
 13 | \item{x}{matrix of covariates}
 14 | 
 15 | \item{tx}{vector of treatment indicators (0 or 1)}
 16 | 
 17 | \item{y}{vector of response values}
 18 | 
 19 | \item{maxterms}{maximum number of terms to include in the regression basis
 20 | (e.g. \code{maxterms = 11} means intercept + 5 pairs added)}
 21 | 
 22 | \item{nquant}{number of quantiles used in splitting}
 23 | 
 24 | \item{degree}{max number of different predictors that can interact in model}
 25 | 
 26 | \item{eps}{shrinkage factor for new term added}
 27 | 
 28 | \item{backstep}{logical: after building out regression basis, should
 29 | backward stepwise selection be used to create a sequence of models, with
 30 | the criterion evaluated on a validation set to choose among the sequence?}
 31 | 
 32 | \item{x.val}{optional matrix of validation-set covariates
 33 | (only used if \code{backstep = TRUE})}
 34 | 
 35 | \item{tx.val}{optional vector of validation-set treatment indicators
 36 | (only used if \code{backstep = TRUE})}
 37 | 
 38 | \item{y.val}{optional vector of validation-set response values
 39 | (only used if \code{backstep = TRUE})}
 40 | 
 41 | \item{propensity}{logical: should propensity score stratification be used?}
 42 | 
 43 | \item{stratum}{optional vector giving propensity score stratum for each
 44 | observation (only used if \code{propensity = TRUE})}
 45 | 
 46 | \item{stratum.val}{optional vector giving propensity score stratum for each
 47 | validation-set observation
 48 | (only used if \code{propensity = backstep = TRUE})}
 49 | 
 50 | \item{minnum}{minimum number of observations in each arm of each propensity
 51 | score stratum needed to estimate regression coefficients for basis
 52 | (only used if \code{propensity = TRUE})}
 53 | }
 54 | \value{
 55 | an object of class \code{causalMARS} with attributes:
 56 |  \itemize{
 57 |    \item parent: indices of nodes that are parents at each stage
 58 |    \item childvar: index of predictor chosen at each forward step
 59 |    \item childquant: quantile of cutoff chosen at each forward step
 60 |    \item quant: quantiles of the columns of x
 61 |    \item active: indices of columns with nonzero norm
 62 |    \item allvars: list of variables appearing in each term
 63 |    \item khat: the sequence of terms deleted at each step
 64 |    \item deltahat: relative change in rss
 65 |    \item rsstesthat: validation-set rss achieved by each model in sequence
 66 |    \item setesthat: standard error for rsstesthat
 67 |    \item tim1: time elapsed during forward stepwise phase
 68 |    \item tim2: total time elapsed
 69 |    \item x
 70 |    \item tx
 71 |    \item y
 72 |    \item maxterms
 73 |    \item eps
 74 |    \item backstep
 75 |    \item propensity
 76 |    \item x.val
 77 |    \item tx.val
 78 |    \item y.val
 79 |    \item stratum
 80 |    \item stratum.val
 81 |    \item minnum
 82 |  }
 83 | }
 84 | \description{
 85 | Fit a causal MARS model
 86 | }
 87 | \details{
 88 | parallel arms mars with backward stepwise BOTH randomized case and
 89 |  propensity stratum. data structures: model terms (nodes) are numbered
 90 |  1, 2, ... with 1 representing the intercept. forward stepwise:
 91 |  modmatrix contains basis functions as model is built up -- two columns are
 92 |  added at each step. Does not include a column of ones for tidiness,
 93 |  we always add two terms, even when term added in linear (so that reflected
 94 |  version is just zero).
 95 |  backward stepwise: khat is the sequence of terms deleted at each step,
 96 |  based on deltahat = relative change in rss. rsstesthat is rss over test
 97 |  (validation) set achieved by each reduced model in sequence- used later for
 98 |  selecting a member of the sequence. active2 contains indices of columns with
 99 |  nonzero norm
100 | }
101 | \examples{
102 | # Randomized experiment example
103 | 
104 | n = 100 # number of training-set patients to simulate
105 | p = 10  # number of features for each training-set patient
106 | 
107 | # Simulate data
108 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
109 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
110 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
111 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
112 | 
113 | # Estimate causal MARS model
114 | fit_cm = causalLearning::causalMARS(x, tx, y)
115 | pred_cm = predict(fit_cm, newx = x)
116 | 
117 | # Visualize results
118 | plot(tx_effect, pred_cm, main = 'Causal MARS',
119 |  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
120 | abline(0, 1, lty = 2)
121 | 
122 | }
123 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/man/cv.causalBoosting.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cv.causalBoosting.R
 3 | \name{cv.causalBoosting}
 4 | \alias{cv.causalBoosting}
 5 | \title{Fit a causal boosting model with cross validation}
 6 | \usage{
 7 | cv.causalBoosting(x, tx, y, num.trees = 500, maxleaves = 4, eps = 0.01,
 8 |   splitSpread = 0.1, type.measure = c("effect", "response"), nfolds = 5,
 9 |   foldid = NULL, propensity = FALSE, stratum = NULL, isConstVar = TRUE)
10 | }
11 | \arguments{
12 | \item{x}{matrix of covariates}
13 | 
14 | \item{tx}{vector of treatment indicators (0 or 1)}
15 | 
16 | \item{y}{vector of response values}
17 | 
18 | \item{num.trees}{number of shallow causal trees to build}
19 | 
20 | \item{maxleaves}{maximum number of leaves per causal tree}
21 | 
22 | \item{eps}{learning rate}
23 | 
24 | \item{splitSpread}{how far apart should the candidate splits be for the
25 | causal trees? (e.g. \code{splitSpread = 0.1}) means we consider 10 quantile
26 | cutpoints as candidates for making split}
27 | 
28 | \item{type.measure}{loss to use for cross validation:
29 | 'response' returns mean-square error for predicting response in each arm.
30 | 'effect' returns MSE for treatment effect using honest over-fit estimation.}
31 | 
32 | \item{nfolds}{number of cross validation folds}
33 | 
34 | \item{foldid}{vector of fold membership}
35 | 
36 | \item{propensity}{logical: should propensity score stratification be used?}
37 | 
38 | \item{stratum}{optional vector giving propensity score stratum for each
39 | observation (only used if \code{propensity = TRUE})}
40 | 
41 | \item{isConstVar}{logical: for the causal tree splitting criterion
42 | (T-statistc), should it be assumed that the noise variance is the same in
43 | treatment and control arms?}
44 | }
45 | \value{
46 | an object of class \code{cv.causalBoosting} which is an object of
47 |  class \code{causalBoosting} with these additional attributes:
48 |  \itemize{
49 |    \item num.trees.min: number of trees with lowest CV error
50 |    \item cvm: vector of mean CV error for each number of trees
51 |    \item cvsd: vector of standard errors for mean CV errors
52 |  }
53 | }
54 | \description{
55 | Fit a causal boosting model with cross validation
56 | }
57 | \examples{
58 | # Randomized experiment example
59 | 
60 | n = 100 # number of training-set patients to simulate
61 | p = 10  # number of features for each training-set patient
62 | 
63 | # Simulate data
64 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
65 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
66 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
67 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
68 | 
69 | # Estimate causal boosting model with cross-validation
70 | fit_cv = causalLearning::cv.causalBoosting(x, tx, y)
71 | fit_cv$num.trees.min.effect # number of trees chosen by cross-validation
72 | pred_cv = predict(fit_cv, newx = x)
73 | 
74 | # Visualize results
75 | plot(tx_effect, pred_cv, main = 'Causal boosting w/ CV',
76 |  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
77 | abline(0, 1, lty = 2)
78 | 
79 | }
80 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/man/pollinated.ranger.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pollinated.ranger.R
 3 | \name{pollinated.ranger}
 4 | \alias{pollinated.ranger}
 5 | \title{Pollinate a fitted ranger random forest model}
 6 | \usage{
 7 | pollinated.ranger(object, x, y)
 8 | }
 9 | \arguments{
10 | \item{object}{a fitted \code{ranger} object}
11 | 
12 | \item{x}{matrix of covariates}
13 | 
14 | \item{y}{vector of response values}
15 | }
16 | \value{
17 | an object of class \code{pollinated.ranger} which is a \code{ranger}
18 |  object that has been pollinated with the data in (x, y)
19 | }
20 | \description{
21 | Pollinate a fitted ranger random forest model
22 | }
23 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/man/predict.PTOforest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.PTOforest.R
 3 | \name{predict.PTOforest}
 4 | \alias{predict.PTOforest}
 5 | \title{Make predictions from a fitted PTO forest model}
 6 | \usage{
 7 | \method{predict}{PTOforest}(object, newx, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a fitted \code{PTOforest} object}
11 | 
12 | \item{newx}{matrix of new covariates for which estimated treatment effects
13 | are desired}
14 | 
15 | \item{...}{ignored}
16 | }
17 | \value{
18 | a vector of predictions corresponding to the rows of \code{newx}
19 | }
20 | \description{
21 | Make predictions from a fitted PTO forest model
22 | }
23 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/man/predict.bagged.causalMARS.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.bagged.causalMARS.R
 3 | \name{predict.bagged.causalMARS}
 4 | \alias{predict.bagged.causalMARS}
 5 | \title{Make predictions from a bag of fitted causal MARS models}
 6 | \usage{
 7 | \method{predict}{bagged.causalMARS}(object, newx, type = c("average", "all"),
 8 |   ...)
 9 | }
10 | \arguments{
11 | \item{object}{a fitted \code{bagged.causalMARS} object}
12 | 
13 | \item{newx}{matrix of new covariates for which estimated treatment effects
14 | are desired}
15 | 
16 | \item{type}{type of prediction required:
17 | 'average' returns a vector of the averages of the bootstrap estimates.
18 | 'all' returns a matrix of all of the bootstrap estimates.}
19 | 
20 | \item{...}{ignored}
21 | }
22 | \value{
23 | a vector of estimated personalized treatment effects corresponding
24 |  to the rows of \code{newx}
25 | }
26 | \description{
27 | Make predictions from a bag of fitted causal MARS models
28 | }
29 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/man/predict.causalBoosting.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.causalBoosting.R
 3 | \name{predict.causalBoosting}
 4 | \alias{predict.causalBoosting}
 5 | \title{Make predictions from a fitted causal boosting model}
 6 | \usage{
 7 | \method{predict}{causalBoosting}(object, newx, newtx = NULL,
 8 |   type = c("treatment.effect", "conditional.mean", "response"),
 9 |   num.trees = 1:object$num.trees, honest = FALSE, naVal = 0, ...)
10 | }
11 | \arguments{
12 | \item{object}{a fitted \code{causalBoosting} object}
13 | 
14 | \item{newx}{matrix of new covariates for which estimated treatment effects
15 | are desired}
16 | 
17 | \item{newtx}{option vector of new treatment assignments
18 | (only used if \code{type = 'response'})}
19 | 
20 | \item{type}{type of prediction required:
21 | 'treatment.effect' returns estimated treatment effect.
22 | 'conditional.mean' returns two predictions, one for each arm.
23 | 'response' returns prediction for arm corresponding to newtx.}
24 | 
25 | \item{num.trees}{number(s) of shallow causal trees to use for prediction}
26 | 
27 | \item{honest}{logical: should honest re-estimates of leaf means be used for
28 | prediction? This requires that \code{x.est, tx.est, y.est} were specified
29 | when the causal boosting model was fit}
30 | 
31 | \item{naVal}{value with which to replace \code{NA} predictions}
32 | 
33 | \item{...}{ignored}
34 | }
35 | \value{
36 | a vector or matrix of predictions corresponding to the rows of
37 |  \code{newx}
38 | }
39 | \description{
40 | Make predictions from a fitted causal boosting model
41 | }
42 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/man/predict.causalMARS.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.causalMARS.R
 3 | \name{predict.causalMARS}
 4 | \alias{predict.causalMARS}
 5 | \title{Make predictions from a fitted causal MARS model}
 6 | \usage{
 7 | \method{predict}{causalMARS}(object, newx, active, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a fitted \code{causalMARS} object}
11 | 
12 | \item{newx}{matrix of new covariates for which estimated treatment effects
13 | are desired}
14 | 
15 | \item{active}{indices of columns with nonzero norm (defaults to model
16 | selected via backward stepwise phase, or the full model if
17 | \code{backstep = FALSE})}
18 | 
19 | \item{...}{ignored}
20 | }
21 | \value{
22 | a vector of estimated personalized treatment effects corresponding
23 |  to the rows of \code{newx}
24 | }
25 | \description{
26 | Make predictions from a fitted causal MARS model
27 | }
28 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/man/predict.causalTree.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.causalTree.R
 3 | \name{predict.causalTree}
 4 | \alias{predict.causalTree}
 5 | \title{Make predictions from a fitted causal tree model}
 6 | \usage{
 7 | \method{predict}{causalTree}(object, newx, newtx = NULL,
 8 |   type = c("treatment.effect", "conditional.mean", "response"),
 9 |   honest = FALSE, naVal = 0, ...)
10 | }
11 | \arguments{
12 | \item{object}{a fitted \code{causalTree} object}
13 | 
14 | \item{newx}{matrix of new covariates for which estimated treatment effects
15 | are desired}
16 | 
17 | \item{newtx}{option vector of new treatment assignments
18 | (only used if \code{type = 'response'})}
19 | 
20 | \item{type}{type of prediction required:
21 | 'treatment.effect' returns estimated treatment effect.
22 | 'conditional.mean' returns two predictions, one for each arm.
23 | 'response' returns prediction for arm corresponding to newtx.}
24 | 
25 | \item{honest}{logical: should honest re-estimates of leaf means be used for
26 | prediction? This requires that \code{x.est, tx.est, y.est} were specified
27 | when the causal boosting model was fit}
28 | 
29 | \item{naVal}{value with which to replace \code{NA} predictions}
30 | 
31 | \item{...}{ignored}
32 | }
33 | \value{
34 | a vector or matrix of predictions corresponding to the rows of
35 |  \code{newx}
36 | }
37 | \description{
38 | Make predictions from a fitted causal tree model
39 | }
40 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/man/predict.cv.causalBoosting.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.cv.causalBoosting.R
 3 | \name{predict.cv.causalBoosting}
 4 | \alias{predict.cv.causalBoosting}
 5 | \title{Make predictions from a fitted cross-validated causal boosting model}
 6 | \usage{
 7 | \method{predict}{cv.causalBoosting}(object, newx, newtx = NULL,
 8 |   type = c("treatment.effect", "conditional.mean", "response"),
 9 |   num.trees = object$num.trees.min.effect, naVal = 0, ...)
10 | }
11 | \arguments{
12 | \item{object}{a fitted \code{cv.causalBoosting} object}
13 | 
14 | \item{newx}{matrix of new covariates for which estimated treatment effects
15 | are desired}
16 | 
17 | \item{newtx}{option vector of new treatment assignments
18 | (only used if \code{type = 'individual'})}
19 | 
20 | \item{type}{type of prediction required:
21 | 'treatment.effect' returns estimated treatment effect.
22 | 'conditional.mean' returns two predictions, one for each arm.
23 | 'response' returns prediction for arm corresponding to newtx.}
24 | 
25 | \item{num.trees}{number of shallow causal trees to use for prediction}
26 | 
27 | \item{naVal}{value with which to replace \code{NA} predictions}
28 | 
29 | \item{...}{ignored}
30 | }
31 | \value{
32 | a vector or matrix of predictions corresponding to the rows of
33 |  \code{newx}
34 | }
35 | \description{
36 | Make predictions from a fitted cross-validated causal boosting model
37 | }
38 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/man/predict.pollinated.ranger.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.pollinated.ranger.R
 3 | \name{predict.pollinated.ranger}
 4 | \alias{predict.pollinated.ranger}
 5 | \title{Make predictions from a pollinated ranger random forest model}
 6 | \usage{
 7 | \method{predict}{pollinated.ranger}(object, newx, predict.all = FALSE,
 8 |   na.treatment = c("omit", "replace", "NA"), ...)
 9 | }
10 | \arguments{
11 | \item{object}{a fitted \code{pollinated.ranger} object}
12 | 
13 | \item{newx}{matrix of new covariates for which predictions are desired}
14 | 
15 | \item{predict.all}{logical: should predictions from all trees be returned?
16 | Otherwise the average across trees is returned}
17 | 
18 | \item{na.treatment}{how to treat NA predictions from individual trees:
19 | 'omit' only uses trees for which the prediction is not NA.
20 | 'replace' replaces NA predictions with the overall mean response.
21 | 'NA' returns NA if any tree prediction is NA.}
22 | 
23 | \item{...}{additional arguments passed on to \code{predict.ranger}}
24 | }
25 | \value{
26 | a vector of predicted treatment effects corresponding to the rows of
27 |  newx
28 | }
29 | \description{
30 | Make predictions from a pollinated ranger random forest model
31 | }
32 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/man/stratify.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/stratify.R
 3 | \name{stratify}
 4 | \alias{stratify}
 5 | \title{Get propensity strata from propensity scores}
 6 | \usage{
 7 | stratify(pscore, tx, min.per.arm = 30)
 8 | }
 9 | \arguments{
10 | \item{pscore}{vector of propensity scores}
11 | 
12 | \item{tx}{vector of treatment indicators}
13 | 
14 | \item{min.per.arm}{minimum number of observations for each arm within each
15 | stratum}
16 | }
17 | \value{
18 | a vector of integers with length equal to the length of pscore,
19 |  reporting the propensity stratum corresponding to each propensity score
20 | }
21 | \description{
22 | Get propensity strata from propensity scores
23 | }
24 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/src/causalBoosting.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/00_pkg_src/causalLearning/src/causalBoosting.o


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/src/causalLearning.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/00_pkg_src/causalLearning/src/causalLearning.so


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00_pkg_src/causalLearning/src/symbols.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/00_pkg_src/causalLearning/src/symbols.rds


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00check.log:
--------------------------------------------------------------------------------
 1 | * using log directory ‘/Users/sspowers/GitHub/causalLearning/causalLearning.Rcheck’
 2 | * using R version 3.3.2 (2016-10-31)
 3 | * using platform: x86_64-apple-darwin13.4.0 (64-bit)
 4 | * using session charset: UTF-8
 5 | * checking for file ‘causalLearning/DESCRIPTION’ ... OK
 6 | * this is package ‘causalLearning’ version ‘1.0.0’
 7 | * checking package namespace information ... OK
 8 | * checking package dependencies ... OK
 9 | * checking if this is a source package ... OK
10 | * checking if there is a namespace ... OK
11 | * checking for executable files ... OK
12 | * checking for hidden files and directories ... OK
13 | * checking for portable file names ... OK
14 | * checking for sufficient/correct file permissions ... OK
15 | * checking whether package ‘causalLearning’ can be installed ... OK
16 | * checking installed package size ... OK
17 | * checking package directory ... OK
18 | * checking DESCRIPTION meta-information ... OK
19 | * checking top-level files ... OK
20 | * checking for left-over files ... OK
21 | * checking index information ... OK
22 | * checking package subdirectories ... OK
23 | * checking R files for non-ASCII characters ... OK
24 | * checking R files for syntax errors ... OK
25 | * checking whether the package can be loaded ... OK
26 | * checking whether the package can be loaded with stated dependencies ... OK
27 | * checking whether the package can be unloaded cleanly ... OK
28 | * checking whether the namespace can be loaded with stated dependencies ... OK
29 | * checking whether the namespace can be unloaded cleanly ... OK
30 | * checking dependencies in R code ... OK
31 | * checking S3 generic/method consistency ... OK
32 | * checking replacement functions ... OK
33 | * checking foreign function calls ... OK
34 | * checking R code for possible problems ... OK
35 | * checking Rd files ... OK
36 | * checking Rd metadata ... OK
37 | * checking Rd cross-references ... OK
38 | * checking for missing documentation entries ... OK
39 | * checking for code/documentation mismatches ... OK
40 | * checking Rd \usage sections ... OK
41 | * checking Rd contents ... OK
42 | * checking for unstated dependencies in examples ... OK
43 | * checking line endings in C/C++/Fortran sources/headers ... OK
44 | * checking compiled code ... OK
45 | * checking examples ... OK
46 | * checking PDF version of manual ... OK
47 | * DONE
48 | Status: OK
49 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/00install.out:
--------------------------------------------------------------------------------
 1 | * installing *source* package ‘causalLearning’ ...
 2 | ** libs
 3 | clang -I/Library/Frameworks/R.framework/Resources/include -DNDEBUG  -I/usr/local/include -I/usr/local/include/freetype2 -I/opt/X11/include    -fPIC  -Wall -mtune=core2 -g -O2  -c causalBoosting.c -o causalBoosting.o
 4 | clang -dynamiclib -Wl,-headerpad_max_install_names -undefined dynamic_lookup -single_module -multiply_defined suppress -L/Library/Frameworks/R.framework/Resources/lib -L/usr/local/lib -o causalLearning.so causalBoosting.o -F/Library/Frameworks/R.framework/.. -framework R -Wl,-framework -Wl,CoreFoundation
 5 | installing to /Users/sspowers/GitHub/causalLearning/causalLearning.Rcheck/causalLearning/libs
 6 | ** R
 7 | ** preparing package for lazy loading
 8 | ** help
 9 | *** installing help indices
10 | ** building package indices
11 | ** testing if installed package can be loaded
12 | * DONE (causalLearning)
13 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning-Ex.R:
--------------------------------------------------------------------------------
  1 | pkgname <- "causalLearning"
  2 | source(file.path(R.home("share"), "R", "examples-header.R"))
  3 | options(warn = 1)
  4 | library('causalLearning')
  5 | 
  6 | base::assign(".oldSearch", base::search(), pos = 'CheckExEnv')
  7 | cleanEx()
  8 | nameEx("PTOforest")
  9 | ### * PTOforest
 10 | 
 11 | flush(stderr()); flush(stdout())
 12 | 
 13 | ### Name: PTOforest
 14 | ### Title: Fit a pollinated transformed outcome (PTO) forest model
 15 | ### Aliases: PTOforest
 16 | 
 17 | ### ** Examples
 18 | 
 19 | # Randomized experiment example
 20 | 
 21 | n = 100 # number of training-set patients to simulate
 22 | p = 10  # number of features for each training-set patient
 23 | 
 24 | # Simulate data
 25 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
 26 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
 27 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
 28 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
 29 | 
 30 | # Estimate PTO forest model
 31 | fit_pto = PTOforest(x, tx, y)
 32 | pred_pto = predict(fit_pto, newx = x)
 33 | 
 34 | # Visualize results
 35 | plot(tx_effect, pred_pto, main = 'PTO forest',
 36 |  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
 37 | abline(0, 1, lty = 2)
 38 | 
 39 | 
 40 | 
 41 | 
 42 | cleanEx()
 43 | nameEx("bagged.causalMARS")
 44 | ### * bagged.causalMARS
 45 | 
 46 | flush(stderr()); flush(stdout())
 47 | 
 48 | ### Name: bagged.causalMARS
 49 | ### Title: Fit a bag of causal MARS models
 50 | ### Aliases: bagged.causalMARS
 51 | 
 52 | ### ** Examples
 53 | 
 54 | # Randomized experiment example
 55 | 
 56 | n = 100 # number of training-set patients to simulate
 57 | p = 10  # number of features for each training-set patient
 58 | 
 59 | # Simulate data
 60 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
 61 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
 62 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
 63 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
 64 | 
 65 | # Estimate bagged causal MARS model
 66 | fit_bcm = causalLearning::bagged.causalMARS(x, tx, y, nbag = 10)
 67 | pred_bcm = predict(fit_bcm, newx = x)
 68 | 
 69 | # Visualize results
 70 | plot(tx_effect, pred_bcm, main = 'Bagged causal MARS',
 71 |  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
 72 | abline(0, 1, lty = 2)
 73 | 
 74 | 
 75 | 
 76 | 
 77 | cleanEx()
 78 | nameEx("causalBoosting")
 79 | ### * causalBoosting
 80 | 
 81 | flush(stderr()); flush(stdout())
 82 | 
 83 | ### Name: causalBoosting
 84 | ### Title: Fit a causal boosting model
 85 | ### Aliases: causalBoosting
 86 | 
 87 | ### ** Examples
 88 | 
 89 | # Randomized experiment example
 90 | 
 91 | n = 100 # number of training-set patients to simulate
 92 | p = 10  # number of features for each training-set patient
 93 | 
 94 | # Simulate data
 95 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
 96 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
 97 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
 98 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
 99 | 
100 | # Estimate causal boosting model
101 | fit_cb = causalBoosting(x, tx, y, num.trees = 500)
102 | pred_cb = predict(fit_cb, newx = x, num.trees = 500)
103 | 
104 | # Visualize results
105 | plot(tx_effect, pred_cb, main = 'Causal boosting',
106 |  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
107 | abline(0, 1, lty = 2)
108 | 
109 | 
110 | 
111 | 
112 | cleanEx()
113 | nameEx("causalMARS")
114 | ### * causalMARS
115 | 
116 | flush(stderr()); flush(stdout())
117 | 
118 | ### Name: causalMARS
119 | ### Title: Fit a causal MARS model
120 | ### Aliases: causalMARS
121 | 
122 | ### ** Examples
123 | 
124 | # Randomized experiment example
125 | 
126 | n = 100 # number of training-set patients to simulate
127 | p = 10  # number of features for each training-set patient
128 | 
129 | # Simulate data
130 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
131 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
132 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
133 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
134 | 
135 | # Estimate causal MARS model
136 | fit_cm = causalLearning::causalMARS(x, tx, y)
137 | pred_cm = predict(fit_cm, newx = x)
138 | 
139 | # Visualize results
140 | plot(tx_effect, pred_cm, main = 'Causal MARS',
141 |  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
142 | abline(0, 1, lty = 2)
143 | 
144 | 
145 | 
146 | 
147 | cleanEx()
148 | nameEx("cv.causalBoosting")
149 | ### * cv.causalBoosting
150 | 
151 | flush(stderr()); flush(stdout())
152 | 
153 | ### Name: cv.causalBoosting
154 | ### Title: Fit a causal boosting model with cross validation
155 | ### Aliases: cv.causalBoosting
156 | 
157 | ### ** Examples
158 | 
159 | # Randomized experiment example
160 | 
161 | n = 100 # number of training-set patients to simulate
162 | p = 10  # number of features for each training-set patient
163 | 
164 | # Simulate data
165 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
166 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
167 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
168 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
169 | 
170 | # Estimate causal boosting model with cross-validation
171 | fit_cv = causalLearning::cv.causalBoosting(x, tx, y)
172 | fit_cv$num.trees.min.effect # number of trees chosen by cross-validation
173 | pred_cv = predict(fit_cv, newx = x)
174 | 
175 | # Visualize results
176 | plot(tx_effect, pred_cv, main = 'Causal boosting w/ CV',
177 |  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
178 | abline(0, 1, lty = 2)
179 | 
180 | 
181 | 
182 | 
183 | ### * <FOOTER>
184 | ###
185 | options(digits = 7L)
186 | base::cat("Time elapsed: ", proc.time() - base::get("ptime", pos = 'CheckExEnv'),"\n")
187 | grDevices::dev.off()
188 | ###
189 | ### Local variables: ***
190 | ### mode: outline-minor ***
191 | ### outline-regexp: "\\(> \\)?### [*]+" ***
192 | ### End: ***
193 | quit('no')
194 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning-Ex.Rout:
--------------------------------------------------------------------------------
  1 | 
  2 | R version 3.3.2 (2016-10-31) -- "Sincere Pumpkin Patch"
  3 | Copyright (C) 2016 The R Foundation for Statistical Computing
  4 | Platform: x86_64-apple-darwin13.4.0 (64-bit)
  5 | 
  6 | R is free software and comes with ABSOLUTELY NO WARRANTY.
  7 | You are welcome to redistribute it under certain conditions.
  8 | Type 'license()' or 'licence()' for distribution details.
  9 | 
 10 |   Natural language support but running in an English locale
 11 | 
 12 | R is a collaborative project with many contributors.
 13 | Type 'contributors()' for more information and
 14 | 'citation()' on how to cite R or R packages in publications.
 15 | 
 16 | Type 'demo()' for some demos, 'help()' for on-line help, or
 17 | 'help.start()' for an HTML browser interface to help.
 18 | Type 'q()' to quit R.
 19 | 
 20 | > pkgname <- "causalLearning"
 21 | > source(file.path(R.home("share"), "R", "examples-header.R"))
 22 | > options(warn = 1)
 23 | > library('causalLearning')
 24 | > 
 25 | > base::assign(".oldSearch", base::search(), pos = 'CheckExEnv')
 26 | > cleanEx()
 27 | > nameEx("PTOforest")
 28 | > ### * PTOforest
 29 | > 
 30 | > flush(stderr()); flush(stdout())
 31 | > 
 32 | > ### Name: PTOforest
 33 | > ### Title: Fit a pollinated transformed outcome (PTO) forest model
 34 | > ### Aliases: PTOforest
 35 | > 
 36 | > ### ** Examples
 37 | > 
 38 | > # Randomized experiment example
 39 | > 
 40 | > n = 100 # number of training-set patients to simulate
 41 | > p = 10  # number of features for each training-set patient
 42 | > 
 43 | > # Simulate data
 44 | > x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
 45 | > tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
 46 | > tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
 47 | > y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
 48 | > 
 49 | > # Estimate PTO forest model
 50 | > fit_pto = PTOforest(x, tx, y)
 51 | > pred_pto = predict(fit_pto, newx = x)
 52 | > 
 53 | > # Visualize results
 54 | > plot(tx_effect, pred_pto, main = 'PTO forest',
 55 | +  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
 56 | > abline(0, 1, lty = 2)
 57 | > 
 58 | > 
 59 | > 
 60 | > 
 61 | > cleanEx()
 62 | > nameEx("bagged.causalMARS")
 63 | > ### * bagged.causalMARS
 64 | > 
 65 | > flush(stderr()); flush(stdout())
 66 | > 
 67 | > ### Name: bagged.causalMARS
 68 | > ### Title: Fit a bag of causal MARS models
 69 | > ### Aliases: bagged.causalMARS
 70 | > 
 71 | > ### ** Examples
 72 | > 
 73 | > # Randomized experiment example
 74 | > 
 75 | > n = 100 # number of training-set patients to simulate
 76 | > p = 10  # number of features for each training-set patient
 77 | > 
 78 | > # Simulate data
 79 | > x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
 80 | > tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
 81 | > tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
 82 | > y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
 83 | > 
 84 | > # Estimate bagged causal MARS model
 85 | > fit_bcm = causalLearning::bagged.causalMARS(x, tx, y, nbag = 10)
 86 | > pred_bcm = predict(fit_bcm, newx = x)
 87 | > 
 88 | > # Visualize results
 89 | > plot(tx_effect, pred_bcm, main = 'Bagged causal MARS',
 90 | +  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
 91 | > abline(0, 1, lty = 2)
 92 | > 
 93 | > 
 94 | > 
 95 | > 
 96 | > cleanEx()
 97 | > nameEx("causalBoosting")
 98 | > ### * causalBoosting
 99 | > 
100 | > flush(stderr()); flush(stdout())
101 | > 
102 | > ### Name: causalBoosting
103 | > ### Title: Fit a causal boosting model
104 | > ### Aliases: causalBoosting
105 | > 
106 | > ### ** Examples
107 | > 
108 | > # Randomized experiment example
109 | > 
110 | > n = 100 # number of training-set patients to simulate
111 | > p = 10  # number of features for each training-set patient
112 | > 
113 | > # Simulate data
114 | > x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
115 | > tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
116 | > tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
117 | > y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
118 | > 
119 | > # Estimate causal boosting model
120 | > fit_cb = causalBoosting(x, tx, y, num.trees = 500)
121 | > pred_cb = predict(fit_cb, newx = x, num.trees = 500)
122 | > 
123 | > # Visualize results
124 | > plot(tx_effect, pred_cb, main = 'Causal boosting',
125 | +  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
126 | > abline(0, 1, lty = 2)
127 | > 
128 | > 
129 | > 
130 | > 
131 | > cleanEx()
132 | > nameEx("causalMARS")
133 | > ### * causalMARS
134 | > 
135 | > flush(stderr()); flush(stdout())
136 | > 
137 | > ### Name: causalMARS
138 | > ### Title: Fit a causal MARS model
139 | > ### Aliases: causalMARS
140 | > 
141 | > ### ** Examples
142 | > 
143 | > # Randomized experiment example
144 | > 
145 | > n = 100 # number of training-set patients to simulate
146 | > p = 10  # number of features for each training-set patient
147 | > 
148 | > # Simulate data
149 | > x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
150 | > tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
151 | > tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
152 | > y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
153 | > 
154 | > # Estimate causal MARS model
155 | > fit_cm = causalLearning::causalMARS(x, tx, y)
156 | > pred_cm = predict(fit_cm, newx = x)
157 | > 
158 | > # Visualize results
159 | > plot(tx_effect, pred_cm, main = 'Causal MARS',
160 | +  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
161 | > abline(0, 1, lty = 2)
162 | > 
163 | > 
164 | > 
165 | > 
166 | > cleanEx()
167 | > nameEx("cv.causalBoosting")
168 | > ### * cv.causalBoosting
169 | > 
170 | > flush(stderr()); flush(stdout())
171 | > 
172 | > ### Name: cv.causalBoosting
173 | > ### Title: Fit a causal boosting model with cross validation
174 | > ### Aliases: cv.causalBoosting
175 | > 
176 | > ### ** Examples
177 | > 
178 | > # Randomized experiment example
179 | > 
180 | > n = 100 # number of training-set patients to simulate
181 | > p = 10  # number of features for each training-set patient
182 | > 
183 | > # Simulate data
184 | > x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
185 | > tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
186 | > tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
187 | > y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
188 | > 
189 | > # Estimate causal boosting model with cross-validation
190 | > fit_cv = causalLearning::cv.causalBoosting(x, tx, y)
191 | > fit_cv$num.trees.min.effect # number of trees chosen by cross-validation
192 | [1] 249
193 | > pred_cv = predict(fit_cv, newx = x)
194 | > 
195 | > # Visualize results
196 | > plot(tx_effect, pred_cv, main = 'Causal boosting w/ CV',
197 | +  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
198 | > abline(0, 1, lty = 2)
199 | > 
200 | > 
201 | > 
202 | > 
203 | > ### * <FOOTER>
204 | > ###
205 | > options(digits = 7L)
206 | > base::cat("Time elapsed: ", proc.time() - base::get("ptime", pos = 'CheckExEnv'),"\n")
207 | Time elapsed:  6.715 0.125 6.724 0 0 
208 | > grDevices::dev.off()
209 | null device 
210 |           1 
211 | > ###
212 | > ### Local variables: ***
213 | > ### mode: outline-minor ***
214 | > ### outline-regexp: "\\(> \\)?### [*]+" ***
215 | > ### End: ***
216 | > quit('no')
217 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning-Ex.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/causalLearning-Ex.pdf


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning-manual.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/causalLearning-manual.pdf


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: causalLearning
 2 | Title: Methods for heterogeneous treatment effect estimation
 3 | Version: 1.0.0
 4 | Authors@R: c(
 5 |   person("Scott", "Powers", email = "saberpowers@gmail.com",
 6 |     role = c("aut", "cre")),
 7 |   person("Junyang", "Qian", email = "junyangq@stanford.edu", role = "aut"),
 8 |   person("Trevor", "Hastie", email = "hastie@stanford.edu", role = "aut"),
 9 |   person("Robert", "Tibshirani", email = "tibs@stanford.edu", role = "aut"))
10 | Description: The main functions are cv.causalBoosting and bagged.causalMARS,
11 |   which build upon the simpler causalBoosting and causalMARS functions. All of
12 |   these functions have their own predict methods.
13 | Depends: R (>= 3.3.0)
14 | Imports: ranger
15 | License: GPL-2
16 | LazyData: true
17 | RoxygenNote: 6.0.1
18 | NeedsCompilation: yes
19 | Packaged: 2018-09-07 15:58:32 UTC; sspowers
20 | Author: Scott Powers [aut, cre],
21 |   Junyang Qian [aut],
22 |   Trevor Hastie [aut],
23 |   Robert Tibshirani [aut]
24 | Maintainer: Scott Powers <saberpowers@gmail.com>
25 | Built: R 3.3.2; x86_64-apple-darwin13.4.0; 2018-09-07 15:58:53 UTC; unix
26 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/INDEX:
--------------------------------------------------------------------------------
 1 | PTOforest               Fit a pollinated transformed outcome (PTO)
 2 |                         forest model
 3 | bagged.causalMARS       Fit a bag of causal MARS models
 4 | causalBoosting          Fit a causal boosting model
 5 | causalMARS              Fit a causal MARS model
 6 | cv.causalBoosting       Fit a causal boosting model with cross
 7 |                         validation
 8 | pollinated.ranger       Pollinate a fitted ranger random forest model
 9 | predict.PTOforest       Make predictions from a fitted PTO forest model
10 | predict.bagged.causalMARS
11 |                         Make predictions from a bag of fitted causal
12 |                         MARS models
13 | predict.causalBoosting
14 |                         Make predictions from a fitted causal boosting
15 |                         model
16 | predict.causalMARS      Make predictions from a fitted causal MARS
17 |                         model
18 | predict.causalTree      Make predictions from a fitted causal tree
19 |                         model
20 | predict.cv.causalBoosting
21 |                         Make predictions from a fitted cross-validated
22 |                         causal boosting model
23 | predict.pollinated.ranger
24 |                         Make predictions from a pollinated ranger
25 |                         random forest model
26 | stratify                Get propensity strata from propensity scores
27 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/Meta/Rd.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/causalLearning/Meta/Rd.rds


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/Meta/hsearch.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/causalLearning/Meta/hsearch.rds


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/Meta/links.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/causalLearning/Meta/links.rds


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/Meta/nsInfo.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/causalLearning/Meta/nsInfo.rds


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/Meta/package.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/causalLearning/Meta/package.rds


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(predict,PTOforest)
 4 | S3method(predict,bagged.causalMARS)
 5 | S3method(predict,causalBoosting)
 6 | S3method(predict,causalMARS)
 7 | S3method(predict,causalTree)
 8 | S3method(predict,cv.causalBoosting)
 9 | export(PTOforest)
10 | export(bagged.causalMARS)
11 | export(causalBoosting)
12 | export(causalMARS)
13 | export(cv.causalBoosting)
14 | export(stratify)
15 | useDynLib(causalLearning)
16 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/R/causalLearning:
--------------------------------------------------------------------------------
 1 | #  File share/R/nspackloader.R
 2 | #  Part of the R package, http://www.R-project.org
 3 | #
 4 | #  Copyright (C) 1995-2012 The R Core Team
 5 | #
 6 | #  This program is free software; you can redistribute it and/or modify
 7 | #  it under the terms of the GNU General Public License as published by
 8 | #  the Free Software Foundation; either version 2 of the License, or
 9 | #  (at your option) any later version.
10 | #
11 | #  This program is distributed in the hope that it will be useful,
12 | #  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 | #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 | #  GNU General Public License for more details.
15 | #
16 | #  A copy of the GNU General Public License is available at
17 | #  http://www.r-project.org/Licenses/
18 | 
19 | local({
20 |     info <- loadingNamespaceInfo()
21 |     pkg <- info$pkgname
22 |     ns <- .getNamespace(as.name(pkg))
23 |     if (is.null(ns))
24 |         stop("cannot find namespace environment for ", pkg, domain = NA);
25 |     dbbase <- file.path(info$libname, pkg, "R", pkg)
26 |     lazyLoad(dbbase, ns, filter = function(n) n != ".__NAMESPACE__.")
27 | })
28 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/R/causalLearning.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/causalLearning/R/causalLearning.rdb


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/R/causalLearning.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/causalLearning/R/causalLearning.rdx


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/help/AnIndex:
--------------------------------------------------------------------------------
 1 | bagged.causalMARS	bagged.causalMARS
 2 | causalBoosting	causalBoosting
 3 | causalMARS	causalMARS
 4 | cv.causalBoosting	cv.causalBoosting
 5 | pollinated.ranger	pollinated.ranger
 6 | predict.bagged.causalMARS	predict.bagged.causalMARS
 7 | predict.causalBoosting	predict.causalBoosting
 8 | predict.causalMARS	predict.causalMARS
 9 | predict.causalTree	predict.causalTree
10 | predict.cv.causalBoosting	predict.cv.causalBoosting
11 | predict.pollinated.ranger	predict.pollinated.ranger
12 | predict.PTOforest	predict.PTOforest
13 | PTOforest	PTOforest
14 | stratify	stratify
15 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/help/aliases.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/causalLearning/help/aliases.rds


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/help/causalLearning.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/causalLearning/help/causalLearning.rdb


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/help/causalLearning.rdx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/causalLearning/help/causalLearning.rdx


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/help/paths.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/causalLearning/help/paths.rds


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/html/00Index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 2 | <html xmlns="http://www.w3.org/1999/xhtml">
 3 | <head><title>R: Methods for heterogeneous treatment effect estimation</title>
 4 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
 5 | <link rel="stylesheet" type="text/css" href="R.css" />
 6 | </head><body>
 7 | <h1> Methods for heterogeneous treatment effect estimation
 8 | <img class="toplogo" src="../../../doc/html/Rlogo.svg" alt="[R logo]" />
 9 | </h1>
10 | <hr/>
11 | <div style="text-align: center;">
12 | <a href="../../../doc/html/packages.html"><img class="arrow" src="../../../doc/html/left.jpg" alt="[Up]" /></a>
13 | <a href="../../../doc/html/index.html"><img class="arrow" src="../../../doc/html/up.jpg" alt="[Top]" /></a>
14 | </div><h2>Documentation for package &lsquo;causalLearning&rsquo; version 1.0.0</h2>
15 | 
16 | <ul><li><a href="../DESCRIPTION">DESCRIPTION file</a>.</li>
17 | </ul>
18 | 
19 | <h2>Help Pages</h2>
20 | 
21 | 
22 | <table width="100%">
23 | <tr><td style="width: 25%;"><a href="bagged.causalMARS.html">bagged.causalMARS</a></td>
24 | <td>Fit a bag of causal MARS models</td></tr>
25 | <tr><td style="width: 25%;"><a href="causalBoosting.html">causalBoosting</a></td>
26 | <td>Fit a causal boosting model</td></tr>
27 | <tr><td style="width: 25%;"><a href="causalMARS.html">causalMARS</a></td>
28 | <td>Fit a causal MARS model</td></tr>
29 | <tr><td style="width: 25%;"><a href="cv.causalBoosting.html">cv.causalBoosting</a></td>
30 | <td>Fit a causal boosting model with cross validation</td></tr>
31 | <tr><td style="width: 25%;"><a href="pollinated.ranger.html">pollinated.ranger</a></td>
32 | <td>Pollinate a fitted ranger random forest model</td></tr>
33 | <tr><td style="width: 25%;"><a href="predict.bagged.causalMARS.html">predict.bagged.causalMARS</a></td>
34 | <td>Make predictions from a bag of fitted causal MARS models</td></tr>
35 | <tr><td style="width: 25%;"><a href="predict.causalBoosting.html">predict.causalBoosting</a></td>
36 | <td>Make predictions from a fitted causal boosting model</td></tr>
37 | <tr><td style="width: 25%;"><a href="predict.causalMARS.html">predict.causalMARS</a></td>
38 | <td>Make predictions from a fitted causal MARS model</td></tr>
39 | <tr><td style="width: 25%;"><a href="predict.causalTree.html">predict.causalTree</a></td>
40 | <td>Make predictions from a fitted causal tree model</td></tr>
41 | <tr><td style="width: 25%;"><a href="predict.cv.causalBoosting.html">predict.cv.causalBoosting</a></td>
42 | <td>Make predictions from a fitted cross-validated causal boosting model</td></tr>
43 | <tr><td style="width: 25%;"><a href="predict.pollinated.ranger.html">predict.pollinated.ranger</a></td>
44 | <td>Make predictions from a pollinated ranger random forest model</td></tr>
45 | <tr><td style="width: 25%;"><a href="predict.PTOforest.html">predict.PTOforest</a></td>
46 | <td>Make predictions from a fitted PTO forest model</td></tr>
47 | <tr><td style="width: 25%;"><a href="PTOforest.html">PTOforest</a></td>
48 | <td>Fit a pollinated transformed outcome (PTO) forest model</td></tr>
49 | <tr><td style="width: 25%;"><a href="stratify.html">stratify</a></td>
50 | <td>Get propensity strata from propensity scores</td></tr>
51 | </table>
52 | </body></html>
53 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/html/R.css:
--------------------------------------------------------------------------------
 1 | body {
 2 |     background: white;
 3 |     color: black;
 4 | }
 5 | 
 6 | a:link {
 7 |     background: white;
 8 |     color: blue;
 9 | }
10 | 
11 | a:visited {
12 |     background: white;
13 |     color: rgb(50%, 0%, 50%);
14 | }
15 | 
16 | h1 {
17 |     background: white;
18 |     color: rgb(55%, 55%, 55%);
19 |     font-family: monospace;
20 |     font-size: x-large;
21 |     text-align: center;
22 | }
23 | 
24 | h2 {
25 |     background: white;
26 |     color: rgb(40%, 40%, 40%);
27 |     font-family: monospace;
28 |     font-size: large;
29 |     text-align: center;
30 | }
31 | 
32 | h3 {
33 |     background: white;
34 |     color: rgb(40%, 40%, 40%);
35 |     font-family: monospace;
36 |     font-size: large;
37 | }
38 | 
39 | h4 {
40 |     background: white;
41 |     color: rgb(40%, 40%, 40%);
42 |     font-family: monospace;
43 |     font-style: italic;
44 |     font-size: large;
45 | }
46 | 
47 | h5 {
48 |     background: white;
49 |     color: rgb(40%, 40%, 40%);
50 |     font-family: monospace;
51 | }
52 | 
53 | h6 {
54 |     background: white;
55 |     color: rgb(40%, 40%, 40%);
56 |     font-family: monospace;
57 |     font-style: italic;
58 | }
59 | 		
60 | img.toplogo {
61 |     width: 4em;
62 |     vertical-align: middle;
63 | }
64 | 
65 | img.arrow {
66 |     width: 30px;
67 |     height: 30px;
68 |     border: 0;
69 | }
70 | 
71 | span.acronym {
72 |     font-size: small;
73 | }
74 | 
75 | span.env {
76 |     font-family: monospace;
77 | }
78 | 
79 | span.file {
80 |     font-family: monospace;
81 | }
82 | 
83 | span.option{
84 |     font-family: monospace;
85 | }
86 | 
87 | span.pkg {
88 |     font-weight: bold;
89 | }
90 | 
91 | span.samp{
92 |     font-family: monospace;
93 | }
94 | 
95 | div.vignettes a:hover {
96 |     background: rgb(85%, 85%, 85%);
97 | }
98 | 


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/libs/causalLearning.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/causalLearning/libs/causalLearning.so


--------------------------------------------------------------------------------
/causalLearning.Rcheck/causalLearning/libs/symbols.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning.Rcheck/causalLearning/libs/symbols.rds


--------------------------------------------------------------------------------
/causalLearning_1.0.0.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/causalLearning_1.0.0.tar.gz


--------------------------------------------------------------------------------
/man/PTOforest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PTOforest.R
 3 | \name{PTOforest}
 4 | \alias{PTOforest}
 5 | \title{Fit a pollinated transformed outcome (PTO) forest model}
 6 | \usage{
 7 | PTOforest(x, tx, y, pscore = rep(0.5, nrow(x)), num.trees = 500,
 8 |   mtry = ncol(x), min.node.size = max(25, nrow(x)/40), postprocess = TRUE,
 9 |   verbose = FALSE)
10 | }
11 | \arguments{
12 | \item{x}{matrix of covariates}
13 | 
14 | \item{tx}{vector of treatment indicators (0 or 1)}
15 | 
16 | \item{y}{vector of response values}
17 | 
18 | \item{pscore}{vector of propensity scores}
19 | 
20 | \item{num.trees}{number of trees for transformed outcome forest}
21 | 
22 | \item{mtry}{number of variables to possibly split at in each node}
23 | 
24 | \item{min.node.size}{minimum node size for transformed outcome forest}
25 | 
26 | \item{postprocess}{logical: should optional post-processing random forest be
27 | fit at end?}
28 | 
29 | \item{verbose}{logical: should progress be printed to console?}
30 | }
31 | \value{
32 | an object of class \code{PTOforest} with attributes:
33 |  \itemize{
34 |    \item x: matrix of covariates supplied by function call
35 |    \item pscore: vector of propensity score supplied by function call
36 |    \item postprocess: logical supplied by function call
37 |    \item TOfit: fitted random forest on transformed outcomes
38 |    \item PTOfit1: TOfit pollinated with treatment-arm outcomes
39 |    \item PTOfit0: TOfit pollinated with control-arm outcomes
40 |    \item postfit: post-processing random forest summarizing results
41 |  }
42 | }
43 | \description{
44 | Fit a pollinated transformed outcome (PTO) forest model
45 | }
46 | \examples{
47 | # Randomized experiment example
48 | 
49 | n = 100 # number of training-set patients to simulate
50 | p = 10  # number of features for each training-set patient
51 | 
52 | # Simulate data
53 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
54 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
55 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
56 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
57 | 
58 | # Estimate PTO forest model
59 | fit_pto = PTOforest(x, tx, y)
60 | pred_pto = predict(fit_pto, newx = x)
61 | 
62 | # Visualize results
63 | plot(tx_effect, pred_pto, main = 'PTO forest',
64 |  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
65 | abline(0, 1, lty = 2)
66 | 
67 | }
68 | 


--------------------------------------------------------------------------------
/man/bagged.causalMARS.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bagged.causalMARS.R
 3 | \name{bagged.causalMARS}
 4 | \alias{bagged.causalMARS}
 5 | \title{Fit a bag of causal MARS models}
 6 | \usage{
 7 | bagged.causalMARS(x, tx, y, nbag = 20, maxterms = 11, nquant = 5,
 8 |   degree = ncol(x), eps = 1, backstep = FALSE, propensity = FALSE,
 9 |   stratum = rep(1, nrow(x)), minnum = 5, verbose = FALSE)
10 | }
11 | \arguments{
12 | \item{x}{matrix of covariates}
13 | 
14 | \item{tx}{vector of treatment indicators (0 or 1)}
15 | 
16 | \item{y}{vector of response values}
17 | 
18 | \item{nbag}{number of models to bag}
19 | 
20 | \item{maxterms}{maximum number of terms to include in the regression basis
21 | (e.g. \code{maxterms = 11} means intercept + 5 pairs added)}
22 | 
23 | \item{nquant}{number of quantiles used in splitting}
24 | 
25 | \item{degree}{max number of different predictors that can interact in model}
26 | 
27 | \item{eps}{shrinkage factor for new term added}
28 | 
29 | \item{backstep}{logical: should out-of-bag samples be used to prune each
30 | model? otherwise full regression basis is used for each model}
31 | 
32 | \item{propensity}{logical: should propensity score stratification be used?}
33 | 
34 | \item{stratum}{optional vector giving propensity score stratum for each
35 | observation (only used if \code{propensity = TRUE})}
36 | 
37 | \item{minnum}{minimum number of observations in each arm of each propensity
38 | score stratum needed to estimate regression coefficients for basis
39 | (only used if \code{propensity = TRUE})}
40 | 
41 | \item{verbose}{logical: should progress be printed to console?}
42 | }
43 | \value{
44 | an object of class \code{bagged.causalMARS}, which is itself a list
45 |  of \code{causalMARS} objects
46 | }
47 | \description{
48 | Fit a bag of causal MARS models
49 | }
50 | \examples{
51 | # Randomized experiment example
52 | 
53 | n = 100 # number of training-set patients to simulate
54 | p = 10  # number of features for each training-set patient
55 | 
56 | # Simulate data
57 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
58 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
59 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
60 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
61 | 
62 | # Estimate bagged causal MARS model
63 | fit_bcm = causalLearning::bagged.causalMARS(x, tx, y, nbag = 10)
64 | pred_bcm = predict(fit_bcm, newx = x)
65 | 
66 | # Visualize results
67 | plot(tx_effect, pred_bcm, main = 'Bagged causal MARS',
68 |  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
69 | abline(0, 1, lty = 2)
70 | 
71 | }
72 | 


--------------------------------------------------------------------------------
/man/causalBoosting.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/causalBoosting.R
 3 | \name{causalBoosting}
 4 | \alias{causalBoosting}
 5 | \title{Fit a causal boosting model}
 6 | \usage{
 7 | causalBoosting(x, tx, y, num.trees = 500, maxleaves = 4, eps = 0.01,
 8 |   splitSpread = 0.1, x.est = NULL, tx.est = NULL, y.est = NULL,
 9 |   propensity = FALSE, stratum = NULL, stratum.est = NULL,
10 |   isConstVar = TRUE)
11 | }
12 | \arguments{
13 | \item{x}{matrix of covariates}
14 | 
15 | \item{tx}{vector of treatment indicators (0 or 1)}
16 | 
17 | \item{y}{vector of response values}
18 | 
19 | \item{num.trees}{number of shallow causal trees to build}
20 | 
21 | \item{maxleaves}{maximum number of leaves per causal tree}
22 | 
23 | \item{eps}{learning rate}
24 | 
25 | \item{splitSpread}{how far apart should the candidate splits be for the
26 | causal trees? (e.g. \code{splitSpread = 0.1}) means we consider 10 quantile
27 | cutpoints as candidates for making split}
28 | 
29 | \item{x.est}{optional matrix of estimation-set covariates used for honest
30 | re-estimation (ignored if \code{tx.est = NULL} or \code{y.est = NULL})}
31 | 
32 | \item{tx.est}{optional vector of estimation-set treatment indicators
33 | (ignored if \code{x.est = NULL} or \code{y.est = NULL})}
34 | 
35 | \item{y.est}{optional vector of estimation-set response values
36 | (ignored if \code{x.est = NULL} or \code{y.est = NULL})}
37 | 
38 | \item{propensity}{logical: should propensity score stratification be used?}
39 | 
40 | \item{stratum}{optional vector giving propensity score stratum for each
41 | observation (only used if \code{propensity = TRUE})}
42 | 
43 | \item{stratum.est}{optional vector giving propensity score stratum for each
44 | estimation-set observation (ignored if \code{x.est = NULL} or
45 | \code{tx.est = NULL} or \code{y.est = NULL})}
46 | 
47 | \item{isConstVar}{logical: for the causal tree splitting criterion
48 | (T-statistc), should it be assumed that the noise variance is the same in
49 | treatment and control arms?}
50 | }
51 | \value{
52 | an object of class \code{causalBoosting} with attributes:
53 |  \itemize{
54 |    \item CBM: a list storing the intercept, the causal trees and \code{eps}
55 |    \item tauhat: matrix of treatment effects for each patient for each step
56 |    \item G1: estimated-treatment conditional mean for each patient
57 |    \item G0: estimated-control conditional mean for each patient
58 |    \item err.y: training error at each step, in predicting response
59 |    \item num.trees: number of trees specified by function call
60 |  }
61 | }
62 | \description{
63 | Fit a causal boosting model
64 | }
65 | \details{
66 | This function exists primarily to be called by cv.causalBoosting because
67 |  the num.trees parameter generally needs to be tuned via cross-validation.
68 | }
69 | \examples{
70 | # Randomized experiment example
71 | 
72 | n = 100 # number of training-set patients to simulate
73 | p = 10  # number of features for each training-set patient
74 | 
75 | # Simulate data
76 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
77 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
78 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
79 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
80 | 
81 | # Estimate causal boosting model
82 | fit_cb = causalBoosting(x, tx, y, num.trees = 500)
83 | pred_cb = predict(fit_cb, newx = x, num.trees = 500)
84 | 
85 | # Visualize results
86 | plot(tx_effect, pred_cb, main = 'Causal boosting',
87 |  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
88 | abline(0, 1, lty = 2)
89 | 
90 | }
91 | 


--------------------------------------------------------------------------------
/man/causalMARS.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/causalMARS.R
  3 | \name{causalMARS}
  4 | \alias{causalMARS}
  5 | \title{Fit a causal MARS model}
  6 | \usage{
  7 | causalMARS(x, tx, y, maxterms = 11, nquant = 5, degree = ncol(x),
  8 |   eps = 1, backstep = FALSE, x.val = NULL, tx.val = NULL,
  9 |   y.val = NULL, propensity = FALSE, stratum = rep(1, nrow(x)),
 10 |   stratum.val = NULL, minnum = 5)
 11 | }
 12 | \arguments{
 13 | \item{x}{matrix of covariates}
 14 | 
 15 | \item{tx}{vector of treatment indicators (0 or 1)}
 16 | 
 17 | \item{y}{vector of response values}
 18 | 
 19 | \item{maxterms}{maximum number of terms to include in the regression basis
 20 | (e.g. \code{maxterms = 11} means intercept + 5 pairs added)}
 21 | 
 22 | \item{nquant}{number of quantiles used in splitting}
 23 | 
 24 | \item{degree}{max number of different predictors that can interact in model}
 25 | 
 26 | \item{eps}{shrinkage factor for new term added}
 27 | 
 28 | \item{backstep}{logical: after building out regression basis, should
 29 | backward stepwise selection be used to create a sequence of models, with
 30 | the criterion evaluated on a validation set to choose among the sequence?}
 31 | 
 32 | \item{x.val}{optional matrix of validation-set covariates
 33 | (only used if \code{backstep = TRUE})}
 34 | 
 35 | \item{tx.val}{optional vector of validation-set treatment indicators
 36 | (only used if \code{backstep = TRUE})}
 37 | 
 38 | \item{y.val}{optional vector of validation-set response values
 39 | (only used if \code{backstep = TRUE})}
 40 | 
 41 | \item{propensity}{logical: should propensity score stratification be used?}
 42 | 
 43 | \item{stratum}{optional vector giving propensity score stratum for each
 44 | observation (only used if \code{propensity = TRUE})}
 45 | 
 46 | \item{stratum.val}{optional vector giving propensity score stratum for each
 47 | validation-set observation
 48 | (only used if \code{propensity = backstep = TRUE})}
 49 | 
 50 | \item{minnum}{minimum number of observations in each arm of each propensity
 51 | score stratum needed to estimate regression coefficients for basis
 52 | (only used if \code{propensity = TRUE})}
 53 | }
 54 | \value{
 55 | an object of class \code{causalMARS} with attributes:
 56 |  \itemize{
 57 |    \item parent: indices of nodes that are parents at each stage
 58 |    \item childvar: index of predictor chosen at each forward step
 59 |    \item childquant: quantile of cutoff chosen at each forward step
 60 |    \item quant: quantiles of the columns of x
 61 |    \item active: indices of columns with nonzero norm
 62 |    \item allvars: list of variables appearing in each term
 63 |    \item khat: the sequence of terms deleted at each step
 64 |    \item deltahat: relative change in rss
 65 |    \item rsstesthat: validation-set rss achieved by each model in sequence
 66 |    \item setesthat: standard error for rsstesthat
 67 |    \item tim1: time elapsed during forward stepwise phase
 68 |    \item tim2: total time elapsed
 69 |    \item x
 70 |    \item tx
 71 |    \item y
 72 |    \item maxterms
 73 |    \item eps
 74 |    \item backstep
 75 |    \item propensity
 76 |    \item x.val
 77 |    \item tx.val
 78 |    \item y.val
 79 |    \item stratum
 80 |    \item stratum.val
 81 |    \item minnum
 82 |  }
 83 | }
 84 | \description{
 85 | Fit a causal MARS model
 86 | }
 87 | \details{
 88 | parallel arms mars with backward stepwise BOTH randomized case and
 89 |  propensity stratum. data structures: model terms (nodes) are numbered
 90 |  1, 2, ... with 1 representing the intercept. forward stepwise:
 91 |  modmatrix contains basis functions as model is built up -- two columns are
 92 |  added at each step. Does not include a column of ones for tidiness,
 93 |  we always add two terms, even when term added in linear (so that reflected
 94 |  version is just zero).
 95 |  backward stepwise: khat is the sequence of terms deleted at each step,
 96 |  based on deltahat = relative change in rss. rsstesthat is rss over test
 97 |  (validation) set achieved by each reduced model in sequence- used later for
 98 |  selecting a member of the sequence. active2 contains indices of columns with
 99 |  nonzero norm
100 | }
101 | \examples{
102 | # Randomized experiment example
103 | 
104 | n = 100 # number of training-set patients to simulate
105 | p = 10  # number of features for each training-set patient
106 | 
107 | # Simulate data
108 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
109 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
110 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
111 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
112 | 
113 | # Estimate causal MARS model
114 | fit_cm = causalLearning::causalMARS(x, tx, y)
115 | pred_cm = predict(fit_cm, newx = x)
116 | 
117 | # Visualize results
118 | plot(tx_effect, pred_cm, main = 'Causal MARS',
119 |  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
120 | abline(0, 1, lty = 2)
121 | 
122 | }
123 | 


--------------------------------------------------------------------------------
/man/cv.causalBoosting.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cv.causalBoosting.R
 3 | \name{cv.causalBoosting}
 4 | \alias{cv.causalBoosting}
 5 | \title{Fit a causal boosting model with cross validation}
 6 | \usage{
 7 | cv.causalBoosting(x, tx, y, num.trees = 500, maxleaves = 4, eps = 0.01,
 8 |   splitSpread = 0.1, type.measure = c("effect", "response"), nfolds = 5,
 9 |   foldid = NULL, propensity = FALSE, stratum = NULL, isConstVar = TRUE)
10 | }
11 | \arguments{
12 | \item{x}{matrix of covariates}
13 | 
14 | \item{tx}{vector of treatment indicators (0 or 1)}
15 | 
16 | \item{y}{vector of response values}
17 | 
18 | \item{num.trees}{number of shallow causal trees to build}
19 | 
20 | \item{maxleaves}{maximum number of leaves per causal tree}
21 | 
22 | \item{eps}{learning rate}
23 | 
24 | \item{splitSpread}{how far apart should the candidate splits be for the
25 | causal trees? (e.g. \code{splitSpread = 0.1}) means we consider 10 quantile
26 | cutpoints as candidates for making split}
27 | 
28 | \item{type.measure}{loss to use for cross validation:
29 | 'response' returns mean-square error for predicting response in each arm.
30 | 'effect' returns MSE for treatment effect using honest over-fit estimation.}
31 | 
32 | \item{nfolds}{number of cross validation folds}
33 | 
34 | \item{foldid}{vector of fold membership}
35 | 
36 | \item{propensity}{logical: should propensity score stratification be used?}
37 | 
38 | \item{stratum}{optional vector giving propensity score stratum for each
39 | observation (only used if \code{propensity = TRUE})}
40 | 
41 | \item{isConstVar}{logical: for the causal tree splitting criterion
42 | (T-statistc), should it be assumed that the noise variance is the same in
43 | treatment and control arms?}
44 | }
45 | \value{
46 | an object of class \code{cv.causalBoosting} which is an object of
47 |  class \code{causalBoosting} with these additional attributes:
48 |  \itemize{
49 |    \item num.trees.min: number of trees with lowest CV error
50 |    \item cvm: vector of mean CV error for each number of trees
51 |    \item cvsd: vector of standard errors for mean CV errors
52 |  }
53 | }
54 | \description{
55 | Fit a causal boosting model with cross validation
56 | }
57 | \examples{
58 | # Randomized experiment example
59 | 
60 | n = 100 # number of training-set patients to simulate
61 | p = 10  # number of features for each training-set patient
62 | 
63 | # Simulate data
64 | x = matrix(rnorm(n * p), nrow = n, ncol = p) # simulate covariate matrix
65 | tx_effect = x[, 1] + (x[, 2] > 0) # simple heterogeneous treatment effect
66 | tx = rbinom(n, size = 1, p = 0.5) # random treatment assignment
67 | y = rowMeans(x) + tx * tx_effect + rnorm(n, sd = 0.001) # simulate response
68 | 
69 | # Estimate causal boosting model with cross-validation
70 | fit_cv = causalLearning::cv.causalBoosting(x, tx, y)
71 | fit_cv$num.trees.min.effect # number of trees chosen by cross-validation
72 | pred_cv = predict(fit_cv, newx = x)
73 | 
74 | # Visualize results
75 | plot(tx_effect, pred_cv, main = 'Causal boosting w/ CV',
76 |  xlab = 'True treatment effect', ylab = 'Estimated treatment effect')
77 | abline(0, 1, lty = 2)
78 | 
79 | }
80 | 


--------------------------------------------------------------------------------
/man/pollinated.ranger.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pollinated.ranger.R
 3 | \name{pollinated.ranger}
 4 | \alias{pollinated.ranger}
 5 | \title{Pollinate a fitted ranger random forest model}
 6 | \usage{
 7 | pollinated.ranger(object, x, y)
 8 | }
 9 | \arguments{
10 | \item{object}{a fitted \code{ranger} object}
11 | 
12 | \item{x}{matrix of covariates}
13 | 
14 | \item{y}{vector of response values}
15 | }
16 | \value{
17 | an object of class \code{pollinated.ranger} which is a \code{ranger}
18 |  object that has been pollinated with the data in (x, y)
19 | }
20 | \description{
21 | Pollinate a fitted ranger random forest model
22 | }
23 | 


--------------------------------------------------------------------------------
/man/predict.PTOforest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.PTOforest.R
 3 | \name{predict.PTOforest}
 4 | \alias{predict.PTOforest}
 5 | \title{Make predictions from a fitted PTO forest model}
 6 | \usage{
 7 | \method{predict}{PTOforest}(object, newx, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a fitted \code{PTOforest} object}
11 | 
12 | \item{newx}{matrix of new covariates for which estimated treatment effects
13 | are desired}
14 | 
15 | \item{...}{ignored}
16 | }
17 | \value{
18 | a vector of predictions corresponding to the rows of \code{newx}
19 | }
20 | \description{
21 | Make predictions from a fitted PTO forest model
22 | }
23 | 


--------------------------------------------------------------------------------
/man/predict.bagged.causalMARS.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.bagged.causalMARS.R
 3 | \name{predict.bagged.causalMARS}
 4 | \alias{predict.bagged.causalMARS}
 5 | \title{Make predictions from a bag of fitted causal MARS models}
 6 | \usage{
 7 | \method{predict}{bagged.causalMARS}(object, newx, type = c("average", "all"),
 8 |   ...)
 9 | }
10 | \arguments{
11 | \item{object}{a fitted \code{bagged.causalMARS} object}
12 | 
13 | \item{newx}{matrix of new covariates for which estimated treatment effects
14 | are desired}
15 | 
16 | \item{type}{type of prediction required:
17 | 'average' returns a vector of the averages of the bootstrap estimates.
18 | 'all' returns a matrix of all of the bootstrap estimates.}
19 | 
20 | \item{...}{ignored}
21 | }
22 | \value{
23 | a vector of estimated personalized treatment effects corresponding
24 |  to the rows of \code{newx}
25 | }
26 | \description{
27 | Make predictions from a bag of fitted causal MARS models
28 | }
29 | 


--------------------------------------------------------------------------------
/man/predict.causalBoosting.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.causalBoosting.R
 3 | \name{predict.causalBoosting}
 4 | \alias{predict.causalBoosting}
 5 | \title{Make predictions from a fitted causal boosting model}
 6 | \usage{
 7 | \method{predict}{causalBoosting}(object, newx, newtx = NULL,
 8 |   type = c("treatment.effect", "conditional.mean", "response"),
 9 |   num.trees = 1:object$num.trees, honest = FALSE, naVal = 0, ...)
10 | }
11 | \arguments{
12 | \item{object}{a fitted \code{causalBoosting} object}
13 | 
14 | \item{newx}{matrix of new covariates for which estimated treatment effects
15 | are desired}
16 | 
17 | \item{newtx}{option vector of new treatment assignments
18 | (only used if \code{type = 'response'})}
19 | 
20 | \item{type}{type of prediction required:
21 | 'treatment.effect' returns estimated treatment effect.
22 | 'conditional.mean' returns two predictions, one for each arm.
23 | 'response' returns prediction for arm corresponding to newtx.}
24 | 
25 | \item{num.trees}{number(s) of shallow causal trees to use for prediction}
26 | 
27 | \item{honest}{logical: should honest re-estimates of leaf means be used for
28 | prediction? This requires that \code{x.est, tx.est, y.est} were specified
29 | when the causal boosting model was fit}
30 | 
31 | \item{naVal}{value with which to replace \code{NA} predictions}
32 | 
33 | \item{...}{ignored}
34 | }
35 | \value{
36 | a vector or matrix of predictions corresponding to the rows of
37 |  \code{newx}
38 | }
39 | \description{
40 | Make predictions from a fitted causal boosting model
41 | }
42 | 


--------------------------------------------------------------------------------
/man/predict.causalMARS.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.causalMARS.R
 3 | \name{predict.causalMARS}
 4 | \alias{predict.causalMARS}
 5 | \title{Make predictions from a fitted causal MARS model}
 6 | \usage{
 7 | \method{predict}{causalMARS}(object, newx, active, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a fitted \code{causalMARS} object}
11 | 
12 | \item{newx}{matrix of new covariates for which estimated treatment effects
13 | are desired}
14 | 
15 | \item{active}{indices of columns with nonzero norm (defaults to model
16 | selected via backward stepwise phase, or the full model if
17 | \code{backstep = FALSE})}
18 | 
19 | \item{...}{ignored}
20 | }
21 | \value{
22 | a vector of estimated personalized treatment effects corresponding
23 |  to the rows of \code{newx}
24 | }
25 | \description{
26 | Make predictions from a fitted causal MARS model
27 | }
28 | 


--------------------------------------------------------------------------------
/man/predict.causalTree.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.causalTree.R
 3 | \name{predict.causalTree}
 4 | \alias{predict.causalTree}
 5 | \title{Make predictions from a fitted causal tree model}
 6 | \usage{
 7 | \method{predict}{causalTree}(object, newx, newtx = NULL,
 8 |   type = c("treatment.effect", "conditional.mean", "response"),
 9 |   honest = FALSE, naVal = 0, ...)
10 | }
11 | \arguments{
12 | \item{object}{a fitted \code{causalTree} object}
13 | 
14 | \item{newx}{matrix of new covariates for which estimated treatment effects
15 | are desired}
16 | 
17 | \item{newtx}{option vector of new treatment assignments
18 | (only used if \code{type = 'response'})}
19 | 
20 | \item{type}{type of prediction required:
21 | 'treatment.effect' returns estimated treatment effect.
22 | 'conditional.mean' returns two predictions, one for each arm.
23 | 'response' returns prediction for arm corresponding to newtx.}
24 | 
25 | \item{honest}{logical: should honest re-estimates of leaf means be used for
26 | prediction? This requires that \code{x.est, tx.est, y.est} were specified
27 | when the causal boosting model was fit}
28 | 
29 | \item{naVal}{value with which to replace \code{NA} predictions}
30 | 
31 | \item{...}{ignored}
32 | }
33 | \value{
34 | a vector or matrix of predictions corresponding to the rows of
35 |  \code{newx}
36 | }
37 | \description{
38 | Make predictions from a fitted causal tree model
39 | }
40 | 


--------------------------------------------------------------------------------
/man/predict.cv.causalBoosting.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.cv.causalBoosting.R
 3 | \name{predict.cv.causalBoosting}
 4 | \alias{predict.cv.causalBoosting}
 5 | \title{Make predictions from a fitted cross-validated causal boosting model}
 6 | \usage{
 7 | \method{predict}{cv.causalBoosting}(object, newx, newtx = NULL,
 8 |   type = c("treatment.effect", "conditional.mean", "response"),
 9 |   num.trees = object$num.trees.min.effect, naVal = 0, ...)
10 | }
11 | \arguments{
12 | \item{object}{a fitted \code{cv.causalBoosting} object}
13 | 
14 | \item{newx}{matrix of new covariates for which estimated treatment effects
15 | are desired}
16 | 
17 | \item{newtx}{option vector of new treatment assignments
18 | (only used if \code{type = 'individual'})}
19 | 
20 | \item{type}{type of prediction required:
21 | 'treatment.effect' returns estimated treatment effect.
22 | 'conditional.mean' returns two predictions, one for each arm.
23 | 'response' returns prediction for arm corresponding to newtx.}
24 | 
25 | \item{num.trees}{number of shallow causal trees to use for prediction}
26 | 
27 | \item{naVal}{value with which to replace \code{NA} predictions}
28 | 
29 | \item{...}{ignored}
30 | }
31 | \value{
32 | a vector or matrix of predictions corresponding to the rows of
33 |  \code{newx}
34 | }
35 | \description{
36 | Make predictions from a fitted cross-validated causal boosting model
37 | }
38 | 


--------------------------------------------------------------------------------
/man/predict.pollinated.ranger.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predict.pollinated.ranger.R
 3 | \name{predict.pollinated.ranger}
 4 | \alias{predict.pollinated.ranger}
 5 | \title{Make predictions from a pollinated ranger random forest model}
 6 | \usage{
 7 | \method{predict}{pollinated.ranger}(object, newx, predict.all = FALSE,
 8 |   na.treatment = c("omit", "replace", "NA"), ...)
 9 | }
10 | \arguments{
11 | \item{object}{a fitted \code{pollinated.ranger} object}
12 | 
13 | \item{newx}{matrix of new covariates for which predictions are desired}
14 | 
15 | \item{predict.all}{logical: should predictions from all trees be returned?
16 | Otherwise the average across trees is returned}
17 | 
18 | \item{na.treatment}{how to treat NA predictions from individual trees:
19 | 'omit' only uses trees for which the prediction is not NA.
20 | 'replace' replaces NA predictions with the overall mean response.
21 | 'NA' returns NA if any tree prediction is NA.}
22 | 
23 | \item{...}{additional arguments passed on to \code{predict.ranger}}
24 | }
25 | \value{
26 | a vector of predicted treatment effects corresponding to the rows of
27 |  newx
28 | }
29 | \description{
30 | Make predictions from a pollinated ranger random forest model
31 | }
32 | 


--------------------------------------------------------------------------------
/man/stratify.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/stratify.R
 3 | \name{stratify}
 4 | \alias{stratify}
 5 | \title{Get propensity strata from propensity scores}
 6 | \usage{
 7 | stratify(pscore, tx, min.per.arm = 30)
 8 | }
 9 | \arguments{
10 | \item{pscore}{vector of propensity scores}
11 | 
12 | \item{tx}{vector of treatment indicators}
13 | 
14 | \item{min.per.arm}{minimum number of observations for each arm within each
15 | stratum}
16 | }
17 | \value{
18 | a vector of integers with length equal to the length of pscore,
19 |  reporting the propensity stratum corresponding to each propensity score
20 | }
21 | \description{
22 | Get propensity strata from propensity scores
23 | }
24 | 


--------------------------------------------------------------------------------
/src/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/saberpowers/causalLearning/4205c82501b21da76914e92e9fae5b4b504ec9bd/src/.DS_Store


--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.dll
4 | 


--------------------------------------------------------------------------------