├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── NAMESPACE
├── R
    ├── gather.R
    ├── print.matrix1.R
    ├── print.pstest.R
    ├── pstest.R
    ├── pstest.package.R
    └── summary.pstest.R
├── README.md
└── man
    ├── pstest-package.Rd
    └── pstest.Rd


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | pstest.Rproj
6 | "pstest (from Vandy).Rproj"
7 | inst/doc
8 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: pstest
 2 | Type: Package
 3 | Title: Specification Tests for Parametric Propensity Score Models
 4 | Version: 0.1.3.900
 5 | Author: Pedro H. C. Sant'Anna <pedro.h.santanna@vanderbilt.edu>, Xiaojun Song
 6 |     <sxj@gsm.pku.edu.cn>
 7 | Maintainer: Pedro H. C. Sant'Anna <pedro.h.santanna@vanderbilt.edu>
 8 | Description: The propensity score is one of the most widely used tools in
 9 |     studying the causal effect of a treatment, intervention, or policy. Given that
10 |     the propensity score is usually unknown, it has to be estimated, implying that
11 |     the reliability of many treatment effect estimators depends on the correct
12 |     specification of the (parametric) propensity score. This package implements the
13 |     data-driven nonparametric diagnostic tools for detecting propensity score
14 |     misspecification proposed by Sant'Anna and Song (2019) <doi:10.1016/j.jeconom.2019.02.002>.
15 | URL: https://github.com/pedrohcgs/pstest
16 | Depends:
17 |     R (>= 3.1)
18 | Imports:
19 |     stats,
20 |     parallel,
21 |     glmx,
22 |     MASS,
23 |     utils
24 | License: GPL-2
25 | Encoding: UTF-8
26 | LazyData: TRUE
27 | RoxygenNote: 6.1.1.9000
28 | Date: 2019-08-25 10:45
29 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(print,pstest)
 4 | S3method(summary,pstest)
 5 | export(pstest)
 6 | importFrom(MASS,ginv)
 7 | importFrom(glmx,hetglm.fit)
 8 | importFrom(parallel,makeCluster)
 9 | importFrom(parallel,nextRNGStream)
10 | importFrom(parallel,parLapply)
11 | importFrom(parallel,stopCluster)
12 | importFrom(stats,binomial)
13 | importFrom(stats,glm)
14 | importFrom(stats,rbinom)
15 | importFrom(stats,runif)
16 | importFrom(utils,write.table)
17 | 


--------------------------------------------------------------------------------
/R/gather.R:
--------------------------------------------------------------------------------
 1 | # Functions used to guarantee replicability of the bootstrap
 2 | # These are based on the harvestr package, but since harvestr is not on CRAN anymore, we just use these functions
 3 | # safe version of retrieving the .Random.seed -
 4 | get.seed.ps <- function(){
 5 |   if(exists(".Random.seed", envir=.GlobalEnv, mode="numeric")) {
 6 |     seed <- get(".Random.seed", envir=.GlobalEnv, inherits=FALSE)
 7 |     class(seed) <- c("rng-seed", "integer")
 8 |     seed
 9 |   } else {
10 |     NULL
11 |   }
12 | }
13 | 
14 | gather.ps <- function(x, seed=1234){
15 |   set.seed(seed, kind="L'Ecuyer-CMRG", normal.kind="Inversion")
16 |   r <- get.seed.ps()
17 |   seeds <- vector('list', x)
18 |   for(i in seq_len(x)) {
19 |     r <-
20 |       seeds[[i]] <-
21 |       structure( parallel::nextRNGStream(r)
22 |                  , RNGlevel='stream'
23 |                  , class=c("rng-seed", "integer")
24 |       )
25 |   }
26 |   structure(seeds, class=c('rng-seeds', 'list'))
27 | }
28 | 


--------------------------------------------------------------------------------
/R/print.matrix1.R:
--------------------------------------------------------------------------------
 1 | #' @title print.matrix1
 2 | #'
 3 | #' @description Helper function to print a matrix; used by the print methods
 4 | #'
 5 | #' @param m Some matrix
 6 | #'
 7 | #' @noRd
 8 | #' @importFrom utils write.table
 9 | #'
10 | 
11 | # Got this from Brant QTE package
12 | print.matrix1 <- function(m){
13 |   utils::write.table(format(m, justify="right", digits=2, nsmall=2),
14 |                      row.names=F, col.names=F, quote=F, sep="\t")
15 | }
16 | 


--------------------------------------------------------------------------------
/R/print.pstest.R:
--------------------------------------------------------------------------------
 1 | #' @title Print
 2 | #'
 3 | #' @description Prints a pstest Object
 4 | #'
 5 | #' @param x A pstest object
 6 | #' @param ... Other params (required as generic function, but not used)
 7 | #'
 8 | #' @export
 9 | #' @noRd
10 | # Define new print function
11 | print.pstest <- function(x, ...){
12 |     #-----------------------------------------------------------------------------
13 |     # Preliminaries
14 |     # Weight function used
15 |     if(x$argu$w == "ind")       ww <- "Indicator function: w(q,u) = 1(q<=u)"
16 |     if(x$argu$w == "exp")       ww <- "Exponential function: w(q,u) = exp(qu)"
17 |     if(x$argu$w == "logistic")  ww <- "Logistic function: w(q,u) = 1/[1+exp(-qu)]"
18 |     if(x$argu$w == "sin")       ww <- "Sine function: w(q,u) = sin(qu)"
19 |     if(x$argu$w == "sincos")    ww <- "Sine and cosine function: w(q,u) = sin(qu)+ cos(qu)"
20 | 
21 |     distt <- x$argu$dist
22 |     if(distt == "Mammen") distt <- "Mammen's"
23 | 
24 |     #Creat parameters for the Table
25 |     header <- c("", "Test statistic", "Bootstrapped P-value")
26 |     body <- cbind(c("Kolmogorov-Smirnov", "Cramer-von Mises"),
27 |                   c(round(x$kstest, digits = 4), round(x$cvmtest, digits = 4)),
28 |                   c(round(x$pvks, digits = 4), round(x$pvcvm, digits = 4)))
29 | 
30 |     colnames(body) <- header
31 |     #-----------------------------------------------------------------------------
32 |     #Output
33 |     cat(" Call:\n")
34 |     print(x$call)
35 |     cat("\n Sant'Anna and Song (2019) specification tests for the propensity score:\n")
36 |     print.matrix1(rbind(header, body))
37 |     cat(" -------------------------------------------------------------------------")
38 |     cat("\n Weight function:", "\t \t", ww)
39 |     cat("\n Number of boostrap draws:", "\t", x$argu$nboot)
40 |     cat("\n Boostrap draws from", distt, 'distribution')
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/R/pstest.R:
--------------------------------------------------------------------------------
  1 | #' pstest: Tests for the Propensity Score
  2 | #'
  3 | #' \emph{pstest} computes Kolmogorov-Smirnov and Cramer-von Mises type tests
  4 | #' for the null hypothesis that a parametric model for the propensity score is
  5 | #' is correctly specified. For details of the testing procedure, see
  6 | #' Sant'Anna and Song (2019),'Specification Tests for the Propensity Score'  <doi:10.1016/j.jeconom.2019.02.002>.
  7 | #'
  8 | #'@param d a vector containing the binary treatment indicator.
  9 | #'@param pscore a vector containing the estimated propensity scores.
 10 | #'@param xpscore a matrix (or data frame) containing the covariates (and their
 11 | #'               transformations) included in the propensity score
 12 | #'               estimation. It should also include the constant term.
 13 | #'@param model  a description of the functional form (link function) used
 14 | #'              to estimated propensity score. The alternatives are:
 15 | #'              'logit' (default), 'probit', and het.probit
 16 | #' @param pscore.model in case you you set model="het.probit", pscore.model is the entire hetglm object.
 17 | #'                      Default for pscore.model is NULL.
 18 | #'@param w a description of which weight function the projection is based on.
 19 | #'            The alternatives are 'ind' (default), which set \eqn{w(q,u)=1(q<=u)},
 20 | #'            'exp', which set \eqn{w(q,u)=exp(qu)}, 'logistic', which set
 21 | #'            \eqn{w(q,u)=1/[1+exp(1-qu)]}, 'sin', which set \eqn{w(q,u)=sin(qu)}, and
 22 | #'            'sincos', which set \eqn{w(q,u)=sin(qu)+cos(qu)}.
 23 | #'@param dist a description of which distribution to use during the bootstrap.
 24 | #'            The alternatives are 'Mammen' (default), and 'Rademacher'.
 25 | #'@param nboot number of bootstrap replicates to perform. Default is 1,000.
 26 | #'@param cores number of cores to use during the bootstrap. Default is 1.
 27 | #'              If cores is greater than 1, the bootstrap is conducted using
 28 | #'              parLapply, instead of lapply type call.
 29 | #'@param chunk a value that determine the size of each 'tile'. Such argument is used
 30 | #'              to split the original data into chunks, saving memory.
 31 | #'              Default value is 1,000. If the \emph{pstest} function throw a
 32 | #'              memory error, you should choose a smaller value for \emph{chunk}.
 33 | #'
 34 | #'
 35 | #'@return a list containing the Kolmogorov-Smirnov and Cramer-von Mises test
 36 | #'        statistics for the null hypothesis of correctly specified propensity
 37 | #'        score model (kstest and cvmtest, respectively), and their associate
 38 | #'        bootstrapped p-values, pvks and pvcvm, respectively. All inputs are also
 39 | #'        returned.
 40 | #'
 41 | #'@references
 42 | #'       Sant'Anna, Pedro H. C, and Song, Xiaojun (2019), \emph{Specification Tests for the Propensity Score},
 43 | #'       Journal of Econometrics, vol. 210 (2), p. 379-404,  <doi:10.1016/j.jeconom.2019.02.002>.
 44 | #'
 45 | #'@examples
 46 | #' # Example based on simulation data
 47 | #' # Simulate vector of covariates
 48 | #' set.seed(1234)
 49 | #' x1 <- runif(100)
 50 | #' x2 <- rt(100, 5)
 51 | #' x3 <- rpois(100, 3)
 52 | #' # generate treatment status score based on Probit Specification
 53 | #' treat <- (x1 + x2 + x3 >= rnorm(100, 4, 5))
 54 | #' # estimate correctly specified propensity score based on Probit
 55 | #' pscore <- stats::glm(treat ~ x1 + x2 + x3, family = binomial(link = "probit"),
 56 | #'               x = TRUE)
 57 | #' # Test the correct specification of estimated propensity score, using
 58 | #' # the weight function 'ind', and bootstrap based on 'Mammen'.
 59 | #' pstest(d = pscore$y, pscore = pscore$fit, xpscore = pscore$x,
 60 | #'        model = "probit", w = "ind", dist = "Mammen")
 61 | #' # Alternatively, one can use the 'sin' weight function
 62 | #' pstest(d = pscore$y, pscore = pscore$fit, xpscore = pscore$x,
 63 | #'        model = "probit", w = "sin", dist = "Mammen")
 64 | #'
 65 | #'@export
 66 | #'
 67 | #'@importFrom stats binomial rbinom runif glm
 68 | #'@importFrom parallel makeCluster parLapply stopCluster nextRNGStream
 69 | #'@importFrom glmx hetglm.fit
 70 | #'@importFrom MASS ginv
 71 | #-------------------------------------------------------------------------------
 72 | pstest = function(d, pscore, xpscore, pscore.model = NULL,
 73 |                   model = "logit",
 74 |                   w = "ind",
 75 |                   dist = "Mammen",
 76 |                   nboot = 1000, cores = 1, chunk = 1000) {
 77 |     #-----------------------------------------------------------------------------
 78 |     # Define some underlying variables
 79 |     n <- length(d)
 80 |     xx <- as.matrix(xpscore)
 81 |     pscore.fit <- pscore
 82 |     uhat <- d - pscore.fit
 83 |     #-----------------------------------------------------------------------------
 84 |     # Run some tests
 85 |     if( !is.element(model,c("logit", "probit", "het.probit"))) {
 86 |       stop("model must be either 'logit', 'probit' or 'het.probit' ")
 87 |     }
 88 |     if( !is.element(dist,c("Mammen", "Rademacher"))) {
 89 |       stop("dist must be either 'Mammen', or 'Rademacher' ")
 90 |     }
 91 |     if( !is.element(w,c("ind", "exp", "logistic", "sin", "sincos"))) {
 92 |       stop("w must be either 'ind', 'exp', 'logistic', 'sin', or 'sincos' ")
 93 |     }
 94 |     #-----------------------------------------------------------------------------
 95 |     # #Define the score variables for the projection
 96 |     if (model == "logit") {
 97 |         g <- pscore.fit * (1 - pscore.fit) * xx
 98 |     }
 99 |     if (model == "probit") {
100 |         beta.x <- stats::qnorm(pscore.fit)
101 |         g <- stats::dnorm(beta.x) * xx
102 |         rm(beta.x)
103 |     }
104 |     if (model == "het.probit") {
105 |       if(is.null(pscore.model)){
106 |         stop(" You must provide the entire hetglm model if you are using het. probit")
107 |       }
108 |       if(!class(pscore.model)=="hetglm"){
109 |         stop(" pscore.model must be estimated using the hetglm function. See glmx package")
110 |       }
111 |       if(is.null(pscore.model$x$scale)){
112 |         stop(" You must include the option x=T in your glmx model")
113 |       }
114 | 
115 |       pp <- pscore.model
116 |       index.mean <- as.numeric(pp$x$mean %*% pp$coefficients$mean)
117 |       index.scale <- as.numeric(pp$x$scale %*% (pp$coefficients$scale))
118 | 
119 |       #beta.x <- stats::qnorm(pscore.fit)
120 |       index <- index.mean * exp(-index.scale)
121 |       g <- cbind(stats::dnorm(index) * exp(-index.scale) *pp$x$mean,
122 |                  - stats::dnorm(index)*index.mean*exp(-index.scale)*pp$x$scale)
123 | 
124 | 
125 |       xx <- as.matrix(cbind(pp$x$mean,pp$x$scale))
126 |       #rm(pp,xx.scale,index.mean,index.scale,index )
127 |     }
128 | 
129 |     gg <- crossprod(g)
130 |     #-----------------------------------------------------------------------------
131 |     # Define variables to be used in the loop
132 |     # Number of covariates
133 |     k.dim = dim(xx)[2]
134 | 
135 |     # unique pscores
136 |     #un.pscores <- unique(pscore.fit)
137 |     un.pscores <- (pscore.fit)
138 |     n.unique <- length(un.pscores)
139 | 
140 |     # Initialize `beta` matrix (K coefficients for each of n.unique responses)
141 |     beta <- matrix(0, k.dim, n.unique)
142 | 
143 |     # Initialize `Rw` row vector (n.unique dimension)
144 |     Rw <- matrix(0, 1, n.unique)
145 | 
146 |     # We split n columns into l tiles, each with chunk columns
147 |     l <- floor(n.unique/chunk) + 1
148 | 
149 |     # Initialize the bootststrap test statistics vectors
150 |     ksb1 <- matrix(0, nboot, 1)
151 |     cvmb1 <- matrix(0, nboot, 1)
152 |     #-----------------------------------------------------------------------------
153 |     # Let's define some parameters for the bootstrap
154 |     # Better to define these outside the loop that will follow.
155 | 
156 |     if (dist == "Mammen"){
157 |       # Use the Mammen(1993) binary V's
158 |       k1 <- 0.5 * (1 - 5^0.5)
159 |       k2 <- 0.5 * (1 + 5^0.5)
160 |       pkappa <- 0.5 * (1 + 5^0.5)/(5^0.5)
161 |     }
162 | 
163 |     if (dist == "Rademacher"){
164 |       # Use the Rademacher V's
165 |       k1 <- 1
166 |       k2 <- -1
167 |       pkappa <- 0.5
168 |     }
169 | 
170 |     # function for the bootstrap
171 |     bootapply <- function(nn, n, pkappa, k1, k2, uhat, w1.temp, Seed) {
172 |         # to make each run fully reproducible, we set the seed
173 |         seed.run <- Seed[nn, ]
174 |         set.seed(seed.run, "L'Ecuyer-CMRG")
175 |         v <- stats::rbinom(n, 1, pkappa)
176 |         v <- ifelse(v == 1, k1, k2)
177 |         # Bootstrapped emprirical process
178 |         Rwb <- colSums(uhat * v * w1.temp)/n
179 |         # KS test
180 |         ksb <- sqrt(n) * max(abs(Rwb))
181 |         # Cramer-von Mises test
182 |         cvmb <- sum(Rwb^2)
183 |         # Return both tests
184 |         return(cbind(ksb, cvmb))
185 |     }
186 |     #-----------------------------------------------------------------------------
187 |     # Define seeds: Guarantee reproducibility
188 |     ss <- floor(stats::runif(1) * 10000)
189 |     seed.temp <- gather.ps(nboot, seed = ss)
190 | 
191 |     Seed <- matrix(nrow = nboot, ncol = 6)
192 |     for (i in 1:nboot) {
193 |       Seed[i, ] <- seed.temp[[i]][2:7]
194 |     }
195 |     #-----------------------------------------------------------------------------
196 |     # If we are going to use paralell coding, initialize the cores
197 |     if (cores > 1) {
198 |         cl <- parallel::makeCluster(cores)
199 |     }
200 |     #-----------------------------------------------------------------------------
201 |     # Start the loop to compute the tests (this is more memory efficient)
202 |     # we do a loop for each weight function, to avoid loss in speed
203 |     # indicator weight
204 |     if (w == "ind"){
205 |         for (i in 1:l) {
206 |           start <- min(chunk * (i - 1) + 1, n.unique)
207 |           end <- min(chunk * i, n.unique)
208 |           w.temp <- outer(pscore.fit, un.pscores[start:end], "<=")
209 |           Gw <- crossprod(g, w.temp)
210 | 
211 |           beta[, start:end] <- MASS::ginv(crossprod(g)) %*% Gw
212 |           #beta[, start:end] <- solve(gg, Gw)
213 |           w1.temp <- (w.temp - g %*% beta[, start:end])
214 |           Rw[start:end] <- colSums(uhat * w1.temp)/n
215 |           # Now the bootstrapped test in the chunk
216 |           if (cores == 1) {
217 |               boot.chunk <- lapply(1:nboot, bootapply, n, pkappa, k1, k2,
218 |                                    uhat, w1.temp, Seed)
219 |           }
220 |           if (cores > 1) {
221 |               boot.chunk <- parallel::parLapply(cl, 1:nboot, bootapply, n,
222 |                                                 pkappa, k1, k2, uhat, w1.temp, Seed)
223 |           }
224 |           # Put the Bootstrap resuls in a matrix
225 |           boot.chunk <- t(matrix(unlist(boot.chunk), 2, nboot))
226 |           # Compute the KSb and CvMb over chunks
227 |           if (1000 * (i - 1) + 1 <= n.unique) {
228 |             ksb1 <- pmax(ksb1, boot.chunk[, 1])
229 |             cvmb1 <- cvmb1 + boot.chunk[, 2]
230 |           }
231 |         }
232 |     }
233 |     #-----------------------------------------------------------------------------
234 |     # exponential weight
235 |     if (w == "exp"){
236 |       for (i in 1:l) {
237 |         start <- min(chunk * (i - 1) + 1, n.unique)
238 |         end <- min(chunk * i, n.unique)
239 |         w.temp <- tcrossprod(pscore.fit, un.pscores[start:end])
240 |         w.temp <- exp(w.temp)
241 |         Gw <- crossprod(g, w.temp)
242 |         beta[, start:end] <- solve(gg, Gw)
243 |         w1.temp <- (w.temp - g %*% beta[, start:end])
244 |         Rw[start:end] <- colSums(uhat * w1.temp)/n
245 |         # Now the bootstrapped test in the chunk
246 |         if (cores == 1) {
247 |           boot.chunk <- lapply(1:nboot, bootapply, n, pkappa, k1, k2,
248 |                                uhat, w1.temp, Seed)
249 |         }
250 |         if (cores > 1) {
251 |           boot.chunk <- parallel::parLapply(cl, 1:nboot, bootapply, n,
252 |                                             pkappa, k1, k2, uhat, w1.temp, Seed)
253 |         }
254 |         # Put the Bootstrap resuls in a matrix
255 |         boot.chunk <- t(matrix(unlist(boot.chunk), 2, nboot))
256 |         # Compute the KSb and CvMb over chunks
257 |         if (1000 * (i - 1) + 1 <= n.unique) {
258 |           ksb1 <- pmax(ksb1, boot.chunk[, 1])
259 |           cvmb1 <- cvmb1 + boot.chunk[, 2]
260 |         }
261 |       }
262 |     }
263 |     #-----------------------------------------------------------------------------
264 |     # Logistic weight
265 |     if (w == "logistic"){
266 |       for (i in 1:l) {
267 |         start <- min(chunk * (i - 1) + 1, n.unique)
268 |         end <- min(chunk * i, n.unique)
269 |         w.temp <- tcrossprod(pscore.fit, un.pscores[start:end])
270 |         w.temp <- 1/(1+exp(1-w.temp))
271 |         Gw <- crossprod(g, w.temp)
272 |         beta[, start:end] <- solve(gg, Gw)
273 |         w1.temp <- (w.temp - g %*% beta[, start:end])
274 |         Rw[start:end] <- colSums(uhat * w1.temp)/n
275 |         # Now the bootstrapped test in the chunk
276 |         if (cores == 1) {
277 |           boot.chunk <- lapply(1:nboot, bootapply, n, pkappa, k1, k2,
278 |                                uhat, w1.temp, Seed)
279 |         }
280 |         if (cores > 1) {
281 |           boot.chunk <- parallel::parLapply(cl, 1:nboot, bootapply, n,
282 |                                             pkappa, k1, k2, uhat, w1.temp, Seed)
283 |         }
284 |         # Put the Bootstrap resuls in a matrix
285 |         boot.chunk <- t(matrix(unlist(boot.chunk), 2, nboot))
286 |         # Compute the KSb and CvMb over chunks
287 |         if (1000 * (i - 1) + 1 <= n.unique) {
288 |           ksb1 <- pmax(ksb1, boot.chunk[, 1])
289 |           cvmb1 <- cvmb1 + boot.chunk[, 2]
290 |         }
291 |       }
292 |     }
293 |     #-----------------------------------------------------------------------------
294 |     # Sine weight
295 |     if (w == "sin"){
296 |       for (i in 1:l) {
297 |         start <- min(chunk * (i - 1) + 1, n.unique)
298 |         end <- min(chunk * i, n.unique)
299 |         w.temp <- tcrossprod(pscore.fit, un.pscores[start:end])
300 |         w.temp <- sin(w.temp)
301 |         Gw <- crossprod(g, w.temp)
302 |         beta[, start:end] <- solve(gg, Gw)
303 |         w1.temp <- (w.temp - g %*% beta[, start:end])
304 |         Rw[start:end] <- colSums(uhat * w1.temp)/n
305 |         # Now the bootstrapped test in the chunk
306 |         if (cores == 1) {
307 |           boot.chunk <- lapply(1:nboot, bootapply, n, pkappa, k1, k2,
308 |                                uhat, w1.temp, Seed)
309 |         }
310 |         if (cores > 1) {
311 |           boot.chunk <- parallel::parLapply(cl, 1:nboot, bootapply, n,
312 |                                             pkappa, k1, k2, uhat, w1.temp, Seed)
313 |         }
314 |         # Put the Bootstrap resuls in a matrix
315 |         boot.chunk <- t(matrix(unlist(boot.chunk), 2, nboot))
316 |         # Compute the KSb and CvMb over chunks
317 |         if (1000 * (i - 1) + 1 <= n.unique) {
318 |           ksb1 <- pmax(ksb1, boot.chunk[, 1])
319 |           cvmb1 <- cvmb1 + boot.chunk[, 2]
320 |         }
321 |       }
322 |     }
323 |     #-----------------------------------------------------------------------------
324 |     # Sine and cosine weight
325 |     if (w == "sincos"){
326 |       for (i in 1:l) {
327 |         start <- min(chunk * (i - 1) + 1, n.unique)
328 |         end <- min(chunk * i, n.unique)
329 |         w.temp <- tcrossprod(pscore.fit, un.pscores[start:end])
330 |         w.temp <- sin(w.temp)+cos(w.temp)
331 |         Gw <- crossprod(g, w.temp)
332 |         beta[, start:end] <- solve(gg, Gw)
333 |         w1.temp <- (w.temp - g %*% beta[, start:end])
334 |         Rw[start:end] <- colSums(uhat * w1.temp)/n
335 |         # Now the bootstrapped test in the chunk
336 |         if (cores == 1) {
337 |           boot.chunk <- lapply(1:nboot, bootapply, n, pkappa, k1, k2,
338 |                                uhat, w1.temp, Seed)
339 |         }
340 |         if (cores > 1) {
341 |           boot.chunk <- parallel::parLapply(cl, 1:nboot, bootapply, n,
342 |                                             pkappa, k1, k2, uhat, w1.temp, Seed)
343 |         }
344 |         # Put the Bootstrap resuls in a matrix
345 |         boot.chunk <- t(matrix(unlist(boot.chunk), 2, nboot))
346 |         # Compute the KSb and CvMb over chunks
347 |         if (1000 * (i - 1) + 1 <= n.unique) {
348 |           ksb1 <- pmax(ksb1, boot.chunk[, 1])
349 |           cvmb1 <- cvmb1 + boot.chunk[, 2]
350 |         }
351 |       }
352 |     }
353 |     #-----------------------------------------------------------------------------
354 |     # close the clusters, if we used paralell
355 |     if (cores > 1) {
356 |         parallel::stopCluster(cl)
357 |     }
358 |     #-----------------------------------------------------------------------------
359 |     # Compute our test statistics
360 |     cvmtest1 <- sum(Rw^2)
361 |     kstest1 <- sqrt(n) * max(abs(Rw))
362 |     #-----------------------------------------------------------------------------
363 |     # Put the bootstrap tests in a matrix
364 |     boottest <- matrix(0, nboot, 2)
365 |     boottest[, 1] <- ksb1
366 |     boottest[, 2] <- cvmb1
367 |     #-----------------------------------------------------------------------------
368 |     # Name the Columns
369 |     colnames(boottest) <- c("ksb", "cvmb")
370 |     #-----------------------------------------------------------------------------
371 |     # compute the Bootstrap P-value
372 |     pvksb <- sum((boottest[, 1] > kstest1))/nboot
373 |     pvcvmb <- sum((boottest[, 2] > cvmtest1))/nboot
374 |     #---------------------------------------------------------------------
375 |     # record the call
376 |     call.param <- match.call()
377 |     # Record all arguments used in the function
378 |     argu <- mget(names(formals()),sys.frame(sys.nframe()))
379 |     argu <- list(model = argu$model, w = argu$w, dist = argu$dist, nboot = argu$nboot )
380 |     # Return these variables
381 |     ret <- list(kstest = kstest1, cvmtest = cvmtest1, pvks = pvksb, pvcvm = pvcvmb,
382 |                 call.param = call.param, argu = argu)
383 |     # Define a new class
384 |     class(ret) <- "pstest"
385 |     # return the list
386 |     return(ret)
387 | }
388 | 


--------------------------------------------------------------------------------
/R/pstest.package.R:
--------------------------------------------------------------------------------
 1 | #' pstest: An R Package for assessing the goodness-of-fit of parametric propensity score models.
 2 | #'
 3 | #'@description
 4 | #' The propensity score is one of the most widely used tools in studying the causal effect
 5 | #' of a treatment, intervention, or policy. Given that the propensity score is usually unknown,
 6 | #' it has to be estimated, implying that the reliability of many treatment effect estimators depends
 7 | #' on the correct specification of the (parametric) propensity score. This package provides
 8 | #' data-driven nonparametric diagnostic tools for detecting propensity score misspecification.
 9 | #'
10 | #'
11 | #'@details
12 | #' This R package implements the class of specification test for the propensity score
13 | #' proposed in Sant'Anna and Song (2019), `Specification Tests for the Propensity Score', <doi:10.1016/j.jeconom.2019.02.002>.
14 | #'
15 | #' In short, this package implements Kolmogorov-Smirnov and Cramer-von Mises type tests
16 | #' for parametric propensity score models with either logistic ('logit'), or
17 | #' normal ('probit') link function. Critical values are computed with the assistance of a
18 | #' multiplier bootstrap.
19 | #'
20 | #' The tests are based on the integrated conditional moment approach, where the weight function
21 | #' used is based on an orthogonal projection onto the tangent space of nuisance parameters.
22 | #' As a result, the tests (a) enjoy improved power properties, (b) do not suffer from the
23 | #' 'curse of dimensionality' when the vector of covariates is of high-dimensionality,
24 | #' (c) are fully data-driven, (e) do not require tuning parameters such as bandwidths, and
25 | #' (e) are able to detect a broad class of local alternatives converging to the null at the
26 | #' parametric rate. These appealing features highlight that the tests can be of great use
27 | #' in practice.
28 | #'
29 | #' It is worth stressing that this package implements in a unified manner a large class of
30 | #' specification tests, depending on the chosen weight function \eqn{w(q,u)}:
31 | #' \itemize{
32 | #' \item{`ind' - the indicator weight function \eqn{w(q,u)=1(q \le u)}. This is the default.}
33 | #' \item{`exp' - the exponential weight function \eqn{w(q,u)=exp(qu)}.}
34 | #' \item{`logistic' - the logistic weight function \eqn{w(q,u)=1/[1+exp(1-qu)]}.}
35 | #' \item{`sin' - the sine weight function \eqn{w(q,u)=sin(qu)}.}
36 | #' \item{`sincos' - the sine and cosine weight function \eqn{w(q,u)=sin(qu)+cos(qu)}.}
37 | #' }
38 | #'
39 | #'Different weight functions \eqn{w(q,u)} have different power properties, and therefore,
40 | #'being able to choose different \eqn{w(q,u)} gives us flexibility to direct power in desired
41 | #'directions.
42 | #'
43 | #' @references
44 | #'       Sant'Anna, Pedro H. C, and Song, Xiaojun (2019), \emph{Specification Tests for the Propensity Score},
45 | #'       Journal of Econometrics 210 (2), pp. 379-404, <doi:10.1016/j.jeconom.2019.02.002>.
46 | #'
47 | #'@docType package
48 | #'@name pstest-package
49 | NULL
50 | 


--------------------------------------------------------------------------------
/R/summary.pstest.R:
--------------------------------------------------------------------------------
 1 | #' @title Summary
 2 | #'
 3 | #' @description Summary of a pstest object
 4 | #'
 5 | #' @param object A pstest object
 6 | #' @param ... Other params (required as generic function, but not used)
 7 | #'
 8 | #' @export
 9 | #' @noRd
10 | # Define new summary function
11 | summary.pstest <- function(object, ...){
12 |   pstest.obj <- object
13 |   print(pstest.obj)
14 | 
15 | }
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # pstest: An R Package to assess the goodness-of-fit of parametric propensity score models.
 2 | 
 3 | ## Overview 
 4 | The propensity score is one of the most widely used tools in studying the causal effect
 5 | of a treatment, intervention, or policy. Given that the propensity score is usually unknown,
 6 | it has to be estimated, implying that the reliability of many treatment effect estimators depends
 7 | on the correct specification of the (parametric) propensity score. This package provides
 8 | data-driven nonparametric diagnostic tools for detecting propensity score misspecification.
 9 | 
10 | In short, this package implements the class of specification test for the propensity score
11 | proposed in Sant'Anna and Song (2019), [Specification Tests for the Propensity Score](https://www.sciencedirect.com/science/article/pii/S0304407619300272). The package accommodates Logit and Probit propensity score specifications. Critical values are computed with the assistance of a multiplier bootstrap.
12 | 
13 | The tests are based on the integrated conditional moment approach, where the weight function
14 | used is based on an orthogonal projection onto the tangent space of nuisance parameters.
15 | As a result, the tests (a) enjoy improved power properties, (b) do not suffer from the
16 | 'curse of dimensionality' when the vector of covariates is of high-dimensionality,
17 | (c) are fully data-driven, (e) do not require tuning parameters such as bandwidths, and
18 | (e) are able to detect a broad class of local alternatives converging to the null at the
19 | parametric rate. These appealing features highlight that the tests can be of great use
20 | in practice.
21 | 
22 | It is worth stressing that the `pstest` package implements in a unified manner a large class of specification tests, depending on the chosen weight function $w(q,u)$. Current choices are:
23 | 
24 | * `ind` - the indicator weight function $w(q,u) = 1(q \leq u)$. This is the default.
25 | * `exp` - the exponential weight function $w(q,u) = exp(qu)$.
26 | * `logistic` - the logistic weight function $w(q,u) = \frac{1}{1+exp(1-qu)}$.
27 | * `sin` - the sine weight function $w(q,u) = sin(qu)$.
28 | * `sincos` - the sine and cosine weight function $w(q,u) = sin(qu) + cos(qu)$.
29 | 
30 | Different weight functions $w(q,u)$ have different power properties. Thus, being able to choose between different $w(q,u)$ gives one the flexibility to direct power in alternative directions.
31 | 
32 | For further details, please see the paper Sant'Anna and Song (2019), [Specification Tests for the Propensity Score](https://www.sciencedirect.com/science/article/pii/S0304407619300272), or the [working paper version](https://papers.ssrn.com/abstract=2872084).
33 | 
34 | ## Installing pstest
35 | This github website hosts the source code. The difference between what is here what is in CRAN is that here we always have the most updated version of the package.
36 | 
37 | To install the `pstest` package, you have two options: (a) install the CRAN version, or (b) instal the GitHub (most updated - RECOMMENDED) version. 
38 | 
39 | You can install the package from CRAN with `install.packages("pstest")`. 
40 | 
41 | Alternatively, you can install the most recent version of the `pstest` package from GitHub (this is what we recommend):
42 | 
43 | ```r
44 | #library(remotes)
45 | remotes::install_github("pedrohcgs/pstest")
46 | ```
47 | 
48 | ## Authors 
49 | 
50 | Pedro H. C. Sant'Anna, Vanderbilt University, Nashville, TN. E-mail: pedro.h.santanna [at] vanderbilt [dot] edu.
51 | 
52 | Xiaojun Song, Peking University, Beijing, China. E-mail: sxj [at] gsm [dot] pku [dot] edu [dot] cn.
53 | 
54 | In case you have questions, please do not hesitate to contact us.
55 | 
56 | 


--------------------------------------------------------------------------------
/man/pstest-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pstest.package.R
 3 | \docType{package}
 4 | \name{pstest-package}
 5 | \alias{pstest-package}
 6 | \title{pstest: An R Package for assessing the goodness-of-fit of parametric propensity score models.}
 7 | \description{
 8 | The propensity score is one of the most widely used tools in studying the causal effect
 9 | of a treatment, intervention, or policy. Given that the propensity score is usually unknown,
10 | it has to be estimated, implying that the reliability of many treatment effect estimators depends
11 | on the correct specification of the (parametric) propensity score. This package provides
12 | data-driven nonparametric diagnostic tools for detecting propensity score misspecification.
13 | }
14 | \details{
15 | This R package implements the class of specification test for the propensity score
16 | proposed in Sant'Anna and Song (2016), `Specification Tests for the Propensity Score',
17 | available at Pedro H.C. Sant'Anna webpage, \url{http://sites.google.com/site/pedrohcsantanna/}.
18 | 
19 | In short, this package implements Kolmogorov-Smirnov and Cramer-von Mises type tests
20 | for parametric propensity score models with either logistic ('logit'), or
21 | normal ('probit') link function. Critical values are computed with the assistance of a
22 | multiplier bootstrap.
23 | 
24 | The tests are based on the integrated conditional moment approach, where the weight function
25 | used is based on an orthogonal projection onto the tangent space of nuisance parameters.
26 | As a result, the tests (a) enjoy improved power properties, (b) do not suffer from the
27 | 'curse of dimensionality' when the vector of covariates is of high-dimensionality,
28 | (c) are fully data-driven, (e) do not require tuning parameters such as bandwidths, and
29 | (e) are able to detect a broad class of local alternatives converging to the null at the
30 | parametric rate. These appealing features highlight that the tests can be of great use
31 | in practice.
32 | 
33 | It is worth stressing that this package implements in a unified manner a large class of
34 | specification tests, depending on the chosen weight function \eqn{w(q,u)}:
35 | \itemize{
36 | \item{`ind' - the indicator weight function \eqn{w(q,u)=1(q \le u)}. This is the default.}
37 | \item{`exp' - the exponential weight function \eqn{w(q,u)=exp(qu)}.}
38 | \item{`logistic' - the logistic weight function \eqn{w(q,u)=1/[1+exp(1-qu)]}.}
39 | \item{`sin' - the sine weight function \eqn{w(q,u)=sin(qu)}.}
40 | \item{`sincos' - the sine and cosine weight function \eqn{w(q,u)=sin(qu)+cos(qu)}.}
41 | }
42 | 
43 | Different weight functions \eqn{w(q,u)} have different power properties, and therefore,
44 | being able to choose different \eqn{w(q,u)} gives us flexibility to direct power in desired
45 | directions.
46 | }
47 | \references{
48 | Sant'Anna, Pedro H. C, and Song, Xiaojun (2016), \emph{Specification Tests for the Propensity Score},
49 |       available at \url{http://sites.google.com/site/pedrohcsantanna/}.
50 | }
51 | 


--------------------------------------------------------------------------------
/man/pstest.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/pstest.R
 3 | \name{pstest}
 4 | \alias{pstest}
 5 | \title{pstest: Tests for the Propensity Score}
 6 | \usage{
 7 | pstest(d, pscore, xpscore, pscore.model = NULL, model = "logit",
 8 |   w = "ind", dist = "Mammen", nboot = 1000, cores = 1,
 9 |   chunk = 1000)
10 | }
11 | \arguments{
12 | \item{d}{a vector containing the binary treatment indicator.}
13 | 
14 | \item{pscore}{a vector containing the estimated propensity scores.}
15 | 
16 | \item{xpscore}{a matrix (or data frame) containing the covariates (and their
17 | transformations) included in the propensity score
18 | estimation. It should also include the constant term.}
19 | 
20 | \item{pscore.model}{in case you you set model="het.probit", pscore.model is the entire hetglm object.
21 | Default for pscore.model is NULL.}
22 | 
23 | \item{model}{a description of the functional form (link function) used
24 | to estimated propensity score. The alternatives are:
25 | 'logit' (default), 'probit', and het.probit}
26 | 
27 | \item{w}{a description of which weight function the projection is based on.
28 | The alternatives are 'ind' (default), which set \eqn{w(q,u)=1(q<=u)},
29 | 'exp', which set \eqn{w(q,u)=exp(qu)}, 'logistic', which set
30 | \eqn{w(q,u)=1/[1+exp(1-qu)]}, 'sin', which set \eqn{w(q,u)=sin(qu)}, and
31 | 'sincos', which set \eqn{w(q,u)=sin(qu)+cos(qu)}.}
32 | 
33 | \item{dist}{a description of which distribution to use during the bootstrap.
34 | The alternatives are 'Mammen' (default), and 'Rademacher'.}
35 | 
36 | \item{nboot}{number of bootstrap replicates to perform. Default is 1,000.}
37 | 
38 | \item{cores}{number of cores to use during the bootstrap. Default is 1.
39 | If cores is greater than 1, the bootstrap is conducted using
40 | parLapply, instead of lapply type call.}
41 | 
42 | \item{chunk}{a value that determine the size of each 'tile'. Such argument is used
43 | to split the original data into chunks, saving memory.
44 | Default value is 1,000. If the \emph{pstest} function throw a
45 | memory error, you should choose a smaller value for \emph{chunk}.}
46 | }
47 | \value{
48 | a list containing the Kolmogorov-Smirnov and Cramer-von Mises test
49 |        statistics for the null hypothesis of correctly specified propensity
50 |        score model (kstest and cvmtest, respectively), and their associate
51 |        bootstrapped p-values, pvks and pvcvm, respectively. All inputs are also
52 |        returned.
53 | }
54 | \description{
55 | \emph{pstest} computes Kolmogorov-Smirnov and Cramer-von Mises type tests
56 | for the null hypothesis that a parametric model for the propensity score is
57 | is correctly specified. For details of the testing procedure, see
58 | Sant'Anna and Song (2016),'Specification Tests for the Propensity Score'.
59 | }
60 | \examples{
61 | # Example based on simulation data
62 | # Simulate vector of covariates
63 | set.seed(1234)
64 | x1 <- runif(100)
65 | x2 <- rt(100, 5)
66 | x3 <- rpois(100, 3)
67 | # generate treatment status score based on Probit Specification
68 | treat <- (x1 + x2 + x3 >= rnorm(100, 4, 5))
69 | # estimate correctly specified propensity score based on Probit
70 | pscore <- stats::glm(treat ~ x1 + x2 + x3, family = binomial(link = "probit"),
71 |               x = TRUE)
72 | # Test the correct specification of estimated propensity score, using
73 | # the weight function 'ind', and bootstrap based on 'Mammen'.
74 | pstest(d = pscore$y, pscore = pscore$fit, xpscore = pscore$x,
75 |        model = "probit", w = "ind", dist = "Mammen")
76 | # Alternatively, one can use the 'sin' weight function
77 | pstest(d = pscore$y, pscore = pscore$fit, xpscore = pscore$x,
78 |        model = "probit", w = "sin", dist = "Mammen")
79 | 
80 | }
81 | \references{
82 | Sant'Anna, Pedro H. C, and Song, Xiaojun (2019), \emph{Specification Tests for the Propensity Score},
83 |       Journal of Econometrics, vol. 210 (2), p. 379-404.
84 | }
85 | 


--------------------------------------------------------------------------------