├── src ├── Makevars ├── subroutines.h ├── rand.h ├── vector.h ├── subroutines.c ├── init.c ├── models.h ├── vector.c ├── rand.c └── NIstandard.c ├── .Rbuildignore ├── data └── seguro.RData ├── .gitignore ├── R ├── weighted.var.R ├── onAttach.R ├── MARnocov.R ├── CADEbasic.R ├── ATOPbasic.R ├── print.randomize.R ├── Seguro.R ├── boundsCI.R ├── print.summary.NoncompMAR.R ├── summary.NoncompMAR.R ├── CACEcov.R ├── PAPE.R ├── PAPD.R ├── AUPEC.R ├── Noncomp.mom.R ├── ATEnocov.R ├── NIbprobit.R ├── ATOPnoassumption.R ├── CACEcluster.R ├── CACEnocov.R ├── ATOPsens.R ├── ATOPobs.R ├── CADEreg.R ├── NoncompMAR.R ├── ATEcluster.R ├── randomize.R ├── CADErand.R ├── NInocov.R └── ATEbounds.R ├── .travis.yml ├── experiment.Rproj ├── README.md ├── man ├── seguro.Rd ├── AUPEC.Rd ├── PAPE.Rd ├── PAPD.Rd ├── ATEnocov.Rd ├── CADEreg.Rd ├── ATOPnoassumption.Rd ├── ATOPsens.Rd ├── CADErand.Rd ├── ATOPobs.Rd ├── CACEcluster.Rd ├── randomize.Rd ├── ATEbounds.Rd ├── ATEcluster.Rd └── NoncompLI.Rd ├── DESCRIPTION └── NAMESPACE /src/Makevars: -------------------------------------------------------------------------------- 1 | PKG_LIBS = $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 2 | 3 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^.*\.md$ 3 | ^\.Rproj\.user$ 4 | ^\.travis\.yml$ 5 | -------------------------------------------------------------------------------- /data/seguro.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kosukeimai/experiment/HEAD/data/seguro.RData -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | src/*.o 6 | src/*.so 7 | src/*.dll 8 | .DS_Store -------------------------------------------------------------------------------- /R/weighted.var.R: -------------------------------------------------------------------------------- 1 | ### 2 | ### weighted variance formula 3 | ### 4 | 5 | weighted.var <- function(x, w) 6 | return(sum(w * (x - weighted.mean(x,w))^2)/((length(x)-1)*mean(w))) 7 | 8 | -------------------------------------------------------------------------------- /src/subroutines.h: -------------------------------------------------------------------------------- 1 | 2 | void SWP( double **X, int k, int size); 3 | void dinv(double **X, int size, double **X_inv); 4 | void dcholdc(double **X, int size, double **L); 5 | double ddet(double **X, int size, int give_log); 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # Sample .travis.yml for R projects 2 | 3 | language: r 4 | cache: packages 5 | sudo: required 6 | 7 | r_build_args: --resave-data 8 | 9 | notifications: 10 | email: 11 | on_success: change 12 | on_failure: change 13 | -------------------------------------------------------------------------------- /R/onAttach.R: -------------------------------------------------------------------------------- 1 | ".onAttach" <- function(lib, pkg) { 2 | mylib <- dirname(system.file(package = pkg)) 3 | title <- packageDescription(pkg, lib.loc = mylib)$Title 4 | ver <- packageDescription(pkg, lib.loc = mylib)$Version 5 | packageStartupMessage(paste(pkg, ": ", title, "\nVersion: ", ver, "\n", sep="")) 6 | } 7 | 8 | -------------------------------------------------------------------------------- /src/rand.h: -------------------------------------------------------------------------------- 1 | double TruncNorm(double lb, double ub, double mu, double var, int invcdf); 2 | void rMVN(double *Sample, double *mean, double **inv_Var, int size); 3 | double dMVN(double *Y, double *MEAN, double **SIG_INV, int dim, int give_log); 4 | void rWish(double **Sample, double **S, int df, int size); 5 | double dnegbin(int Y, double mu, double theta, int give_log); 6 | double rnegbin(double mu, double theta); 7 | -------------------------------------------------------------------------------- /experiment.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | PackageRoxygenize: rd,collate,namespace 19 | -------------------------------------------------------------------------------- /R/MARnocov.R: -------------------------------------------------------------------------------- 1 | ### Missing at random 2 | MARnocov <- function(Y, D, Z) { 3 | R <- (!is.na(Y))*1 4 | ITT <- mean(Y[R == 1 & D == 1 & Z == 1])*mean(D[Z == 1]) + 5 | mean(Y[R == 1 & D == 0 & Z == 1])*(1-mean(D[Z == 1])) - 6 | mean(Y[R == 1 & D == 1 & Z == 0])*mean(D[Z == 0]) - 7 | mean(Y[R == 1 & D == 0 & Z == 0]) * (1-mean(D[Z == 0])) 8 | CACE <- ITT/(mean(D[Z==1])-mean(D[Z==0])) 9 | return(list(ITT = ITT, CACE = CACE)) 10 | } 11 | -------------------------------------------------------------------------------- /R/CADEbasic.R: -------------------------------------------------------------------------------- 1 | ##### basic functions for calculating the CADE 2 | 3 | Difflist=function(a,b){ 4 | J=length(a) 5 | c=a 6 | for (j in 1:J){ 7 | c[[j]]=a[[j]]-b[[j]] 8 | } 9 | return(c) 10 | } 11 | 12 | Meanlist=function(a){ 13 | J=length(a) 14 | s=0 15 | for(j in 1:J){ 16 | s=s+mean(a[[j]]) 17 | } 18 | return(s/J) 19 | } 20 | 21 | Productlist=function(a,b){ 22 | J=length(a) 23 | c=a 24 | for (j in 1:J){ 25 | c[[j]]=a[[j]]*b[[j]] 26 | } 27 | return(c) 28 | } 29 | -------------------------------------------------------------------------------- /R/ATOPbasic.R: -------------------------------------------------------------------------------- 1 | #### functions for calculating bounds 2 | 3 | f1=function(x,y,u){u-(u-x)/y } 4 | f2=function(x,y,l){l+(x-l)/y } 5 | 6 | fLB= function (x1,y1,x0,y0,l,u){ 7 | ifelse(y1>0, max(l,f1(x1,y1,u)),l)-ifelse(y0>0,min(u,f2(x0,y0,l)),u) 8 | } 9 | 10 | fUB= function (x1,y1,x0,y0,l,u){ 11 | ifelse(y1>0, min(u,f2(x1,y1,l)),u)-ifelse(y0>0,max(l,f1(x0,y0,u)),l) 12 | } 13 | 14 | ### functions for calculating CI 15 | 16 | 17 | CalC= function (x,hat.LB,hat.UB,sigma.LB,sigma.UB,alpha){ 18 | pnorm(x+(hat.UB-hat.LB)/max(sigma.LB,sigma.UB))-pnorm(-x)-(1-alpha) 19 | } -------------------------------------------------------------------------------- /R/print.randomize.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | print.randomize <- function(x, digits = getOption("digits"), ...) { 3 | 4 | cat("\nCall:\n", deparse(x$call), "\n\n", sep = "") 5 | cat("\n Treatment Assignment:\n") 6 | print(table(x$treatment, exclude = NULL, ...), digits = digits) 7 | 8 | if (!is.null(x$block)) { 9 | cat("\n Treatment Assignment by Blocks:\n\n") 10 | print(ftable(x$block, x$treatment, exclude = NULL, ...), 11 | digits = digits) 12 | } 13 | 14 | cat("\nTotal number of observations:", 15 | length(na.omit(x$treatment)), "\n\n") 16 | invisible(x) 17 | } 18 | -------------------------------------------------------------------------------- /src/vector.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int *intArray(int num); 5 | void PintArray(int *ivector, int length); 6 | int **intMatrix(int row, int col); 7 | void PintMatrix(int **imatrix, int row, int col); 8 | 9 | double *doubleArray(int num); 10 | void PdoubleArray(double *dvector, int length); 11 | double **doubleMatrix(int row, int col); 12 | void PdoubleMatrix(double **dmatrix, int row, int col); 13 | 14 | double ***doubleMatrix3D(int x, int y, int z); 15 | void PdoubleMatrix3D(double ***dmatrix3D, int x, int y, int z); 16 | 17 | long *longArray(int num); 18 | 19 | void FreeMatrix(double **Matrix, int row); 20 | void FreeintMatrix(int **Matrix, int row); 21 | void Free3DMatrix(double ***Matrix, int index, int row); 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # experiment: R package for designing and analyzing randomized experiments [![Build Status](https://travis-ci.org/kosukeimai/experiment.svg?branch=master)](https://travis-ci.org/kosukeimai/experiment) [![CRAN Version](http://www.r-pkg.org/badges/version/experiment)](https://CRAN.R-project.org/package=experiment) ![CRAN downloads](http://cranlogs.r-pkg.org/badges/grand-total/experiment) 2 | 3 | R package experiment Provides various statistical methods for designing and analyzing randomized experiments. One functionality of the package is the implementation of randomized-block and matched-pair designs based on possibly multivariate pre-treatment covariates. The package also provides the tools to analyze various randomized experiments including cluster randomized experiments, two-stage randomized experiments, randomized experiments with noncompliance, and randomized experiments with missing data. 4 | -------------------------------------------------------------------------------- /R/Seguro.R: -------------------------------------------------------------------------------- 1 | #' Data from the Mexican universal health insurance program, Seguro Popular. 2 | #' 3 | #' This data set contains the outcome, missing indicator and the treatment for the application 4 | #' in Kosuke Imai and Zhichao Jiang (2018). 5 | #' 6 | #' @format A data frame with 14,902 rows and 6 variables: 7 | #' \describe{ 8 | #' \item{Ya}{Satisfaction for the first unit in the matched pairs} 9 | #' \item{Yb}{Satisfaction for the second unit in the matched pairs} 10 | #' \item{Ra}{Missing indicator for the first unit in the matched pairs} 11 | #' \item{Rb}{Missing indicator for the second unit in the matched pairs} 12 | #' \item{Ta}{Treatment assignment for the first unit in the matched pairs} 13 | #' \item{Tb}{Treatment assignment for the second unit in the matched pairs} 14 | #' #' } 15 | #' 16 | #' @docType data 17 | #' @keywords datasets 18 | #' @name seguro 19 | #' @examples 20 | #' data(seguro) 21 | "seguro" -------------------------------------------------------------------------------- /man/seguro.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Seguro.R 3 | \docType{data} 4 | \name{seguro} 5 | \alias{seguro} 6 | \title{Data from the Mexican universal health insurance program, Seguro Popular.} 7 | \format{ 8 | A data frame with 14,902 rows and 6 variables: 9 | \describe{ 10 | \item{Ya}{Satisfaction for the first unit in the matched pairs} 11 | \item{Yb}{Satisfaction for the second unit in the matched pairs} 12 | \item{Ra}{Missing indicator for the first unit in the matched pairs} 13 | \item{Rb}{Missing indicator for the second unit in the matched pairs} 14 | \item{Ta}{Treatment assignment for the first unit in the matched pairs} 15 | \item{Tb}{Treatment assignment for the second unit in the matched pairs} 16 | #' } 17 | } 18 | \usage{ 19 | seguro 20 | } 21 | \description{ 22 | This data set contains the outcome, missing indicator and the treatment for the application 23 | in Kosuke Imai and Zhichao Jiang (2018). 24 | } 25 | \examples{ 26 | data(seguro) 27 | } 28 | \keyword{datasets} 29 | -------------------------------------------------------------------------------- /R/boundsCI.R: -------------------------------------------------------------------------------- 1 | ### 2 | ### This function calculates the CI of bounds using the B-method 3 | ### and Bonferroni given the bootstrap draws 4 | ### 5 | 6 | boundsCI <- function(lb.rep, ub.rep, lb.est, ub.est, alpha) { 7 | 8 | reps <- length(lb.rep) 9 | 10 | ## bonferroni 11 | bon.lower <- quantile(lb.rep, alpha/2) 12 | bon.upper <- quantile(ub.rep, 1-alpha/2) 13 | 14 | ## b-method 15 | bmin.dif <- lb.rep-lb.est 16 | bmax.dif <- ub.est-ub.rep 17 | bmin.max.dif <- c(bmin.dif, bmax.dif) 18 | b.sup <- rep(NA, 2*reps) 19 | 20 | for (i in 1:(2*reps)) { 21 | b.sup[i]<-max(sum(bmin.dif<=bmin.max.dif[i])/reps, #emp. dis func 22 | sum(bmax.dif<=bmin.max.dif[i])/reps) 23 | } 24 | beta <- quantile(b.sup, 1-alpha) 25 | b.lower <- lb.est-quantile(bmin.dif, beta) 26 | b.upper <- ub.est+quantile(bmax.dif, beta) 27 | names(b.lower) <- names(bon.lower) 28 | names(b.upper) <- names(bon.upper) 29 | 30 | ##output 31 | return(list(bonferroni = c(bon.lower, bon.upper), 32 | bmethod = c(b.lower, b.upper))) 33 | } 34 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: experiment 2 | Version: 1.2.1 3 | Date: 2022-04-07 4 | Title: R Package for Designing and Analyzing Randomized Experiments 5 | Authors@R: c( 6 | person("Kosuke", "Imai", , "imai@harvard.edu", c("aut", "cre")), 7 | person("Zhichao", "Jiang", , "zhichaoj@princeton.edu", c("aut")) 8 | ) 9 | Maintainer: Kosuke Imai 10 | Depends: boot, MASS, R (>= 2.4.0) 11 | Description: Provides various statistical methods for 12 | designing and analyzing randomized experiments. One functionality 13 | of the package is the implementation of randomized-block and 14 | matched-pair designs based on possibly multivariate pre-treatment 15 | covariates. The package also provides the tools to analyze various 16 | randomized experiments including cluster randomized experiments, 17 | two-stage randomized experiments, randomized experiments with 18 | noncompliance, and randomized experiments with missing data. 19 | License: GPL (>=2) 20 | LazyLoad: yes 21 | LazyData: yes 22 | URL: https://github.com/kosukeimai/experiment 23 | BugReports: https://github.com/kosukeimai/experiment/issues 24 | RoxygenNote: 7.1.2 25 | -------------------------------------------------------------------------------- /R/print.summary.NoncompMAR.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | print.summary.NoncompMAR <- function(x, digits = max(3, getOption("digits") 3 | - 3), param = TRUE, ...) { 4 | 5 | cat("\nCall:\n") 6 | cat(paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n", 7 | sep = "") 8 | 9 | cat("\nQuantities of Interest:\n") 10 | printCoefmat(x$qoi.table, digits = digits, na.print = "NA", ...) 11 | 12 | if (!is.null(x$coefC.table) & param) { 13 | cat("\nCoefficients for the compliance model:\n") 14 | printCoefmat(x$coefC.table, digits = digits, na.print = "NA", ...) 15 | } 16 | 17 | if (!is.null(x$coefO.table) & param) { 18 | cat("\nCoefficients for the outcome model:\n") 19 | printCoefmat(x$coefO.table, digits = digits, na.print = "NA", ...) 20 | } 21 | 22 | if (!is.null(x$tauO.table) & param) { 23 | cat("\nThreshold parameters for the outcome model:\n") 24 | printCoefmat(x$tauO.table, digits = digits, na.print = "NA", ...) 25 | } 26 | 27 | cat("\nNumber of observations:", x$n.obs) 28 | cat("\nNumber of Gibbs draws:", x$n.draws) 29 | cat("\n\n") 30 | invisible(x) 31 | } 32 | -------------------------------------------------------------------------------- /man/AUPEC.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AUPEC.R 3 | \name{AUPEC} 4 | \alias{AUPEC} 5 | \title{Estimation of the unnormalized Area Under Prescription Evaluation Curve (AUPEC) in Completely Randomized Experiments} 6 | \usage{ 7 | AUPEC(T, tau, Y) 8 | } 9 | \arguments{ 10 | \item{T}{The unit-level binary treatment receipt variable.} 11 | 12 | \item{tau}{The unit-level continuous score for treatment assignment. We assume those that have tau<0 should 13 | not have treatment. Conditional Average Treatment Effect is one possible measure.} 14 | 15 | \item{Y}{The outcome variable of interest.} 16 | } 17 | \value{ 18 | A list that contains the following items: \item{aupec}{The estimated 19 | Area Under Prescription Evaluation Curve} \item{sd}{The estimated standard deviation 20 | of AUPEC.} 21 | } 22 | \description{ 23 | This function estimates AUPEC. The details of the methods for this design are given in Imai and Li (2019). 24 | } 25 | \references{ 26 | Imai and Li (2019). \dQuote{Experimental Evaluation of Individualized Treatment Rules}, 27 | } 28 | \author{ 29 | Michael Lingzhi Li, Operations Research Center, Massachusetts Institute of Technology 30 | \email{mlli@mit.edu}, \url{http://mlli.mit.edu}; 31 | } 32 | \keyword{evaluation} 33 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,randomize) 4 | S3method(print,summary.NoncompMAR) 5 | S3method(summary,NoncompMAR) 6 | export(ATEbounds) 7 | export(ATEcluster) 8 | export(ATEnocov) 9 | export(ATOPnoassumption) 10 | export(ATOPobs) 11 | export(ATOPsens) 12 | export(AUPEC) 13 | export(CACEcluster) 14 | export(CADErand) 15 | export(CADEreg) 16 | export(PAPD) 17 | export(PAPE) 18 | export(randomize) 19 | importFrom(MASS,mvrnorm) 20 | importFrom(boot,boot) 21 | importFrom(stats,coef) 22 | importFrom(stats,complete.cases) 23 | importFrom(stats,cov) 24 | importFrom(stats,fitted) 25 | importFrom(stats,ftable) 26 | importFrom(stats,lm) 27 | importFrom(stats,mahalanobis) 28 | importFrom(stats,model.frame) 29 | importFrom(stats,model.matrix) 30 | importFrom(stats,model.response) 31 | importFrom(stats,na.fail) 32 | importFrom(stats,na.omit) 33 | importFrom(stats,pnorm) 34 | importFrom(stats,printCoefmat) 35 | importFrom(stats,qnorm) 36 | importFrom(stats,quantile) 37 | importFrom(stats,rbinom) 38 | importFrom(stats,rnorm) 39 | importFrom(stats,runif) 40 | importFrom(stats,sd) 41 | importFrom(stats,terms) 42 | importFrom(stats,uniroot) 43 | importFrom(stats,var) 44 | importFrom(stats,vcov) 45 | importFrom(stats,weighted.mean) 46 | importFrom(utils,packageDescription) 47 | useDynLib(experiment) 48 | -------------------------------------------------------------------------------- /man/PAPE.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PAPE.R 3 | \name{PAPE} 4 | \alias{PAPE} 5 | \title{Estimation of the Population Average Prescription Effect in Completely Randomized Experiments} 6 | \usage{ 7 | PAPE(T, That, Y, plim = NA) 8 | } 9 | \arguments{ 10 | \item{T}{The unit-level binary treatment receipt variable.} 11 | 12 | \item{That}{The unit-level binary treatment that would have been assigned by the 13 | individualized treatment rule.} 14 | 15 | \item{Y}{The outcome variable of interest.} 16 | 17 | \item{plim}{The maximum percentage of population that can be treated under the 18 | budget constraint. Should be a decimal between 0 and 1. Default is NA which assumes 19 | no budget constraint.} 20 | } 21 | \value{ 22 | A list that contains the following items: \item{pape}{The estimated 23 | Population Average Prescription Effect.} \item{sd}{The estimated standard deviation 24 | of PAPE.} 25 | } 26 | \description{ 27 | This function estimates the Population Average Prescription Effect with and without a budget 28 | constraint. The details of the methods for this design are given in Imai and Li (2019). 29 | } 30 | \references{ 31 | Imai and Li (2019). \dQuote{Experimental Evaluation of Individualized Treatment Rules}, 32 | } 33 | \author{ 34 | Michael Lingzhi Li, Operations Research Center, Massachusetts Institute of Technology 35 | \email{mlli@mit.edu}, \url{http://mlli.mit.edu}; 36 | } 37 | \keyword{evaluation} 38 | -------------------------------------------------------------------------------- /man/PAPD.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PAPD.R 3 | \name{PAPD} 4 | \alias{PAPD} 5 | \title{Estimation of the Population Average Prescription Difference in Completely Randomized Experiments} 6 | \usage{ 7 | PAPD(T, Thatfp, Thatgp, Y, plim) 8 | } 9 | \arguments{ 10 | \item{T}{The unit-level binary treatment receipt variable.} 11 | 12 | \item{Thatfp}{The unit-level binary treatment that would have been assigned by the 13 | first individualized treatment rule.} 14 | 15 | \item{Thatgp}{The unit-level binary treatment that would have been assigned by the 16 | second individualized treatment rule.} 17 | 18 | \item{Y}{The outcome variable of interest.} 19 | 20 | \item{plim}{The maximum percentage of population that can be treated under the 21 | budget constraint. Should be a decimal between 0 and 1.} 22 | } 23 | \value{ 24 | A list that contains the following items: \item{papd}{The estimated 25 | Population Average Prescription Difference} \item{sd}{The estimated standard deviation 26 | of PAPD.} 27 | } 28 | \description{ 29 | This function estimates the Population Average Prescription Difference with a budget 30 | constraint. The details of the methods for this design are given in Imai and Li (2019). 31 | } 32 | \references{ 33 | Imai and Li (2019). \dQuote{Experimental Evaluation of Individualized Treatment Rules}, 34 | } 35 | \author{ 36 | Michael Lingzhi Li, Operations Research Center, Massachusetts Institute of Technology 37 | \email{mlli@mit.edu}, \url{http://mlli.mit.edu}; 38 | } 39 | \keyword{evaluation} 40 | -------------------------------------------------------------------------------- /man/ATEnocov.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ATEnocov.R 3 | \name{ATEnocov} 4 | \alias{ATEnocov} 5 | \title{Estimation of the Average Treatment Effect in Randomized Experiments} 6 | \usage{ 7 | ATEnocov(Y, Z, data = parent.frame(), match = NULL) 8 | } 9 | \arguments{ 10 | \item{Y}{The outcome variable of interest.} 11 | 12 | \item{Z}{The (randomized) treatment variable. This variable should be 13 | binary.} 14 | 15 | \item{data}{A data frame containing the relevant variables.} 16 | 17 | \item{match}{A variable indicating matched-pairs. The two units in the same 18 | matched-pair should have the same value.} 19 | } 20 | \value{ 21 | A list of class \code{ATEnocov} which contains the following items: 22 | \item{call}{ The matched call. } \item{Y}{ The outcome variable. } 23 | \item{Z}{ The treatment variable. } \item{match}{ The matched-pair 24 | indicator variable. } \item{ATEest}{ The estimated average treatment 25 | effect. } \item{ATE.var}{ The estimated variance of the average treatment 26 | effect estimator. } \item{diff}{ Within-pair differences if the 27 | matched-pair design is analyzed. } 28 | } 29 | \description{ 30 | This function computes the standard ``difference-in-means'' estimate of the 31 | average treatment effect in randomized experiments without using 32 | pre-treatment covariates. The treatment variable is assumed to be binary. 33 | Currently, the two designs are allowed: complete randomized design and 34 | matched-pair design. 35 | } 36 | \references{ 37 | Imai, Kosuke, (2008). \dQuote{Randomization-based Inference and 38 | Efficiency Analysis in Experiments under the Matched-Pair Design}, Statistics in Medicine. 39 | } 40 | \author{ 41 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University 42 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu}; 43 | } 44 | \keyword{design} 45 | -------------------------------------------------------------------------------- /R/summary.NoncompMAR.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | summary.NoncompMAR <- function(object, CI=c(2.5, 97.5),...){ 3 | 4 | qoi <- cbind(object$itt, object$cace, object$pc, object$base) 5 | qoi <- cbind(apply(qoi, 2, mean), apply(qoi, 2, sd), 6 | apply(qoi, 2, quantile, min(CI)/100), 7 | apply(qoi, 2, quantile, max(CI)/100)) 8 | colnames(qoi) <- c("mean", "std.dev.", paste(min(CI), "%", sep=""), 9 | paste(max(CI), "%", sep="")) 10 | 11 | if (!is.null(object$coefficientsC)) { 12 | coefC <- object$coefficientsC 13 | coefC <- cbind(apply(coefC, 2, mean), apply(coefC, 2, sd), 14 | apply(coefC, 2, quantile, min(CI)/100), 15 | apply(coefC, 2, quantile, max(CI)/100)) 16 | colnames(coefC) <- c("mean", "std.dev.", paste(min(CI), "%", sep=""), 17 | paste(max(CI), "%", sep="")) 18 | } 19 | else 20 | coefC <- NULL 21 | 22 | if (!is.null(object$coefficientsO)) { 23 | coefO <- object$coefficientsO 24 | coefO <- cbind(apply(coefO, 2, mean), apply(coefO, 2, sd), 25 | apply(coefO, 2, quantile, min(CI)/100), 26 | apply(coefO, 2, quantile, max(CI)/100)) 27 | colnames(coefO) <- c("mean", "std.dev.", paste(min(CI), "%", sep=""), 28 | paste(max(CI), "%", sep="")) 29 | } 30 | else 31 | coefO <- NULL 32 | 33 | if (!is.null(object$coefficientsS)) { 34 | coefS <- object$coefficientsS 35 | coefS <- cbind(apply(coefS, 2, mean), apply(coefS, 2, sd), 36 | apply(coefS, 2, quantile, min(CI)/100), 37 | apply(coefS, 2, quantile, max(CI)/100)) 38 | colnames(coefS) <- c("mean", "std.dev.", paste(min(CI), "%", sep=""), 39 | paste(max(CI), "%", sep="")) 40 | } 41 | else 42 | coefS <- NULL 43 | 44 | 45 | ans <- list(call = object$call, n.obs = length(object$Y), n.draws = 46 | object$n.draws, qoi.table = qoi, coefC.table = coefC, 47 | coefO.table = coefO, coefS.table = coefS) 48 | 49 | if (!is.null(object$thresholds)) { 50 | tauO <- object$thresholds 51 | tauO <- cbind(apply(tauO, 2, mean), apply(tauO, 2, sd), 52 | apply(tauO, 2, quantile, min(CI)/100), 53 | apply(tauO, 2, quantile, max(CI)/100)) 54 | colnames(tauO) <- c("mean", "std.dev.", paste(min(CI), "%", sep=""), 55 | paste(max(CI), "%", sep="")) 56 | ans$tauO.table <- tauO 57 | } 58 | 59 | class(ans) <- "summary.NoncompMAR" 60 | return(ans) 61 | } 62 | -------------------------------------------------------------------------------- /R/CACEcov.R: -------------------------------------------------------------------------------- 1 | ### 2 | ### Calculate the CACE with covariates and optional clustering using 3 | ### two-stage least squares 4 | ### 5 | 6 | CACEcov <- function(Y, D, Z, X, grp = NULL, data = parent.frame(), 7 | robust = FALSE, fast = TRUE){ 8 | 9 | call <- match.call() 10 | Y <- matrix(eval(call$Y, data), ncol = 1) 11 | N <- nrow(Y) 12 | D <- matrix(eval(call$D, data), ncol = 1) 13 | X <- model.matrix(X, data = data) 14 | Z <- cbind(X, matrix(eval(call$Z, data), nrow = N)) 15 | X <- cbind(X, D) 16 | grp <- eval(call$grp, data) 17 | if (!is.null(grp)) { 18 | sgrp <- sort(grp, index.return = TRUE) 19 | grp <- grp[sgrp$ix] 20 | X <- X[sgrp$ix,] 21 | Z <- Z[sgrp$ix,] 22 | D <- D[sgrp$ix,] 23 | Y <- Y[sgrp$ix,] 24 | } 25 | 26 | dhat <- fitted(tmp <- lm(D ~ -1 + Z)) 27 | beta <- coef(lm(Y ~ -1 + X[,-ncol(X)] + dhat)) 28 | ZZinv <- (vcov(tmp)/(summary(tmp)$sigma^2)) 29 | XZ <- t(X) %*% Z 30 | XPzXinv <- solve(XZ %*% ZZinv %*% t(XZ)) 31 | 32 | epsilon <- c(Y - X %*% beta) 33 | est <- beta[length(beta)] 34 | 35 | if (is.null(grp)) { 36 | if (robust) { 37 | if (fast) 38 | ZOmegaZ <- t(Z) %*% diag(epsilon^2) %*% Z 39 | else { 40 | ZOmegaZ <- matrix(0, ncol = ncol(Z), nrow = ncol(Z)) 41 | for (i in 1:nrow(Z)) 42 | ZOmegaZ <- ZOmegaZ + crossprod(matrix(Z[i,], nrow = 1)) * epsilon[i]^2 43 | } 44 | var <- XPzXinv %*% XZ %*% ZZinv 45 | var <- var %*% ZOmegaZ %*% t(var) 46 | } else { 47 | sig2 <- c(crossprod(epsilon))/ N 48 | var <- sig2 * XPzXinv 49 | } 50 | } else { 51 | n.grp <- length(unique(grp)) 52 | if (fast) { 53 | Omega <- matrix(0, ncol = N, nrow = N) 54 | counter <- 1 55 | for (i in 1:n.grp) { 56 | n.grp.obs <- sum(grp == unique(grp)[i]) 57 | Omega[counter:(counter+n.grp.obs-1),counter:(counter+n.grp.obs-1)] <- 58 | epsilon[grp == unique(grp)[i]] %*% t(epsilon[grp == unique(grp)[i]]) 59 | counter <- counter + n.grp.obs 60 | } 61 | ZOmegaZ <- t(Z) %*% Omega %*% Z 62 | } else { 63 | ZOmegaZ <- matrix(0, ncol = ncol(Z), nrow = ncol(Z)) 64 | for (i in 1:n.grp) { 65 | ZOmegaZ <- ZOmegaZ + t(Z[grp == unique(grp)[i],]) %*% 66 | (epsilon[grp == unique(grp)[i]] %*% t(epsilon[grp == unique(grp)[i]])) %*% 67 | Z[grp == unique(grp)[i],] 68 | } 69 | 70 | } 71 | var <- XPzXinv %*% XZ %*% ZZinv 72 | var <- var %*% ZOmegaZ %*% t(var) 73 | } 74 | names(est) <- "CACE" 75 | 76 | return(list(est = est, var = var[nrow(var),ncol(var)])) 77 | } 78 | -------------------------------------------------------------------------------- /man/CADEreg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CADEreg.R 3 | \name{CADEreg} 4 | \alias{CADEreg} 5 | \title{Regression-based method for the complier average direct effect} 6 | \usage{ 7 | CADEreg(data) 8 | } 9 | \arguments{ 10 | \item{data}{A data frame containing the relevant variables. The names for the variables should be: ``Z'' for the treatment assignment, ``D'' for the actual received treatment, ``Y'' for the outcome, ``A'' for the treatment assignment mechanism and ``id'' for the cluster ID. The variable for the cluster id should be a factor.} 11 | } 12 | \value{ 13 | A list of class \code{CADEreg} which contains the following items: 14 | \item{CADE1}{ The point estimate of CADE(1). } \item{CADE0}{ The point estimate of CADE(0). } 15 | \item{var1.clu}{ The cluster-robust variance of CADE(1). } \item{var0.clu}{ The cluster-robust variance of CADE(0). } 16 | \item{var1.clu.hc2}{ The cluster-robust HC2 variance of CADE(1). } 17 | \item{var0.clu.hc2}{ The cluster-robust HC2 variance of CADE(0). } 18 | \item{var1.hc2}{ The HC2 variance of CADE(1). } 19 | \item{var0.hc2}{ The HC2 variance of CADE(0). } 20 | \item{var1.ind}{ The individual-robust variance of CADE(1). } 21 | \item{var0.ind}{ The individual-robust variance of CADE(0). } 22 | \item{var1.reg}{ The proposed variance of CADE(1). } 23 | \item{var0.reg}{ The proposed variance of CADE(0). } 24 | } 25 | \description{ 26 | This function computes the point estimates of the complier average direct effect (CADE) and four 27 | different variance estimates: the HC2 variance, the cluster-robust variance, the cluster-robust HC2 28 | variance and the variance proposed in the reference. The estimators calculated using this function 29 | are cluster-weighted, i.e., the weights are equal for each cluster. To obtain the indivudal-weighted 30 | estimators, please multiply the recieved treatment and the outcome by \code{n_jJ/N}, where 31 | \code{n_j} is the number of individuals in cluster \code{j}, \code{J} is the number of clusters and 32 | \code{N} is the total number of individuals. 33 | } 34 | \details{ 35 | For the details of the method implemented by this function, see the 36 | references. 37 | } 38 | \references{ 39 | Kosuke Imai, Zhichao Jiang and Anup Malani (2021). 40 | \dQuote{Causal Inference with Interference and Noncompliance in the Two-Stage Randomized Experiments}, \emph{Journal of the American Statistical Association}. 41 | } 42 | \author{ 43 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University 44 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu}; 45 | Zhichao Jiang, Department of Politics, Princeton University 46 | \email{zhichaoj@princeton.edu}. 47 | } 48 | \keyword{experiments} 49 | \keyword{randomized} 50 | \keyword{two-stage} 51 | -------------------------------------------------------------------------------- /man/ATOPnoassumption.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ATOPnoassumption.R 3 | \name{ATOPnoassumption} 4 | \alias{ATOPnoassumption} 5 | \title{Bounding the ATOP when some of the Outcome Data are 6 | Missing Under the Matched-Pairs Design} 7 | \usage{ 8 | ATOPnoassumption(Ya, Yb, Ra, Rb, Ta, Tb, l, u, alpha, rep) 9 | } 10 | \arguments{ 11 | \item{Ya}{A vector of the outcomes of the first unit in the matched pairs. The missing values for \code{Ya} should be coded 12 | as \code{NA}.} 13 | 14 | \item{Yb}{A vector of the outcomes of the second unit in the matched pairs. The missing values for \code{Yb} should be coded 15 | as \code{NA}.} 16 | 17 | \item{Ra}{A vector of the missing data indicators of the first unit in the matched pairs.} 18 | 19 | \item{Rb}{A vector of the missing data indicators of the second unit in the matched pairs.} 20 | 21 | \item{Ta}{A vector of the treatment conditions of the first unit in the matched pairs.} 22 | 23 | \item{Tb}{A vector of the treatment conditions of the second unit in the matched pairs.} 24 | 25 | \item{l}{The lower limit of the outcome.} 26 | 27 | \item{u}{The upper limit of the outcome.} 28 | 29 | \item{alpha}{A positive scalar that is less than or equal to 0.5. This will 30 | determine the (1-\code{alpha}) level of confidence intervals. The default is 31 | \code{0.05}.} 32 | 33 | \item{rep}{The number of repetitions for bootstraping.} 34 | } 35 | \value{ 36 | A list of class \code{ATOPnoassumption} which contains the following items: 37 | \item{LB}{ The lower bound for the ATOP. } \item{UB}{ The upper bound for the ATOP. } 38 | \item{LB.CI}{ The lower limit of the confidence interval for the ATOP. } 39 | \item{UB.CI}{ The upper limit of the confidence interval for the ATOP. } 40 | } 41 | \description{ 42 | This function computes the no assumption bounds on the average treatment effect among always-observed pairs (ATOP) when 43 | some of the outcome data are missing. The confidence intervals for the 44 | ATOP are also computed. 45 | } 46 | \details{ 47 | For the details of the method implemented by this function, see the 48 | references. 49 | } 50 | \examples{ 51 | data(seguro) 52 | attach(seguro) 53 | ATOPnoassumption(Ya,Yb,Ra,Rb,Ta,Tb,l=0,u=1,alpha=0.05,rep=100) 54 | } 55 | \references{ 56 | Kosuke Imai and Zhichao Jiang (2018). 57 | \dQuote{A Sensitivity Analysis for Missing Outcomes Due to 58 | Truncation-by-Death under the Matched-Pairs Design}, \emph{Technical Report}. Department of Politics, Princeton 59 | University. 60 | } 61 | \author{ 62 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University 63 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu}; 64 | Zhichao Jiang, Department of Politics, Princeton University 65 | \email{zhichaoj@princeton.edu}. 66 | } 67 | \keyword{design} 68 | \keyword{matched-pairs} 69 | -------------------------------------------------------------------------------- /R/PAPE.R: -------------------------------------------------------------------------------- 1 | #' Estimation of the Population Average Prescription Effect in Completely Randomized Experiments 2 | #' 3 | #' This function estimates the Population Average Prescription Effect with and without a budget 4 | #' constraint. The details of the methods for this design are given in Imai and Li (2019). 5 | #' 6 | #' 7 | #' 8 | #' @param T The unit-level binary treatment receipt variable. 9 | #' @param That The unit-level binary treatment that would have been assigned by the 10 | #' individualized treatment rule. 11 | #' @param Y The outcome variable of interest. 12 | #' @param plim The maximum percentage of population that can be treated under the 13 | #' budget constraint. Should be a decimal between 0 and 1. Default is NA which assumes 14 | #' no budget constraint. 15 | #' @return A list that contains the following items: \item{pape}{The estimated 16 | #' Population Average Prescription Effect.} \item{sd}{The estimated standard deviation 17 | #' of PAPE.} 18 | #' @author Michael Lingzhi Li, Operations Research Center, Massachusetts Institute of Technology 19 | #' \email{mlli@mit.edu}, \url{http://mlli.mit.edu}; 20 | #' @references Imai and Li (2019). \dQuote{Experimental Evaluation of Individualized Treatment Rules}, 21 | #' @keywords evaluation 22 | #' @export PAPE 23 | PAPE <- function (T, That, Y, plim = NA) { 24 | if (!(identical(as.numeric(T),as.numeric(as.logical(T))))) { 25 | stop("T should be binary.") 26 | } 27 | if (!(identical(as.numeric(That),as.numeric(as.logical(That))))) { 28 | stop("That should be binary.") 29 | } 30 | T=as.numeric(T) 31 | That=as.numeric(That) 32 | Y=as.numeric(Y) 33 | if (is.na(plim)) { 34 | n=length(Y) 35 | n1=sum(T) 36 | n0=n-n1 37 | n1h=sum(That) 38 | n0h=n-n1h 39 | probs=sum(That)/n 40 | SAPE=n/(n-1)*(1/n1*sum(T*That*Y)+1/n0*sum(Y*(1-T)*(1-That))-n1h/n1/n*sum(Y*T)-n0h/n0/n*sum(Y*(1-T))) 41 | Sf1=var(((That-probs)*Y)[T==1]) 42 | Sf0=var(((That-probs)*Y)[T==0]) 43 | SATE=1/n1*sum(T*Y)-1/n0*(sum((1-T)*Y)) 44 | covarterm=1/n^2*(SAPE^2+2*(n-1)*SAPE*SATE*(2*probs-1)-(1-probs)*probs*n*SATE^2) 45 | varexp=(n/(n-1))^2*(Sf1/n1+Sf0/n0+covarterm) 46 | return(list(pape=SAPE,sd=sqrt(varexp))) 47 | } else { 48 | if ((plim<0) | (plim>1)) { 49 | stop("Budget constraint should be between 0 and 1") 50 | } 51 | n=length(Y) 52 | n1=sum(T) 53 | n0=n-n1 54 | n1h=sum(That) 55 | n0h=n-n1h 56 | SAPEfp=1/n1*sum(T*That*Y)+1/n0*sum(Y*(1-T)*(1-That))-plim/n1*sum(Y*T)-(1-plim)/n0*sum(Y*(1-T)) 57 | Sfp1=var(((That-plim)*Y)[T==1]) 58 | Sfp0=var(((That-plim)*Y)[T==0]) 59 | kf1=mean(Y[T==1 & That==1])-mean(Y[T==0 & That==1]) 60 | kf0=mean(Y[T==1 & That==0])-mean(Y[T==0 & That==0]) 61 | varfp=Sfp1/n1+Sfp0/n0+floor(n*plim)*(n-floor(n*plim))/(n^2*(n-1))*((2*plim-1)*kf1^2-2*plim*kf1*kf0) 62 | return(list(pape=SAPEfp,sd=sqrt(varfp))) 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /R/PAPD.R: -------------------------------------------------------------------------------- 1 | #' Estimation of the Population Average Prescription Difference in Completely Randomized Experiments 2 | #' 3 | #' This function estimates the Population Average Prescription Difference with a budget 4 | #' constraint. The details of the methods for this design are given in Imai and Li (2019). 5 | #' 6 | #' 7 | #' 8 | #' @param T The unit-level binary treatment receipt variable. 9 | #' @param Thatfp The unit-level binary treatment that would have been assigned by the 10 | #' first individualized treatment rule. 11 | #' @param Thatgp The unit-level binary treatment that would have been assigned by the 12 | #' second individualized treatment rule. 13 | #' @param Y The outcome variable of interest. 14 | #' @param plim The maximum percentage of population that can be treated under the 15 | #' budget constraint. Should be a decimal between 0 and 1. 16 | #' @return A list that contains the following items: \item{papd}{The estimated 17 | #' Population Average Prescription Difference} \item{sd}{The estimated standard deviation 18 | #' of PAPD.} 19 | #' @author Michael Lingzhi Li, Operations Research Center, Massachusetts Institute of Technology 20 | #' \email{mlli@mit.edu}, \url{http://mlli.mit.edu}; 21 | #' @references Imai and Li (2019). \dQuote{Experimental Evaluation of Individualized Treatment Rules}, 22 | #' @keywords evaluation 23 | #' @export PAPD 24 | PAPD <- function (T, Thatfp,Thatgp , Y, plim) { 25 | if (!(identical(as.numeric(T),as.numeric(as.logical(T))))) { 26 | stop("T should be binary.") 27 | } 28 | if (!(identical(as.numeric(Thatfp),as.numeric(as.logical(Thatfp))))) { 29 | stop("Thatfp should be binary.") 30 | } 31 | if (!(identical(as.numeric(Thatgp),as.numeric(as.logical(Thatgp))))) { 32 | stop("Thatgp should be binary.") 33 | } 34 | if ((plim<0) | (plim>1)) { 35 | stop("Budget constraint should be between 0 and 1") 36 | } 37 | T=as.numeric(T) 38 | Thatfp=as.numeric(Thatfp) 39 | Thatgp=as.numeric(Thatgp) 40 | Y=as.numeric(Y) 41 | n=length(Y) 42 | n1=sum(T) 43 | n0=n-n1 44 | SAPEfp=1/n1*sum(T*Thatfp*Y)+1/n0*sum(Y*(1-T)*(1-Thatfp))-plim/n1*sum(Y*T)-(1-plim)/n0*sum(Y*(1-T)) 45 | SAPEgp=1/n1*sum(T*Thatgp*Y)+1/n0*sum(Y*(1-T)*(1-Thatgp))-plim/n1*sum(Y*T)-(1-plim)/n0*sum(Y*(1-T)) 46 | Sfp1=var(((Thatfp-plim)*Y)[T==1]) 47 | Sfp0=var(((Thatfp-plim)*Y)[T==0]) 48 | kf1=mean(Y[T==1 & Thatfp==1])-mean(Y[T==0 & Thatfp==1]) 49 | kf0=mean(Y[T==1 & Thatfp==0])-mean(Y[T==0 & Thatfp==0]) 50 | PAPD=SAPEfp-SAPEgp 51 | Sfgp1=var(((Thatfp-Thatgp)*Y)[T==1]) 52 | Sfgp0=var(((Thatfp-Thatgp)*Y)[T==0]) 53 | kg1=mean(Y[T==1 & Thatgp==1])-mean(Y[T==0 & Thatgp==1]) 54 | kg0=mean(Y[T==1 & Thatgp==0])-mean(Y[T==0 & Thatgp==0]) 55 | varfgp=Sfgp1/n1+Sfgp0/n0-floor(n*plim)*(n-floor(n*plim))/(n^2*(n-1))*(kf1^2+kg1^2)+ 56 | 2*floor(n*plim)*max(floor(n*plim),n-floor(n*plim))/(n^2*(n-1))*abs(kf1*kg1) 57 | if (varfgp>0) { 58 | return(list(papd=PAPD,sd=sqrt(varfgp))) 59 | } else { 60 | return(list(papd=PAPD,sd=0)) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /man/ATOPsens.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ATOPsens.R 3 | \name{ATOPsens} 4 | \alias{ATOPsens} 5 | \title{Sensitivity analysis for the ATOP when some of the Outcome Data are 6 | Missing Under the Matched-Pairs Design} 7 | \usage{ 8 | ATOPsens(Ya, Yb, Ra, Rb, Ta, Tb, gamma, l, u, alpha, rep) 9 | } 10 | \arguments{ 11 | \item{Ya}{A vector of the outcomes of the first unit in the matched pairs. The missing values for \code{Ya} should be coded 12 | as \code{NA}.} 13 | 14 | \item{Yb}{A vector of the outcomes of the second unit in the matched pairs. The missing values for \code{Yb} should be coded 15 | as \code{NA}.} 16 | 17 | \item{Ra}{A vector of the missing data indicators of the first unit in the matched pairs.} 18 | 19 | \item{Rb}{A vector of the missing data indicators of the second unit in the matched pairs.} 20 | 21 | \item{Ta}{A vector of the treatment conditions of the first unit in the matched pairs.} 22 | 23 | \item{Tb}{A vector of the treatment conditions of the second unit in the matched pairs.} 24 | 25 | \item{gamma}{The sensitivity parameter which charaterizes the degree of the within-pair similarity.} 26 | 27 | \item{l}{The lower limit of the outcome.} 28 | 29 | \item{u}{The upper limit of the outcome.} 30 | 31 | \item{alpha}{A positive scalar that is less than or equal to 0.5. This will 32 | determine the (1-\code{alpha}) level of confidence intervals. The default is 33 | \code{0.05}.} 34 | 35 | \item{rep}{The number of repetitions for bootstraping.} 36 | } 37 | \value{ 38 | A list of class \code{ATOPsens} which contains the following items: 39 | \item{LB}{ The lower bound for the ATOP. } \item{UB}{ The upper bound for the ATOP. } 40 | \item{LB.CI}{ The lower limit of the confidence interval for the ATOP. } 41 | \item{UB.CI}{ The upper limit of the confidence interval for the ATOP. } 42 | } 43 | \description{ 44 | This function computes the bounds on the average treatment effect among always-observed pairs (ATOP) 45 | with pre-specified sensivity parameters when 46 | some of the outcome data are missing. The sensivity parameter characterizes the degree of the within-pair similarity. 47 | The confidence intervals for the 48 | ATOP are also computed. 49 | } 50 | \details{ 51 | For the details of the method implemented by this function, see the 52 | references. 53 | } 54 | \examples{ 55 | data(seguro) 56 | attach(seguro) 57 | ATOPobs(Ya,Yb,Ra,Rb,Ta,Tb,gamma=0.95,kappa1=1,kappa0=1,l=0,u=1,alpha=0.05,rep=100) 58 | } 59 | \references{ 60 | Kosuke Imai and Zhichao Jiang (2018). 61 | \dQuote{A Sensitivity Analysis for Missing Outcomes Due to 62 | Truncation-by-Death under the Matched-Pairs Design}, \emph{Statistics in Medicine}. 63 | } 64 | \author{ 65 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University 66 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu}; 67 | Zhichao Jiang, Department of Politics, Princeton University 68 | \email{zhichaoj@princeton.edu}. 69 | } 70 | \keyword{design} 71 | \keyword{matched-pairs} 72 | -------------------------------------------------------------------------------- /src/subroutines.c: -------------------------------------------------------------------------------- 1 | #define USE_FC_LEN_T 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "vector.h" 10 | #include "rand.h" 11 | 12 | /* The Sweep operator */ 13 | void SWP( 14 | double **X, /* The Matrix to work on */ 15 | int k, /* The row to sweep */ 16 | int size) /* The dim. of X */ 17 | { 18 | int i,j; 19 | 20 | if (X[k][k] < 10e-20) 21 | error("SWP: singular matrix.\n"); 22 | else 23 | X[k][k]=-1/X[k][k]; 24 | for(i=0;i0))/n 29 | Z=rbinom(1e4,n,pf) 30 | 31 | ThatfA=numeric(n) 32 | kAf1=numeric(n) 33 | kAf0=numeric(n) 34 | kAf1A=numeric(n) 35 | kAf1B=numeric(n) 36 | kAf0A=numeric(n) 37 | kAf0B=numeric(n) 38 | covarsum=0 39 | for (i in 1:n) { 40 | cutofftemp=quantile(tau,1-i/n) 41 | Thatftemp=as.numeric(tau>cutofftemp) 42 | ThatftempA=as.numeric(tau>max(cutofftemp,0)) 43 | ThatfA=ThatfA+1/n*ThatftempA 44 | cutofftemp2=quantile(tau,(i-1)/n) 45 | Thatftemp2=as.numeric(tau>cutofftemp2) 46 | tempkAf1=mean(Y[T==1 & Thatftemp2==1])-mean(Y[T==0 & Thatftemp2==1]) 47 | tempkAf0=mean(Y[T==1 & Thatftemp==0])-mean(Y[T==0 & Thatftemp==0]) 48 | if (is.nan(tempkAf1)) { 49 | kAf1[n-i+1]=kAf1[n-i+2] 50 | } else { 51 | kAf1[n-i+1]=tempkAf1 52 | } 53 | if (is.nan(tempkAf0)) { 54 | kAf0[i]=kAf0[i-1] 55 | } else { 56 | kAf0[i]=tempkAf0 57 | } 58 | kAf1A[n-i+1]=(n-i+1)*kAf1[n-i+1] 59 | kAf1B[n-i+1]=(i-1)*kAf1[n-i+1] 60 | kAf0A[i]=i*kAf0[i] 61 | kAf0B[i]=(n-i)*kAf0[i] 62 | } 63 | sumtemp1=cumsum(kAf1A*kAf0B) 64 | sumtemp2=cumsum(kAf1A) 65 | sumtemp3=cumsum(kAf1A*kAf1B) 66 | tempM=outer(kAf1A,kAf1B) 67 | tempM[lower.tri(tempM,diag=TRUE)] <- 0 68 | tempMsum=cumsum(colSums(tempM)) 69 | covarsum1=mean(-1/(n^3*(n-1))*sumtemp1[Z]-Z*(n-Z)^2/(n^3*(n-1))*kAf1[Z]*kAf0[Z]-2/(n^4*(n-1))*tempMsum[Z]-Z^2*(n-Z)^2/(n^4*(n-1))*kAf1[Z]^2 70 | -2*(n-Z)^2/(n^4*(n-1))*kAf1[Z]*sumtemp2[Z]+1/n^4*sumtemp3[Z]) 71 | covarsum2=var(1/n*(sumtemp2[Z]/n+(n-Z)*Z/n*kAf1[Z])) 72 | ThatfA2=ThatfA-1/2 73 | SfA1=var((ThatfA2*Y)[T==1]) 74 | SfA0=var((ThatfA2*Y)[T==0]) 75 | varfA=SfA1/n1+SfA0/n0+covarsum1+covarsum2 76 | AUPEC=1/n1*sum(T*ThatfA*Y)+1/n0*sum(Y*(1-T)*(1-ThatfA))-0.5/n1*sum(T*Y)-0.5/n0*sum((1-T)*Y) 77 | return(list(aupec=AUPEC,sd=sqrt(varfA))) 78 | } 79 | -------------------------------------------------------------------------------- /src/init.c: -------------------------------------------------------------------------------- 1 | #include // for NULL 2 | #include 3 | 4 | /* FIXME: 5 | Check these declarations against the C/Fortran source code. 6 | */ 7 | 8 | /* .C calls */ 9 | extern void LIbinary(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); 10 | extern void LIcount(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); 11 | extern void LIgaussian(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); 12 | extern void LIordinal(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); 13 | extern void LItwopart(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); 14 | extern void MARprobit(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); 15 | extern void NIbprobit(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *); 16 | 17 | static const R_CMethodDef CEntries[] = { 18 | {"LIbinary", (DL_FUNC) &LIbinary, 44}, 19 | {"LIcount", (DL_FUNC) &LIcount, 48}, 20 | {"LIgaussian", (DL_FUNC) &LIgaussian, 46}, 21 | {"LIordinal", (DL_FUNC) &LIordinal, 46}, 22 | {"LItwopart", (DL_FUNC) &LItwopart, 49}, 23 | {"MARprobit", (DL_FUNC) &MARprobit, 28}, 24 | {"NIbprobit", (DL_FUNC) &NIbprobit, 25}, 25 | {NULL, NULL, 0} 26 | }; 27 | 28 | void R_init_experiment(DllInfo *dll) 29 | { 30 | R_registerRoutines(dll, CEntries, NULL, NULL, NULL); 31 | R_useDynamicSymbols(dll, FALSE); 32 | } 33 | -------------------------------------------------------------------------------- /man/CADErand.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CADErand.R 3 | \name{CADErand} 4 | \alias{CADErand} 5 | \title{Randomization-based method for the complier average direct effect and the complier average spillover effect} 6 | \usage{ 7 | CADErand(data, individual = 1) 8 | } 9 | \arguments{ 10 | \item{data}{A data frame containing the relevant variables. The names for the variables should be: ``Z'' for the treatment assignment, ``D'' for the actual received treatment, ``Y'' for the outcome, ``A'' for the treatment assignment mechanism and ``id'' for the cluster ID. The variable for the cluster id should be a factor.} 11 | 12 | \item{individual}{A binary variable with TRUE for individual-weighted estimators and FALSE for cluster-weighted estimators.} 13 | } 14 | \value{ 15 | A list of class \code{CADErand} which contains the following items: 16 | \item{CADE1}{ The point estimate of CADE(1). } \item{CADE0}{ The point estimate of CADE(0). } 17 | \item{CADE1}{ The point estimate of CASE(1). } \item{CASE0}{ The point estimate of CASE(0). } 18 | \item{var.CADE1}{ The variance estimate of CADE(1). } 19 | \item{var.CADE0}{ The variance estimate of CADE(0). } 20 | \item{var.CASE1}{ The variance estimate of CASE(1). } 21 | \item{var.CASE0}{ The variance estimate of CASE(0). } 22 | \item{DEY1}{ The point estimate of DEY(1). } \item{DEY0}{ The point estimate of DEY(0). } 23 | \item{DED1}{ The point estimate of DED(1). } \item{DED0}{ The point estimate of DED(0). } 24 | \item{var.DEY1}{ The variance estimate of DEY(1). } 25 | \item{var.DEY0}{ The variance estimate of DEY(0). } 26 | \item{var.DED1}{ The variance estimate of DED(1). } 27 | \item{var.DED0}{ The variance estimate of DED(0). } 28 | \item{SEY1}{ The point estimate of SEY(1). } \item{SEY0}{ The point estimate of SEY(0). } 29 | \item{SED1}{ The point estimate of SED(1). } \item{SED0}{ The point estimate of SED(0). } 30 | \item{var.SEY1}{ The variance estimate of SEY(1). } 31 | \item{var.SEY0}{ The variance estimate of SEY(0). } 32 | \item{var.SED1}{ The variance estimate of SED(1). } 33 | \item{var.SED0}{ The variance estimate of SED(0). } 34 | } 35 | \description{ 36 | This function computes the point estimates and variance estimates of the complier average direct effect (CADE) and the complier average spillover effect (CASE). 37 | The estimators calculated using this function are either individual weighted or cluster-weighted. The point estimates and variances of ITT effects are also included. 38 | } 39 | \details{ 40 | For the details of the method implemented by this function, see the 41 | references. 42 | } 43 | \references{ 44 | Kosuke Imai, Zhichao Jiang and Anup Malani (2018). 45 | \dQuote{Causal Inference with Interference and Noncompliance in the Two-Stage Randomized Experiments}, \emph{Technical Report}. Department of Politics, Princeton 46 | University. 47 | } 48 | \author{ 49 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University 50 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu}; 51 | Zhichao Jiang, Department of Politics, Princeton University 52 | \email{zhichaoj@princeton.edu}. 53 | } 54 | \keyword{experiments} 55 | \keyword{randomized} 56 | \keyword{two-stage} 57 | -------------------------------------------------------------------------------- /R/Noncomp.mom.R: -------------------------------------------------------------------------------- 1 | ### 2 | ### Wald-Type Method of Moments Estimator for Randomized Experiments 3 | ### with Noncompliance and Subsequent Missing Outcomes 4 | ### 5 | 6 | Noncomp.mom <- function(Y, D, Z, data = parent.frame()) { 7 | 8 | call <- match.call() 9 | Y <- eval(call$Y, envir = data) 10 | D <- eval(call$D, envir = data) 11 | Z <- eval(call$Z, envir = data) 12 | N <- length(Y) 13 | N1 <- length(Y[Z==1]) 14 | N0 <- length(Y[Z==0]) 15 | R <- (!is.na(Y))*1 16 | if (sum(R)>0) { 17 | if (sum(D == 1 & Z == 0)>0) { 18 | ## No always-takers 19 | ## Frangakis and Rubin (1999) Biometrika 20 | U <- sum(D*Z)/sum(Z) 21 | R01 <- sum(R*(1-D)*Z)/sum((1-D)*Z) 22 | R0 <- sum(R*(1-Z))/sum(1-Z) 23 | Y0 <- mean(Y[R==1 & Z==0]) 24 | Y01 <- sum(na.omit(Y*R*(1-D)*Z))/sum(R*(1-D)*Z) 25 | Y10 <- (Y0*R0-Y01*R01*(1-U))/(R0-R01*(1-U)) 26 | Y11 <- sum(na.omit(Y*R*D*Z))/sum(R*D*Z) 27 | q <- var(Y[R==1 & D==1 & Z==1])/mean(Z*D*R) 28 | v <- delta <- rep(0, 5) 29 | v[1] <- U*(1-U)/mean(Z==1) 30 | v[2] <- var(Y[R==1 & D==0 & Z == 1])/mean(Z*(1-D)*R==1) 31 | v[3] <- R01*(1-R01)/mean(Z*(1-D)==1) 32 | v[4] <- R0*(1-R0)/mean(Z==0) 33 | v[5] <- var(Y[R==1 & Z==0])/mean(R*(1-Z)==1) 34 | w <- 1/(R0-R01*(1-U)) 35 | delta[1] <- -R0*R01*(Y0-Y01)*(w^2) 36 | delta[2] <- -R01*(1-U)*w 37 | delta[3] <- R0*(Y0-Y01)*(1-U)*(w^2) 38 | delta[4] <- -R01*(Y0-Y01)*(1-U)*(w^2) 39 | delta[5] <- R0*w 40 | 41 | CACEest <- Y11-Y10 42 | IVvar <- (q + sum(v*(delta^2)))/N 43 | ITTest <- U*(Y11-Y10) 44 | ITTvar <- 45 | ((U^2)*q+v[1]*(Y11-Y10-U*delta[1])^2+(U^2)*sum(v[2:5]*(delta[2:5]^2)))/N 46 | } else { 47 | ## Always-Takers allowed 48 | ## variance has not been computed yet. 49 | Ca <- mean(D[Z == 0]) 50 | Cn <- mean(1-D[Z == 1]) 51 | Ra0 <- mean(R[D == 1 & Z == 0]) 52 | Rn1 <- mean(R[D == 0 & Z == 1]) 53 | R1 <- mean(R[Z == 1]) 54 | R0 <- mean(R[Z == 0]) 55 | Ya <- mean(Y[R == 1 & D == 1 & Z == 0]) 56 | Yn <- mean(Y[R == 1 & D == 0 & Z == 1]) 57 | Y1.obs <- mean(Y[R == 1 & Z == 1]) 58 | Y0.obs <- mean(Y[R == 1 & Z == 0]) 59 | Yc1 <- (R1*Y1.obs-Ya*Ca*Ra0-Yn*Cn*Rn1)/(R1-Ca*Ra0-Cn*Rn1) 60 | Yc0 <- (R0*Y0.obs-Ya*Ca*Ra0-Yn*Cn*Rn1)/(R0-Ca*Ra0-Cn*Rn1) 61 | CACEest <- Yc1 - Yc0 62 | ITT.est <- CACEest*(1-Ca-Cn) 63 | } 64 | } else { 65 | ## No missing outcomes 66 | ## Imbens and Rubin (1997) Annals of Statistics 67 | Y1bar <- mean(Y[Z==1]) 68 | Y0bar <- mean(Y[Z==0]) 69 | D1bar <- mean(D[Z==1]) 70 | D0bar <- mean(D[Z==0]) 71 | ITTest <- Y1bar - Y0bar 72 | ITTestD <- D1bar - D0bar 73 | CACEest <- ITTest/ITTestD 74 | Y1var <- sum(Z*(Y-Y1bar))/(N1^2) 75 | Y0var <- sum((1-Z)*(Y-Y0bar))/(N0^2) 76 | D1var <- sum(Z*(D-D1bar))/(N1^2) 77 | D0var <- sum((1-Z)*(D-D0bar))/(N0^2) 78 | ITTvar <- Y1var + Y0var 79 | ITTvarD <- D1var + D0var 80 | ITTcov <- sum(Z*(Y-Y1bar)*(D-D1bar))/(N1^2) + 81 | sum((1-Z)*(Y-Y0bar)*(D-D0bar))/(N0^2) 82 | IVvar <- (ITTvar*(ITTestD^2)+ITTvarD*(ITTest^2)-2*ITTcov*ITTest*ITTestD)/(ITTestD^4) 83 | } 84 | return(list(CACEest = CACEest, CACEse = sqrt(IVvar), ITTest = ITTest, 85 | ITTse = sqrt(ITTvar))) 86 | } 87 | -------------------------------------------------------------------------------- /man/ATOPobs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ATOPobs.R 3 | \name{ATOPobs} 4 | \alias{ATOPobs} 5 | \title{Sensitivity analysis for the ATOP when some of the Outcome Data are 6 | Missing Under the Matched-Pairs Design in Observational Studies} 7 | \usage{ 8 | ATOPobs(Ya, Yb, Ra, Rb, Ta, Tb, gamma, kappa1, kappa0, l, u, alpha, rep) 9 | } 10 | \arguments{ 11 | \item{Ya}{A vector of the outcomes of the first unit in the matched pairs. The missing values for \code{Ya} should be coded 12 | as \code{NA}.} 13 | 14 | \item{Yb}{A vector of the outcomes of the second unit in the matched pairs. The missing values for \code{Yb} should be coded 15 | as \code{NA}.} 16 | 17 | \item{Ra}{A vector of the missing data indicators of the first unit in the matched pairs.} 18 | 19 | \item{Rb}{A vector of the missing data indicators of the second unit in the matched pairs.} 20 | 21 | \item{Ta}{A vector of the treatment conditions of the first unit in the matched pairs.} 22 | 23 | \item{Tb}{A vector of the treatment conditions of the second unit in the matched pairs.} 24 | 25 | \item{gamma}{The sensitivity parameter which charaterizes the degree of the within-pair similarity.} 26 | 27 | \item{kappa1}{The sensitivity parameter which charaterizes the dependence between \code{R(1)} and \code{T}.} 28 | 29 | \item{kappa0}{The sensitivity parameter which charaterizes the dependence between \code{R(0)} and \code{T}.} 30 | 31 | \item{l}{The lower limit of the outcome.} 32 | 33 | \item{u}{The upper limit of the outcome.} 34 | 35 | \item{alpha}{A positive scalar that is less than or equal to 0.5. This will 36 | determine the (1-\code{alpha}) level of confidence intervals. The default is 37 | \code{0.05}.} 38 | 39 | \item{rep}{The number of repetitions for bootstraping.} 40 | } 41 | \value{ 42 | A list of class \code{ATOPsens} which contains the following items: 43 | \item{LB}{ The lower bound for the ATOP. } \item{UB}{ The upper bound for the ATOP. } 44 | \item{LB.CI}{ The lower limit of the confidence interval for the ATOP. } 45 | \item{UB.CI}{ The upper limit of the confidence interval for the ATOP. } 46 | } 47 | \description{ 48 | This function computes the bounds on the average treatment effect among always-observed pairs (ATOP) 49 | with pre-specified sensivity parameters when 50 | some of the outcome data are missing. The sensivity parameters characterizes the degree of the within-pair similarity 51 | and the dependence between the potential missing indicators and the treatment. The confidence intervals for the 52 | ATOP are also computed. 53 | } 54 | \details{ 55 | For the details of the method implemented by this function, see the 56 | references. 57 | } 58 | \examples{ 59 | data(seguro) 60 | attach(seguro) 61 | ATOPsens(Ya,Yb,Ra,Rb,Ta,Tb,gamma=0.95,l=0,u=1,alpha=0.05,rep=100) 62 | } 63 | \references{ 64 | Kosuke Imai and Zhichao Jiang (2018). 65 | \dQuote{A Sensitivity Analysis for Missing Outcomes Due to 66 | Truncation-by-Death under the Matched-Pairs Design}, \emph{Statistics in Medicine}. 67 | } 68 | \author{ 69 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University 70 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu}; 71 | Zhichao Jiang, Department of Politics, Princeton University 72 | \email{zhichaoj@princeton.edu}. 73 | } 74 | \keyword{design} 75 | \keyword{matched-pairs} 76 | -------------------------------------------------------------------------------- /src/models.h: -------------------------------------------------------------------------------- 1 | /* normal regression */ 2 | void bNormalReg(double **D, double *beta, double *sig2, 3 | int n_samp, int n_cov, int addprior, int pbeta, 4 | double *beta0, double **A0, int psig2, double s0, 5 | int nu0, int sig2fixed); 6 | 7 | /* binomial probit regression */ 8 | void bprobitGibbs(int *Y, double **X, double *beta, int n_samp, 9 | int n_cov, int prior, double *beta0, double **A0, 10 | int mda, int n_gen); 11 | 12 | /* ordinal probit regression */ 13 | void boprobitMCMC(int *Y, double **X, double *beta, 14 | double *tau, int n_samp, int n_cov, int n_cat, 15 | int prior, double *beta0, double **A0, int mda, 16 | int mh, double *prop, int *accept, int n_gen); 17 | 18 | /* binomial and mulitnomial logistic regression */ 19 | void logitMetro(int *Y, double **X, double *beta, int n_samp, 20 | int n_dim, int n_cov, double *beta0, double **A0, 21 | double *Var, int n_gen, int *counter); 22 | 23 | /* Normal mixed effects regression */ 24 | void bNormalMixedGibbs(double *Y, double **X, double ***Zgrp, 25 | int *grp, double *beta, double **gamma, double *sig2, 26 | double **Psi, int n_samp, int n_fixed, int n_random, 27 | int n_grp, int prior, double *beta0, double **A0, 28 | int imp, int nu0, double s0, int tau0, double **T0, 29 | int n_gen0); 30 | 31 | /* binomial mixed effects probit regression */ 32 | void bprobitMixedGibbs(int *Y, double **X, double ***Zgrp, 33 | int *grp, double *beta, double **gamma, 34 | double **Psi, int n_samp, int n_fixed, 35 | int n_random, int n_grp, 36 | int prior, double *beta0, double **A0, 37 | int tau0, double **T0, int n_gen); 38 | 39 | /* (binomial/multinomial) logistic mixed effects regression */ 40 | void logitMixedMetro(int *Y, double **X, double ***Z, int *grp, 41 | double *beta, double ***gamma, double ***Psi, 42 | int n_samp, int n_dim, int n_fixed, 43 | int n_random, int n_grp, double *beta0, 44 | double **A0, int tau0, double **T0, 45 | double *tune_fixed, double *tune_random, 46 | int n_gen, int *acc_fixed, int *acc_random); 47 | 48 | /* ordinal probit mixed effects regression */ 49 | void boprobitMixedMCMC(int *Y, double **X, double ***Zgrp, int *grp, 50 | double *beta, double **gamma, double *tau, 51 | double **Psi, int n_samp, int n_cat, 52 | int n_fixed, int n_random, int n_grp, 53 | int prior, double *beta0, double **A0, int tau0, 54 | double **T0, int mh, double *prop, int *accept, 55 | int n_gen); 56 | 57 | /* negative binomial regression */ 58 | void negbinMetro(int *Y, double **X, double *beta, double *sig2, 59 | int n_samp, int n_cov, double *beta0, double **A0, 60 | double a0, double b0, double *varb, double vars, 61 | double *cont, int n_gen, int *counter, int sig2fixed); 62 | 63 | /* mixed effects negative binomial regression */ 64 | void bnegbinMixedMCMC(int *Y, int **Ygrp, double **X, double ***Zgrp, 65 | int *grp, double *beta, double **gamma, 66 | double *sig2, double **Psi, int n_samp, 67 | int n_fixed, int n_random, int n_grp, 68 | int max_samp_grp, double *beta0, 69 | double **A0, double a0, double b0, 70 | int tau0, double **T0, double *varb, double vars, 71 | double *varg, int *counter, int **counterg, 72 | int n_gen); 73 | -------------------------------------------------------------------------------- /src/vector.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | int* intArray(int num) { 8 | int *iArray = (int *)malloc(num * sizeof(int)); 9 | if (!iArray) 10 | error("Out of memory error in intArray\n"); 11 | return iArray; 12 | } 13 | 14 | void PintArray(int *ivector, int length) { 15 | int i; 16 | for (i = 0; i < length; i++) 17 | Rprintf("%5d\n", ivector[i]); 18 | } 19 | 20 | int** intMatrix(int row, int col) { 21 | int i; 22 | int **iMatrix = (int **)malloc(row * sizeof(int *)); 23 | if (!iMatrix) 24 | error("Out of memory error in intMatrix\n"); 25 | for (i = 0; i < row; i++) { 26 | iMatrix[i] = (int *)malloc(col * sizeof(int)); 27 | if (!iMatrix[i]) 28 | error("Out of memory error in intMatrix\n"); 29 | } 30 | return iMatrix; 31 | } 32 | 33 | void PintMatrix(int **imatrix, int row, int col) { 34 | int i, j; 35 | for (i = 0; i < row; i++) { 36 | for (j = 0; j < col; j++) 37 | Rprintf("%5d", imatrix[i][j]); 38 | Rprintf("\n"); 39 | } 40 | } 41 | 42 | 43 | double* doubleArray(int num) { 44 | double *dArray = (double *)malloc(num * sizeof(double)); 45 | if (!dArray) 46 | error("Out of memory error in doubleArray\n"); 47 | return dArray; 48 | } 49 | 50 | void PdoubleArray(double *dvector, int length) { 51 | int i; 52 | for (i = 0; i < length; i++) 53 | Rprintf("%14g\n", dvector[i]); 54 | } 55 | 56 | double** doubleMatrix(int row, int col) { 57 | int i; 58 | double **dMatrix = (double **)malloc((size_t)(row * sizeof(double *))); 59 | if (!dMatrix) 60 | error("Out of memory error in doubleMatrix\n"); 61 | for (i = 0; i < row; i++) { 62 | dMatrix[i] = (double *)malloc((size_t)(col * sizeof(double))); 63 | if (!dMatrix[i]) 64 | error("Out of memory error in doubleMatrix\n"); 65 | } 66 | return dMatrix; 67 | } 68 | 69 | void PdoubleMatrix(double **dmatrix, int row, int col) { 70 | int i, j; 71 | for (i = 0; i < row; i++) { 72 | for (j = 0; j < col; j++) 73 | Rprintf("%14g", dmatrix[i][j]); 74 | Rprintf("\n"); 75 | } 76 | } 77 | 78 | double*** doubleMatrix3D(int x, int y, int z) { 79 | int i; 80 | double ***dM3 = (double ***)malloc(x * sizeof(double **)); 81 | if (!dM3) 82 | error("Out of memory error in doubleMatrix3D\n"); 83 | for (i = 0; i < x; i++) 84 | dM3[i] = doubleMatrix(y, z); 85 | return dM3; 86 | } 87 | 88 | void PdoubleMatrix3D(double ***dmatrix3D, int x, int y, int z) { 89 | int i, j, k; 90 | for (i = 0; i < x; i++) { 91 | Rprintf("Fist dimension = %5d\n", i); 92 | for (j = 0; j < y; j++) { 93 | for (k = 0; k < z; k++) 94 | Rprintf("%14g", dmatrix3D[i][j][k]); 95 | Rprintf("\n"); 96 | } 97 | } 98 | } 99 | 100 | long* longArray(int num) { 101 | long *lArray = (long *)malloc(num * sizeof(long)); 102 | if (!lArray) 103 | error("Out of memory error in longArray\n"); 104 | return lArray; 105 | } 106 | 107 | void FreeMatrix(double **Matrix, int row) { 108 | int i; 109 | for (i = 0; i < row; i++) 110 | free(Matrix[i]); 111 | free(Matrix); 112 | } 113 | 114 | void FreeintMatrix(int **Matrix, int row) { 115 | int i; 116 | for (i = 0; i < row; i++) 117 | free(Matrix[i]); 118 | free(Matrix); 119 | } 120 | 121 | void Free3DMatrix(double ***Matrix, int index, int row) { 122 | int i; 123 | for (i = 0; i < index; i++) 124 | FreeMatrix(Matrix[i], row); 125 | free(Matrix); 126 | } 127 | 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /man/CACEcluster.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CACEcluster.R 3 | \name{CACEcluster} 4 | \alias{CACEcluster} 5 | \title{Estimation of the Complier Average Causal Effects in Cluster-Randomized 6 | Experiments with Unit-level Noncompliance} 7 | \usage{ 8 | CACEcluster( 9 | Y, 10 | D, 11 | Z, 12 | grp, 13 | data = parent.frame(), 14 | match = NULL, 15 | weights = NULL, 16 | ... 17 | ) 18 | } 19 | \arguments{ 20 | \item{Y}{The outcome variable of interest.} 21 | 22 | \item{D}{The unit-level treatment receipt variable. This variable should be 23 | binary but can differ across units within each cluster.} 24 | 25 | \item{Z}{The (randomized) cluster-level encouragement variable. This 26 | variable should be binary. Two units in the same cluster should have the 27 | same value.} 28 | 29 | \item{grp}{A variable indicating clusters of units. Two units in the same 30 | cluster should have the same value.} 31 | 32 | \item{data}{A data frame containing the relevant variables.} 33 | 34 | \item{match}{A variable indicating matched-pairs of clusters. Two units in 35 | the same matched-pair of clusters should have the same value. The default is 36 | \code{NULL} (i.e., no matching).} 37 | 38 | \item{weights}{A variable indicating the population cluster sizes, which 39 | will be used to construct weights for each pair of clusters. Two units in 40 | the same cluster should have the same value. The default is \code{NULL}, in 41 | which case sample cluster sizes will be used for constructing weights.} 42 | 43 | \item{...}{Optional arguments passed to \code{ATEcluster}, which is called 44 | internally.} 45 | } 46 | \value{ 47 | A list of class \code{CACEcluster} which contains the following 48 | items: \item{call}{ The matched call. } \item{ITTY}{ The output object from 49 | \code{ATEcluster} which is used to estimate the ITT effect of the 50 | encouragement on the outcome variable. } \item{ITTD}{ The output object 51 | from \code{ATEcluster} which is used to estimate the ITT effect of the 52 | encouragement on the treatment receipt variable. } \item{n1}{ The total 53 | number of units in the treatment group. } \item{n0}{ The total number of 54 | units in the control group. } \item{Z}{ The treatment variable. } 55 | \item{est}{ The estimated complier average causal effect. } \item{var}{ The 56 | estimated variance of the complier average causal effect estimator. } 57 | \item{cov}{ The estimated covariance between two ITT estimator. } \item{m}{ 58 | The number of pairs in the matched-pair design. } \item{N1}{ The population 59 | cluster sizes for the treatment group. } \item{N0}{ The population cluster 60 | sizes for the control group. } \item{w}{ Pair-specific normalized 61 | arithmetic mean weights. These weights sum up to the total number of units 62 | in the sample, i.e., \code{n}. } 63 | } 64 | \description{ 65 | This function estimates various complier average causal effect in 66 | cluster-randomized experiments without using pre-treatment covariates when 67 | unit-level noncompliance exists. Both the encouragement and treatment 68 | variables are assumed to be binary. Currently, only the matched-pair design 69 | is allowed. The details of the methods for this design are given in Imai, 70 | King, and Nall (2007). 71 | } 72 | \references{ 73 | Imai, Kosuke, Gary King, and Clayton Nall (2007). \dQuote{The 74 | Essential Role of Pair Matching in Cluster-Randomized Experiments, with 75 | Application to the Mexican Universal Health Insurance Evaluation}, Technical 76 | Report. Department of Politics, Princeton University. 77 | } 78 | \author{ 79 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University 80 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu}; 81 | } 82 | \keyword{design} 83 | -------------------------------------------------------------------------------- /R/ATEnocov.R: -------------------------------------------------------------------------------- 1 | ### 2 | ### Calculate the ATE without covariates 3 | ### 4 | #' Estimation of the Average Treatment Effect in Randomized Experiments 5 | #' 6 | #' This function computes the standard ``difference-in-means'' estimate of the 7 | #' average treatment effect in randomized experiments without using 8 | #' pre-treatment covariates. The treatment variable is assumed to be binary. 9 | #' Currently, the two designs are allowed: complete randomized design and 10 | #' matched-pair design. 11 | #' 12 | #' @useDynLib experiment 13 | #' @importFrom stats coef complete.cases cov fitted ftable lm mahalanobis model.frame model.matrix model.response na.fail na.omit printCoefmat qnorm quantile rbinom rnorm runif pnorm uniroot sd terms var vcov weighted.mean 14 | #' @importFrom utils packageDescription 15 | #' @importFrom MASS mvrnorm 16 | #' @importFrom boot boot 17 | #' 18 | #' @param Y The outcome variable of interest. 19 | #' @param Z The (randomized) treatment variable. This variable should be 20 | #' binary. 21 | #' @param data A data frame containing the relevant variables. 22 | #' @param match A variable indicating matched-pairs. The two units in the same 23 | #' matched-pair should have the same value. 24 | #' @return A list of class \code{ATEnocov} which contains the following items: 25 | #' \item{call}{ The matched call. } \item{Y}{ The outcome variable. } 26 | #' \item{Z}{ The treatment variable. } \item{match}{ The matched-pair 27 | #' indicator variable. } \item{ATEest}{ The estimated average treatment 28 | #' effect. } \item{ATE.var}{ The estimated variance of the average treatment 29 | #' effect estimator. } \item{diff}{ Within-pair differences if the 30 | #' matched-pair design is analyzed. } 31 | #' @author Kosuke Imai, Department of Government and Department of Statistics, Harvard University 32 | #' \email{imai@@Harvard.Edu}, \url{https://imai.fas.harvard.edu}; 33 | #' @references Imai, Kosuke, (2008). \dQuote{Randomization-based Inference and 34 | #' Efficiency Analysis in Experiments under the Matched-Pair Design}, Statistics in Medicine. 35 | #' @keywords design 36 | #' @export ATEnocov 37 | ATEnocov <- function(Y, Z, data = parent.frame(), match = NULL){ 38 | 39 | ## an internal function that checks match and returns diff 40 | match.check <- function(Y, Z, match) { 41 | n <- length(Y) 42 | if ((n %% 2) != 0) 43 | stop("pair randomization requires the even number of observations") 44 | if (length(unique(table(match))) > 1) 45 | stop("invalid input for `match'") 46 | if (unique(table(match)) != 2) 47 | stop("invalid input for `match'") 48 | umatch <- sort(unique(match)) 49 | diff <- rep(NA, n/2) 50 | for (i in 1:length(umatch)) 51 | diff[i] <- Y[(Z == 1) & (match == umatch[i])] - 52 | Y[(Z == 0) & (match == umatch[i])] 53 | return(diff) 54 | } 55 | 56 | ## getting the data 57 | call <- match.call() 58 | Y <- eval(call$Y, envir = data) 59 | Z <- eval(call$Z, envir = data) 60 | match <- eval(call$match, envir = data) 61 | 62 | ## checking data 63 | if (sum(sort(unique(Z)) == c(0,1)) != 2) 64 | stop("`Z' should be binary taking the value of 0 or 1") 65 | if (length(Y) != length(Z)) 66 | stop("`Y' and `Z' have different numbers of observations") 67 | if (!is.null(match)) 68 | if (length(match) != length(Y)) 69 | stop("`match' and `Y' have different numbers of observations") 70 | 71 | res <- list(call = call, Y = Y, Z = Z, match = match) 72 | ## ATE for unit randomization 73 | res$ATE.est <- mean(Y[Z==1])-mean(Y[Z==0]) 74 | if (is.null(match)) { # without matching 75 | res$ATE.var <- var(Y[Z==1])/sum(Z==1)+var(Y[Z==0])/sum(Z==0) 76 | } else { # with matching 77 | res$diff <- diff <- match.check(Y, Z, match) 78 | res$ATE.var <- var(diff)/length(diff) 79 | } 80 | class(res) <- "ATEnocov" 81 | return(res) 82 | } 83 | 84 | -------------------------------------------------------------------------------- /R/NIbprobit.R: -------------------------------------------------------------------------------- 1 | ### 2 | ### Bayesian probit with nonignorable missing outcomes and 3 | ### multi-valued treatments 4 | ### 5 | 6 | NIbprobit <- function(formula, Xo, Xr, data = parent.frame(), 7 | n.draws = 5000, insample = FALSE, 8 | param = TRUE, mda = TRUE, 9 | p.mean.o = 0, p.prec.o = 0.01, 10 | p.mean.r = 0, p.prec.r = 0.01, 11 | coef.start.o = 0, coef.start.r = 0, 12 | burnin = 0, thin = 0, verbose = TRUE) { 13 | 14 | ## getting Y and D 15 | call <- match.call() 16 | tm <- terms(formula) 17 | attr(tm, "intercept") <- 0 18 | mf <- model.frame(tm, data = data, na.action = 'na.pass') 19 | D <- model.matrix(tm, data = mf) 20 | if (max(D) > 1 || min(D) < 0) 21 | stop("the treatment variable should be a factor variable.") 22 | Y <- model.response(mf) 23 | m <- ncol(D) # number of treatment levels including control 24 | ## getting Xo and Xr 25 | tm <- terms(Xo) 26 | attr(tm, "intercept") <- 1 27 | Xo <- model.matrix(tm, data = data, na.action = 'na.pass') 28 | Xo <- Xo[,(colnames(Xo) != "(Intercept)")] 29 | tm <- terms(Xr) 30 | attr(tm, "intercept") <- 1 31 | Xr <- model.matrix(tm, data = data, na.action = 'na.pass') 32 | Xr <- Xr[,(colnames(Xr) != "(Intercept)")] 33 | ## taking care of NA's in D and X 34 | ind <- complete.cases(cbind(D, Xo, Xr)) 35 | Y <- Y[ind] 36 | D <- D[ind,] 37 | Xo <- Xo[ind,] 38 | Xr <- Xr[ind,] 39 | R <- (!is.na(Y))*1 40 | Y[is.na(Y)] <- rbinom(sum(is.na(Y)), size = 1, prob = 0.5) 41 | cnameso <- c(colnames(D), colnames(Xo)) 42 | cnamesr <- c("1-Y", "Y", colnames(Xr)) 43 | Xo <- cbind(D, Xo) 44 | colnames(Xo) <- cnameso 45 | Xr <- cbind(1-Y, Y, Xr) 46 | colnames(Xr) <- cnamesr 47 | 48 | res <- list(call = call, Y = Y, Xo = Xo, Xr = Xr, n.draws = n.draws) 49 | 50 | n <- length(Y) 51 | ncovo <- ncol(Xo) 52 | ncovr <- ncol(Xr) 53 | ## starting values 54 | if(length(coef.start.o) != ncovo) 55 | coef.start.o <- rep(coef.start.o, ncovo) 56 | if(length(coef.start.r) != ncovr) 57 | coef.start.r <- rep(coef.start.r, ncovr) 58 | 59 | ## prior 60 | if(length(p.mean.o) != ncovo) 61 | p.mean.o <- rep(p.mean.o, ncovo) 62 | if(length(p.mean.r) != ncovr) 63 | p.mean.r <- rep(p.mean.r, ncovr) 64 | if(!is.matrix(p.prec.o)) 65 | p.prec.o <- diag(p.prec.o, ncovo) 66 | if(!is.matrix(p.prec.r)) 67 | p.prec.r <- diag(p.prec.r, ncovr) 68 | 69 | ## checking thinnig and burnin intervals 70 | if (n.draws <= 0) 71 | stop("`n.draws' should be a positive integer.") 72 | if (burnin < 0 || burnin >= n.draws) 73 | stop("`burnin' should be a non-negative integer less than `n.draws'.") 74 | if (thin < 0 || thin >= n.draws) 75 | stop("`thin' should be a non-negative integer less than `n.draws'.") 76 | keep <- thin + 1 77 | 78 | ## calling C function to do MCMC 79 | par <- .C("NIbprobit", 80 | as.integer(Y), as.integer(R), 81 | as.double(Xo), as.double(Xr), 82 | as.double(coef.start.o), as.double(coef.start.r), 83 | as.integer(n), as.integer(ncovo), as.integer(ncovr), 84 | as.integer(m), as.double(p.mean.o), as.double(p.mean.r), 85 | as.double(p.prec.o), as.double(p.prec.r), 86 | as.integer(insample), as.integer(param), as.integer(mda), 87 | as.integer(n.draws), as.integer(burnin), 88 | as.integer(keep), as.integer(verbose), 89 | coef.o = double(ncovo*(ceiling((n.draws-burnin)/keep))), 90 | coef.r = double(ncovr*(ceiling((n.draws-burnin)/keep))), 91 | ATE = double((m-1)*(ceiling((n.draws-burnin)/keep))), 92 | BASE = double(m*(ceiling((n.draws-burnin)/keep))), 93 | PACKAGE="experiment") 94 | if (param) { 95 | res$coef.o <- matrix(par$coef.o, byrow = TRUE, ncol = ncovo) 96 | colnames(res$coef.o) <- colnames(Xo) 97 | res$coef.r <- matrix(par$coef.r, byrow = TRUE, ncol = ncovr) 98 | colnames(res$coef.r) <- colnames(Xr) 99 | } 100 | res$ATE <- matrix(par$ATE, byrow = TRUE, ncol = m-1) 101 | res$base <- matrix(par$BASE, byrow = TRUE, ncol = m) 102 | colnames(res$base) <- colnames(D) 103 | 104 | class(res) <- "NIbprobit" 105 | return(res) 106 | } 107 | -------------------------------------------------------------------------------- /man/randomize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/randomize.R 3 | \name{randomize} 4 | \alias{randomize} 5 | \alias{Randomize} 6 | \title{Randomization of the Treatment Assignment for Conducting Experiments} 7 | \usage{ 8 | randomize( 9 | data, 10 | group = c("Treat", "Control"), 11 | ratio = NULL, 12 | indx = NULL, 13 | block = NULL, 14 | n.block = NULL, 15 | match = NULL, 16 | complete = TRUE 17 | ) 18 | } 19 | \arguments{ 20 | \item{data}{A data frame containing the observations to which the treatments 21 | are randomly assigned.} 22 | 23 | \item{group}{A numerical or character vector indicating the 24 | treatment/control groups. The length of the vector equals the total number 25 | of such groups. The default specifies two groups called \dQuote{Treat} and 26 | \dQuote{Control}.} 27 | 28 | \item{ratio}{An optional numerical vector which specifies the proportion of 29 | the treatment/control groups within the sample. The length of the vector 30 | should equal the number of groups. The default is the equal allocation.} 31 | 32 | \item{indx}{An optional variable name in the data frame to be used as the 33 | names of the observations. If not specified, the row names of the data frame 34 | will be used so long as they are available. If the row names are not 35 | available, the integer sequence starting from 1 will be used.} 36 | 37 | \item{block}{An optional variable name in the data frame or a formula to be 38 | used as the blocking variables for randomized-block designs. If a variable 39 | name is specified, then the unique values of that variable will form blocks 40 | unless \code{n.block} is specified (see below). If a formula is specified, 41 | it will be evaluated using \code{data} and then blocking will be based on 42 | the \code{mahalanobis} distance of the resulting model matrix. In this case, 43 | users may want to specify \code{n.block} to avoid creating blocks that have 44 | too few observations.} 45 | 46 | \item{n.block}{An optional scalar specifying the number of blocks to be 47 | created for randomized block designs. If unspecified, the unique values of 48 | the blocking variable will define blocks. If specified, the blocks of 49 | roughly equal size will be created based on the \code{quantile} of the 50 | blocking variable.} 51 | 52 | \item{match}{An optional variable name in the data frame or a formula to be 53 | used as the matching variables for matched-pair designs. This input is 54 | applicable only to the case where there are two groups. Pairs of 55 | observations will be formed based on the similar values of the matching 56 | variable. If a formula is specified, the \code{mahalanobis} distance of the 57 | resulting model matrix will be used.} 58 | 59 | \item{complete}{logical. If it equals \code{TRUE} (default), then complete 60 | randomization will be performed (within each block if randomized block 61 | designs are used). Otherwise, simple randomization will be implemented. For 62 | matched-pair designs, \code{complete} has to equal \code{TRUE}.} 63 | } 64 | \value{ 65 | A list of class \code{randomize} which contains the following items: 66 | \item{call}{ the matched call. } \item{treatment}{ The vector of randomized 67 | treatments. } \item{data}{ The data frame that was used to conduct the 68 | randomization. } \item{block}{ The blocking variable that was used to 69 | implement randomized-block designs. } \item{match}{ The matching variable 70 | that was used to implement matched-pair designs. } \item{block.id}{ The 71 | variable indicating which observations belong to which blocks in 72 | randomized-block designs. } \item{match.id}{ The variable indicating which 73 | observations belong to which pairs in matched-pair designs. } 74 | } 75 | \description{ 76 | This function can be used to randomize the treatment assignment for 77 | randomized experiments. In addition to the complete randomization, it 78 | implements randomized-block and matched-pair designs. 79 | } 80 | \details{ 81 | Randomized-block designs refer to the complete randomization of the 82 | treatment within the pre-specified blocks which contain multiple 83 | observations. Matched-pair designs refer to the randomization of the binary 84 | treatment variable within the pre-specified pair of observations. 85 | } 86 | \author{ 87 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University 88 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu}; 89 | } 90 | \keyword{design} 91 | -------------------------------------------------------------------------------- /man/ATEbounds.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ATEbounds.R 3 | \name{ATEbounds} 4 | \alias{ATEbounds} 5 | \title{Bounding the Average Treatment Effect when some of the Outcome Data are 6 | Missing} 7 | \usage{ 8 | ATEbounds( 9 | formula, 10 | data = parent.frame(), 11 | maxY = NULL, 12 | minY = NULL, 13 | alpha = 0.05, 14 | n.reps = 0, 15 | strata = NULL, 16 | ratio = NULL, 17 | survey = NULL, 18 | ... 19 | ) 20 | } 21 | \arguments{ 22 | \item{formula}{A formula of the form \code{Y ~ X} where \code{Y} is the name 23 | of the outcome variable and \code{X} is the name of the (randomized) 24 | treatment variable. \code{X} should be a factor variable but its value can 25 | take more than two levels. The missing values for \code{Y} should be coded 26 | as \code{NA}.} 27 | 28 | \item{data}{A data frame containing the relevant variables.} 29 | 30 | \item{maxY}{A scalar. The maximum value of the outcome variable. The default 31 | is the maximum sample value.} 32 | 33 | \item{minY}{A scalar. The minimum value of the outcome variable. The default 34 | is the minimum sample value.} 35 | 36 | \item{alpha}{A positive scalar that is less than or equal to 0.5. This will 37 | determine the (1-\code{alpha}) level of confidence intervals. The default is 38 | \code{0.05}.} 39 | 40 | \item{n.reps}{A positive integer. The number of bootstrap replicates used 41 | for the construction of confidence intervals via B-method of Berran (1988). 42 | If it equals zero, the confidence intervals will not be constructed.} 43 | 44 | \item{strata}{The variable name indicating strata. If this is specified, the 45 | quantities of interest will be first calculated within each strata and then 46 | aggregated. The default is \code{NULL}.} 47 | 48 | \item{ratio}{A \eqn{J \times M} matrix of probabilities where \eqn{J} is the 49 | number of strata and \eqn{M} is the number of treatment and control groups. 50 | Each element of the matrix specifies the probability of a unit falling into 51 | that category. The default is \code{NULL} in which case the sample estimates 52 | of these probabilities are used for computation.} 53 | 54 | \item{survey}{The variable name for survey weights. The default is 55 | \code{NULL}.} 56 | 57 | \item{...}{The arguments passed to other functions.} 58 | } 59 | \value{ 60 | A list of class \code{ATEbounds} which contains the following items: 61 | \item{call}{ The matched call. } \item{Y}{ The outcome variable. } 62 | \item{D}{ The treatment variable. } \item{bounds}{ The point estimates of 63 | the sharp bounds on the average treatment effect. } \item{bounds.Y}{ The 64 | point estimates of the sharp bounds on the outcome variable within each 65 | treatment/control group. } \item{bmethod.ci}{ The B-method confidence 66 | interval of the bounds on the average treatment effect. } \item{bonf.ci}{ 67 | The Bonferroni confidence interval of the bounds on the average treatment 68 | effect. } \item{bonf.ci.Y}{ The Bonferroni confidence interval of the 69 | bounds on the outcome variable within each treatment/control group. } 70 | \item{bmethod.ci.Y}{ The B-method confidence interval of the bounds on the 71 | outcome variable within each treatment/control group. } \item{maxY}{ The 72 | maximum value of the outcome variable used in the computation. } 73 | \item{minY}{ The minimum value of the outcome variable used in the 74 | computation. } \item{nobs}{ The number of observations. } \item{nobs.Y}{ 75 | The number of observations within each treatment/control group. } 76 | \item{ratio}{ The probability of treatment assignment (within each strata if 77 | \code{strata} is specified) used in the computation. } 78 | } 79 | \description{ 80 | This function computes the sharp bounds on the average treatment effect when 81 | some of the outcome data are missing. The confidence intervals for the 82 | bounds are also computed. 83 | } 84 | \details{ 85 | For the details of the method implemented by this function, see the 86 | references. 87 | } 88 | \references{ 89 | Horowitz, Joel L. and Charles F. Manski. (1998). 90 | \dQuote{Censoring of Outcomes and Regressors due to Survey Nonresponse: 91 | Identification and Estimation Using Weights and Imputations.} \emph{Journal 92 | of Econometrics}, Vol. 84, pp.37-58. 93 | 94 | Horowitz, Joel L. and Charles F. Manski. (2000). \dQuote{Nonparametric 95 | Analysis of Randomized Experiments With Missing Covariate and Outcome Data.} 96 | \emph{Journal of the Americal Statistical Association}, Vol. 95, No. 449, 97 | pp.77-84. 98 | 99 | Harris-Lacewell, Melissa, Kosuke Imai, and Teppei Yamamoto. (2007). 100 | \dQuote{Racial Gaps in the Responses to Hurricane Katrina: An Experimental 101 | Study}, \emph{Technical Report}. Department of Politics, Princeton 102 | University. 103 | } 104 | \author{ 105 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University 106 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu}; 107 | } 108 | \keyword{design} 109 | -------------------------------------------------------------------------------- /R/ATOPnoassumption.R: -------------------------------------------------------------------------------- 1 | ### 2 | ### Calculates the no-assumption bounds for the ATOP in the presence of missing 3 | ### response under matched-pairs design 4 | ### 5 | 6 | 7 | 8 | #' Bounding the ATOP when some of the Outcome Data are 9 | #' Missing Under the Matched-Pairs Design 10 | #' 11 | #' This function computes the no assumption bounds on the average treatment effect among always-observed pairs (ATOP) when 12 | #' some of the outcome data are missing. The confidence intervals for the 13 | #' ATOP are also computed. 14 | #' 15 | #' For the details of the method implemented by this function, see the 16 | #' references. 17 | #' 18 | #' @param Ya A vector of the outcomes of the first unit in the matched pairs. The missing values for \code{Ya} should be coded 19 | #' as \code{NA}. 20 | #' @param Yb A vector of the outcomes of the second unit in the matched pairs. The missing values for \code{Yb} should be coded 21 | #' as \code{NA}. 22 | #' @param Ra A vector of the missing data indicators of the first unit in the matched pairs. 23 | #' @param Rb A vector of the missing data indicators of the second unit in the matched pairs. 24 | #' @param Ta A vector of the treatment conditions of the first unit in the matched pairs. 25 | #' @param Tb A vector of the treatment conditions of the second unit in the matched pairs. 26 | #' @param l The lower limit of the outcome. 27 | #' @param u The upper limit of the outcome. 28 | #' @param alpha A positive scalar that is less than or equal to 0.5. This will 29 | #' determine the (1-\code{alpha}) level of confidence intervals. The default is 30 | #' \code{0.05}. 31 | #' @param rep The number of repetitions for bootstraping. 32 | #' @return A list of class \code{ATOPnoassumption} which contains the following items: 33 | #' \item{LB}{ The lower bound for the ATOP. } \item{UB}{ The upper bound for the ATOP. } 34 | #' \item{LB.CI}{ The lower limit of the confidence interval for the ATOP. } 35 | #'\item{UB.CI}{ The upper limit of the confidence interval for the ATOP. } 36 | #' @author Kosuke Imai, Department of Government and Department of Statistics, Harvard University 37 | #' \email{imai@@Harvard.Edu}, \url{https://imai.fas.harvard.edu}; 38 | #' Zhichao Jiang, Department of Politics, Princeton University 39 | #' \email{zhichaoj@@princeton.edu}. 40 | #' @references Kosuke Imai and Zhichao Jiang (2018). 41 | #' \dQuote{A Sensitivity Analysis for Missing Outcomes Due to 42 | #' Truncation-by-Death under the Matched-Pairs Design}, \emph{Technical Report}. Department of Politics, Princeton 43 | #' University. 44 | #' @keywords matched-pairs design 45 | #' @examples 46 | #' data(seguro) 47 | #' attach(seguro) 48 | #' ATOPnoassumption(Ya,Yb,Ra,Rb,Ta,Tb,l=0,u=1,alpha=0.05,rep=100) 49 | #' @export ATOPnoassumption 50 | 51 | ATOPnoassumption = function(Ya,Yb,Ra,Rb,Ta,Tb,l,u,alpha,rep){ 52 | if(!(is.vector(Ya)&is.vector(Yb)&is.vector(Ra)&is.vector(Rb)&is.vector(Ta)&is.vector(Tb))) 53 | stop('Data should be input as vectors') 54 | if (length(unique(apply(cbind(Ya,Yb,Ra,Rb,Ta,Tb),2,length)))!=1) 55 | stop('Vectors should have the same length') 56 | 57 | omega1 <- mean(c(Ya[Ta==1&Ra==1&Rb==1],Yb[Tb==1&Rb==1&Ra==1])) 58 | omega0 <- mean(c(Ya[Ta==0&Ra==1&Rb==1],Yb[Tb==0&Rb==1&Ra==1])) 59 | 60 | pi <- mean(Ra==1&Rb==1) 61 | N <- length(Ya) 62 | 63 | ##### 64 | 65 | indL1 <- 1 66 | indL2 <- 1 67 | indU1 <- 1 68 | indU2 <- 1 69 | if (pi<1/2 | f1(omega1,2-1/pi,u)u){indL2=2} 71 | if (pi<1/2| f2(omega1,2-1/pi,l)>u){indU1=2} 72 | if (pi<1/2 | f1(omega0,2-1/pi,u)u){indL2=0} 82 | if (Delta1<0| f2(omega1,Delta1,l)>u){indU1=0} 83 | if (Delta1<0 | f1(omega0,Delta1,u)0,f1(omega1.boots,Delta1.boots,u),l)-ifelse(indL2!=0&Delta1.boots>0,f2(omega0.boots,Delta1.boots,l),u),l-u) 108 | UB.boots[i] <- min(ifelse(indU1!=0&Delta1.boots>0,f2(omega1.boots, Delta1.boots,l),u)-ifelse(indU2!=0&Delta1.boots>0,f1(omega0.boots,Delta1.boots,u),l),u-l) 109 | } 110 | #### Calculate CI 111 | sd.LB <- sd(LB.boots) 112 | sd.UB <- sd(UB.boots) 113 | C <- uniroot(CalC,hat.LB=LB,hat.UB=UB,sigma.LB=sd.LB,sigma.UB=sd.UB,alpha=alpha,lower=0,upper=100)$root 114 | LB.CI <- max(LB-C*sd.LB,l-u) 115 | UB.CI <- min(UB+C*sd.UB,u-l) 116 | return(list(LB=LB,UB=UB,LB.CI=LB.CI,UB.CI=UB.CI)) 117 | } 118 | -------------------------------------------------------------------------------- /src/rand.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "vector.h" 8 | #include "subroutines.h" 9 | #include "rand.h" 10 | 11 | /* Multivariate Normal density */ 12 | double dMVN( 13 | double *Y, /* The data */ 14 | double *MEAN, /* The parameters */ 15 | double **SIG_INV, /* inverse of the covariance matrix */ 16 | int dim, /* dimension */ 17 | int give_log){ /* 1 if log_scale 0 otherwise */ 18 | 19 | int j,k; 20 | double value=0.0; 21 | 22 | for(j=0;j= stub) 56 | error("TruncNorm: lower bound is greater than upper bound\n"); 57 | if (invcdf) { /* inverse cdf method */ 58 | z = qnorm(runif(pnorm(stlb, 0, 1, 1, 0), pnorm(stub, 0, 1, 1, 0)), 59 | 0, 1, 1, 0); 60 | } 61 | else { /* rejection sampling method */ 62 | double tol=2.0; 63 | double temp, M, u, exp_par; 64 | int flag=0; /* 1 if stlb, stub <-tol */ 65 | if(stub<=-tol){ 66 | flag=1; 67 | temp=stub; 68 | stub=-stlb; 69 | stlb=-temp; 70 | } 71 | if(stlb>=tol){ 72 | exp_par=stlb; 73 | while(pexp(stub,1/exp_par,1,0) - pexp(stlb,1/exp_par,1,0) < 0.000001) 74 | exp_par/=2.0; 75 | if(dnorm(stlb,0,1,1) - dexp(stlb,1/exp_par,1) >= 76 | dnorm(stub,0,1,1) - dexp(stub,1/exp_par,1)) 77 | M=exp(dnorm(stlb,0,1,1) - dexp(stlb,1/exp_par,1)); 78 | else 79 | M=exp(dnorm(stub,0,1,1) - dexp(stub,1/exp_par,1)); 80 | do{ 81 | u=unif_rand(); 82 | z=-log(1-u*(pexp(stub,1/exp_par,1,0)-pexp(stlb,1/exp_par,1,0)) 83 | -pexp(stlb,1/exp_par,1,0))/exp_par; 84 | }while(unif_rand() > exp(dnorm(z,0,1,1)-dexp(z,1/exp_par,1))/M ); 85 | if(flag==1) z=-z; 86 | } 87 | else{ 88 | do z=norm_rand(); 89 | while( zstub ); 90 | } 91 | } 92 | return(z*sigma + mu); 93 | } 94 | 95 | 96 | /* Sample from the MVN dist */ 97 | void rMVN( 98 | double *Sample, /* Vector for the sample */ 99 | double *mean, /* The vector of means */ 100 | double **Var, /* The matrix Variance */ 101 | int size) /* The dimension */ 102 | { 103 | int j,k; 104 | double **Model = doubleMatrix(size+1, size+1); 105 | double cond_mean; 106 | 107 | /* draw from mult. normal using SWP */ 108 | for(j=1;j<=size;j++){ 109 | for(k=1;k<=size;k++) 110 | Model[j][k]=Var[j-1][k-1]; 111 | Model[0][j]=mean[j-1]; 112 | Model[j][0]=mean[j-1]; 113 | } 114 | Model[0][0]=-1; 115 | Sample[0]=(double)norm_rand()*sqrt(Model[1][1])+Model[0][1]; 116 | for(j=2;j<=size;j++){ 117 | SWP(Model,j-1,size+1); 118 | cond_mean=Model[j][0]; 119 | for(k=1;k0) 160 | for(k=0;k0) 166 | for(k=0;ku){indL2=0} 82 | if (Delta1<0| f2(omega1,Delta1,l)>u){indU1=0} 83 | if (Delta1<0 | f1(omega0,Delta1,u)0,f1(omega1.boots,Delta1.boots,u),l)-ifelse(indL2!=0&Delta1.boots>0,f2(omega0.boots,Delta1.boots,l),u),l-u) 108 | UB.boots[i] <- min(ifelse(indU1!=0&Delta1.boots>0,f2(omega1.boots, Delta1.boots,l),u)-ifelse(indU2!=0&Delta1.boots>0,f1(omega0.boots,Delta1.boots,u),l),u-l) 109 | 110 | } 111 | #### Calculate CI 112 | sd.LB <- sd(LB.boots) 113 | sd.UB <- sd(UB.boots) 114 | C <- uniroot(CalC,hat.LB=LB,hat.UB=UB,sigma.LB=sd.LB,sigma.UB=sd.UB,alpha=alpha,lower=0,upper=100)$root 115 | LB.CI <- max(LB-C*sd.LB,l-u) 116 | UB.CI <- min(UB+C*sd.UB,u-l) 117 | return(list(LB=LB,UB=UB,LB.CI=LB.CI,UB.CI=UB.CI)) 118 | } 119 | -------------------------------------------------------------------------------- /R/CADEreg.R: -------------------------------------------------------------------------------- 1 | ### 2 | ### Regression-based method for the complier average direct effect 3 | ### 4 | ### 5 | 6 | 7 | 8 | #' Regression-based method for the complier average direct effect 9 | #' 10 | #' 11 | #' This function computes the point estimates of the complier average direct effect (CADE) and four 12 | #' different variance estimates: the HC2 variance, the cluster-robust variance, the cluster-robust HC2 13 | #' variance and the variance proposed in the reference. The estimators calculated using this function 14 | #' are cluster-weighted, i.e., the weights are equal for each cluster. To obtain the indivudal-weighted 15 | #' estimators, please multiply the recieved treatment and the outcome by \code{n_jJ/N}, where 16 | #' \code{n_j} is the number of individuals in cluster \code{j}, \code{J} is the number of clusters and 17 | #' \code{N} is the total number of individuals. 18 | #' 19 | #' 20 | #' For the details of the method implemented by this function, see the 21 | #' references. 22 | #' 23 | #' @param data A data frame containing the relevant variables. The names for the variables should be: ``Z'' for the treatment assignment, ``D'' for the actual received treatment, ``Y'' for the outcome, ``A'' for the treatment assignment mechanism and ``id'' for the cluster ID. The variable for the cluster id should be a factor. 24 | #' @return A list of class \code{CADEreg} which contains the following items: 25 | #' \item{CADE1}{ The point estimate of CADE(1). } \item{CADE0}{ The point estimate of CADE(0). } 26 | #' \item{var1.clu}{ The cluster-robust variance of CADE(1). } \item{var0.clu}{ The cluster-robust variance of CADE(0). } 27 | #'\item{var1.clu.hc2}{ The cluster-robust HC2 variance of CADE(1). } 28 | #'\item{var0.clu.hc2}{ The cluster-robust HC2 variance of CADE(0). } 29 | #'\item{var1.hc2}{ The HC2 variance of CADE(1). } 30 | #'\item{var0.hc2}{ The HC2 variance of CADE(0). } 31 | #'\item{var1.ind}{ The individual-robust variance of CADE(1). } 32 | #'\item{var0.ind}{ The individual-robust variance of CADE(0). } 33 | #'\item{var1.reg}{ The proposed variance of CADE(1). } 34 | #'\item{var0.reg}{ The proposed variance of CADE(0). } 35 | #' @author Kosuke Imai, Department of Government and Department of Statistics, Harvard University 36 | #' \email{imai@@Harvard.Edu}, \url{https://imai.fas.harvard.edu}; 37 | #' Zhichao Jiang, Department of Politics, Princeton University 38 | #' \email{zhichaoj@@princeton.edu}. 39 | #' @references Kosuke Imai, Zhichao Jiang and Anup Malani (2021). 40 | #' \dQuote{Causal Inference with Interference and Noncompliance in the Two-Stage Randomized Experiments}, \emph{Journal of the American Statistical Association}. 41 | #' @keywords two-stage randomized experiments 42 | #' @export CADEreg 43 | 44 | 45 | CADEreg=function(data){ 46 | ## transform the data into list 47 | if(!is.factor(data$id)){stop('The cluster_id should be a factor variable.')} 48 | cluster.id=unique(data$id) 49 | n.cluster=length(cluster.id) 50 | Z=vector("list", n.cluster) 51 | D=vector("list", n.cluster) 52 | Y=vector("list", n.cluster) 53 | A=rep(0,n.cluster) 54 | for (i in 1:n.cluster){ 55 | Z[[i]]=as.numeric(data$Z[data$id==cluster.id[i]]) 56 | D[[i]]=as.numeric(data$D[data$id==cluster.id[i]]) 57 | Y[[i]]=data$Y[data$id==cluster.id[i]] 58 | if (length(unique(data$A[data$id==cluster.id[i]]))!=1){ 59 | stop( paste0('The assignment mechanism in cluster ',i,' should be the same.')) 60 | } 61 | A[i]=data$A[data$id==cluster.id[i]][1] 62 | } 63 | 64 | 65 | n=sapply(Z,length) 66 | J=length(n) 67 | # weights 68 | W=sum(n) 69 | J1=sum(A) 70 | J0=J-J1 71 | n1=sapply(Z,sum) 72 | n0=n-n1 73 | 74 | index.l=rep(1,J) 75 | index.r=rep(1,J) 76 | for(j in 2:J){ 77 | index.l[j]=1+sum(n[1:(j-1)]) 78 | } 79 | for(j in 1:J){ 80 | index.r[j]=sum(n[1:j]) 81 | } 82 | for (j in 1:J){ 83 | index=index.l[j]:index.r[j] 84 | W[index]=ifelse(A[j]==1, 1/J1,1/(J-J1)) * ifelse(Z[[j]]==1, 1/n1[j],1/(n0[j])) 85 | } 86 | ## Design matrix in the fist stage 87 | A.reg= rep(0,sum(n)) 88 | Z.reg=rep(0,sum(n)) 89 | D.reg=rep(0,sum(n)) 90 | Y.reg=rep(0,sum(n)) 91 | for (j in 1:J){ 92 | index=index.l[j]:index.r[j] 93 | A.reg[index]=A[j] 94 | Z.reg[index]=Z[[j]] 95 | D.reg[index]=D[[j]] 96 | Y.reg[index]=Y[[j]] 97 | } 98 | 99 | X= cbind(A.reg, 1-A.reg, Z.reg*A.reg, Z.reg*(1-A.reg) ) 100 | 101 | reg1s=lm(D.reg~0+X,weights=W) 102 | D.hat=X%*%reg1s$coefficients 103 | 104 | M= cbind(A.reg, 1-A.reg, D.hat*A.reg, D.hat*(1-A.reg) ) 105 | reg2s=lm(Y.reg~0+M,weights=as.vector(W)) 106 | res= Y.reg-cbind(A.reg, 1-A.reg, D.reg*A.reg, D.reg*(1-A.reg) )%*%reg2s$coefficients 107 | ### variance 108 | 109 | ## cluster robust variance 110 | MM=t(M)%*%diag(W)%*%M 111 | 112 | var.cluster.med=array(0,dim=c(4,4)) 113 | for( j in 1:J){ 114 | index= index.l[j]:index.r[j] 115 | Mj= M[index,] 116 | if (A[j]==1){ 117 | Sj=cbind(W[index], 0, W[index]*D.hat[index],0) 118 | 119 | var.cluster.med=var.cluster.med+t(Sj)%*%res[index] %*%t(t(Sj)%*%res[index]) 120 | }else{ 121 | Sj=cbind(0,W[index], 0, W[index]*D.hat[index]) 122 | 123 | var.cluster.med=var.cluster.med+t(Sj)%*%res[index] %*%t(t(Sj)%*%res[index]) 124 | 125 | } 126 | } 127 | var.cluster=solve(MM)%*%var.cluster.med%*%solve(MM) 128 | 129 | 130 | ## cluster robust hc2 variance 131 | MM=t(M)%*%diag(W)%*%M 132 | 133 | var.cluster.med=array(0,dim=c(4,4)) 134 | for( j in 1:J){ 135 | index= index.l[j]:index.r[j] 136 | Mj= M[index,] 137 | if (A[j]==1){ 138 | Sj=cbind(W[index], 0, W[index]*D.hat[index],0)*sqrt(J1/(J1-1)) 139 | 140 | var.cluster.med=var.cluster.med+t(Sj)%*%res[index] %*%t(t(Sj)%*%res[index]) 141 | }else{ 142 | Sj=cbind(0,W[index], 0, W[index]*D.hat[index])*sqrt((J-J1)/(J-J1-1)) 143 | 144 | var.cluster.med=var.cluster.med+t(Sj)%*%res[index] %*%t(t(Sj)%*%res[index]) 145 | 146 | } 147 | } 148 | 149 | var.cluster.hc2=solve(MM)%*%var.cluster.med%*%solve(MM) 150 | ### individual robust hc2 151 | res.ind=rep(0,sum(n)) 152 | var.ind.med=array(0,dim=c(4,4)) 153 | for (j in 1:J){ 154 | index= index.l[j]:index.r[j] 155 | adj1= sum(res[index]*Z[[j]]/sum(Z[[j]])) 156 | adj0= sum(res[index]*(1-Z[[j]])/sum(1-Z[[j]])) 157 | 158 | res.ind[index]=res[index] - ifelse(Z[[j]]==1,adj1,adj0) 159 | } 160 | 161 | for (j in 1:J){ 162 | for (i in 1:n[j]){ 163 | index=index.l[j]-1+i 164 | var.ind.med=var.ind.med+(M[index,])%*% t( M[index,]) *W[index]^2 * ifelse(Z.reg[index]==1, n1[j]/(n1[j]-1),(n0[j])/(n0[j]-1))*res.ind[index]^2 165 | } 166 | } 167 | var.ind=solve(MM)%*%var.ind.med%*%solve(MM) 168 | 169 | 170 | ### traditional hc2 variance 171 | var.hc2.med=array(0,dim=c(4,4)) 172 | for (j in 1:J){ 173 | for (i in 1:n[j]){ 174 | index=index.l[j]-1+i 175 | if (A[j]==1){ 176 | constant=ifelse(Z.reg[index]==1, J1*n1[j]/(J1*n1[j]-1),J1*n0[j]/(J1*n0[j]-1)) 177 | }else{ 178 | constant=ifelse(Z.reg[index]==1, J0*n1[j]/(J1*n1[j]-1),J0*n0[j]/(J1*n0[j]-1)) 179 | } 180 | 181 | var.hc2.med=var.hc2.med+(M[index,])%*% t( M[index,]) *W[index]^2 * constant*res[index]^2 182 | } 183 | } 184 | var.hc2=solve(MM)%*%var.hc2.med%*%solve(MM) 185 | 186 | ## results 187 | est.CADE1=reg2s$coefficients[3] 188 | est.CADE0=reg2s$coefficients[4] 189 | var1.cluster=var.cluster[3,3] 190 | var0.cluster=var.cluster[4,4] 191 | var1.cluster.hc2=var.cluster.hc2[3,3] 192 | var0.cluster.hc2=var.cluster.hc2[4,4] 193 | var1.ind=var.ind[3,3] 194 | var0.ind=var.ind[4,4] 195 | var1.reg=(1-J1/J)*var.cluster.hc2[3,3]+(J1/J)*var.ind[3,3] 196 | var0.reg=(J1/J)*var.cluster.hc2[4,4]+(1-J1/J)*var.ind[4,4] 197 | var1.hc2=var.hc2[3,3] 198 | var0.hc2=var.hc2[4,4] 199 | return(list(CADE1=est.CADE1,CADE0=est.CADE0, var1.clu=var1.cluster,var0.clu=var0.cluster,var1.clu.hc2=var1.cluster.hc2,var0.clu.hc2=var0.cluster.hc2, var1.ind=var1.ind,var0.ind=var0.ind,var1.reg=var1.reg,var0.reg=var0.reg,var1.hc2=var1.hc2,var0.hc2=var0.hc2)) 200 | } 201 | -------------------------------------------------------------------------------- /R/NoncompMAR.R: -------------------------------------------------------------------------------- 1 | NoncompMAR <- function(formula, Z, D, treat = NULL, data = parent.frame(), 2 | n.draws = 5000, insample = TRUE, param = TRUE, 3 | p.mean.c = 0, p.var.c = 100, p.mean.o = 0, 4 | p.var.o = 100, smooth = 1, tie = 0.0001, mda = TRUE, 5 | coef.start.c = 0, coef.start.o = 0, burnin = 0, 6 | thin = 0, verbose = TRUE) { 7 | 8 | ## getting the data 9 | call <- match.call() 10 | mf <- model.frame(formula, data=data, na.action='na.pass') 11 | X <- model.matrix(formula, data=mf) 12 | Y <- as.integer(model.response(mf)) 13 | N <- length(Y) 14 | Z <- eval(call$Z, envir = data) 15 | D <- eval(call$D, envir = data) 16 | 17 | ## first sort by NA and then sort by treat 18 | treat <- eval(call$treat, envir = data) 19 | if (!is.null(treat)) { 20 | varT <- TRUE 21 | N11 <- length(na.omit(treat)) 22 | indx <- 1:N 23 | indx <- c(indx[!is.na(treat)], indx[is.na(treat)]) 24 | X <- X[indx,] 25 | Y <- Y[indx] 26 | Z <- Z[indx] 27 | D <- D[indx] 28 | tmp <- sort(treat, index.return = TRUE) 29 | difftreat <- diff(tmp$x) 30 | difftreat[difftreat==0] <- tie 31 | treat <- c(difftreat, rep(NA, N-N11)) 32 | indx <- c(tmp$ix, (N11+1):N) 33 | X <- X[indx,] 34 | Y <- Y[indx] 35 | Z <- Z[indx] 36 | D <- D[indx] 37 | } 38 | else { 39 | varT <- FALSE 40 | N11 <- 0 41 | } 42 | Ymax <- max(na.omit(Y)) 43 | Ymiss <- is.na(Y) 44 | Y[Ymiss] <- 1 45 | 46 | ## compliance status 47 | C <- rep(NA, N) 48 | C[Z==1 & D==1] <- 1 49 | C[Z==1 & D==0] <- 0 50 | C[Z==0] <- rbinom(sum(Z==0), 1, 1/2) 51 | res <- list(call = call, Y = Y, X = X, C = C, D = D, Z = Z, 52 | n.draws = n.draws) 53 | 54 | if (varT) { 55 | ## Xo = [a10 1 X Xt] where a10 for compliers without treatment 56 | ## The default category is the never-taker without treatment 57 | Xo <- cbind(0,X) 58 | colnames(Xo) <- c("Complier without treatment", colnames(X)) 59 | Xo[C==1 & D==0, 1] <- 1 60 | 61 | ## difference matrix 62 | R <- rbind(c(1, rep(0, N11-2)), c(-1,1,rep(0, N11-3))) 63 | for (i in 1:(N11-4)) 64 | R <- rbind(R, c(rep(0,i),-1,1,rep(0,N11-3-i))) 65 | R <- rbind(R, c(rep(0, N11-3), -1, 1)) 66 | Q <- rbind(0, diag(N11-1)) 67 | Xt <- cbind(c(1,1,rep(0, N11-2)), Q%*%solve(R)) 68 | Xt <- rbind(Xt, matrix(0, ncol=N11, nrow=N-N11)) 69 | tmp <- NULL 70 | for (i in 1:N11) 71 | tmp <- c(tmp, paste("delta", i, sep="")) 72 | colnames(Xt) <- tmp 73 | } 74 | else { 75 | ## Xo = [a11 a10 1 X] where a10 for compliers without treatment 76 | ## a11 for compliers with treatment 77 | ## The default category is the never-taker without treatment 78 | Xo <- cbind(0,0,X) 79 | colnames(Xo) <- c("Complier with treatment", "Complier without treatment", 80 | colnames(X)) 81 | Xo[C==1 & D==1, 1] <- 1 82 | Xo[C==1 & D==0, 2] <- 1 83 | } 84 | ## dimension 85 | res$Xo <- Xo 86 | ncov <- ncol(X) 87 | ncovo <- ncovX <- ncol(Xo) 88 | 89 | ## starting values 90 | if(length(coef.start.c) != ncov) 91 | coef.start.c <- rep(coef.start.c, ncov) 92 | if(length(coef.start.o) != ncovo) 93 | coef.start.o <- rep(coef.start.o, ncovo) 94 | 95 | ## prior 96 | if(length(p.mean.c) != ncov) 97 | p.mean.c <- rep(p.mean.c, ncov) 98 | if(length(p.mean.o) != ncovo) 99 | p.mean.o <- rep(p.mean.o, ncovo) 100 | if(!is.matrix(p.var.c)) 101 | p.var.c <- diag(p.var.c, ncov) 102 | if(!is.matrix(p.var.o)) 103 | p.var.o <- diag(p.var.o, ncovo) 104 | ## prior for smooth terms 105 | ## putting the data and smooth stuff together 106 | if(varT) { 107 | smooth <- diag(smooth*difftreat) 108 | Xo <- cbind(Xo, Xt) 109 | ncovo <- ncol(Xo) 110 | p.mean.o <- c(p.mean.o, rep(0, ncovo-length(p.mean.o))) 111 | coef.start.o <- c(coef.start.o, rep(0, ncovo-length(coef.start.o))) 112 | p.var.o <- rbind(cbind(p.var.o, matrix(0, ncol = ncol(smooth), 113 | nrow = nrow(p.var.o))), 114 | cbind(matrix(0, ncol = ncol(p.var.o), nrow = 115 | nrow(smooth)), smooth)) 116 | } 117 | 118 | ## checking thinnig and burnin intervals 119 | if (n.draws <= 0) 120 | stop("`n.draws' should be a positive integer.") 121 | if (burnin < 0 || burnin >= n.draws) 122 | stop("`burnin' should be a non-negative integer less than `n.draws'.") 123 | if (thin < 0 || thin >= n.draws) 124 | stop("`thin' should be a non-negative integer less than `n.draws'.") 125 | keep <- thin + 1 126 | 127 | ## calling C function 128 | if (Ymax == 1) # binary probit 129 | if (param) 130 | allpar <- 6+ncov+ncovo 131 | else 132 | allpar <- 6 133 | else # ordered probit 134 | if (param) 135 | allpar <- 2*(Ymax+1)+1+ncov+ncovo+Ymax 136 | else 137 | allpar <- 2*(Ymax+1)+1 138 | par <- .C("MARprobit", 139 | as.integer(Y), as.integer(Ymiss), as.integer(Ymax), 140 | as.integer(Z), as.integer(D), as.integer(C), 141 | as.double(X), as.double(Xo), 142 | as.double(coef.start.c), as.double(coef.start.o), 143 | as.integer(N), as.integer(n.draws), 144 | as.integer(ncov), as.integer(ncovo), as.integer(ncovX), 145 | as.integer(N11), 146 | as.double(p.mean.c), as.double(p.mean.o), 147 | as.double(solve(p.var.c)), as.double(solve(p.var.o)), 148 | as.integer(insample), as.integer(varT), 149 | as.integer(param), as.integer(mda), as.integer(burnin), 150 | as.integer(keep), as.integer(verbose), 151 | pdStore = double(allpar*(ceiling((n.draws-burnin)/keep))), 152 | PACKAGE="experiment")$pdStore 153 | 154 | ## results 155 | par <- matrix(par, ncol = allpar, byrow=TRUE) 156 | if (Ymax == 1) { # binary probit 157 | if (param) { 158 | res$coefficientsC <- par[,7:(6+ncov)] 159 | if (varT){ 160 | res$coefficientsO <- par[,(7+ncov):(6+ncov+ncovo-N11)] 161 | res$coefficientsS <- par[,(7+ncov+ncovo-N11):(6+ncov+ncovo)] 162 | } 163 | else 164 | res$coefficientsO <- par[,(7+ncov):(6+ncov+ncovo)] 165 | } 166 | res$pc <- as.matrix(par[,1]) 167 | res$cace <- as.matrix(par[,2]) 168 | res$itt <- as.matrix(par[,3]) 169 | res$base <- as.matrix(par[,4:6]) 170 | colnames(res$pc) <- "Compliance Prob." 171 | colnames(res$itt) <- "ITT" 172 | colnames(res$cace) <- "CACE" 173 | colnames(res$base) <- c("Baseline for compliers", 174 | "Baseline for noncompliers", 175 | "Baseline for all") 176 | } 177 | else { # ordered probit 178 | if (param) { 179 | res$coefficientsC <- par[,(2*(Ymax+1)+2):(2*(Ymax+1)+1+ncov)] 180 | res$coefficientsO <- par[,(2*(Ymax+1)+2+ncov):(2*(Ymax+1)+1+ncov+ncovo)] 181 | res$thresholds <- 182 | par[,(2*(Ymax+1)+2+ncov+ncovo):(2*(Ymax+1)+1+ncov+ncovo+Ymax)] 183 | tmp <- rep("tau", ncol(res$thresholds)) 184 | for (i in 1:ncol(res$thresholds)) 185 | tmp[i] <- paste("tau", i-1, sep="") 186 | colnames(res$thresholds) <- tmp 187 | } 188 | res$pc <- as.matrix(par[,1]) 189 | res$cace <- as.matrix(par[,2:(Ymax+2)]) 190 | res$itt <- as.matrix(par[,(Ymax+3):(2*(Ymax+1)+1)]) 191 | tmp1 <- rep("ITT", ncol(res$itt)) 192 | tmp2 <- rep("CACE", ncol(res$cace)) 193 | for (i in 1:(Ymax+1)) { 194 | tmp1[i] <- paste("ITT(Y=", i-1, ")", sep="") 195 | tmp2[i] <- paste("CACE(Y=", i-1, ")", sep="") 196 | } 197 | colnames(res$itt) <- tmp1 198 | colnames(res$cace) <- tmp2 199 | colnames(res$pc) <- "Compliance Prob." 200 | } 201 | if (param) { 202 | colnames(res$coefficientsC) <- colnames(X) 203 | if (varT) { 204 | colnames(res$coefficientsO) <- colnames(Xo)[1:(ncovo-N11)] 205 | colnames(res$coefficientsS) <- colnames(Xt) 206 | } 207 | else 208 | colnames(res$coefficientsO) <- colnames(Xo) 209 | } 210 | 211 | class(res) <- "NoncompMAR" 212 | return(res) 213 | } 214 | -------------------------------------------------------------------------------- /R/ATEcluster.R: -------------------------------------------------------------------------------- 1 | #' Estimation of the Average Treatment Effects in Cluster-Randomized 2 | #' Experiments 3 | #' 4 | #' This function estimates various average treatment effect in 5 | #' cluster-randomized experiments without using pre-treatment covariates. The 6 | #' treatment variable is assumed to be binary. Currently, only the matched-pair 7 | #' design is allowed. The details of the methods for this design are given in 8 | #' Imai, King, and Nall (2007). 9 | #' 10 | #' 11 | #' @param Y The outcome variable of interest. 12 | #' @param Z The (randomized) cluster-level treatment variable. This variable 13 | #' should be binary. Two units in the same cluster should have the same value. 14 | #' @param grp A variable indicating clusters of units. Two units in the same 15 | #' cluster should have the same value. 16 | #' @param data A data frame containing the relevant variables. 17 | #' @param match A variable indicating matched-pairs of clusters. Two units in 18 | #' the same matched-pair of clusters should have the same value. The default is 19 | #' \code{NULL} (i.e., no matching). 20 | #' @param weights A variable indicating the population cluster sizes, which 21 | #' will be used to construct weights for each pair of clusters. Two units in 22 | #' the same cluster should have the same value. The default is \code{NULL}, in 23 | #' which case sample cluster sizes will be used for constructing weights. 24 | #' @param fpc A logical variable indicating whether or not finite population 25 | #' correction should be used for estimating the lower bound of CACE variance. 26 | #' This is relevant only when \code{weights} are specified. 27 | #' @return A list of class \code{ATEcluster} which contains the following 28 | #' items: \item{call}{ The matched call. } \item{n}{ The total number of 29 | #' units. } \item{n1}{ The total number of units in the treatment group. } 30 | #' \item{n0}{ The total number of units in the control group. } \item{Y}{ The 31 | #' outcome variable. } \item{Y1bar}{ The cluster-specific (unweighted) average 32 | #' value of the observed outcome for the treatment group. } \item{Y0bar}{ The 33 | #' cluster-specific (unweighted) average value of the observed outcome for the 34 | #' treatment group. } \item{Y1var}{ The cluster-specific sample variance of 35 | #' the observed outcome for the treatment group. } \item{Y0var}{ The 36 | #' cluster-specific sample variance of the observed outcome for the control 37 | #' group. } \item{Z}{ The treatment variable. } \item{grp}{ The 38 | #' cluster-indicator variable. } \item{match}{ The matched-pair indicator 39 | #' variable. } \item{weights}{ The weight variable in its original form. } 40 | #' \item{est}{ The estimated average treatment effect based on the arithmetic 41 | #' mean weights. } \item{var}{ The estimated variance of the average treatment 42 | #' effect estimator based on the arithmetic mean weights. This uses the 43 | #' variance formula provided in Imai, King, and Nall (2007). } \item{var.lb}{ 44 | #' The estimated sharp lower bound of the cluster average treatment effect 45 | #' estimator using the arithmetic mean weights. } \item{est.dk}{ The estimated 46 | #' average treatment effect based on the harmonic mean weights. } 47 | #' \item{var.dk}{ The estimated variance of the average treatment effect 48 | #' estimator based on the harmonic mean weights. This uses the variance formula 49 | #' provided in Donner and Klar (1993). } \item{dkvar}{ The estimated variance 50 | #' of the average treatment effect estimator based on the harmonic mean 51 | #' weights. This uses the variance formula provided in Imai, King, and Nall 52 | #' (2007). } \item{eff}{ The estimated relative efficiency of the matched-pair 53 | #' design over the completely randomized design (the ratio of two estimated 54 | #' variances). } \item{m}{ The number of pairs in the matched-pair design. } 55 | #' \item{N1}{ The population cluster sizes for the treatment group. } 56 | #' \item{N0}{ The population cluster sizes for the control group. } \item{w1}{ 57 | #' Cluster-specific weights for the treatment group. } \item{w0}{ 58 | #' Cluster-specific weights for the control group. } \item{w}{ Pair-specific 59 | #' normalized arithmetic mean weights. These weights sum up to the total number 60 | #' of units in the sample, i.e., \code{n}. } \item{w.dk}{ Pair-specific 61 | #' normalized harmonic mean weights. These weights sum up to the total number 62 | #' of units in the sample, i.e., \code{n}. } \item{diff}{ Within-pair 63 | #' differences if the matched-pair design is analyzed. This equals the 64 | #' difference between \code{Y1bar} and \code{Y0bar}. } 65 | #' @author Kosuke Imai, Department of Government and Department of Statistics, Harvard University 66 | #' \email{imai@@Harvard.Edu}, \url{https://imai.fas.harvard.edu}; 67 | #' @references Donner, A. and N. Klar (1993). \dQuote{Confidence interval 68 | #' construction for effect measures arising from cluster randomized trials.} 69 | #' Journal of Clinical Epidemiology. Vol. 46, No. 2, pp. 123-131. 70 | #' 71 | #' Imai, Kosuke, Gary King, and Clayton Nall (2007). \dQuote{The Essential Role 72 | #' of Pair Matching in Cluster-Randomized Experiments, with Application to the 73 | #' Mexican Universal Health Insurance Evaluation}, Technical Report. Department 74 | #' of Politics, Princeton University. 75 | #' @keywords design 76 | #' @export ATEcluster 77 | ATEcluster <- function(Y, Z, grp, data = parent.frame(), 78 | match = NULL, weights = NULL, fpc = TRUE) { 79 | 80 | call <- match.call() 81 | Y <- eval(call$Y, envir = data) 82 | Z <- eval(call$Z, envir = data) 83 | grp <- eval(call$grp, envir = data) 84 | match <- eval(call$match, envir = data) 85 | weights <- eval(call$weights, envir = data) 86 | 87 | n <- length(Y) 88 | res <- list(call = call, n = n, Y = Y, Z = Z, grp = grp, 89 | match = match, weights = weights) 90 | if (is.null(match)) 91 | stop("This option is not yet available.") 92 | else { 93 | res$m <- m <- length(unique(match)) 94 | res$Y1bar <- Y1bar <- tapply(Y[Z==1], match[Z==1], mean) 95 | res$Y0bar <- Y0bar <- tapply(Y[Z==0], match[Z==0], mean) 96 | res$diff <- diff <- Y1bar-Y0bar 97 | res$n1 <- n1 <- tapply(rep(1, sum(Z==1)), match[Z==1], sum) 98 | res$n0 <- n0 <- tapply(rep(1, sum(Z==0)), match[Z==0], sum) 99 | } 100 | 101 | if (is.null(weights)) { 102 | ## variance for PATE1 (sampling of clusters) 103 | N1 <- w1 <- n1 104 | N0 <- w0 <- n0 105 | } else { 106 | ## variance for PATE2 (double sampling) 107 | w1 <- N1 <- tapply(weights[Z==1], match[Z==1], mean) 108 | w0 <- N0 <- tapply(weights[Z==0], match[Z==0], mean) 109 | } 110 | w <- w1 + w0 111 | w <- n*w/sum(w) 112 | ## estimates 113 | ATE.est <- weighted.mean(diff, w) 114 | ATE.var <- m*sum((w*diff-n*ATE.est/m)^2)/((m-1)*(n^2)) 115 | ## donner&klar methods: 116 | w.dk <- w1*w0/(w1 + w0) 117 | w.dk <- n*w.dk/sum(w.dk) 118 | ATEdk.est <- weighted.mean(diff, w.dk) 119 | ATEdk.var <- sum(w.dk^2)*sum(w.dk*(diff-ATEdk.est)^2)/(n^3) 120 | ATE.dkvar <- sum(w^2)*sum(w*(diff-ATE.est)^2)/(n^3) 121 | ## lower bound for CATE variance 122 | if (!is.null(weights)) { 123 | Y1var <- tapply(Y[Z==1], match[Z==1], var)/n1 124 | Y0var <- tapply(Y[Z==0], match[Z==0], var)/n0 125 | if (fpc) { 126 | Y1var <- (1-n1/N1)*Y1var 127 | Y0var <- (1-n0/N0)*Y0var 128 | if ((sum(n0 > N0)+sum(n1 > N1))>0) 129 | stop("population size is smaller than sample size") 130 | } 131 | res$Y1var <- Y1var 132 | res$Y0var <- Y0var 133 | res$var.lb <- sum((w/n)^2*(Y1var+Y0var)) 134 | } 135 | ## unbiased estimation 136 | ##Y1sum <- tapply(Y[Z==1], match[Z==1], sum) 137 | ##Y0sum <- tapply(Y[Z==0], match[Z==0], sum) 138 | ##ATE.estU <- 2*(sum(Y1sum)-sum(Y0sum))/n 139 | ##ATE.varU <- 4*m*var(Y1sum-Y0sum)/(n^2) 140 | 141 | ## return the resutls 142 | res$est <- ATE.est 143 | res$est.dk <- ATEdk.est 144 | res$var <- ATE.var 145 | res$dkvar <- ATE.dkvar 146 | res$var.dk <- ATEdk.var 147 | res$eff <- 1/(1-2*cov(w*Y1bar, w*Y0bar)/(var(w*Y1bar)+var(w*Y0bar))) 148 | res$w <- w 149 | res$w.dk <- w.dk 150 | if (!is.null(match)) { 151 | res$w1 <- w1 152 | res$w0 <- w0 153 | res$N0 <- N0 154 | res$N1 <- N1 155 | } 156 | class(res) <- "ATEcluster" 157 | return(res) 158 | } 159 | -------------------------------------------------------------------------------- /R/randomize.R: -------------------------------------------------------------------------------- 1 | #' Randomization of the Treatment Assignment for Conducting Experiments 2 | #' 3 | #' This function can be used to randomize the treatment assignment for 4 | #' randomized experiments. In addition to the complete randomization, it 5 | #' implements randomized-block and matched-pair designs. 6 | #' 7 | #' Randomized-block designs refer to the complete randomization of the 8 | #' treatment within the pre-specified blocks which contain multiple 9 | #' observations. Matched-pair designs refer to the randomization of the binary 10 | #' treatment variable within the pre-specified pair of observations. 11 | #' 12 | #' @aliases randomize Randomize 13 | #' @param data A data frame containing the observations to which the treatments 14 | #' are randomly assigned. 15 | #' @param group A numerical or character vector indicating the 16 | #' treatment/control groups. The length of the vector equals the total number 17 | #' of such groups. The default specifies two groups called \dQuote{Treat} and 18 | #' \dQuote{Control}. 19 | #' @param ratio An optional numerical vector which specifies the proportion of 20 | #' the treatment/control groups within the sample. The length of the vector 21 | #' should equal the number of groups. The default is the equal allocation. 22 | #' @param indx An optional variable name in the data frame to be used as the 23 | #' names of the observations. If not specified, the row names of the data frame 24 | #' will be used so long as they are available. If the row names are not 25 | #' available, the integer sequence starting from 1 will be used. 26 | #' @param block An optional variable name in the data frame or a formula to be 27 | #' used as the blocking variables for randomized-block designs. If a variable 28 | #' name is specified, then the unique values of that variable will form blocks 29 | #' unless \code{n.block} is specified (see below). If a formula is specified, 30 | #' it will be evaluated using \code{data} and then blocking will be based on 31 | #' the \code{mahalanobis} distance of the resulting model matrix. In this case, 32 | #' users may want to specify \code{n.block} to avoid creating blocks that have 33 | #' too few observations. 34 | #' @param n.block An optional scalar specifying the number of blocks to be 35 | #' created for randomized block designs. If unspecified, the unique values of 36 | #' the blocking variable will define blocks. If specified, the blocks of 37 | #' roughly equal size will be created based on the \code{quantile} of the 38 | #' blocking variable. 39 | #' @param match An optional variable name in the data frame or a formula to be 40 | #' used as the matching variables for matched-pair designs. This input is 41 | #' applicable only to the case where there are two groups. Pairs of 42 | #' observations will be formed based on the similar values of the matching 43 | #' variable. If a formula is specified, the \code{mahalanobis} distance of the 44 | #' resulting model matrix will be used. 45 | #' @param complete logical. If it equals \code{TRUE} (default), then complete 46 | #' randomization will be performed (within each block if randomized block 47 | #' designs are used). Otherwise, simple randomization will be implemented. For 48 | #' matched-pair designs, \code{complete} has to equal \code{TRUE}. 49 | #' @return A list of class \code{randomize} which contains the following items: 50 | #' \item{call}{ the matched call. } \item{treatment}{ The vector of randomized 51 | #' treatments. } \item{data}{ The data frame that was used to conduct the 52 | #' randomization. } \item{block}{ The blocking variable that was used to 53 | #' implement randomized-block designs. } \item{match}{ The matching variable 54 | #' that was used to implement matched-pair designs. } \item{block.id}{ The 55 | #' variable indicating which observations belong to which blocks in 56 | #' randomized-block designs. } \item{match.id}{ The variable indicating which 57 | #' observations belong to which pairs in matched-pair designs. } 58 | #' @author Kosuke Imai, Department of Government and Department of Statistics, Harvard University 59 | #' \email{imai@@Harvard.Edu}, \url{https://imai.fas.harvard.edu}; 60 | #' @keywords design 61 | #' @export randomize 62 | randomize <- function(data, group = c("Treat", "Control"), ratio = 63 | NULL, indx = NULL, block = NULL, n.block = NULL, 64 | match = NULL, complete = TRUE){ 65 | 66 | ## call 67 | call <- match.call() 68 | ## data 69 | m <- length(group) 70 | if ((!is.null(call$block)) && (!is.null(call$match))) { 71 | stop("invalid inputs for `block' and `match'.") 72 | } else if (!is.null(call$block)) { ## blocking 73 | if ("formula" %in% class(block)) { 74 | tm <- terms(block) 75 | attr(tm, "intercept") <- 0 76 | data <- model.frame(tm, data = data, na.action = na.fail) 77 | X <- model.matrix(tm, data = data) 78 | block <- mahalanobis(X, apply(X, 2, mean), var(X)) 79 | } else { 80 | block <- eval(call$block, envir = data) 81 | } 82 | } else { ## matching 83 | if (m != 2) 84 | stop("2 groups are required for matching.") 85 | if (!is.null(ratio)) 86 | warning("`ratio' will be ignored.") 87 | if (complete) { 88 | if ("formula" %in% class(match)) { 89 | tm <- terms(match) 90 | attr(tm, "intercept") <- 0 91 | data <- model.frame(tm, data = data, na.action = na.fail) 92 | X <- model.matrix(tm, data = data) 93 | match <- mahalanobis(X, apply(X, 2, mean), var(X)) 94 | } else { 95 | match <- eval(call$match, data) 96 | } 97 | } else { 98 | stop("`complete' should be TRUE for matching.") 99 | } 100 | } 101 | 102 | ## getting index 103 | n <- nrow(data) 104 | if (!is.null(indx)) 105 | indx <- eval(indx, data) 106 | else if (is.null(rownames(data))) 107 | indx <- 1:n 108 | else 109 | indx <- rownames(data) 110 | 111 | ## groups 112 | if (is.null(ratio)) 113 | ratio <- rep(1/m, m) 114 | ratio <- ratio/sum(ratio) 115 | if (sum(ratio < 0) > 1) 116 | stop("invalid input for `size'.") 117 | 118 | ## output 119 | res <- list(call = call, ratio = ratio) 120 | 121 | ## blocking and matching variable 122 | if (is.null(block) && is.null(match)) { 123 | if (complete) { # complete randomization 124 | tmp <- ratio2size(n, ratio, group) 125 | ttt <- sample(tmp$vector, n, replace = FALSE) 126 | } else { # simple randomization 127 | ttt <- sample(group, n, replace = TRUE, prob = ratio) 128 | } 129 | names(ttt) <- indx 130 | } else if (is.null(match)) { ## blocking 131 | block.id <- rep(NA, n) 132 | if (is.null(n.block)) { 133 | tmp <- unique(block) 134 | n.block <- length(tmp) 135 | for (i in 1:n.block) 136 | block.id[block == tmp[i]] <- i 137 | } else { 138 | tmp <- quantile(block, (0:(n.block-1))/n.block) 139 | block.id <- rep(0, n) 140 | for (i in 1:n.block) { 141 | block.id[block >= tmp[i]] <- block.id[block >= tmp[i]] + 1 142 | } 143 | if (sum(table(block.id) < m) > 0) 144 | stop("some blocks have too few observations.") 145 | } 146 | ttt <- rep(NA, n) 147 | names(ttt) <- names(block.id) <- indx 148 | for (i in 1:n.block) { 149 | howmany <- sum(block.id == i) 150 | if (complete) { # comlete randomization 151 | tmp <- ratio2size(howmany, ratio, group) 152 | ttt[block.id == i] <- sample(tmp$vector, howmany, replace = FALSE) 153 | } else { 154 | ttt[block.id == i] <- sample(group, sum(block.id == i), 155 | replace = TRUE, prob = ratio) 156 | } 157 | } 158 | res$block <- block 159 | res$block.id <- block.id 160 | } else { ## matching 161 | match.id <- ttt <- rep(NA, n) 162 | names(match.id) <- names(ttt) <- indx 163 | counter <- 1 164 | while (sum(is.na(match.id)) > 1) { 165 | unit <- sample(indx[is.na(match.id)], 1, replace = FALSE) 166 | diff <- abs(match[is.na(match.id)]-match[unit]) 167 | mindiff <- names(sort(diff[diff>0]))[1] 168 | match.id[unit] <- match.id[mindiff] <- counter 169 | tmp <- sample(group, 2, replace = FALSE) 170 | ttt[unit] <- tmp[1] 171 | ttt[mindiff] <- tmp[2] 172 | counter <- counter + 1 173 | } 174 | res$match <- match 175 | res$match.id <- match.id 176 | } 177 | 178 | ## return the results 179 | res$treatment <- ttt 180 | res$data <- data 181 | class(res) <- "randomize" 182 | 183 | return(res) 184 | } 185 | 186 | ### 187 | ### This converts ratio into size while randomly allocating remainders 188 | ### 189 | 190 | ratio2size <- function(n, ratio, group) { 191 | m <- length(ratio) 192 | 193 | size <- round(ratio * n, digits = 0) 194 | if (sum(size) > n) { 195 | tmp <- sample(1:length(size), sum(size)-n, replace = FALSE) 196 | size[tmp] <- size[tmp] - 1 197 | } 198 | if (sum(size) < n) { 199 | tmp <- sample(1:length(size), n-sum(size), replace = FALSE) 200 | size[tmp] <- size[tmp] + 1 201 | } 202 | allgroup <- NULL 203 | for (i in 1:m) 204 | allgroup <- c(allgroup, rep(group[i], size[i])) 205 | return(list(size = size, vector = allgroup)) 206 | 207 | } 208 | -------------------------------------------------------------------------------- /R/CADErand.R: -------------------------------------------------------------------------------- 1 | ### 2 | ### Randomization-based method for the complier average direct effect and the complier average spillover effect 3 | ### 4 | ### 5 | 6 | 7 | 8 | #' Randomization-based method for the complier average direct effect and the complier average spillover effect 9 | #' 10 | #' 11 | #' This function computes the point estimates and variance estimates of the complier average direct effect (CADE) and the complier average spillover effect (CASE). 12 | #' The estimators calculated using this function are either individual weighted or cluster-weighted. The point estimates and variances of ITT effects are also included. 13 | #' 14 | #' 15 | #' For the details of the method implemented by this function, see the 16 | #' references. 17 | #' 18 | #' @param data A data frame containing the relevant variables. The names for the variables should be: ``Z'' for the treatment assignment, ``D'' for the actual received treatment, ``Y'' for the outcome, ``A'' for the treatment assignment mechanism and ``id'' for the cluster ID. The variable for the cluster id should be a factor. 19 | #' @param individual A binary variable with TRUE for individual-weighted estimators and FALSE for cluster-weighted estimators. 20 | #' @return A list of class \code{CADErand} which contains the following items: 21 | #' \item{CADE1}{ The point estimate of CADE(1). } \item{CADE0}{ The point estimate of CADE(0). } 22 | #'\item{CADE1}{ The point estimate of CASE(1). } \item{CASE0}{ The point estimate of CASE(0). } 23 | #'\item{var.CADE1}{ The variance estimate of CADE(1). } 24 | #'\item{var.CADE0}{ The variance estimate of CADE(0). } 25 | #'\item{var.CASE1}{ The variance estimate of CASE(1). } 26 | #'\item{var.CASE0}{ The variance estimate of CASE(0). } 27 | #'\item{DEY1}{ The point estimate of DEY(1). } \item{DEY0}{ The point estimate of DEY(0). } 28 | #'\item{DED1}{ The point estimate of DED(1). } \item{DED0}{ The point estimate of DED(0). } 29 | #'\item{var.DEY1}{ The variance estimate of DEY(1). } 30 | #'\item{var.DEY0}{ The variance estimate of DEY(0). } 31 | #'\item{var.DED1}{ The variance estimate of DED(1). } 32 | #'\item{var.DED0}{ The variance estimate of DED(0). } 33 | #'\item{SEY1}{ The point estimate of SEY(1). } \item{SEY0}{ The point estimate of SEY(0). } 34 | #'\item{SED1}{ The point estimate of SED(1). } \item{SED0}{ The point estimate of SED(0). } 35 | #'\item{var.SEY1}{ The variance estimate of SEY(1). } 36 | #'\item{var.SEY0}{ The variance estimate of SEY(0). } 37 | #'\item{var.SED1}{ The variance estimate of SED(1). } 38 | #'\item{var.SED0}{ The variance estimate of SED(0). } 39 | #' @author Kosuke Imai, Department of Government and Department of Statistics, Harvard University 40 | #' \email{imai@@Harvard.Edu}, \url{https://imai.fas.harvard.edu}; 41 | #' Zhichao Jiang, Department of Politics, Princeton University 42 | #' \email{zhichaoj@@princeton.edu}. 43 | #' @references Kosuke Imai, Zhichao Jiang and Anup Malani (2018). 44 | #' \dQuote{Causal Inference with Interference and Noncompliance in the Two-Stage Randomized Experiments}, \emph{Technical Report}. Department of Politics, Princeton 45 | #' University. 46 | #' @keywords two-stage randomized experiments 47 | #' @export CADErand 48 | 49 | 50 | CADErand=function(data,individual=1){ 51 | ## transform the data into list 52 | if(!is.factor(data$id)){stop('The cluster_id should be a factor variable.')} 53 | cluster.id=unique(data$id) 54 | n.cluster=length(cluster.id) 55 | Z=vector("list", n.cluster) 56 | D=vector("list", n.cluster) 57 | Y=vector("list", n.cluster) 58 | A=rep(0,n.cluster) 59 | for (i in 1:n.cluster){ 60 | Z[[i]]=as.numeric(data$Z[data$id==cluster.id[i]]) 61 | D[[i]]=as.numeric(data$D[data$id==cluster.id[i]]) 62 | Y[[i]]=data$Y[data$id==cluster.id[i]] 63 | if (length(unique(data$A[data$id==cluster.id[i]]))!=1){ 64 | stop( paste0('The assignment mechanism in cluster ',i,' should be the same.')) 65 | } 66 | A[i]=data$A[data$id==cluster.id[i]][1] 67 | } 68 | 69 | 70 | 71 | n=sapply(Z,length) 72 | N=sum(n) 73 | J=length(n) 74 | if (individual==1){ 75 | for ( i in 1:n.cluster){ 76 | Y[[i]]=Y[[i]]*n[i]*J/N 77 | D[[i]]=D[[i]]*n[i]*J/N 78 | } 79 | } 80 | est.Dj00=rep(0,J) 81 | est.Dj00= sapply(Difflist(D,Productlist(D,Z)),sum)/(n-sapply(Z,sum))*(1-A) 82 | est.Dj01=rep(0,J) 83 | est.Dj01= sapply(Difflist(D,Productlist(D,Z)),sum)/(n-sapply(Z,sum))*(A) 84 | est.Dj10=rep(0,J) 85 | est.Dj10=sapply(Productlist(D,Z),sum)/(sapply(Z,sum))*(1-A) 86 | est.Dj11=rep(0,J) 87 | est.Dj11= sapply(Productlist(D,Z),sum)/(sapply(Z,sum))*(A) 88 | est.D00= sum(est.Dj00*(1-A))/sum(1-A) 89 | est.D10= sum(est.Dj10*(1-A))/sum(1-A) 90 | est.D01= sum(est.Dj01*(A))/sum(A) 91 | est.D11= sum(est.Dj11*(A))/sum(A) 92 | est.DEDj0=est.Dj10-est.Dj00 93 | est.DEDj1=est.Dj11-est.Dj01 94 | est.DED1=est.D11-est.D01 95 | est.DED0=est.D10-est.D00 96 | est.SED1=est.D11-est.D10 97 | est.SED0=est.D01-est.D00 98 | ### variance 99 | est.xiDE0=sum((est.DEDj0-est.DED0)^2*(1-A))/(sum(1-A)-1) 100 | est.xiDE1=sum((est.DEDj1-est.DED1)^2*(A))/(sum(A)-1) 101 | est.xij01=rep(0,J) 102 | est.xij00=rep(0,J) 103 | est.xij11=rep(0,J) 104 | est.xij10=rep(0,J) 105 | est.xib00=sum((est.Dj00-est.D00)^2*(1-A))/(sum(1-A)-1) 106 | est.xib10=sum((est.Dj10-est.D10)^2*(1-A))/(sum(1-A)-1) 107 | est.xib01=sum((est.Dj01-est.D01)^2*(A))/(sum(A)-1) 108 | est.xib11=sum((est.Dj11-est.D11)^2*(A))/(sum(A)-1) 109 | for (j in 1:J){ 110 | est.xij01[j]=sum((D[[j]]-est.Dj01[j])^2*(1-Z[[j]]))/(sum(1-Z[[j]])-1) 111 | est.xij00[j]=sum((D[[j]]-est.Dj00[j])^2*(1-Z[[j]]))/(sum(1-Z[[j]])-1) 112 | est.xij11[j]=sum((D[[j]]-est.Dj11[j])^2*(Z[[j]]))/(sum(Z[[j]])-1) 113 | est.xij10[j]=sum((D[[j]]-est.Dj10[j])^2*(Z[[j]]))/(sum(Z[[j]])-1) 114 | } 115 | var.DED0=est.xiDE0*(1/sum(1-A)-1/J)+ sum((est.xij00/(n-sapply(Z,sum))+est.xij10/sapply(Z,sum))*(1-A))/J/sum(1-A) 116 | var.DED1=est.xiDE1*(1/sum(A)-1/J)+ sum((est.xij01/(n-sapply(Z,sum))+est.xij11/sapply(Z,sum))*(A))/J/sum(A) 117 | var.SED1=est.xib11/sum(A)+est.xib10/sum(1-A) 118 | var.SED0=est.xib01/sum(A)+est.xib00/sum(1-A) 119 | 120 | est.Yj00=rep(0,J) 121 | est.Yj00= sapply(Difflist(Y,Productlist(Y,Z)),sum)/(n-sapply(Z,sum))*(1-A) 122 | est.Yj01=rep(0,J) 123 | est.Yj01= sapply(Difflist(Y,Productlist(Y,Z)),sum)/(n-sapply(Z,sum))*(A) 124 | est.Yj10=rep(0,J) 125 | est.Yj10= sapply(Productlist(Y,Z),sum)/(sapply(Z,sum))*(1-A) 126 | est.Yj11=rep(0,J) 127 | est.Yj11= sapply(Productlist(Y,Z),sum)/(sapply(Z,sum))*(A) 128 | est.Y00= sum(est.Yj00*(1-A))/sum(1-A) 129 | est.Y10= sum(est.Yj10*(1-A))/sum(1-A) 130 | est.Y01= sum(est.Yj01*(A))/sum(A) 131 | est.Y11= sum(est.Yj11*(A))/sum(A) 132 | est.DEYj0=est.Yj10-est.Yj00 133 | est.DEYj1=est.Yj11-est.Yj01 134 | est.DEY1=est.Y11-est.Y01 135 | est.DEY0=est.Y10-est.Y00 136 | est.SEY1=est.Y11-est.Y10 137 | est.SEY0=est.Y01-est.Y00 138 | ### variance 139 | est.sigmaDE0=sum((est.DEYj0-est.DEY0)^2*(1-A))/(sum(1-A)-1) 140 | est.sigmaDE1=sum((est.DEYj1-est.DEY1)^2*(A))/(sum(A)-1) 141 | est.sigmaj01=rep(0,J) 142 | est.sigmaj00=rep(0,J) 143 | est.sigmaj11=rep(0,J) 144 | est.sigmaj10=rep(0,J) 145 | est.sigmab00=sum((est.Yj00-est.Y00)^2*(1-A))/(sum(1-A)-1) 146 | est.sigmab10=sum((est.Yj10-est.Y10)^2*(1-A))/(sum(1-A)-1) 147 | est.sigmab01=sum((est.Yj01-est.Y01)^2*(A))/(sum(A)-1) 148 | est.sigmab11=sum((est.Yj11-est.Y11)^2*(A))/(sum(A)-1) 149 | for (j in 1:J){ 150 | est.sigmaj01[j]=sum((Y[[j]]-est.Yj01[j])^2*(1-Z[[j]]))/(sum(1-Z[[j]])-1) 151 | est.sigmaj00[j]=sum((Y[[j]]-est.Yj00[j])^2*(1-Z[[j]]))/(sum(1-Z[[j]])-1) 152 | est.sigmaj11[j]=sum((Y[[j]]-est.Yj11[j])^2*(Z[[j]]))/(sum(Z[[j]])-1) 153 | est.sigmaj10[j]=sum((Y[[j]]-est.Yj10[j])^2*(Z[[j]]))/(sum(Z[[j]])-1) 154 | } 155 | var.DEY0=est.sigmaDE0*(1/sum(1-A)-1/J)+ sum((est.sigmaj00/(n-sapply(Z,sum))+est.sigmaj10/sapply(Z,sum))*(1-A))/J/sum(1-A) 156 | var.DEY1=est.sigmaDE1*(1/sum(A)-1/J)+ sum((est.sigmaj01/(n-sapply(Z,sum))+est.sigmaj11/sapply(Z,sum))*(A))/J/sum(A) 157 | var.SEY1=est.sigmab11/sum(A)+est.sigmab10/sum(1-A) 158 | var.SEY0=est.sigmab01/sum(A)+est.sigmab00/sum(1-A) 159 | 160 | ### covariance 161 | est.zetaDE0=sum((est.DEYj0-est.DEY0)*(est.DEDj0-est.DED0)*(1-A))/(sum(1-A)-1) 162 | est.zetaDE1=sum((est.DEYj1-est.DEY1)*(est.DEDj1-est.DED1)*(A))/(sum(A)-1) 163 | est.zetaj01=rep(0,J) 164 | est.zetaj00=rep(0,J) 165 | est.zetaj11=rep(0,J) 166 | est.zetaj10=rep(0,J) 167 | est.zetab00=sum((est.Yj00-est.Y00)*(est.Dj00-est.D00)*(1-A))/(sum(1-A)-1) 168 | est.zetab10=sum((est.Yj10-est.Y10)*(est.Dj10-est.D10)*(1-A))/(sum(1-A)-1) 169 | est.zetab01=sum((est.Yj01-est.Y01)*(est.Dj01-est.D01)*(A))/(sum(A)-1) 170 | est.zetab11=sum((est.Yj11-est.Y11)*(est.Dj11-est.D11)*(A))/(sum(A)-1) 171 | for (j in 1:J){ 172 | est.zetaj01[j]=sum((Y[[j]]-est.Yj01[j])*(D[[j]]-est.Dj01[j])*(1-Z[[j]]))/(sum(1-Z[[j]])-1) 173 | est.zetaj00[j]=sum((Y[[j]]-est.Yj00[j])*(D[[j]]-est.Dj00[j])*(1-Z[[j]]))/(sum(1-Z[[j]])-1) 174 | est.zetaj11[j]=sum((Y[[j]]-est.Yj11[j])*(D[[j]]-est.Dj11[j])*(Z[[j]]))/(sum(Z[[j]])-1) 175 | est.zetaj10[j]=sum((Y[[j]]-est.Yj10[j])*(D[[j]]-est.Dj10[j])*(Z[[j]]))/(sum(Z[[j]])-1) 176 | } 177 | est.zeta0=est.zetaDE0*(1/sum(1-A)-1/J)+ sum((est.zetaj00/(n-sapply(Z,sum))+est.zetaj10/sapply(Z,sum))*(1-A))/J/sum(1-A) 178 | est.zeta1=est.zetaDE1*(1/sum(A)-1/J)+ sum((est.zetaj01/(n-sapply(Z,sum))+est.zetaj11/sapply(Z,sum))*(A))/J/sum(A) 179 | est.zetab1=est.zetab11/sum(A)+est.zetab10/sum(1-A) 180 | est.zetab0=est.zetab01/sum(A)+est.zetab00/sum(1-A) 181 | 182 | 183 | #### CADE 184 | est.CADE1=est.DEY1/est.DED1 185 | est.CADE0=est.DEY0/est.DED0 186 | est.CASE1=est.SEY1/est.SED1 187 | est.CASE0=est.SEY0/est.SED0 188 | est.varCADE1= (var.DEY1-2*est.CADE1*est.zeta1+est.CADE1^2*var.DED1)/est.DED1^2 189 | est.varCADE0= (var.DEY0-2*est.CADE0*est.zeta0+est.CADE0^2*var.DED0)/est.DED0^2 190 | est.varCASE1= (var.SEY1-2*est.CASE1*est.zetab1+est.CASE1^2*var.SED1)/est.SED1^2 191 | est.varCASE0= (var.SEY0-2*est.CASE0*est.zetab0+est.CASE0^2*var.SED0)/est.SED0^2 192 | return(list(CADE1=est.CADE1,CADE0=est.CADE0,CASE1=est.CASE1,CASE0=est.CASE0, var.CADE1=est.varCADE1,var.CADE0=est.varCADE0,var.CASE1=est.varCASE1,var.CASE0=est.varCASE0, 193 | DEY1=est.DEY1,DEY0=est.DEY0,DED1=est.DED1,DED0=est.DED0, 194 | var.DEY1=var.DEY1,var.DEY0=var.DEY0,var.DED1=var.DED1,var.DED0=var.DED0, 195 | SEY1=est.SEY1,SEY0=est.SEY0,SED1=est.SED1,SED0=est.SED0, 196 | var.SEY1=var.SEY1,var.SEY0=var.SEY0,var.SED1=var.SED1,var.SED0=var.SED0 197 | )) 198 | } 199 | -------------------------------------------------------------------------------- /man/NoncompLI.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/NoncompLI.R 3 | \name{NoncompLI} 4 | \alias{NoncompLI} 5 | \title{Bayesian Analysis of Randomized Experiments with Noncompliance and Missing 6 | Outcomes Under the Assumption of Latent Ignorability} 7 | \usage{ 8 | NoncompLI( 9 | formulae, 10 | Z, 11 | D, 12 | data = parent.frame(), 13 | n.draws = 5000, 14 | param = TRUE, 15 | in.sample = FALSE, 16 | model.c = "probit", 17 | model.o = "probit", 18 | model.r = "probit", 19 | tune.c = 0.01, 20 | tune.o = 0.01, 21 | tune.r = 0.01, 22 | tune.v = 0.01, 23 | p.mean.c = 0, 24 | p.mean.o = 0, 25 | p.mean.r = 0, 26 | p.prec.c = 0.001, 27 | p.prec.o = 0.001, 28 | p.prec.r = 0.001, 29 | p.df.o = 10, 30 | p.scale.o = 1, 31 | p.shape.o = 1, 32 | mda.probit = TRUE, 33 | coef.start.c = 0, 34 | coef.start.o = 0, 35 | tau.start.o = NULL, 36 | coef.start.r = 0, 37 | var.start.o = 1, 38 | burnin = 0, 39 | thin = 0, 40 | verbose = TRUE 41 | ) 42 | } 43 | \arguments{ 44 | \item{formulae}{A list of formulae where the first formula specifies the 45 | (pre-treatment) covariates in the outcome model (the latent compliance 46 | covariate will be added automatically), the second formula specifies the 47 | compliance model, and the third formula defines the covariate specification 48 | for the model for missing-data mechanism (the latent compliance covariate 49 | will be added automatically). For the outcome model, the formula should take 50 | the two-sided standard R \command{formula} where the outcome variable is 51 | specified in the left hand side of the formula which is then separated by 52 | \code{~} from the covariate equation in the right hand side, e.g., \code{y ~ 53 | x1 + x2}. For the compliance and missing-data mechanism models, the 54 | one-sided \command{formula} should be used where the left hand side is left 55 | unspecified, e.g., \code{~ x1 + x2}.} 56 | 57 | \item{Z}{A randomized encouragement variable, which should be a binary 58 | variable in the specified data frame.} 59 | 60 | \item{D}{A treatment variable, which should be a binary variable in the 61 | specified data frame.} 62 | 63 | \item{data}{A data frame which contains the variables that appear in the 64 | model formulae (\code{formulae}), the encouragement variable (\code{Z}), and 65 | the treatment variable (\code{D}).} 66 | 67 | \item{n.draws}{The number of MCMC draws. The default is \code{5000}.} 68 | 69 | \item{param}{A logical variable indicating whether the Monte Carlo draws of 70 | the model parameters should be saved in the output object. The default is 71 | \code{TRUE}.} 72 | 73 | \item{in.sample}{A logical variable indicating whether or not the sample 74 | average causal effect should be calculated using the observed potential 75 | outcome for each unit. If it is set to \code{FALSE}, then the population 76 | average causal effect will be calculated. The default is \code{FALSE}.} 77 | 78 | \item{model.c}{The model for compliance. Either \code{logit} or 79 | \code{probit} model is allowed. The default is \code{probit}.} 80 | 81 | \item{model.o}{The model for outcome. The following five models are allowed: 82 | \code{logit}, \code{probit}, \code{oprobit} (ordered probit regression), 83 | \code{gaussian} (gaussian regression), \code{negbin} (negative binomial 84 | regression), and \code{twopart} (two part model where the first part is the 85 | probit regression for \eqn{Pr(Y>0|X)} and the second part models 86 | \eqn{p(log(Y)|X, Y>0)} using the gaussian regression). The default is 87 | \code{probit}.} 88 | 89 | \item{model.r}{The model for (non)response. Either \code{logit} or 90 | \code{probit} model is allowed. The default is \code{probit}.} 91 | 92 | \item{tune.c}{Tuning constants for fitting the compliance model. These 93 | positive constants are used to tune the (random-walk) Metropolis-Hastings 94 | algorithm to fit the logit model. Use either a scalar or a vector of 95 | constants whose length equals that of the coefficient vector. The default is 96 | \code{0.01}.} 97 | 98 | \item{tune.o}{Tuning constants for fitting the outcome model. These positive 99 | constants are used to tune the (random-walk) Metropolis-Hastings algorithm 100 | to fit logit, ordered probit, and negative binomial models. Use either a 101 | scalar or a vector of constants whose length equals that of the coefficient 102 | vector for logit and negative binomial models. For the ordered probit model, 103 | use either a scalar or a vector of constants whose length equals that of 104 | cut-point parameters to be estimated. The default is \code{0.01}.} 105 | 106 | \item{tune.r}{Tuning constants for fitting the (non)response model. These 107 | positive constants are used to tune the (random-walk) Metropolis-Hastings 108 | algorithm to fit the logit model. Use either a scalar or a vector of 109 | constants whose length equals that of the coefficient vector. The default is 110 | \code{0.01}.} 111 | 112 | \item{tune.v}{A scalar tuning constant for fitting the variance component of 113 | the negative binomial (outcome) model. The default is \code{0.01}.} 114 | 115 | \item{p.mean.c}{Prior mean for the compliance model. It should be either a 116 | scalar or a vector of appropriate length. The default is \code{0}.} 117 | 118 | \item{p.mean.o}{Prior mean for the outcome model. It should be either a 119 | scalar or a vector of appropriate length. The default is \code{0}.} 120 | 121 | \item{p.mean.r}{Prior mean for the (non)response model. It should be either 122 | a scalar or a vector of appropriate length. The default is \code{0}.} 123 | 124 | \item{p.prec.c}{Prior precision for the compliance model. It should be 125 | either a positive scalar or a positive semi-definite matrix of appropriate 126 | size. The default is \code{0.001}.} 127 | 128 | \item{p.prec.o}{Prior precision for the outcome model. It should be either a 129 | positive scalar or a positive semi-definite matrix of appropriate size. The 130 | default is \code{0.001}.} 131 | 132 | \item{p.prec.r}{Prior precision for the (non)response model. It should be 133 | either a positive scalar or a positive semi-definite matrix of appropriate 134 | size. The default is \code{0.001}.} 135 | 136 | \item{p.df.o}{A positive integer. Prior degrees of freedom parameter for the 137 | inverse chisquare distribution in the gaussian and twopart (outcome) models. 138 | The default is \code{10}.} 139 | 140 | \item{p.scale.o}{A positive scalar. Prior scale parameter for the inverse 141 | chisquare distribution (for the variance) in the gaussian and twopart 142 | (outcome) models. For the negative binomial (outcome) model, this is used 143 | for the scale parameter of the inverse gamma distribution. The default is 144 | \code{1}.} 145 | 146 | \item{p.shape.o}{A positive scalar. Prior shape for the inverse chisquare 147 | distribution in the negative binomial (outcome) model. The default is 148 | \code{1}.} 149 | 150 | \item{mda.probit}{A logical variable indicating whether to use marginal data 151 | augmentation for probit models. The default is \code{TRUE}.} 152 | 153 | \item{coef.start.c}{Starting values for coefficients of the compliance 154 | model. It should be either a scalar or a vector of appropriate length. The 155 | default is \code{0}.} 156 | 157 | \item{coef.start.o}{Starting values for coefficients of the outcome model. 158 | It should be either a scalar or a vector of appropriate length. The default 159 | is \code{0}.} 160 | 161 | \item{tau.start.o}{Starting values for thresholds of the ordered probit 162 | (outcome) model. If it is set to \code{NULL}, then the starting values will 163 | be a sequence starting from 0 and then incrementing by 0.1. The default is 164 | \code{NULL}.} 165 | 166 | \item{coef.start.r}{Starting values for coefficients of the (non)response 167 | model. It should be either a scalar or a vector of appropriate length. The 168 | default is \code{0}.} 169 | 170 | \item{var.start.o}{A positive scalar starting value for the variance of the 171 | gaussian, negative binomial, and twopart (outcome) models. The default is 172 | \code{1}.} 173 | 174 | \item{burnin}{The number of initial burnins for the Markov chain. The 175 | default is \code{0}.} 176 | 177 | \item{thin}{The size of thinning interval for the Markov chain. The default 178 | is \code{0}.} 179 | 180 | \item{verbose}{A logical variable indicating whether additional progress 181 | reports should be prited while running the code. The default is \code{TRUE}.} 182 | } 183 | \value{ 184 | An object of class \code{NoncompLI} which contains the following 185 | elements as a list: \item{call}{The matched call.} \item{Y}{The outcome 186 | variable.} \item{D}{The treatment variable.} \item{Z}{The (randomized) 187 | encouragement variable.} \item{R}{The response indicator variable for 188 | \code{Y}.} \item{A}{The indicator variable for (known) always-takers, i.e., 189 | the control units who received the treatment.} \item{C}{The indicator 190 | variable for (known) compliers, i.e., the encouraged units who received the 191 | treatment when there is no always-takers.} \item{Xo}{The matrix of 192 | covariates used for the outcome model.} \item{Xc}{The matrix of covariates 193 | used for the compliance model.} \item{Xr}{The matrix of covariates used for 194 | the (non)response model.} \item{n.draws}{The number of MCMC draws.} 195 | \item{QoI}{The Monte carlo draws of quantities of interest from their 196 | posterior distributions. Quantities of interest include \code{ITT} 197 | (intention-to-treat) effect, \code{CACE} (complier average causal effect), 198 | \code{Y1barC} (The mean outcome value under the treatment for compliers), 199 | \code{Y0barC} (The mean outcome value under the control for compliers), 200 | \code{YbarN} (The mean outcome value for never-takers), \code{YbarA} (The 201 | mean outcome value for always-takers), \code{pC} (The proportion of 202 | compliers), \code{pN} (The proportion of never-takers), \code{pA} (The 203 | proportion of always-takers) } If \code{param} is set to \code{TRUE}, the 204 | following elments are also included: \item{coefO}{The Monte carlo draws of 205 | coefficients of the outcome model from their posterior distribution.} 206 | \item{coefO1}{If \code{model = "twopart"}, this element contains the Monte 207 | carlo draws of coefficients of the outcome model for \eqn{p(log(Y)|X, Y > 208 | 0)} from their posterior distribution.} \item{coefC}{The Monte carlo draws 209 | of coefficients of the compliance model from their posterior distribution.} 210 | \item{coefA}{If always-takers exist, then this element contains the Monte 211 | carlo draws of coefficients of the compliance model for always-takers from 212 | their posterior distribution.} \item{coefR}{The Monte carlo draws of 213 | coefficients of the (non)response model from their posterior distribution.} 214 | \item{sig2}{The Monte carlo draws of the variance parameter for the 215 | gaussian, negative binomial, and twopart (outcome) models.} 216 | } 217 | \description{ 218 | This function estimates the average causal effects for randomized 219 | experiments with noncompliance and missing outcomes under the assumption of 220 | latent ignorability (Frangakis and Rubin, 1999). The models are based on 221 | Bayesian generalized linear models and are fitted using the Markov chain 222 | Monte Carlo algorithms. Various types of the outcome variables can be 223 | analyzed to estimate the Intention-to-Treat effect and Complier Average 224 | Causal Effect. 225 | } 226 | \details{ 227 | For the details of the model being fitted, see the references. Note that 228 | when always-takers exist we fit either two logistic or two probit models by 229 | first modeling whether a unit is a complier or a noncomplier, and then 230 | modeling whether a unit is an always-taker or a never-taker for those who 231 | are classified as non-compliers. 232 | } 233 | \references{ 234 | Frangakis, Constantine E. and Donald B. Rubin. (1999). 235 | \dQuote{Addressing Complications of Intention-to-Treat Analysis in the 236 | Combined Presence of All-or-None Treatment Noncompliance and Subsequent 237 | Missing Outcomes.} \emph{Biometrika}, Vol. 86, No. 2, pp. 365-379. 238 | 239 | Hirano, Keisuke, Guido W. Imbens, Donald B. Rubin, and Xiao-Hua Zhou. 240 | (2000). \dQuote{Assessing the Effect of an Influenza Vaccine in an 241 | Encouragement Design.} \emph{Biostatistics}, Vol. 1, No. 1, pp. 69-88. 242 | 243 | Barnard, John, Constantine E. Frangakis, Jennifer L. Hill, and Donald B. 244 | Rubin. (2003). \dQuote{Principal Stratification Approach to Broken 245 | Randomized Experiments: A Case Study of School Choice Vouchers in New York 246 | (with Discussion)}, \emph{Journal of the American Statistical Association}, 247 | Vol. 98, No. 462, pp299--311. 248 | 249 | Horiuchi, Yusaku, Kosuke Imai, and Naoko Taniguchi (2007). \dQuote{Designing 250 | and Analyzing Randomized Experiments: Application to a Japanese Election 251 | Survey Experiment.} \emph{American Journal of Political Science}, Vol. 51, 252 | No. 3 (July), pp. 669-687. 253 | } 254 | \author{ 255 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University 256 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu}; 257 | } 258 | \keyword{models} 259 | -------------------------------------------------------------------------------- /R/NInocov.R: -------------------------------------------------------------------------------- 1 | ### 2 | ### Assumption of nonignorability 3 | ### 4 | 5 | NInocov <- function(Y, D, Z = NULL, CI = 0.95) { 6 | 7 | if (is.null(Z)) { 8 | ## Without noncompliance 9 | n <- length(D) 10 | R <- (!is.na(Y))*1 11 | pi00 <- mean((D == 0)*(R == 0)) 12 | pi01 <- mean((D == 0)*(R == 1)) 13 | pi10 <- mean((D == 1)*(R == 0)) 14 | pi11 <- mean((D == 1)*(R == 1)) 15 | 16 | p01 <- mean(Y[D == 0 & R == 1]) 17 | p11 <- mean(Y[D == 1 & R == 1]) 18 | 19 | p00 <- (p01*((1-p11)*pi00*pi11-(1-p01)*pi01*pi10))/(pi00*pi11*(p01-p11)) 20 | p10 <- (p11*((1-p11)*pi00*pi11-(1-p01)*pi01*pi10))/(pi01*pi10*(p01-p11)) 21 | 22 | p0 <- (p00*pi00+p01*pi01)/(pi00+pi01) 23 | p1 <- (p10*pi10+p11*pi11)/(pi10+pi11) 24 | Ybar <- matrix(c(p0, p1), nrow = 1) 25 | colnames(Ybar) <- c("Y0bar", "Y1bar") 26 | 27 | ATE <- p1-p0 28 | 29 | pi111 <- mean(Y == 1 & D == 1 & R == 1) 30 | pi101 <- mean(Y == 1 & D == 0 & R == 1) 31 | 32 | ## bounds 33 | lb <- (p11*pi11*(pi00+pi01)-(pi00+p01*pi01)*(pi10+pi11))/((pi10+pi11)*(pi00+pi01)) 34 | ub <- ((pi10+p11*pi11)*(pi00+pi01)-p01*pi01*(pi10+pi11))/((pi10+pi11)*(pi00+pi01)) 35 | 36 | ## asymptotic variance of ATE 37 | Sigma <- matrix(NA, ncol = 5, nrow = 5) 38 | Sigma[1,1] <- pi01*(1-pi01) 39 | Sigma[2,2] <- pi10*(1-pi10) 40 | Sigma[3,3] <- pi11*(1-pi11) 41 | Sigma[4,4] <- pi11*p11*(1-pi11*p11) 42 | Sigma[5,5] <- pi01*p01*(1-pi01*p01) 43 | Sigma[1,2] <- Sigma[2,1] <- -pi01*pi10 44 | Sigma[1,3] <- Sigma[3,1] <- -pi01*pi11 45 | Sigma[1,4] <- Sigma[4,1] <- -pi01*pi11*p11 46 | Sigma[1,5] <- Sigma[5,1] <- pi01*p01*(1-pi01) 47 | Sigma[2,3] <- Sigma[3,2] <- -pi10*pi11 48 | Sigma[2,4] <- Sigma[4,2] <- -pi10*pi11*p11 49 | Sigma[2,5] <- Sigma[5,2] <- -pi10*p01*pi01 50 | Sigma[3,4] <- Sigma[4,3] <- pi11*p11*(1-pi11) 51 | Sigma[3,5] <- Sigma[5,3] <- -pi11*pi01*p01 52 | Sigma[4,5] <- Sigma[5,4] <- -pi11*p11*pi01*p01 53 | 54 | Delta <- matrix(NA, ncol = 1, nrow = 5) 55 | A <- pi11*p11*(pi00+pi01)-pi01*p01*(pi10+pi11) 56 | B <- pi00*pi11-pi10*pi01 57 | C <- pi11*pi01*(p01-p11)*(pi00+pi01)*(pi10+pi11) 58 | Delta[1] <- -(pi10+pi11)*A+A*(B-A)*p11/(pi01*(p01-p11)) 59 | Delta[2] <- -(p11*pi11+p01*pi01)*(B-2*A)-(pi11+pi01)*A - 60 | A*(B-A)*(pi00+pi01-pi10-pi11)/((pi00+pi01)*(pi10+pi11)) 61 | Delta[3] <- -(p11*pi11+p01*pi01)*(B-2*A)+(pi00-pi11)*A - 62 | A*(B-A)*(p01/((p01-p11)*pi11)+(pi00+pi01-pi10-pi11)/((pi00+pi01)*(pi10+pi11))) 63 | Delta[4] <- (pi00+pi01)*(B-2*A)+A*(B-A)/(pi11*(p01-p11)) 64 | Delta[5] <- -(pi10+pi11)*(B-2*A)-A*(B-A)/(pi01*(p01-p11)) 65 | Delta <- Delta/C 66 | 67 | var <- t(Delta)%*%Sigma%*%Delta 68 | se <- sqrt(var/n) 69 | 70 | ps <- matrix(c(p00, p10, p01, p11), nrow = 1) 71 | colnames(ps) <- c("p00", "p10", "p01", "p11") 72 | 73 | pis <- matrix(c(pi00, pi10, pi01, pi11), nrow = 1) 74 | colnames(pis) <- c("pi00", "pi10", "pi01", "pi11") 75 | 76 | ATE <- matrix(c(ATE, se), nrow = 1) 77 | colnames(ATE) <- c("ATE", "s.e.") 78 | z <- qnorm(1-(1-CI)/2) 79 | ci.est <- matrix(c(ATE[1]-z*se, ATE[1]+z*se), nrow = 1) 80 | colnames(ci.est) <- c("lower CI", "upper CI") 81 | return(list(Ybar = Ybar, ATE = ATE, CI = ci.est, 82 | bounds = c(lb, ub), ps = ps, pis = pis, n = n)) 83 | } else { 84 | ## WITH NONCOMPLIANCE 85 | n <- length(D) 86 | R <- (!is.na(Y))*1 87 | pi000 <- mean(D == 0 & R == 0 & Z == 0) 88 | pi100 <- mean(D == 1 & R == 0 & Z == 0) 89 | pi001 <- mean(D == 0 & R == 0 & Z == 1) 90 | pi101 <- mean(D == 1 & R == 0 & Z == 1) 91 | pi010 <- mean(D == 0 & R == 1 & Z == 0) 92 | pi110 <- mean(D == 1 & R == 1 & Z == 0) 93 | pi011 <- mean(D == 0 & R == 1 & Z == 1) 94 | pi111 <- mean(D == 1 & R == 1 & Z == 1) 95 | 96 | p010 <- mean(Y[D == 0 & R == 1 & Z == 0]) 97 | p011 <- mean(Y[D == 0 & R == 1 & Z == 1]) 98 | p110 <- mean(Y[D == 1 & R == 1 & Z == 0]) 99 | p111 <- mean(Y[D == 1 & R == 1 & Z == 1]) 100 | 101 | p000 <- (p010*((1-p011)*pi000*pi011-(1-p010)*pi010*pi001))/(pi000*pi011*(p010-p011)) 102 | p100 <- (p110*((1-p111)*pi100*pi111-(1-p110)*pi110*pi101))/(pi100*pi111*(p110-p111)) 103 | p001 <- (p011*((1-p011)*pi000*pi011-(1-p010)*pi010*pi001))/(pi010*pi001*(p010-p011)) 104 | p101 <- (p111*((1-p111)*pi100*pi111-(1-p110)*pi110*pi101))/(pi110*pi101*(p110-p111)) 105 | 106 | Y0bar <- (p000*pi000+p010*pi010+p100*pi100+p110*pi110)/(pi000+pi010+pi100+pi110) 107 | Y1bar <- (p001*pi001+p011*pi011+p101*pi101+p111*pi111)/(pi001+pi011+pi101+pi111) 108 | Ybar <- matrix(c(Y0bar, Y1bar), nrow = 1) 109 | colnames(Ybar) <- c("Y0bar", "Y1bar") 110 | 111 | ITT <- Y1bar-Y0bar 112 | CACE <- ITT/(mean(D[Z==1])-mean(D[Z==0])) 113 | 114 | ## alternative parameterization 115 | A0 <- p011*(pi011+((1-p011)*pi000*pi011/pi010-(1-p010)*pi001)/(p010-p011)) 116 | A1 <- p111*(pi111+((1-p111)*pi100*pi111/pi110-(1-p110)*pi101)/(p110-p111)) 117 | A <- A0 + A1 118 | C1 <- pi001+pi011+pi101+pi111 119 | C0 <- pi000+pi010+pi100+pi110 120 | Y1bar.alt <- A/C1 121 | 122 | B0 <- p010*(pi010+((1-p011)*pi000-(1-p010)*pi010*pi001/pi011)/(p010-p011)) 123 | B1 <- p110*(pi110+((1-p111)*pi100-(1-p110)*pi110*pi101/pi111)/(p110-p111)) 124 | B <- B0 + B1 125 | Y0bar.alt <- B/C0 126 | 127 | cace <- (Y1bar.alt-Y0bar.alt)*C0*C1 128 | cace.denom <- (pi101+pi111)*C0-(pi100+pi110)*C1 129 | cace <- cace/cace.denom 130 | 131 | ## variances 132 | Sigma <- matrix(NA, ncol = 11, nrow = 11) 133 | Sigma[1,1] <- pi001*(1-pi001) 134 | Sigma[2,2] <- pi010*(1-pi010) 135 | Sigma[3,3] <- pi100*(1-pi100) 136 | Sigma[4,4] <- pi011*(1-pi011) 137 | Sigma[5,5] <- pi101*(1-pi101) 138 | Sigma[6,6] <- pi110*(1-pi110) 139 | Sigma[7,7] <- pi111*(1-pi111) 140 | Sigma[8,8] <- p010*pi010*(1-p010*pi010) 141 | Sigma[9,9] <- p110*pi110*(1-p110*pi110) 142 | Sigma[10,10] <- p011*pi011*(1-p011*pi011) 143 | Sigma[11,11] <- p111*pi111*(1-p111*pi111) 144 | 145 | ## covariances 146 | Sigma[1,2] <- Sigma[2,1] <- -pi001*pi010 147 | Sigma[1,3] <- Sigma[3,1] <- -pi001*pi100 148 | Sigma[1,4] <- Sigma[4,1] <- -pi001*pi011 149 | Sigma[1,5] <- Sigma[5,1] <- -pi001*pi101 150 | Sigma[1,6] <- Sigma[6,1] <- -pi001*pi110 151 | Sigma[1,7] <- Sigma[7,1] <- -pi001*pi111 152 | Sigma[1,8] <- Sigma[8,1] <- -pi001*p010*pi010 153 | Sigma[1,9] <- Sigma[9,1] <- -pi001*p110*pi110 154 | Sigma[1,10] <- Sigma[10,1] <- -pi001*p011*pi011 155 | Sigma[1,11] <- Sigma[11,1] <- -pi001*p111*pi111 156 | 157 | Sigma[2,3] <- Sigma[3,2] <- -pi010*pi100 158 | Sigma[2,4] <- Sigma[4,2] <- -pi010*pi011 159 | Sigma[2,5] <- Sigma[5,2] <- -pi010*pi101 160 | Sigma[2,6] <- Sigma[6,2] <- -pi010*pi110 161 | Sigma[2,7] <- Sigma[7,2] <- -pi010*pi111 162 | Sigma[2,8] <- Sigma[8,2] <- p010*pi010*(1-pi010) 163 | Sigma[2,9] <- Sigma[9,2] <- -pi010*p110*pi110 164 | Sigma[2,10] <- Sigma[10,2] <- -pi010*p011*pi011 165 | Sigma[2,11] <- Sigma[11,2] <- -pi010*p111*pi111 166 | 167 | Sigma[3,4] <- Sigma[4,3] <- -pi100*pi011 168 | Sigma[3,5] <- Sigma[5,3] <- -pi100*pi101 169 | Sigma[3,6] <- Sigma[6,3] <- -pi100*pi110 170 | Sigma[3,7] <- Sigma[7,3] <- -pi100*pi111 171 | Sigma[3,8] <- Sigma[8,3] <- -pi100*p010*pi010 172 | Sigma[3,9] <- Sigma[9,3] <- -pi100*p110*pi110 173 | Sigma[3,10] <- Sigma[10,3] <- -pi100*p011*pi011 174 | Sigma[3,11] <- Sigma[11,3] <- -pi100*p111*pi111 175 | 176 | Sigma[4,5] <- Sigma[5,4] <- -pi011*pi101 177 | Sigma[4,6] <- Sigma[6,4] <- -pi011*pi110 178 | Sigma[4,7] <- Sigma[7,4] <- -pi011*pi111 179 | Sigma[4,8] <- Sigma[8,4] <- -pi011*p010*pi010 180 | Sigma[4,9] <- Sigma[9,4] <- -pi011*p110*pi110 181 | Sigma[4,10] <- Sigma[10,4] <- p011*pi011*(1-pi011) 182 | Sigma[4,11] <- Sigma[11,4] <- -pi011*p111*pi111 183 | 184 | Sigma[5,6] <- Sigma[6,5] <- -pi101*pi110 185 | Sigma[5,7] <- Sigma[7,5] <- -pi101*pi111 186 | Sigma[5,8] <- Sigma[8,5] <- -pi101*p010*pi010 187 | Sigma[5,9] <- Sigma[9,5] <- -pi101*p110*pi110 188 | Sigma[5,10] <- Sigma[10,5] <- -pi101*p011*pi011 189 | Sigma[5,11] <- Sigma[11,5] <- -pi101*p111*pi111 190 | 191 | Sigma[6,7] <- Sigma[7,6] <- -pi110*pi111 192 | Sigma[6,8] <- Sigma[8,6] <- -pi110*p010*pi010 193 | Sigma[6,9] <- Sigma[9,6] <- pi110*p110*(1-pi110) 194 | Sigma[6,10] <- Sigma[10,6] <- -pi110*p011*pi011 195 | Sigma[6,11] <- Sigma[11,6] <- -pi110*p111*pi111 196 | 197 | Sigma[7,8] <- Sigma[8,7] <- -pi111*p010*pi010 198 | Sigma[7,9] <- Sigma[9,7] <- -pi111*p110*pi110 199 | Sigma[7,10] <- Sigma[10,7] <- -pi111*p011*pi011 200 | Sigma[7,11] <- Sigma[11,7] <- p111*pi111*(1-pi111) 201 | 202 | Sigma[8,9] <- Sigma[9,8] <- -p010*pi010*p110*pi110 203 | Sigma[8,10] <- Sigma[10,8] <- -p010*pi010*p011*pi011 204 | Sigma[8,11] <- Sigma[11,8] <- -p010*pi010*p111*pi111 205 | 206 | Sigma[9,10] <- Sigma[10,9] <- -p110*pi110*p011*pi011 207 | Sigma[9,11] <- Sigma[11,9] <- -p110*pi110*p111*pi111 208 | 209 | Sigma[10,11] <- Sigma[11,10] <- -p011*pi011*p111*pi111 210 | 211 | ## partial derivatives 212 | dA <- dB <- matrix(NA, ncol = 1, nrow = 11) 213 | pi1010 <- p010*pi010 214 | pi1110 <- p110*pi110 215 | pi1011 <- p011*pi011 216 | pi1111 <- p111*pi111 217 | 218 | A0denom <- pi011*pi1010/pi1011-pi010 219 | A1denom <- pi111*pi1110/pi1111-pi110 220 | numer0 <- (pi011-pi1011)*pi000-pi001*(pi010-pi1010) 221 | numer1 <- (pi111-pi1111)*pi100-pi101*(pi110-pi1110) 222 | dA[1] <- -(pi010-pi1010)/A0denom 223 | dA[2] <- (-pi001*A0denom+numer0)/(A0denom^2) 224 | dA[3] <- (pi111-pi1111)/A1denom 225 | dA[4] <- (A0denom*pi000-pi1010*numer0/pi1011)/(A0denom^2) 226 | dA[5] <- -(pi110-pi1110)/A1denom 227 | dA[6] <- (-pi101*A1denom+numer1)/(A1denom^2) 228 | dA[7] <- (pi100*A1denom-pi1110*numer1/pi1111)/(A1denom^2) 229 | dA[8] <- (pi001*A0denom-pi011*numer0/pi1011)/(A0denom^2) 230 | dA[9] <- (pi101*A1denom-pi111*numer1/pi1111)/(A1denom^2) 231 | dA[10] <- 1 + (-pi000*A0denom+pi011*pi1010*numer0/(pi1011^2))/(A0denom^2) 232 | dA[11] <- 1 + (-pi100*A1denom+pi111*pi1110*numer1/(pi1111^2))/(A1denom^2) 233 | 234 | B0denom <- pi011-pi1011*pi010/pi1010 235 | B1denom <- pi111-pi1111*pi110/pi1110 236 | dB[1] <- -(pi010-pi1010)/B0denom 237 | dB[2] <- (-pi001*B0denom+pi1011*numer0/pi1010)/(B0denom^2) 238 | dB[3] <- (pi111-pi1111)/B1denom 239 | dB[4] <- (pi000*B0denom-numer0)/(B0denom^2) 240 | dB[5] <- -(pi110-pi1110)/B1denom 241 | dB[6] <- (-pi101*B1denom+pi1111*numer1/pi1110)/(B1denom^2) 242 | dB[7] <- (pi100*B1denom-numer1)/(B1denom^2) 243 | dB[8] <- 1 + (pi001*B0denom-pi1011*pi010*numer0/(pi1010^2))/(B0denom^2) 244 | dB[9] <- 1 + (pi101*B1denom-pi1111*pi110*numer1/(pi1110^2))/(B1denom^2) 245 | dB[10] <- (-pi000*B0denom+pi010*numer0/pi1010)/(B0denom^2) 246 | dB[11] <- (-pi100*B1denom+pi110*numer1/pi1110)/(B1denom^2) 247 | 248 | dC0 <- dC1 <- matrix(0, ncol = 1, nrow = 11) 249 | dC0[2] <- dC0[3] <- dC0[6] <- dC1[1] <- dC1[4] <- dC1[5] <- dC1[7] <- 1 250 | 251 | ## put together 252 | partial <- (dA*C1 - dC1*A)/(C1^2)-(dB*C0-dC0*B)/(C0^2) 253 | ITT.se <- sqrt(t(partial)%*%Sigma%*%partial/n) 254 | z <- qnorm(1-(1-CI)/2) 255 | ITT.ci <- matrix(c(ITT-z*ITT.se, ITT+z*ITT.se), nrow = 1) 256 | colnames(ITT.ci) <- c("lower CI", "upper CI") 257 | ITT <- matrix(c(ITT, ITT.se), nrow = 1) 258 | colnames(ITT) <- c("ITT est.", "ITT s.e.") 259 | 260 | ## variance for CACE 261 | partial.cace <- (partial*C1*C0 + ITT[1]*(dC1*C0+C1*dC0))*cace.denom 262 | partial.cace[c(1, 2, 4, 8:11)] <- partial.cace[c(1, 2, 4, 8:11)] - 263 | (dC0[c(1, 2, 4, 8:11)]*(pi101+pi111) - 264 | (pi100+pi110)*dC1[c(1, 2, 4, 8:11)])*ITT[1]*C1*C0 265 | partial.cace[c(3, 6)] <- partial.cace[c(3,6)] - 266 | (dC0[c(3,6)]*(pi101+pi111) - C1 - (pi100+pi110)*dC1[c(3,6)])*ITT[1]*C1*C0 267 | partial.cace[c(5, 7)] <- partial.cace[c(5,7)] - 268 | (C0 + dC0[c(5, 7)]*(pi101+pi111) - (pi100+pi110)*dC1[c(5,7)])*ITT[1]*C1*C0 269 | partial.cace <- partial.cace/(cace.denom^2) 270 | CACE.se <- sqrt(t(partial.cace)%*%Sigma%*%partial.cace/n) 271 | CACE.ci <- matrix(c(CACE-z*CACE.se, CACE+z*CACE.se), nrow = 1) 272 | colnames(CACE.ci) <- c("lower CI", "upper CI") 273 | CACE <- matrix(c(CACE, CACE.se), nrow = 1) 274 | colnames(CACE) <- c("CACE est.", "CACE s.e.") 275 | 276 | ## output 277 | ps <- matrix(c(p000, p100, p001, p101, p010, p011, p110, p111), nrow = 1) 278 | colnames(ps) <- c("p000", "p100", "p001", "p101", "p010", "p011", 279 | "p110", "p111") 280 | 281 | pis <- matrix(c(pi000, pi100, pi001, pi101, pi010, pi110, pi011, pi111), nrow = 1) 282 | colnames(pis) <- c("pi000", "pi100", "pi001", "pi101", "pi010", 283 | "pi110", "pi011", "pi111") 284 | 285 | return(list(Ybar = Ybar, ITT = ITT, CACE = CACE, ITT.ci = ITT.ci, 286 | CACE.ci = CACE.ci, ps = ps, pis = pis, n = n)) 287 | } 288 | } 289 | -------------------------------------------------------------------------------- /R/ATEbounds.R: -------------------------------------------------------------------------------- 1 | ### 2 | ### Calculates the bounds for the ATE in the presence of missing 3 | ### response 4 | ### 5 | 6 | 7 | 8 | #' Bounding the Average Treatment Effect when some of the Outcome Data are 9 | #' Missing 10 | #' 11 | #' This function computes the sharp bounds on the average treatment effect when 12 | #' some of the outcome data are missing. The confidence intervals for the 13 | #' bounds are also computed. 14 | #' 15 | #' For the details of the method implemented by this function, see the 16 | #' references. 17 | #' 18 | #' @param formula A formula of the form \code{Y ~ X} where \code{Y} is the name 19 | #' of the outcome variable and \code{X} is the name of the (randomized) 20 | #' treatment variable. \code{X} should be a factor variable but its value can 21 | #' take more than two levels. The missing values for \code{Y} should be coded 22 | #' as \code{NA}. 23 | #' @param data A data frame containing the relevant variables. 24 | #' @param maxY A scalar. The maximum value of the outcome variable. The default 25 | #' is the maximum sample value. 26 | #' @param minY A scalar. The minimum value of the outcome variable. The default 27 | #' is the minimum sample value. 28 | #' @param alpha A positive scalar that is less than or equal to 0.5. This will 29 | #' determine the (1-\code{alpha}) level of confidence intervals. The default is 30 | #' \code{0.05}. 31 | #' @param strata The variable name indicating strata. If this is specified, the 32 | #' quantities of interest will be first calculated within each strata and then 33 | #' aggregated. The default is \code{NULL}. 34 | #' @param ratio A \eqn{J \times M} matrix of probabilities where \eqn{J} is the 35 | #' number of strata and \eqn{M} is the number of treatment and control groups. 36 | #' Each element of the matrix specifies the probability of a unit falling into 37 | #' that category. The default is \code{NULL} in which case the sample estimates 38 | #' of these probabilities are used for computation. 39 | #' @param survey The variable name for survey weights. The default is 40 | #' \code{NULL}. 41 | #' @param n.reps A positive integer. The number of bootstrap replicates used 42 | #' for the construction of confidence intervals via B-method of Berran (1988). 43 | #' If it equals zero, the confidence intervals will not be constructed. 44 | #' @param ... The arguments passed to other functions. 45 | #' @return A list of class \code{ATEbounds} which contains the following items: 46 | #' \item{call}{ The matched call. } \item{Y}{ The outcome variable. } 47 | #' \item{D}{ The treatment variable. } \item{bounds}{ The point estimates of 48 | #' the sharp bounds on the average treatment effect. } \item{bounds.Y}{ The 49 | #' point estimates of the sharp bounds on the outcome variable within each 50 | #' treatment/control group. } \item{bmethod.ci}{ The B-method confidence 51 | #' interval of the bounds on the average treatment effect. } \item{bonf.ci}{ 52 | #' The Bonferroni confidence interval of the bounds on the average treatment 53 | #' effect. } \item{bonf.ci.Y}{ The Bonferroni confidence interval of the 54 | #' bounds on the outcome variable within each treatment/control group. } 55 | #' \item{bmethod.ci.Y}{ The B-method confidence interval of the bounds on the 56 | #' outcome variable within each treatment/control group. } \item{maxY}{ The 57 | #' maximum value of the outcome variable used in the computation. } 58 | #' \item{minY}{ The minimum value of the outcome variable used in the 59 | #' computation. } \item{nobs}{ The number of observations. } \item{nobs.Y}{ 60 | #' The number of observations within each treatment/control group. } 61 | #' \item{ratio}{ The probability of treatment assignment (within each strata if 62 | #' \code{strata} is specified) used in the computation. } 63 | #' @author Kosuke Imai, Department of Government and Department of Statistics, Harvard University 64 | #' \email{imai@@Harvard.Edu}, \url{https://imai.fas.harvard.edu}; 65 | #' @references Horowitz, Joel L. and Charles F. Manski. (1998). 66 | #' \dQuote{Censoring of Outcomes and Regressors due to Survey Nonresponse: 67 | #' Identification and Estimation Using Weights and Imputations.} \emph{Journal 68 | #' of Econometrics}, Vol. 84, pp.37-58. 69 | #' 70 | #' Horowitz, Joel L. and Charles F. Manski. (2000). \dQuote{Nonparametric 71 | #' Analysis of Randomized Experiments With Missing Covariate and Outcome Data.} 72 | #' \emph{Journal of the Americal Statistical Association}, Vol. 95, No. 449, 73 | #' pp.77-84. 74 | #' 75 | #' Harris-Lacewell, Melissa, Kosuke Imai, and Teppei Yamamoto. (2007). 76 | #' \dQuote{Racial Gaps in the Responses to Hurricane Katrina: An Experimental 77 | #' Study}, \emph{Technical Report}. Department of Politics, Princeton 78 | #' University. 79 | #' @keywords design 80 | #' @export ATEbounds 81 | ATEbounds <- function(formula, data = parent.frame(), maxY = NULL, 82 | minY = NULL, alpha = 0.05, n.reps = 0, 83 | strata = NULL, ratio = NULL, survey = NULL, ...) { 84 | 85 | ## getting Y and D 86 | call <- match.call() 87 | tm <- terms(formula) 88 | attr(tm, "intercept") <- 0 89 | mf <- model.frame(tm, data = data, na.action = 'na.pass') 90 | D <- model.matrix(tm, data = mf) 91 | M <- ncol(D) 92 | if (max(D) > 1 || min(D) < 0) 93 | stop("the treatment variable should be a factor variable.") 94 | Y <- model.response(mf) 95 | if (is.null(maxY)) 96 | maxY <- max(Y, na.rm = TRUE) 97 | if (is.null(minY)) 98 | minY <- min(Y, na.rm = TRUE) 99 | if (!is.null(call$survey)) 100 | survey <- eval(call$survey, data) 101 | else 102 | survey <- rep(1, length(Y)) 103 | ### computing the bounds 104 | if (!is.null(call$strata)) { 105 | strata <- eval(call$strata, data) 106 | res <- boundsAggComp(cbind(Y, strata, D), rep(1, length(Y)), maxY, 107 | minY, alpha = alpha, ratio = ratio, survey = survey) 108 | } else { 109 | res <- boundsComp(cbind(Y, D), rep(1, length(Y)), maxY, minY, 110 | alpha = alpha, survey = survey) 111 | } 112 | 113 | ## CI based on B-method 114 | if (n.reps > 0) { 115 | if (!is.null(call$strata)) { 116 | breps <- boot(data = cbind(Y, strata, D), statistic = boundsAggComp, 117 | R = n.reps, maxY = maxY, minY = minY, alpha = 118 | NULL, survey = survey)$t 119 | res$bmethod.ci <- res$bonf.ci <- matrix(NA, ncol = 2, nrow = choose(M, 2)) 120 | counter <- 1 121 | for (i in 1:(M-1)) 122 | for (j in (i+1):M) { 123 | tmp <- boundsCI(breps[,counter], breps[,counter+1], 124 | res$bounds[(counter+1)/2,1], 125 | res$bounds[(counter+1)/2,2], alpha) 126 | res$bmethod.ci[(counter+1)/2,] <- tmp$bmethod 127 | res$bonf.ci[(counter+1)/2,] <- tmp$bonferroni 128 | counter <- counter + 2 129 | } 130 | } else { 131 | breps <- boot(data = cbind(Y, D), statistic = boundsComp, 132 | R = n.reps, maxY = maxY, minY = minY, alpha = NULL, 133 | survey = survey)$t 134 | res$bmethod.ci.Y <- matrix(NA, ncol = 2, nrow = M) 135 | res$bmethod.ci <- matrix(NA, ncol = 2, nrow = choose(M, 2)) 136 | for (i in 1:M) { 137 | tmp <- boundsCI(breps[,(i-1)*2+1], breps[,i*2], 138 | res$bounds.Y[i,1], 139 | res$bounds.Y[i,2], alpha) 140 | res$bmethod.ci.Y[i,] <- tmp$bmethod 141 | res$bonf.ci.Y[i,] <- tmp$bonferroni 142 | } 143 | counter <- 1 144 | for (i in 1:(M-1)) 145 | for (j in (i+1):M) { 146 | tmp <- boundsCI(breps[,2*M+counter], breps[,2*M+counter+1], 147 | res$bounds[(counter+1)/2,1], 148 | res$bounds[(counter+1)/2,2], alpha) 149 | res$bmethod.ci[(counter+1)/2,] <- tmp$bmethod 150 | res$bonf.ci[(counter+1)/2,] <- tmp$bonferroni 151 | counter <- counter + 2 152 | } 153 | } 154 | } 155 | 156 | ## dimnames 157 | tmp <- NULL 158 | for (i in 1:(M-1)) 159 | for (j in (i+1):M) 160 | tmp <- c(tmp, paste(colnames(D)[i], "-", colnames(D)[j])) 161 | if (is.null(call$strata)) { 162 | rownames(res$bounds.Y) <- rownames(res$bonf.ci.Y) <- colnames(D) 163 | rownames(res$bounds) <- rownames(res$bonf.ci) <- tmp 164 | colnames(res$bounds) <- colnames(res$bounds.Y) <- c("lower", "upper") 165 | colnames(res$bonf.ci) <- colnames(res$bonf.ci.Y) <- 166 | c(paste("lower ", alpha/2, "%CI", sep=""), 167 | paste("upper ", 1-alpha/2, "%CI", sep="")) 168 | if (n.reps > 0) { 169 | rownames(res$bmethod.ci.Y) <- colnames(D) 170 | rownames(res$bmethod.ci) <- tmp 171 | colnames(res$bmethod.ci) <- colnames(res$bmethod.ci.Y) <- 172 | c(paste("lower ", alpha/2, "%CI", sep=""), 173 | paste("upper ", 1-alpha/2, "%CI", sep="")) 174 | } 175 | } else { 176 | rownames(res$bounds) <- tmp 177 | colnames(res$bounds) <- c("lower", "upper") 178 | if (n.reps > 0) { 179 | rownames(res$bmethod.ci) <- rownames(res$bonf.ci) <- tmp 180 | colnames(res$bmethod.ci) <- colnames(res$bonf.ci) <- 181 | c(paste("lower ", alpha/2, "%CI", sep=""), 182 | paste("upper ", 1-alpha/2, "%CI", sep="")) 183 | } 184 | } 185 | res$Y <- Y 186 | res$D <- D 187 | res$call <- call 188 | class(res) <- "ATEbounds" 189 | return(res) 190 | } 191 | 192 | ### 193 | ### An internal function which computes the bounds and bonferroni CI 194 | ### if alpha is specified (when alpha = NULL, then it returns a vector 195 | ### of bounds; this is used for bootstrap) 196 | ### 197 | 198 | boundsComp <- function(data, weights, maxY, minY, alpha = NULL, 199 | survey = NULL) { 200 | Y <- data[,1] 201 | D <- data[,-1] 202 | M <- ncol(D) 203 | bounds.Y <- ci.Y <- vars.Y <- matrix(NA, ncol = 2, nrow = M) 204 | nobs.Y <- NULL 205 | if (is.null(survey)) 206 | survey <- rep(1, length(Y)) 207 | for (i in 1:M) { 208 | Ysub <- Y[D[,i]==1] 209 | w <- weights[D[,i]==1]*survey[D[,i]==1] 210 | n <- length(Ysub) 211 | Ymax <- Ymin <- Ysub 212 | Ymax[is.na(Ysub)] <- maxY 213 | Ymin[is.na(Ysub)] <- minY 214 | ## point estimates of the bounds 215 | bounds.Y[i,] <- c(weighted.mean(Ymin, w), weighted.mean(Ymax, w)) 216 | if (!is.null(alpha)) { 217 | ## variances 218 | vars.Y[i,] <- c(weighted.var(Ymin, w)*sum(w^2)/(sum(w)^2), 219 | weighted.var(Ymax, w)*sum(w^2)/(sum(w)^2)) 220 | ## Bonferroni bounds 221 | ci.Y[i,] <- c(bounds.Y[i,1] - qnorm(1-alpha/2)*sqrt(vars.Y[i,1]), 222 | bounds.Y[i,2] + qnorm(1-alpha/2)*sqrt(vars.Y[i,2])) 223 | } 224 | nobs.Y <- c(nobs.Y, n) 225 | } 226 | 227 | ## Bounds for the ATE 228 | bounds <- ci <- matrix(NA, ncol = 2, nrow = choose(M, 2)) 229 | counter <- 1 230 | nobs <- tmp <- NULL 231 | for (i in 1:(M-1)) { 232 | for (j in (i+1):M) { 233 | bounds[counter,] <- c(bounds.Y[i,1]-bounds.Y[j,2], 234 | bounds.Y[i,2]-bounds.Y[j,1]) 235 | if (!is.null(alpha)) 236 | ci[counter,] <- c(bounds[counter,1] - 237 | qnorm(1-alpha/2)*sqrt(vars.Y[i,1]+vars.Y[j,2]), 238 | bounds[counter,2] + 239 | qnorm(1-alpha/2)*sqrt(vars.Y[i,2]+vars.Y[j,1])) 240 | counter <- counter + 1 241 | nobs <- c(nobs, nobs.Y[i]+nobs.Y[j]) 242 | } 243 | } 244 | 245 | if (is.null(alpha)) 246 | return(c(t(rbind(bounds.Y, bounds)))) 247 | else 248 | return(list(bounds.Y = bounds.Y, bounds = bounds, bonf.ci = ci, 249 | bonf.ci.Y = ci.Y, maxY = maxY, minY = minY, 250 | nobs = nobs, nobs.Y = nobs.Y)) 251 | } 252 | 253 | ### 254 | ### Aggregate bounds 255 | ### 256 | 257 | boundsAggComp <- function(data, weights, maxY, minY, alpha = NULL, 258 | ratio = NULL, survey = NULL) { 259 | Y <- data[,1] 260 | S <- data[,2] 261 | Svalue <- unique(S) 262 | J <- length(Svalue) 263 | D <- data[,3:ncol(data)] 264 | M <- ncol(D) 265 | ## compute bounds within each strata and weights across strata 266 | res.sub <- list() 267 | if (is.null(ratio)) 268 | ratio.cal <- TRUE 269 | else 270 | ratio.cal <- FALSE 271 | if (ratio.cal) 272 | ratio <- matrix(NA, nrow = J, ncol = M) 273 | if (is.null(survey)) 274 | survey <- rep(1, length(Y)) 275 | for (i in 1:J) { 276 | sub <- (S == Svalue[i]) 277 | res.sub[[i]] <- boundsComp(data[sub,-2], weights[sub], maxY, minY, 278 | 0.05, survey[sub]) 279 | if (ratio.cal) 280 | for (j in 1:M) 281 | ratio[i,j] <- sum(weights[sub & (D[,j] == 1)]) 282 | } 283 | if (ratio.cal) 284 | ratio <- ratio/sum(weights) 285 | omega <- matrix(NA, nrow = (M-1)*M/2, ncol = J) 286 | counter <- 1 287 | for (j in 1:(M-1)) { 288 | for (k in (j+1):M) { 289 | tmp <- 0 290 | for (i in 1:J) { 291 | omega[counter,i] <- ratio[i,j] + ratio[i,k] 292 | tmp <- tmp + omega[counter,i] 293 | } 294 | omega[counter,] <- omega[counter,]/tmp 295 | counter <- counter + 1 296 | } 297 | } 298 | 299 | ## aggregate the results 300 | bounds <- matrix(0, ncol = 2, nrow = choose(M, 2)) 301 | counter <- 1 302 | for (j in 1:(M-1)) { 303 | for (k in (j+1):M) { 304 | for (i in 1:J) { 305 | bounds[counter,] <- bounds[counter,] + 306 | (res.sub[[i]]$bounds)[counter,]*omega[counter,i] 307 | } 308 | counter <- counter + 1 309 | } 310 | } 311 | if (is.null(alpha)) 312 | return(c(t(bounds))) 313 | else 314 | return(list(bounds = bounds, maxY = maxY, minY = minY, 315 | ratio = ratio)) 316 | #return(list(bounds = bounds, maxY = maxY, minY = minY, 317 | # ratio = ratio, omega = omega)) 318 | } 319 | -------------------------------------------------------------------------------- /src/NIstandard.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "vector.h" 8 | #include "subroutines.h" 9 | #include "rand.h" 10 | #include "models.h" 11 | 12 | /* 13 | Bayesian Binary Probit with Nonignorable Missing Outcomes 14 | */ 15 | 16 | void NIbprobit(int *Y, /* binary outcome variable */ 17 | int *R, /* recording indicator for Y */ 18 | double *dXo, /* covariates */ 19 | double *dXr, /* covariates */ 20 | double *beta, /* coefficients */ 21 | double *delta, /* coefficients */ 22 | int *insamp, /* # of obs */ 23 | int *incovo, /* # of covariates */ 24 | int *incovr, /* # of covariates */ 25 | int *intreat, /* # of treatments */ 26 | double *beta0, /* prior mean */ 27 | double *delta0, /* prior mean */ 28 | double *dAo, /* prior precision */ 29 | double *dAr, /* prior precision */ 30 | int *Insample, /* insample QoI */ 31 | int *param, /* store parameters? */ 32 | int *mda, /* marginal data augmentation? */ 33 | int *ndraws, /* # of gibbs draws */ 34 | int *iBurnin, /* # of burnin */ 35 | int *iKeep, /* every ?th draws to keep */ 36 | int *verbose, 37 | double *coefo, /* storage for coefficients */ 38 | double *coefr, /* storage for coefficients */ 39 | double *ATE, /* storage for ATE */ 40 | double *BASE /* storage for baseline */ 41 | ) { 42 | 43 | /*** counters ***/ 44 | int n_samp = *insamp; /* sample size */ 45 | int n_gen = *ndraws; /* number of gibbs draws */ 46 | int n_covo = *incovo; /* number of covariates */ 47 | int n_covr = *incovr; /* number of covariates */ 48 | int n_treat = *intreat; /* number of treatments */ 49 | 50 | /*** data ***/ 51 | /* covariates for the response model */ 52 | double **Xr = doubleMatrix(n_samp+n_covr, n_covr+1); 53 | /* covariates for the outcome model */ 54 | double **Xo = doubleMatrix(n_samp+n_covo, n_covo+1); 55 | 56 | /*** model parameters ***/ 57 | double **Ao = doubleMatrix(n_covo, n_covo); 58 | double **Ar = doubleMatrix(n_covr, n_covr); 59 | double **mtemp1 = doubleMatrix(n_covo, n_covo); 60 | double **mtemp2 = doubleMatrix(n_covr, n_covr); 61 | 62 | /*** QoIs ***/ 63 | double *base = doubleArray(n_treat); 64 | double *cATE = doubleArray(n_treat); 65 | 66 | /*** storage parameters and loop counters **/ 67 | int progress = 1; 68 | int keep = 1; 69 | int i, j, k, main_loop; 70 | int itemp, itemp0, itemp1, itemp2, itempP = ftrunc((double) n_gen/10); 71 | double dtemp, pj, r0, r1; 72 | 73 | /*** get random seed **/ 74 | GetRNGstate(); 75 | 76 | /*** read the data ***/ 77 | itemp = 0; 78 | for (j = 0; j < n_covo; j++) 79 | for (i = 0; i < n_samp; i++) 80 | Xo[i][j] = dXo[itemp++]; 81 | itemp = 0; 82 | for (j = 0; j < n_covr; j++) 83 | for (i = 0; i < n_samp; i++) 84 | Xr[i][j] = dXr[itemp++]; 85 | 86 | /*** read the prior and it as additional data points ***/ 87 | itemp = 0; 88 | for (k = 0; k < n_covo; k++) 89 | for (j = 0; j < n_covo; j++) 90 | Ao[j][k] = dAo[itemp++]; 91 | 92 | itemp = 0; 93 | for (k = 0; k < n_covr; k++) 94 | for (j = 0; j < n_covr; j++) 95 | Ar[j][k] = dAr[itemp++]; 96 | 97 | dcholdc(Ao, n_covo, mtemp1); 98 | for(i = 0; i < n_covo; i++) { 99 | Xo[n_samp+i][n_covo] = 0; 100 | for(j = 0; j < n_covo; j++) { 101 | Xo[n_samp+i][n_covo] += mtemp1[i][j]*beta0[j]; 102 | Xo[n_samp+i][j] = mtemp1[i][j]; 103 | } 104 | } 105 | 106 | dcholdc(Ar, n_covr, mtemp2); 107 | for(i = 0; i < n_covr; i++) { 108 | Xr[n_samp+i][n_covr] = 0; 109 | for(j = 0; j < n_covr; j++) { 110 | Xr[n_samp+i][n_covr] += mtemp2[i][j]*delta0[j]; 111 | Xr[n_samp+i][j] = mtemp2[i][j]; 112 | } 113 | } 114 | 115 | /*** Gibbs Sampler! ***/ 116 | itemp = 0; itemp0 = 0; itemp1 = 0; itemp2 = 0; 117 | for(main_loop = 1; main_loop <= n_gen; main_loop++){ 118 | 119 | /** Response Model: binary Probit **/ 120 | bprobitGibbs(R, Xr, delta, n_samp, n_covr, 0, delta0, Ar, *mda, 1); 121 | 122 | /** Outcome Model: binary probit **/ 123 | bprobitGibbs(Y, Xo, beta, n_samp, n_covo, 0, beta0, Ao, *mda, 1); 124 | 125 | /** Imputing the missing data **/ 126 | for (i = 0; i < n_samp; i++) { 127 | if (R[i] == 0) { 128 | pj = 0; 129 | r0 = delta[0]; 130 | r1 = delta[1]; 131 | for (j = 0; j < n_covo; j++) 132 | pj += Xo[i][j]*beta[j]; 133 | for (j = 2; j < n_covr; j++) { 134 | r0 += Xr[i][j]*delta[j]; 135 | r1 += Xr[i][j]*delta[j]; 136 | } 137 | pj = pnorm(0, pj, 1, 0, 0); 138 | r0 = pnorm(0, r0, 1, 0, 0); 139 | r1 = pnorm(0, r1, 1, 0, 0); 140 | if (unif_rand() < ((1-r1)*pj/((1-r1)*pj+(1-r0)*(1-pj)))) { 141 | Y[i] = 1; 142 | Xr[i][0] = 0; 143 | Xr[i][1] = 1; 144 | } else { 145 | Y[i] = 0; 146 | Xr[i][0] = 1; 147 | Xr[i][1] = 0; 148 | } 149 | } 150 | } 151 | 152 | /** Compute quantities of interest **/ 153 | for (j = 0; j < n_treat; j++) 154 | base[j] = 0; 155 | for (i = 0; i < n_samp; i++) { 156 | dtemp = 0; 157 | for (j = n_treat; j < n_covo; j++) 158 | dtemp += Xo[i][j]*beta[j]; 159 | for (j = 0; j < n_treat; j++) { 160 | if (*Insample) { 161 | if (Xo[i][j] == 1) 162 | base[j] += (double)Y[i]; 163 | else 164 | base[j] += (double)((dtemp+beta[j]+norm_rand()) > 0); 165 | } else 166 | base[j] += pnorm(0, dtemp+beta[j], 1, 0, 0); 167 | } 168 | } 169 | for (j = 0; j < n_treat; j++) 170 | base[j] /= (double)n_samp; 171 | 172 | /** Storing the results **/ 173 | if (main_loop > *iBurnin) { 174 | if (keep == *iKeep) { 175 | for (j = 0; j < (n_treat-1); j++) 176 | ATE[itemp0++] = base[j+1] - base[0]; 177 | for (j = 0; j < n_treat; j++) 178 | BASE[itemp++] = base[j]; 179 | if (*param) { 180 | for (i = 0; i < n_covo; i++) 181 | coefo[itemp1++] = beta[i]; 182 | for (i = 0; i < n_covr; i++) 183 | coefr[itemp2++] = delta[i]; 184 | } 185 | keep = 1; 186 | } 187 | else 188 | keep++; 189 | } 190 | 191 | if(*verbose) { 192 | if(main_loop == itempP) { 193 | Rprintf("%3d percent done.\n", progress*10); 194 | itempP += ftrunc((double) n_gen/10); 195 | progress++; 196 | R_FlushConsole(); 197 | } 198 | } 199 | R_CheckUserInterrupt(); 200 | } /* end of Gibbs sampler */ 201 | 202 | /** write out the random seed **/ 203 | PutRNGstate(); 204 | 205 | /** freeing memory **/ 206 | FreeMatrix(Xr, n_samp+n_covr); 207 | FreeMatrix(Xo, n_samp+n_covo); 208 | FreeMatrix(Ao, n_covo); 209 | FreeMatrix(Ar, n_covr); 210 | FreeMatrix(mtemp1, n_covo); 211 | FreeMatrix(mtemp2, n_covr); 212 | free(base); 213 | free(cATE); 214 | } /* NIbprobit */ 215 | 216 | 217 | 218 | 219 | /* 220 | Bayesian Binary Mixed Effects Probit with Nonignorable Missing 221 | Outcomes 222 | */ 223 | 224 | void NIbprobitMixed(int *Y, /* binary outcome variable */ 225 | int *R, /* recording indicator for Y */ 226 | int *grp, /* group indicator */ 227 | int *in_grp, /* number of groups */ 228 | int *max_samp_grp, /* max # of obs within group */ 229 | double *dXo, /* fixed effects covariates */ 230 | double *dXr, /* fixed effects covariates */ 231 | double *dZo, /* random effects covariates */ 232 | double *dZr, /* random effects covariates */ 233 | double *beta, /* coefficients */ 234 | double *delta, /* coefficients */ 235 | double *dPsio, /* random effects variance */ 236 | double *dPsir, /* random effects variance */ 237 | int *insamp, /* # of obs */ 238 | int *incovo, /* # of fixed effects */ 239 | int *incovr, /* # of fixed effects */ 240 | int *incovoR, /* # of random effects */ 241 | int *incovrR, /* # of random effects */ 242 | int *intreat, /* # of treatments */ 243 | double *beta0, /* prior mean */ 244 | double *delta0, /* prior mean */ 245 | double *dAo, /* prior precision */ 246 | double *dAr, /* prior precision */ 247 | int *dfo, /* prior degrees of freedom */ 248 | int *dfr, /* prior degrees of freedom */ 249 | double *dS0o, /* prior scale */ 250 | double *dS0r, /* prior scale */ 251 | int *Insample, /* insample QoI */ 252 | int *param, /* store parameters? */ 253 | int *mda, /* marginal data augmentation? */ 254 | int *ndraws, /* # of gibbs draws */ 255 | int *iBurnin, /* # of burnin */ 256 | int *iKeep, /* every ?th draws to keep */ 257 | int *verbose, 258 | double *coefo, /* storage for coefficients */ 259 | double *coefr, /* storage for coefficients */ 260 | double *sPsiO, /* storage for variance */ 261 | double *sPsiR, /* storage for variance */ 262 | double *ATE, /* storage for ATE */ 263 | double *BASE /* storage for baseline */ 264 | ) { 265 | 266 | /*** counters ***/ 267 | int n_samp = *insamp; /* sample size */ 268 | int n_gen = *ndraws; /* number of gibbs draws */ 269 | int n_grp = *in_grp; /* number of groups */ 270 | int n_covo = *incovo; /* number of fixed effects */ 271 | int n_covr = *incovr; /* number of fixed effects */ 272 | int n_covoR = *incovoR; /* number of random effects */ 273 | int n_covrR = *incovrR; /* number of random effects */ 274 | int n_treat = *intreat; /* number of treatments */ 275 | 276 | /*** data ***/ 277 | /* covariates for the response model */ 278 | double **Xr = doubleMatrix(n_samp+n_covr, n_covr+1); 279 | /* covariates for the outcome model */ 280 | double **Xo = doubleMatrix(n_samp+n_covo, n_covo+1); 281 | /* random effects covariates */ 282 | double ***Zo = doubleMatrix3D(n_grp, *max_samp_grp + n_covoR, 283 | n_covoR + 1); 284 | double ***Zr = doubleMatrix3D(n_grp, *max_samp_grp + n_covrR, 285 | n_covrR + 1); 286 | 287 | /*** model parameters ***/ 288 | double **PsiO = doubleMatrix(n_covoR, n_covoR); 289 | double **PsiR = doubleMatrix(n_covrR, n_covrR); 290 | double **xiO = doubleMatrix(n_grp, n_covoR); 291 | double **xiR = doubleMatrix(n_grp, n_covrR); 292 | double **S0o = doubleMatrix(n_covoR, n_covoR); 293 | double **S0r = doubleMatrix(n_covrR, n_covrR); 294 | double **Ao = doubleMatrix(n_covo, n_covo); 295 | double **Ar = doubleMatrix(n_covr, n_covr); 296 | double **mtemp1 = doubleMatrix(n_covo, n_covo); 297 | double **mtemp2 = doubleMatrix(n_covr, n_covr); 298 | 299 | /*** QoIs ***/ 300 | double *base = doubleArray(n_treat); 301 | double *cATE = doubleArray(n_treat); 302 | 303 | /*** storage parameters and loop counters **/ 304 | int progress = 1; 305 | int keep = 1; 306 | int i, j, k, main_loop; 307 | int itemp, itemp0, itemp1, itemp2, itemp3 = 0, itempP = ftrunc((double) n_gen/10); 308 | int *vitemp = intArray(n_grp); 309 | double dtemp, pj, r0, r1; 310 | 311 | /*** get random seed **/ 312 | GetRNGstate(); 313 | 314 | /*** fixed effects ***/ 315 | itemp = 0; 316 | for (j = 0; j < n_covo; j++) 317 | for (i = 0; i < n_samp; i++) 318 | Xo[i][j] = dXo[itemp++]; 319 | itemp = 0; 320 | for (j = 0; j < n_covr; j++) 321 | for (i = 0; i < n_samp; i++) 322 | Xr[i][j] = dXr[itemp++]; 323 | 324 | /* prior */ 325 | itemp = 0; 326 | for (k = 0; k < n_covo; k++) 327 | for (j = 0; j < n_covo; j++) 328 | Ao[j][k] = dAo[itemp++]; 329 | 330 | itemp = 0; 331 | for (k = 0; k < n_covr; k++) 332 | for (j = 0; j < n_covr; j++) 333 | Ar[j][k] = dAr[itemp++]; 334 | 335 | dcholdc(Ao, n_covo, mtemp1); 336 | for(i = 0; i < n_covo; i++) { 337 | Xo[n_samp+i][n_covo] = 0; 338 | for(j = 0; j < n_covo; j++) { 339 | Xo[n_samp+i][n_covo] += mtemp1[i][j]*beta0[j]; 340 | Xo[n_samp+i][j] = mtemp1[i][j]; 341 | } 342 | } 343 | 344 | dcholdc(Ar, n_covr, mtemp2); 345 | for(i = 0; i < n_covr; i++) { 346 | Xr[n_samp+i][n_covr] = 0; 347 | for(j = 0; j < n_covr; j++) { 348 | Xr[n_samp+i][n_covr] += mtemp2[i][j]*delta0[j]; 349 | Xr[n_samp+i][j] = mtemp2[i][j]; 350 | } 351 | } 352 | 353 | /* random effects */ 354 | itemp = 0; 355 | for (j = 0; j < n_grp; j++) 356 | vitemp[j] = 0; 357 | for (i = 0; i < n_samp; i++) { 358 | for (j = 0; j < n_covoR; j++) 359 | Zo[grp[i]][vitemp[grp[i]]][j] = dZo[itemp++]; 360 | vitemp[grp[i]]++; 361 | } 362 | 363 | itemp = 0; 364 | for (j = 0; j < n_grp; j++) 365 | vitemp[j] = 0; 366 | for (i = 0; i < n_samp; i++) { 367 | for (j = 0; j < n_covrR; j++) 368 | Zr[grp[i]][vitemp[grp[i]]][j] = dZr[itemp++]; 369 | vitemp[grp[i]]++; 370 | } 371 | 372 | /* prior variance for random effects */ 373 | itemp = 0; 374 | for (k = 0; k < n_covoR; k++) 375 | for (j = 0; j < n_covoR; j++) 376 | PsiO[j][k] = dPsio[itemp++]; 377 | 378 | itemp = 0; 379 | for (k = 0; k < n_covrR; k++) 380 | for (j = 0; j < n_covrR; j++) 381 | PsiR[j][k] = dPsir[itemp++]; 382 | 383 | itemp = 0; 384 | for (k = 0; k < n_covoR; k++) 385 | for (j = 0; j < n_grp; j++) 386 | xiO[j][k] = norm_rand(); 387 | 388 | itemp = 0; 389 | for (k = 0; k < n_covrR; k++) 390 | for (j = 0; j < n_grp; j++) 391 | xiR[j][k] = norm_rand(); 392 | 393 | /* hyper prior scale parameter for random effects */ 394 | itemp = 0; 395 | for (k = 0; k < n_covoR; k++) 396 | for (j = 0; j < n_covoR; j++) 397 | S0o[j][k] = dS0o[itemp++]; 398 | 399 | itemp = 0; 400 | for (k = 0; k < n_covrR; k++) 401 | for (j = 0; j < n_covrR; j++) 402 | S0r[j][k] = dS0r[itemp++]; 403 | 404 | /*** Gibbs Sampler! ***/ 405 | itemp = 0; itemp0 = 0; itemp1 = 0; itemp2 = 0; 406 | for(main_loop = 1; main_loop <= n_gen; main_loop++){ 407 | 408 | /** Response Model: binary Probit **/ 409 | bprobitMixedGibbs(R, Xr, Zr, grp, delta, xiR, PsiR, n_samp, 410 | n_covr, n_covrR, n_grp, 0, delta0, Ar, *dfr, S0r, 411 | 1); 412 | 413 | /** Outcome Model: binary probit **/ 414 | bprobitMixedGibbs(Y, Xo, Zr, grp, beta, xiO, PsiO, n_samp, n_covo, 415 | n_covoR, n_grp, 0, beta0, Ao, *dfo, S0o, 1); 416 | 417 | /** Imputing the missing data **/ 418 | for (j = 0; j < n_grp; j++) 419 | vitemp[j] = 0; 420 | for (i = 0; i < n_samp; i++) { 421 | if (R[i] == 0) { 422 | pj = 0; 423 | r0 = delta[0]; 424 | r1 = delta[1]; 425 | for (j = 0; j < n_covo; j++) 426 | pj += Xo[i][j]*beta[j]; 427 | for (j = 2; j < n_covr; j++) { 428 | r0 += Xr[i][j]*delta[j]; 429 | r1 += Xr[i][j]*delta[j]; 430 | } 431 | for (j = 0; j < n_covoR; j++) 432 | pj += Zo[grp[i]][vitemp[grp[i]]][j]*xiO[grp[i]][j]; 433 | for (j = 0; j < n_covrR; j++) { 434 | r0 += Zr[grp[i]][vitemp[grp[i]]][j]*xiR[grp[i]][j]; 435 | r1 += Zr[grp[i]][vitemp[grp[i]]][j]*xiR[grp[i]][j]; 436 | } 437 | pj = pnorm(0, pj, 1, 0, 0); 438 | r0 = pnorm(0, r0, 1, 0, 0); 439 | r1 = pnorm(0, r1, 1, 0, 0); 440 | if (unif_rand() < ((1-r1)*pj/((1-r1)*pj+(1-r0)*(1-pj)))) { 441 | Y[i] = 1; 442 | Xr[i][0] = 0; 443 | Xr[i][1] = 1; 444 | } else { 445 | Y[i] = 0; 446 | Xr[i][0] = 1; 447 | Xr[i][1] = 0; 448 | } 449 | } 450 | vitemp[grp[i]]++; 451 | } 452 | 453 | /** Compute quantities of interest **/ 454 | for (j = 0; j < n_grp; j++) 455 | vitemp[j] = 0; 456 | for (j = 0; j < n_treat; j++) 457 | base[j] = 0; 458 | for (i = 0; i < n_samp; i++) { 459 | dtemp = 0; 460 | for (j = n_treat; j < n_covo; j++) 461 | dtemp += Xo[i][j]*beta[j]; 462 | for (j = 0; j < n_covoR; j++) 463 | dtemp += Zo[grp[i]][vitemp[grp[i]]][j]*xiO[grp[i]][j]; 464 | for (j = 0; j < n_treat; j++) { 465 | if (*Insample) { 466 | if (Xo[i][j] == 1) 467 | base[j] += (double)Y[i]; 468 | else 469 | base[j] += (double)((dtemp+beta[j]+norm_rand()) > 0); 470 | } else 471 | base[j] += pnorm(0, dtemp+beta[j], 1, 0, 0); 472 | } 473 | vitemp[grp[i]]++; 474 | } 475 | for (j = 0; j < n_treat; j++) 476 | base[j] /= (double)n_samp; 477 | 478 | /** Storing the results **/ 479 | if (main_loop > *iBurnin) { 480 | if (keep == *iKeep) { 481 | for (j = 0; j < (n_treat-1); j++) 482 | ATE[itemp0++] = base[j+1] - base[0]; 483 | for (j = 0; j < n_treat; j++) 484 | BASE[itemp++] = base[j]; 485 | if (*param) { 486 | for (i = 0; i < n_covo; i++) 487 | coefo[itemp1++] = beta[i]; 488 | for (i = 0; i < n_covr; i++) 489 | coefr[itemp2++] = delta[i]; 490 | for (i = 0; i < n_covoR; i++) 491 | for (j = i; j < n_covoR; j++) 492 | sPsiO[itemp3++] = PsiO[i][j]; 493 | for (i = 0; i < n_covrR; i++) 494 | for (j = i; j < n_covrR; j++) 495 | sPsiR[itemp3++] = PsiR[i][j]; 496 | } 497 | keep = 1; 498 | } 499 | else 500 | keep++; 501 | } 502 | 503 | if(*verbose) { 504 | if(main_loop == itempP) { 505 | Rprintf("%3d percent done.\n", progress*10); 506 | itempP += ftrunc((double) n_gen/10); 507 | progress++; 508 | R_FlushConsole(); 509 | } 510 | } 511 | R_CheckUserInterrupt(); 512 | } /* end of Gibbs sampler */ 513 | 514 | /** write out the random seed **/ 515 | PutRNGstate(); 516 | 517 | /** freeing memory **/ 518 | FreeMatrix(Xr, n_samp+n_covr); 519 | FreeMatrix(Xo, n_samp+n_covo); 520 | Free3DMatrix(Zo, n_grp, *max_samp_grp + n_covoR); 521 | Free3DMatrix(Zr, n_grp, *max_samp_grp + n_covrR); 522 | FreeMatrix(PsiO, n_covoR); 523 | FreeMatrix(PsiR, n_covrR); 524 | FreeMatrix(xiO, n_grp); 525 | FreeMatrix(xiR, n_grp); 526 | FreeMatrix(S0o, n_covoR); 527 | FreeMatrix(S0r, n_covrR); 528 | FreeMatrix(Ao, n_covo); 529 | FreeMatrix(Ar, n_covr); 530 | FreeMatrix(mtemp1, n_covo); 531 | FreeMatrix(mtemp2, n_covr); 532 | free(base); 533 | free(cATE); 534 | free(vitemp); 535 | } /* NIbprobitMixed */ 536 | 537 | 538 | 539 | --------------------------------------------------------------------------------