├── src
    ├── Makevars
    ├── subroutines.h
    ├── rand.h
    ├── vector.h
    ├── subroutines.c
    ├── init.c
    ├── models.h
    ├── vector.c
    ├── rand.c
    └── NIstandard.c
├── .Rbuildignore
├── data
    └── seguro.RData
├── .gitignore
├── R
    ├── weighted.var.R
    ├── onAttach.R
    ├── MARnocov.R
    ├── CADEbasic.R
    ├── ATOPbasic.R
    ├── print.randomize.R
    ├── Seguro.R
    ├── boundsCI.R
    ├── print.summary.NoncompMAR.R
    ├── summary.NoncompMAR.R
    ├── CACEcov.R
    ├── PAPE.R
    ├── PAPD.R
    ├── AUPEC.R
    ├── Noncomp.mom.R
    ├── ATEnocov.R
    ├── NIbprobit.R
    ├── ATOPnoassumption.R
    ├── CACEcluster.R
    ├── CACEnocov.R
    ├── ATOPsens.R
    ├── ATOPobs.R
    ├── CADEreg.R
    ├── NoncompMAR.R
    ├── ATEcluster.R
    ├── randomize.R
    ├── CADErand.R
    ├── NInocov.R
    └── ATEbounds.R
├── .travis.yml
├── experiment.Rproj
├── README.md
├── man
    ├── seguro.Rd
    ├── AUPEC.Rd
    ├── PAPE.Rd
    ├── PAPD.Rd
    ├── ATEnocov.Rd
    ├── CADEreg.Rd
    ├── ATOPnoassumption.Rd
    ├── ATOPsens.Rd
    ├── CADErand.Rd
    ├── ATOPobs.Rd
    ├── CACEcluster.Rd
    ├── randomize.Rd
    ├── ATEbounds.Rd
    ├── ATEcluster.Rd
    └── NoncompLI.Rd
├── DESCRIPTION
└── NAMESPACE


/src/Makevars:
--------------------------------------------------------------------------------
1 |  PKG_LIBS =  $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS)
2 | 
3 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^.*\.md$
3 | ^\.Rproj\.user$
4 | ^\.travis\.yml$
5 | 


--------------------------------------------------------------------------------
/data/seguro.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kosukeimai/experiment/HEAD/data/seguro.RData


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | src/*.o
6 | src/*.so
7 | src/*.dll
8 | .DS_Store


--------------------------------------------------------------------------------
/R/weighted.var.R:
--------------------------------------------------------------------------------
1 | ###
2 | ### weighted variance formula
3 | ###
4 | 
5 | weighted.var <- function(x, w) 
6 |   return(sum(w * (x - weighted.mean(x,w))^2)/((length(x)-1)*mean(w)))
7 |   
8 | 


--------------------------------------------------------------------------------
/src/subroutines.h:
--------------------------------------------------------------------------------
1 | 
2 | void SWP( double **X, int k, int size);
3 | void dinv(double **X, int size, double **X_inv);
4 | void dcholdc(double **X, int size, double **L);
5 | double ddet(double **X, int size, int give_log);
6 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # Sample .travis.yml for R projects
 2 | 
 3 | language: r
 4 | cache: packages
 5 | sudo: required
 6 | 
 7 | r_build_args: --resave-data
 8 | 
 9 | notifications:
10 |   email:
11 |     on_success: change
12 |     on_failure: change
13 | 


--------------------------------------------------------------------------------
/R/onAttach.R:
--------------------------------------------------------------------------------
1 | ".onAttach" <- function(lib, pkg) {
2 |   mylib <- dirname(system.file(package = pkg))
3 |   title <- packageDescription(pkg, lib.loc = mylib)$Title
4 |   ver <- packageDescription(pkg, lib.loc = mylib)$Version
5 |   packageStartupMessage(paste(pkg, ": ", title, "\nVersion: ", ver, "\n", sep=""))
6 | }
7 | 
8 | 


--------------------------------------------------------------------------------
/src/rand.h:
--------------------------------------------------------------------------------
1 | double TruncNorm(double lb, double ub, double mu, double var, int invcdf);
2 | void rMVN(double *Sample, double *mean, double **inv_Var, int size);
3 | double dMVN(double *Y, double *MEAN, double **SIG_INV, int dim, int give_log);
4 | void rWish(double **Sample, double **S, int df, int size);
5 | double dnegbin(int Y, double mu, double theta, int give_log);
6 | double rnegbin(double mu, double theta);
7 | 


--------------------------------------------------------------------------------
/experiment.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | PackageRoxygenize: rd,collate,namespace
19 | 


--------------------------------------------------------------------------------
/R/MARnocov.R:
--------------------------------------------------------------------------------
 1 | ### Missing at random
 2 | MARnocov <- function(Y, D, Z) {
 3 |   R <- (!is.na(Y))*1
 4 |   ITT <- mean(Y[R == 1 & D == 1 & Z == 1])*mean(D[Z == 1]) +
 5 |     mean(Y[R == 1 & D == 0 & Z == 1])*(1-mean(D[Z == 1])) -
 6 |       mean(Y[R == 1 & D == 1 & Z == 0])*mean(D[Z == 0]) -
 7 |         mean(Y[R == 1 & D == 0 & Z == 0]) * (1-mean(D[Z == 0]))
 8 |   CACE <- ITT/(mean(D[Z==1])-mean(D[Z==0]))
 9 |   return(list(ITT = ITT, CACE = CACE))
10 | }
11 | 


--------------------------------------------------------------------------------
/R/CADEbasic.R:
--------------------------------------------------------------------------------
 1 | ##### basic functions for calculating the CADE
 2 | 
 3 | Difflist=function(a,b){
 4 |   J=length(a)
 5 |   c=a
 6 |   for (j in 1:J){
 7 |     c[[j]]=a[[j]]-b[[j]]
 8 |   }
 9 |   return(c)
10 | }
11 | 
12 | Meanlist=function(a){
13 |   J=length(a)
14 |   s=0
15 |   for(j in 1:J){
16 |     s=s+mean(a[[j]])
17 |   }
18 |   return(s/J)
19 | }
20 | 
21 | Productlist=function(a,b){
22 |   J=length(a)
23 |   c=a
24 |   for (j in 1:J){
25 |     c[[j]]=a[[j]]*b[[j]]
26 |   }
27 |   return(c)
28 | }
29 | 


--------------------------------------------------------------------------------
/R/ATOPbasic.R:
--------------------------------------------------------------------------------
 1 | #### functions for calculating bounds
 2 | 
 3 | f1=function(x,y,u){u-(u-x)/y }
 4 | f2=function(x,y,l){l+(x-l)/y }
 5 | 
 6 | fLB= function (x1,y1,x0,y0,l,u){
 7 |   ifelse(y1>0,  max(l,f1(x1,y1,u)),l)-ifelse(y0>0,min(u,f2(x0,y0,l)),u)
 8 | }
 9 | 
10 | fUB= function (x1,y1,x0,y0,l,u){
11 |   ifelse(y1>0,  min(u,f2(x1,y1,l)),u)-ifelse(y0>0,max(l,f1(x0,y0,u)),l)
12 | }
13 | 
14 | ###  functions for calculating CI
15 | 
16 | 
17 | CalC= function (x,hat.LB,hat.UB,sigma.LB,sigma.UB,alpha){
18 |   pnorm(x+(hat.UB-hat.LB)/max(sigma.LB,sigma.UB))-pnorm(-x)-(1-alpha)
19 | }


--------------------------------------------------------------------------------
/R/print.randomize.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | print.randomize <- function(x, digits = getOption("digits"), ...) {
 3 | 
 4 |   cat("\nCall:\n", deparse(x$call), "\n\n", sep = "")
 5 |   cat("\n Treatment Assignment:\n")
 6 |   print(table(x$treatment, exclude = NULL, ...), digits = digits)
 7 | 
 8 |   if (!is.null(x$block)) {
 9 |     cat("\n Treatment Assignment by Blocks:\n\n")
10 |     print(ftable(x$block, x$treatment, exclude = NULL, ...),
11 |           digits = digits)
12 |   }
13 |   
14 |   cat("\nTotal number of observations:",
15 |       length(na.omit(x$treatment)), "\n\n")
16 |   invisible(x)
17 | }
18 | 


--------------------------------------------------------------------------------
/src/vector.h:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h>
 2 | #include <assert.h>
 3 | 
 4 | int *intArray(int num);
 5 | void PintArray(int *ivector, int length);
 6 | int **intMatrix(int row, int col);
 7 | void PintMatrix(int **imatrix, int row, int col);
 8 | 
 9 | double *doubleArray(int num);
10 | void PdoubleArray(double *dvector, int length);
11 | double **doubleMatrix(int row, int col);
12 | void PdoubleMatrix(double **dmatrix, int row, int col);
13 | 
14 | double ***doubleMatrix3D(int x, int y, int z);
15 | void PdoubleMatrix3D(double ***dmatrix3D, int x, int y, int z);
16 | 
17 | long *longArray(int num);
18 | 
19 | void FreeMatrix(double **Matrix, int row);
20 | void FreeintMatrix(int **Matrix, int row);
21 | void Free3DMatrix(double ***Matrix, int index, int row);
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # experiment: R package for designing and analyzing randomized experiments [![Build Status](https://travis-ci.org/kosukeimai/experiment.svg?branch=master)](https://travis-ci.org/kosukeimai/experiment) [![CRAN Version](http://www.r-pkg.org/badges/version/experiment)](https://CRAN.R-project.org/package=experiment) ![CRAN downloads](http://cranlogs.r-pkg.org/badges/grand-total/experiment)
2 | 
3 | R package experiment Provides various statistical methods for designing and analyzing randomized experiments. One functionality of the package is the implementation of randomized-block and matched-pair designs based on possibly multivariate pre-treatment covariates. The package also provides the tools to analyze various randomized experiments including cluster randomized experiments, two-stage randomized experiments, randomized experiments with noncompliance, and randomized experiments with missing data.
4 | 


--------------------------------------------------------------------------------
/R/Seguro.R:
--------------------------------------------------------------------------------
 1 | #' Data from the Mexican universal health insurance program, Seguro Popular.
 2 | #'
 3 | #' This data set contains the outcome, missing indicator and the treatment for the application 
 4 | #' in  Kosuke Imai and Zhichao Jiang (2018).
 5 | #'
 6 | #' @format A data frame with 14,902 rows and 6 variables:
 7 | #' \describe{
 8 | #'   \item{Ya}{Satisfaction for the first unit in the matched pairs}
 9 | #'   \item{Yb}{Satisfaction for the second unit in the matched pairs}
10 | #'   \item{Ra}{Missing indicator for the first unit in the matched pairs}
11 | #'   \item{Rb}{Missing indicator for the second unit in the matched pairs}
12 | #'   \item{Ta}{Treatment assignment for the first unit in the matched pairs}
13 | #'   \item{Tb}{Treatment assignment for the second unit in the matched pairs}
14 | #'   #' }
15 | #'
16 | #' @docType data
17 | #' @keywords datasets
18 | #' @name seguro
19 | #' @examples
20 | #' data(seguro)
21 | "seguro"


--------------------------------------------------------------------------------
/man/seguro.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Seguro.R
 3 | \docType{data}
 4 | \name{seguro}
 5 | \alias{seguro}
 6 | \title{Data from the Mexican universal health insurance program, Seguro Popular.}
 7 | \format{
 8 | A data frame with 14,902 rows and 6 variables:
 9 | \describe{
10 |   \item{Ya}{Satisfaction for the first unit in the matched pairs}
11 |   \item{Yb}{Satisfaction for the second unit in the matched pairs}
12 |   \item{Ra}{Missing indicator for the first unit in the matched pairs}
13 |   \item{Rb}{Missing indicator for the second unit in the matched pairs}
14 |   \item{Ta}{Treatment assignment for the first unit in the matched pairs}
15 |   \item{Tb}{Treatment assignment for the second unit in the matched pairs}
16 |   #' }
17 | }
18 | \usage{
19 | seguro
20 | }
21 | \description{
22 | This data set contains the outcome, missing indicator and the treatment for the application 
23 | in  Kosuke Imai and Zhichao Jiang (2018).
24 | }
25 | \examples{
26 | data(seguro)
27 | }
28 | \keyword{datasets}
29 | 


--------------------------------------------------------------------------------
/R/boundsCI.R:
--------------------------------------------------------------------------------
 1 | ###
 2 | ### This function calculates the CI of bounds using the B-method
 3 | ### and Bonferroni given the bootstrap draws 
 4 | ###
 5 | 
 6 | boundsCI <- function(lb.rep, ub.rep, lb.est, ub.est, alpha) {
 7 | 
 8 |   reps <- length(lb.rep)
 9 |   
10 |   ## bonferroni
11 |   bon.lower <- quantile(lb.rep, alpha/2)
12 |   bon.upper <- quantile(ub.rep, 1-alpha/2)
13 |   
14 |   ## b-method
15 |   bmin.dif <- lb.rep-lb.est
16 |   bmax.dif <- ub.est-ub.rep
17 |   bmin.max.dif <- c(bmin.dif, bmax.dif)
18 |   b.sup <- rep(NA, 2*reps)
19 |   
20 |   for (i in 1:(2*reps)) {
21 |     b.sup[i]<-max(sum(bmin.dif<=bmin.max.dif[i])/reps, #emp. dis func
22 |                     sum(bmax.dif<=bmin.max.dif[i])/reps)
23 |   }
24 |   beta <- quantile(b.sup, 1-alpha)
25 |   b.lower <- lb.est-quantile(bmin.dif, beta)
26 |   b.upper <- ub.est+quantile(bmax.dif, beta)
27 |   names(b.lower) <- names(bon.lower)
28 |   names(b.upper) <- names(bon.upper)
29 |   
30 |   ##output
31 |   return(list(bonferroni = c(bon.lower, bon.upper),
32 |               bmethod = c(b.lower, b.upper)))
33 | }
34 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: experiment
 2 | Version: 1.2.1
 3 | Date: 2022-04-07
 4 | Title: R Package for Designing and Analyzing Randomized Experiments
 5 | Authors@R: c(
 6 |   person("Kosuke", "Imai", , "imai@harvard.edu", c("aut", "cre")),
 7 |   person("Zhichao", "Jiang", , "zhichaoj@princeton.edu", c("aut"))
 8 |   )
 9 | Maintainer: Kosuke Imai <imai@harvard.edu>
10 | Depends: boot, MASS, R (>= 2.4.0)
11 | Description: Provides various statistical methods for
12 |   designing and analyzing randomized experiments. One functionality
13 |   of the package is the implementation of randomized-block and
14 |   matched-pair designs based on possibly multivariate pre-treatment
15 |   covariates. The package also provides the tools to analyze various
16 |   randomized experiments including cluster randomized experiments,
17 |   two-stage randomized experiments, randomized experiments with 
18 |   noncompliance, and randomized experiments with missing data.
19 | License: GPL (>=2)
20 | LazyLoad: yes
21 | LazyData: yes
22 | URL: https://github.com/kosukeimai/experiment
23 | BugReports: https://github.com/kosukeimai/experiment/issues
24 | RoxygenNote: 7.1.2
25 | 


--------------------------------------------------------------------------------
/R/print.summary.NoncompMAR.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | print.summary.NoncompMAR <- function(x, digits = max(3, getOption("digits")
 3 |                                           - 3), param = TRUE, ...) { 
 4 | 
 5 |   cat("\nCall:\n")
 6 |   cat(paste(deparse(x$call), sep = "\n", collapse = "\n"), "\n\n",
 7 |       sep = "") 
 8 | 
 9 |   cat("\nQuantities of Interest:\n")
10 |   printCoefmat(x$qoi.table, digits = digits, na.print = "NA", ...)
11 | 
12 |   if (!is.null(x$coefC.table) & param) {
13 |     cat("\nCoefficients for the compliance model:\n")
14 |     printCoefmat(x$coefC.table, digits = digits, na.print = "NA", ...)
15 |   }
16 |       
17 |   if (!is.null(x$coefO.table) & param) {
18 |     cat("\nCoefficients for the outcome model:\n")
19 |     printCoefmat(x$coefO.table, digits = digits, na.print = "NA", ...)
20 |   }
21 | 
22 |   if (!is.null(x$tauO.table) & param) {
23 |     cat("\nThreshold parameters for the outcome model:\n")
24 |     printCoefmat(x$tauO.table, digits = digits, na.print = "NA", ...)
25 |   }
26 |   
27 |   cat("\nNumber of observations:", x$n.obs)
28 |   cat("\nNumber of Gibbs draws:", x$n.draws)
29 |   cat("\n\n")
30 |   invisible(x)
31 | }
32 | 


--------------------------------------------------------------------------------
/man/AUPEC.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AUPEC.R
 3 | \name{AUPEC}
 4 | \alias{AUPEC}
 5 | \title{Estimation of the unnormalized Area Under Prescription Evaluation Curve (AUPEC) in Completely Randomized Experiments}
 6 | \usage{
 7 | AUPEC(T, tau, Y)
 8 | }
 9 | \arguments{
10 | \item{T}{The unit-level binary treatment receipt variable.}
11 | 
12 | \item{tau}{The unit-level continuous score for treatment assignment. We assume those that have tau<0 should
13 | not have treatment. Conditional Average Treatment Effect is one possible measure.}
14 | 
15 | \item{Y}{The outcome variable of interest.}
16 | }
17 | \value{
18 | A list that contains the following items: \item{aupec}{The estimated
19 | Area Under Prescription Evaluation Curve} \item{sd}{The estimated standard deviation
20 | of AUPEC.}
21 | }
22 | \description{
23 | This function estimates AUPEC. The details of the methods for this design are given in Imai and Li (2019).
24 | }
25 | \references{
26 | Imai and Li (2019). \dQuote{Experimental Evaluation of Individualized Treatment Rules},
27 | }
28 | \author{
29 | Michael Lingzhi Li, Operations Research Center, Massachusetts Institute of Technology
30 | \email{mlli@mit.edu}, \url{http://mlli.mit.edu};
31 | }
32 | \keyword{evaluation}
33 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(print,randomize)
 4 | S3method(print,summary.NoncompMAR)
 5 | S3method(summary,NoncompMAR)
 6 | export(ATEbounds)
 7 | export(ATEcluster)
 8 | export(ATEnocov)
 9 | export(ATOPnoassumption)
10 | export(ATOPobs)
11 | export(ATOPsens)
12 | export(AUPEC)
13 | export(CACEcluster)
14 | export(CADErand)
15 | export(CADEreg)
16 | export(PAPD)
17 | export(PAPE)
18 | export(randomize)
19 | importFrom(MASS,mvrnorm)
20 | importFrom(boot,boot)
21 | importFrom(stats,coef)
22 | importFrom(stats,complete.cases)
23 | importFrom(stats,cov)
24 | importFrom(stats,fitted)
25 | importFrom(stats,ftable)
26 | importFrom(stats,lm)
27 | importFrom(stats,mahalanobis)
28 | importFrom(stats,model.frame)
29 | importFrom(stats,model.matrix)
30 | importFrom(stats,model.response)
31 | importFrom(stats,na.fail)
32 | importFrom(stats,na.omit)
33 | importFrom(stats,pnorm)
34 | importFrom(stats,printCoefmat)
35 | importFrom(stats,qnorm)
36 | importFrom(stats,quantile)
37 | importFrom(stats,rbinom)
38 | importFrom(stats,rnorm)
39 | importFrom(stats,runif)
40 | importFrom(stats,sd)
41 | importFrom(stats,terms)
42 | importFrom(stats,uniroot)
43 | importFrom(stats,var)
44 | importFrom(stats,vcov)
45 | importFrom(stats,weighted.mean)
46 | importFrom(utils,packageDescription)
47 | useDynLib(experiment)
48 | 


--------------------------------------------------------------------------------
/man/PAPE.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PAPE.R
 3 | \name{PAPE}
 4 | \alias{PAPE}
 5 | \title{Estimation of the Population Average Prescription Effect in Completely Randomized Experiments}
 6 | \usage{
 7 | PAPE(T, That, Y, plim = NA)
 8 | }
 9 | \arguments{
10 | \item{T}{The unit-level binary treatment receipt variable.}
11 | 
12 | \item{That}{The unit-level binary treatment that would have been assigned by the 
13 | individualized treatment rule.}
14 | 
15 | \item{Y}{The outcome variable of interest.}
16 | 
17 | \item{plim}{The maximum percentage of population that can be treated under the
18 | budget constraint. Should be a decimal between 0 and 1. Default is NA which assumes
19 | no budget constraint.}
20 | }
21 | \value{
22 | A list that contains the following items: \item{pape}{The estimated
23 | Population Average Prescription Effect.} \item{sd}{The estimated standard deviation
24 | of PAPE.}
25 | }
26 | \description{
27 | This function estimates the Population Average Prescription Effect with and without a budget
28 | constraint. The details of the methods for this design are given in Imai and Li (2019).
29 | }
30 | \references{
31 | Imai and Li (2019). \dQuote{Experimental Evaluation of Individualized Treatment Rules},
32 | }
33 | \author{
34 | Michael Lingzhi Li, Operations Research Center, Massachusetts Institute of Technology
35 | \email{mlli@mit.edu}, \url{http://mlli.mit.edu};
36 | }
37 | \keyword{evaluation}
38 | 


--------------------------------------------------------------------------------
/man/PAPD.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PAPD.R
 3 | \name{PAPD}
 4 | \alias{PAPD}
 5 | \title{Estimation of the Population Average Prescription Difference in Completely Randomized Experiments}
 6 | \usage{
 7 | PAPD(T, Thatfp, Thatgp, Y, plim)
 8 | }
 9 | \arguments{
10 | \item{T}{The unit-level binary treatment receipt variable.}
11 | 
12 | \item{Thatfp}{The unit-level binary treatment that would have been assigned by the 
13 | first individualized treatment rule.}
14 | 
15 | \item{Thatgp}{The unit-level binary treatment that would have been assigned by the 
16 | second individualized treatment rule.}
17 | 
18 | \item{Y}{The outcome variable of interest.}
19 | 
20 | \item{plim}{The maximum percentage of population that can be treated under the
21 | budget constraint. Should be a decimal between 0 and 1.}
22 | }
23 | \value{
24 | A list that contains the following items: \item{papd}{The estimated
25 | Population Average Prescription Difference} \item{sd}{The estimated standard deviation
26 | of PAPD.}
27 | }
28 | \description{
29 | This function estimates the Population Average Prescription Difference with a budget
30 | constraint. The details of the methods for this design are given in Imai and Li (2019).
31 | }
32 | \references{
33 | Imai and Li (2019). \dQuote{Experimental Evaluation of Individualized Treatment Rules},
34 | }
35 | \author{
36 | Michael Lingzhi Li, Operations Research Center, Massachusetts Institute of Technology
37 | \email{mlli@mit.edu}, \url{http://mlli.mit.edu};
38 | }
39 | \keyword{evaluation}
40 | 


--------------------------------------------------------------------------------
/man/ATEnocov.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ATEnocov.R
 3 | \name{ATEnocov}
 4 | \alias{ATEnocov}
 5 | \title{Estimation of the Average Treatment Effect in Randomized Experiments}
 6 | \usage{
 7 | ATEnocov(Y, Z, data = parent.frame(), match = NULL)
 8 | }
 9 | \arguments{
10 | \item{Y}{The outcome variable of interest.}
11 | 
12 | \item{Z}{The (randomized) treatment variable. This variable should be
13 | binary.}
14 | 
15 | \item{data}{A data frame containing the relevant variables.}
16 | 
17 | \item{match}{A variable indicating matched-pairs. The two units in the same
18 | matched-pair should have the same value.}
19 | }
20 | \value{
21 | A list of class \code{ATEnocov} which contains the following items:
22 | \item{call}{ The matched call.  } \item{Y}{ The outcome variable.  }
23 | \item{Z}{ The treatment variable.  } \item{match}{ The matched-pair
24 | indicator variable.  } \item{ATEest}{ The estimated average treatment
25 | effect.  } \item{ATE.var}{ The estimated variance of the average treatment
26 | effect estimator.  } \item{diff}{ Within-pair differences if the
27 | matched-pair design is analyzed.  }
28 | }
29 | \description{
30 | This function computes the standard ``difference-in-means'' estimate of the
31 | average treatment effect in randomized experiments without using
32 | pre-treatment covariates. The treatment variable is assumed to be binary.
33 | Currently, the two designs are allowed: complete randomized design and
34 | matched-pair design.
35 | }
36 | \references{
37 | Imai, Kosuke, (2008). \dQuote{Randomization-based Inference and
38 | Efficiency Analysis in Experiments under the Matched-Pair Design}, Statistics in Medicine.
39 | }
40 | \author{
41 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University
42 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
43 | }
44 | \keyword{design}
45 | 


--------------------------------------------------------------------------------
/R/summary.NoncompMAR.R:
--------------------------------------------------------------------------------
 1 | #' @export
 2 | summary.NoncompMAR <- function(object, CI=c(2.5, 97.5),...){
 3 | 
 4 |   qoi <- cbind(object$itt, object$cace, object$pc, object$base)
 5 |   qoi <- cbind(apply(qoi, 2, mean), apply(qoi, 2, sd),
 6 |                apply(qoi, 2, quantile, min(CI)/100),
 7 |                apply(qoi, 2, quantile, max(CI)/100))
 8 |   colnames(qoi) <- c("mean", "std.dev.", paste(min(CI), "%", sep=""),
 9 |                      paste(max(CI), "%", sep=""))
10 |   
11 |   if (!is.null(object$coefficientsC)) {
12 |     coefC <- object$coefficientsC
13 |     coefC <- cbind(apply(coefC, 2, mean), apply(coefC, 2, sd),
14 |                    apply(coefC, 2, quantile, min(CI)/100),
15 |                    apply(coefC, 2, quantile, max(CI)/100))
16 |     colnames(coefC) <- c("mean", "std.dev.", paste(min(CI), "%", sep=""),
17 |                          paste(max(CI), "%", sep=""))
18 |   }
19 |   else
20 |     coefC <- NULL
21 | 
22 |   if (!is.null(object$coefficientsO)) {
23 |     coefO <- object$coefficientsO
24 |     coefO <- cbind(apply(coefO, 2, mean), apply(coefO, 2, sd),
25 |                    apply(coefO, 2, quantile, min(CI)/100),
26 |                    apply(coefO, 2, quantile, max(CI)/100)) 
27 |     colnames(coefO) <- c("mean", "std.dev.", paste(min(CI), "%", sep=""),
28 |                          paste(max(CI), "%", sep=""))
29 |   }
30 |   else
31 |     coefO <- NULL
32 | 
33 |   if (!is.null(object$coefficientsS)) {
34 |     coefS <- object$coefficientsS
35 |     coefS <- cbind(apply(coefS, 2, mean), apply(coefS, 2, sd),
36 |                    apply(coefS, 2, quantile, min(CI)/100),
37 |                    apply(coefS, 2, quantile, max(CI)/100)) 
38 |     colnames(coefS) <- c("mean", "std.dev.", paste(min(CI), "%", sep=""),
39 |                          paste(max(CI), "%", sep=""))
40 |   }
41 |   else
42 |     coefS <- NULL
43 | 
44 |   
45 |   ans <- list(call = object$call, n.obs = length(object$Y), n.draws =
46 |               object$n.draws, qoi.table = qoi, coefC.table = coefC,
47 |               coefO.table = coefO, coefS.table = coefS)  
48 | 
49 |   if (!is.null(object$thresholds)) {
50 |     tauO <- object$thresholds
51 |     tauO <- cbind(apply(tauO, 2, mean), apply(tauO, 2, sd),
52 |                   apply(tauO, 2, quantile, min(CI)/100),
53 |                   apply(tauO, 2, quantile, max(CI)/100)) 
54 |     colnames(tauO) <- c("mean", "std.dev.", paste(min(CI), "%", sep=""),
55 |                         paste(max(CI), "%", sep=""))
56 |     ans$tauO.table <- tauO
57 |   }
58 | 
59 |   class(ans) <- "summary.NoncompMAR"
60 |   return(ans)
61 | }
62 | 


--------------------------------------------------------------------------------
/R/CACEcov.R:
--------------------------------------------------------------------------------
 1 | ###
 2 | ### Calculate the CACE with covariates and optional clustering using
 3 | ### two-stage least squares
 4 | ###
 5 | 
 6 | CACEcov <- function(Y, D, Z, X, grp = NULL, data = parent.frame(),
 7 |                     robust = FALSE, fast = TRUE){ 
 8 | 
 9 |   call <- match.call()
10 |   Y <- matrix(eval(call$Y, data), ncol = 1)
11 |   N <- nrow(Y)
12 |   D <- matrix(eval(call$D, data), ncol = 1)
13 |   X <- model.matrix(X, data = data)
14 |   Z <- cbind(X, matrix(eval(call$Z, data), nrow = N))
15 |   X <- cbind(X, D)
16 |   grp <- eval(call$grp, data)
17 |   if (!is.null(grp)) {
18 |     sgrp <- sort(grp, index.return = TRUE)
19 |     grp <- grp[sgrp$ix]
20 |     X <- X[sgrp$ix,]
21 |     Z <- Z[sgrp$ix,]
22 |     D <- D[sgrp$ix,]
23 |     Y <- Y[sgrp$ix,]
24 |   }
25 | 
26 |   dhat <- fitted(tmp <- lm(D ~ -1 + Z))
27 |   beta <- coef(lm(Y ~ -1 + X[,-ncol(X)] + dhat))
28 |   ZZinv <- (vcov(tmp)/(summary(tmp)$sigma^2))
29 |   XZ <- t(X) %*% Z
30 |   XPzXinv <- solve(XZ %*% ZZinv %*% t(XZ))
31 | 
32 |   epsilon <- c(Y - X %*% beta)
33 |   est <- beta[length(beta)]
34 | 
35 |   if (is.null(grp)) {
36 |     if (robust) {
37 |       if (fast)
38 |         ZOmegaZ <- t(Z) %*% diag(epsilon^2) %*% Z
39 |       else {
40 |         ZOmegaZ <- matrix(0, ncol = ncol(Z), nrow = ncol(Z))
41 |         for (i in 1:nrow(Z))
42 |           ZOmegaZ <- ZOmegaZ + crossprod(matrix(Z[i,], nrow = 1)) * epsilon[i]^2
43 |       }
44 |       var <- XPzXinv %*% XZ %*% ZZinv 
45 |       var <- var %*% ZOmegaZ %*% t(var)
46 |     } else {
47 |       sig2 <- c(crossprod(epsilon))/ N
48 |       var <- sig2 * XPzXinv
49 |     }
50 |   } else {
51 |     n.grp <- length(unique(grp))
52 |     if (fast) {
53 |       Omega <- matrix(0, ncol = N, nrow = N)
54 |       counter <- 1
55 |       for (i in 1:n.grp) {
56 |         n.grp.obs <- sum(grp == unique(grp)[i])
57 |         Omega[counter:(counter+n.grp.obs-1),counter:(counter+n.grp.obs-1)] <-
58 |           epsilon[grp == unique(grp)[i]] %*% t(epsilon[grp == unique(grp)[i]])
59 |         counter <- counter + n.grp.obs
60 |       }
61 |       ZOmegaZ <- t(Z) %*% Omega %*% Z
62 |     } else {
63 |       ZOmegaZ <- matrix(0, ncol = ncol(Z), nrow = ncol(Z))
64 |       for (i in 1:n.grp) {
65 |         ZOmegaZ <- ZOmegaZ + t(Z[grp == unique(grp)[i],]) %*%
66 |           (epsilon[grp == unique(grp)[i]] %*% t(epsilon[grp == unique(grp)[i]])) %*%
67 |           Z[grp == unique(grp)[i],]
68 |       }
69 | 
70 |     }
71 |     var <- XPzXinv %*% XZ %*% ZZinv
72 |     var <- var %*% ZOmegaZ %*% t(var)
73 |   }
74 |   names(est) <- "CACE"
75 |   
76 |   return(list(est = est, var = var[nrow(var),ncol(var)]))
77 | }
78 | 


--------------------------------------------------------------------------------
/man/CADEreg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CADEreg.R
 3 | \name{CADEreg}
 4 | \alias{CADEreg}
 5 | \title{Regression-based method for the complier average direct effect}
 6 | \usage{
 7 | CADEreg(data)
 8 | }
 9 | \arguments{
10 | \item{data}{A data frame containing the relevant variables. The names for the variables should be: ``Z'' for the treatment assignment,  ``D''  for the actual received treatment, ``Y'' for the outcome, ``A'' for the treatment assignment mechanism and ``id'' for the cluster ID. The variable for the cluster id should be a factor.}
11 | }
12 | \value{
13 | A list of class \code{CADEreg} which contains the following items:
14 | \item{CADE1}{ The point estimate of CADE(1).  } \item{CADE0}{ The point estimate of CADE(0).  } 
15 | \item{var1.clu}{ The cluster-robust variance of CADE(1).   } \item{var0.clu}{ The cluster-robust variance of CADE(0).  }
16 | \item{var1.clu.hc2}{ The cluster-robust HC2 variance of CADE(1).   } 
17 | \item{var0.clu.hc2}{ The cluster-robust HC2 variance of CADE(0).    } 
18 | \item{var1.hc2}{ The  HC2 variance of CADE(1).    } 
19 | \item{var0.hc2}{ The  HC2 variance of CADE(0).    } 
20 | \item{var1.ind}{ The  individual-robust variance of CADE(1).    } 
21 | \item{var0.ind}{ The  individual-robust variance of CADE(0).    } 
22 | \item{var1.reg}{ The  proposed variance of CADE(1).    } 
23 | \item{var0.reg}{ The  proposed variance of CADE(0).    }
24 | }
25 | \description{
26 | This function computes the point estimates of the complier average direct effect (CADE) and four
27 |  different variance estimates: the HC2 variance, the cluster-robust variance, the cluster-robust HC2
28 |  variance and the variance proposed in the reference. The estimators calculated using this function
29 |  are cluster-weighted, i.e., the weights are equal for each cluster. To obtain the indivudal-weighted
30 |  estimators, please multiply the recieved treatment and the outcome by \code{n_jJ/N}, where
31 |  \code{n_j} is the number of individuals in cluster \code{j}, \code{J} is the number of clusters and 
32 |  \code{N} is the total number of individuals.
33 | }
34 | \details{
35 | For the details of the method implemented by this function, see the
36 | references.
37 | }
38 | \references{
39 | Kosuke Imai, Zhichao Jiang and Anup Malani (2021).
40 | \dQuote{Causal Inference with Interference and Noncompliance in the Two-Stage Randomized Experiments}, \emph{Journal of the American Statistical Association}.
41 | }
42 | \author{
43 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University
44 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
45 | Zhichao Jiang, Department of Politics, Princeton University
46 | \email{zhichaoj@princeton.edu}.
47 | }
48 | \keyword{experiments}
49 | \keyword{randomized}
50 | \keyword{two-stage}
51 | 


--------------------------------------------------------------------------------
/man/ATOPnoassumption.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ATOPnoassumption.R
 3 | \name{ATOPnoassumption}
 4 | \alias{ATOPnoassumption}
 5 | \title{Bounding the  ATOP when some of the Outcome Data are
 6 | Missing Under the Matched-Pairs Design}
 7 | \usage{
 8 | ATOPnoassumption(Ya, Yb, Ra, Rb, Ta, Tb, l, u, alpha, rep)
 9 | }
10 | \arguments{
11 | \item{Ya}{A vector of the outcomes of the first unit in the matched pairs. The missing values for \code{Ya} should be coded
12 | as \code{NA}.}
13 | 
14 | \item{Yb}{A vector of the outcomes of the second unit in the matched pairs. The missing values for \code{Yb} should be coded
15 | as \code{NA}.}
16 | 
17 | \item{Ra}{A vector of the missing data indicators of the first unit in the matched pairs.}
18 | 
19 | \item{Rb}{A vector of the missing data indicators of the second unit in the matched pairs.}
20 | 
21 | \item{Ta}{A vector of the treatment conditions of the first unit in the matched pairs.}
22 | 
23 | \item{Tb}{A vector of the treatment conditions of the second unit in the matched pairs.}
24 | 
25 | \item{l}{The lower limit of the outcome.}
26 | 
27 | \item{u}{The upper limit of the outcome.}
28 | 
29 | \item{alpha}{A positive scalar that is less than or equal to 0.5. This will
30 | determine the (1-\code{alpha}) level of confidence intervals. The default is
31 | \code{0.05}.}
32 | 
33 | \item{rep}{The number of repetitions for bootstraping.}
34 | }
35 | \value{
36 | A list of class \code{ATOPnoassumption} which contains the following items:
37 | \item{LB}{ The lower bound for the ATOP.  } \item{UB}{ The upper bound for the ATOP.   }
38 | \item{LB.CI}{ The lower limit of the confidence interval for the ATOP.   }
39 | \item{UB.CI}{ The upper limit of the confidence interval for the ATOP.    }
40 | }
41 | \description{
42 | This function computes the no assumption bounds on the average treatment effect among always-observed pairs (ATOP) when
43 | some of the outcome data are missing. The confidence intervals for the
44 | ATOP are also computed.
45 | }
46 | \details{
47 | For the details of the method implemented by this function, see the
48 | references.
49 | }
50 | \examples{
51 | data(seguro)
52 | attach(seguro)
53 | ATOPnoassumption(Ya,Yb,Ra,Rb,Ta,Tb,l=0,u=1,alpha=0.05,rep=100)
54 | }
55 | \references{
56 | Kosuke Imai and Zhichao Jiang (2018).
57 | \dQuote{A Sensitivity Analysis for Missing Outcomes Due to 
58 | Truncation-by-Death under the Matched-Pairs Design}, \emph{Technical Report}. Department of Politics, Princeton
59 | University.
60 | }
61 | \author{
62 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University
63 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
64 | Zhichao Jiang, Department of Politics, Princeton University
65 | \email{zhichaoj@princeton.edu}.
66 | }
67 | \keyword{design}
68 | \keyword{matched-pairs}
69 | 


--------------------------------------------------------------------------------
/R/PAPE.R:
--------------------------------------------------------------------------------
 1 | #' Estimation of the Population Average Prescription Effect in Completely Randomized Experiments
 2 | #' 
 3 | #' This function estimates the Population Average Prescription Effect with and without a budget
 4 | #' constraint. The details of the methods for this design are given in Imai and Li (2019).
 5 | #' 
 6 | #' 
 7 | #' 
 8 | #' @param T The unit-level binary treatment receipt variable.
 9 | #' @param That The unit-level binary treatment that would have been assigned by the 
10 | #' individualized treatment rule.
11 | #' @param Y The outcome variable of interest.
12 | #' @param plim The maximum percentage of population that can be treated under the
13 | #' budget constraint. Should be a decimal between 0 and 1. Default is NA which assumes
14 | #' no budget constraint.
15 | #' @return A list that contains the following items: \item{pape}{The estimated
16 | #' Population Average Prescription Effect.} \item{sd}{The estimated standard deviation
17 | #' of PAPE.}
18 | #' @author Michael Lingzhi Li, Operations Research Center, Massachusetts Institute of Technology
19 | #' \email{mlli@mit.edu}, \url{http://mlli.mit.edu};
20 | #' @references Imai and Li (2019). \dQuote{Experimental Evaluation of Individualized Treatment Rules},
21 | #' @keywords evaluation
22 | #' @export PAPE
23 | PAPE <- function (T, That, Y, plim = NA) {
24 |   if (!(identical(as.numeric(T),as.numeric(as.logical(T))))) {
25 |     stop("T should be binary.")
26 |   }
27 |   if (!(identical(as.numeric(That),as.numeric(as.logical(That))))) {
28 |     stop("That should be binary.")
29 |   }
30 |   T=as.numeric(T)
31 |   That=as.numeric(That)
32 |   Y=as.numeric(Y)
33 |   if (is.na(plim)) {
34 |     n=length(Y)
35 |     n1=sum(T)
36 |     n0=n-n1
37 |     n1h=sum(That)
38 |     n0h=n-n1h
39 |     probs=sum(That)/n
40 |     SAPE=n/(n-1)*(1/n1*sum(T*That*Y)+1/n0*sum(Y*(1-T)*(1-That))-n1h/n1/n*sum(Y*T)-n0h/n0/n*sum(Y*(1-T)))
41 |     Sf1=var(((That-probs)*Y)[T==1])
42 |     Sf0=var(((That-probs)*Y)[T==0])
43 |     SATE=1/n1*sum(T*Y)-1/n0*(sum((1-T)*Y))
44 |     covarterm=1/n^2*(SAPE^2+2*(n-1)*SAPE*SATE*(2*probs-1)-(1-probs)*probs*n*SATE^2)
45 |     varexp=(n/(n-1))^2*(Sf1/n1+Sf0/n0+covarterm)
46 |     return(list(pape=SAPE,sd=sqrt(varexp)))
47 |   } else {
48 |     if ((plim<0) | (plim>1)) {
49 |       stop("Budget constraint should be between 0 and 1")
50 |     }
51 |     n=length(Y)
52 |     n1=sum(T)
53 |     n0=n-n1
54 |     n1h=sum(That)
55 |     n0h=n-n1h
56 |     SAPEfp=1/n1*sum(T*That*Y)+1/n0*sum(Y*(1-T)*(1-That))-plim/n1*sum(Y*T)-(1-plim)/n0*sum(Y*(1-T))
57 |     Sfp1=var(((That-plim)*Y)[T==1])
58 |     Sfp0=var(((That-plim)*Y)[T==0])
59 |     kf1=mean(Y[T==1 & That==1])-mean(Y[T==0 & That==1])
60 |     kf0=mean(Y[T==1 & That==0])-mean(Y[T==0 & That==0])
61 |     varfp=Sfp1/n1+Sfp0/n0+floor(n*plim)*(n-floor(n*plim))/(n^2*(n-1))*((2*plim-1)*kf1^2-2*plim*kf1*kf0)
62 |     return(list(pape=SAPEfp,sd=sqrt(varfp)))
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/R/PAPD.R:
--------------------------------------------------------------------------------
 1 | #' Estimation of the Population Average Prescription Difference in Completely Randomized Experiments
 2 | #' 
 3 | #' This function estimates the Population Average Prescription Difference with a budget
 4 | #' constraint. The details of the methods for this design are given in Imai and Li (2019).
 5 | #' 
 6 | #' 
 7 | #' 
 8 | #' @param T The unit-level binary treatment receipt variable.
 9 | #' @param Thatfp The unit-level binary treatment that would have been assigned by the 
10 | #' first individualized treatment rule.
11 | #' @param Thatgp The unit-level binary treatment that would have been assigned by the 
12 | #' second individualized treatment rule.
13 | #' @param Y The outcome variable of interest.
14 | #' @param plim The maximum percentage of population that can be treated under the
15 | #' budget constraint. Should be a decimal between 0 and 1. 
16 | #' @return A list that contains the following items: \item{papd}{The estimated
17 | #' Population Average Prescription Difference} \item{sd}{The estimated standard deviation
18 | #' of PAPD.}
19 | #' @author Michael Lingzhi Li, Operations Research Center, Massachusetts Institute of Technology
20 | #' \email{mlli@mit.edu}, \url{http://mlli.mit.edu};
21 | #' @references Imai and Li (2019). \dQuote{Experimental Evaluation of Individualized Treatment Rules},
22 | #' @keywords evaluation
23 | #' @export PAPD
24 | PAPD <- function (T, Thatfp,Thatgp , Y, plim) {
25 |   if (!(identical(as.numeric(T),as.numeric(as.logical(T))))) {
26 |     stop("T should be binary.")
27 |   }
28 |   if (!(identical(as.numeric(Thatfp),as.numeric(as.logical(Thatfp))))) {
29 |     stop("Thatfp should be binary.")
30 |   }
31 |   if (!(identical(as.numeric(Thatgp),as.numeric(as.logical(Thatgp))))) {
32 |     stop("Thatgp should be binary.")
33 |   }
34 |   if ((plim<0) | (plim>1)) {
35 |     stop("Budget constraint should be between 0 and 1")
36 |   }
37 |   T=as.numeric(T)
38 |   Thatfp=as.numeric(Thatfp)
39 |   Thatgp=as.numeric(Thatgp)
40 |   Y=as.numeric(Y)
41 |   n=length(Y)
42 |   n1=sum(T)
43 |   n0=n-n1
44 |   SAPEfp=1/n1*sum(T*Thatfp*Y)+1/n0*sum(Y*(1-T)*(1-Thatfp))-plim/n1*sum(Y*T)-(1-plim)/n0*sum(Y*(1-T))
45 |   SAPEgp=1/n1*sum(T*Thatgp*Y)+1/n0*sum(Y*(1-T)*(1-Thatgp))-plim/n1*sum(Y*T)-(1-plim)/n0*sum(Y*(1-T))
46 |   Sfp1=var(((Thatfp-plim)*Y)[T==1])
47 |   Sfp0=var(((Thatfp-plim)*Y)[T==0])
48 |   kf1=mean(Y[T==1 & Thatfp==1])-mean(Y[T==0 & Thatfp==1])
49 |   kf0=mean(Y[T==1 & Thatfp==0])-mean(Y[T==0 & Thatfp==0])
50 |   PAPD=SAPEfp-SAPEgp
51 |   Sfgp1=var(((Thatfp-Thatgp)*Y)[T==1])
52 |   Sfgp0=var(((Thatfp-Thatgp)*Y)[T==0])
53 |   kg1=mean(Y[T==1 & Thatgp==1])-mean(Y[T==0 & Thatgp==1])
54 |   kg0=mean(Y[T==1 & Thatgp==0])-mean(Y[T==0 & Thatgp==0])
55 |   varfgp=Sfgp1/n1+Sfgp0/n0-floor(n*plim)*(n-floor(n*plim))/(n^2*(n-1))*(kf1^2+kg1^2)+
56 |     2*floor(n*plim)*max(floor(n*plim),n-floor(n*plim))/(n^2*(n-1))*abs(kf1*kg1)
57 |   if (varfgp>0) {
58 |     return(list(papd=PAPD,sd=sqrt(varfgp)))
59 |   } else {
60 |     return(list(papd=PAPD,sd=0))
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/man/ATOPsens.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ATOPsens.R
 3 | \name{ATOPsens}
 4 | \alias{ATOPsens}
 5 | \title{Sensitivity analysis for the ATOP when some of the Outcome Data are
 6 | Missing Under the Matched-Pairs Design}
 7 | \usage{
 8 | ATOPsens(Ya, Yb, Ra, Rb, Ta, Tb, gamma, l, u, alpha, rep)
 9 | }
10 | \arguments{
11 | \item{Ya}{A vector of the outcomes of the first unit in the matched pairs. The missing values for \code{Ya} should be coded
12 | as \code{NA}.}
13 | 
14 | \item{Yb}{A vector of the outcomes of the second unit in the matched pairs. The missing values for \code{Yb} should be coded
15 | as \code{NA}.}
16 | 
17 | \item{Ra}{A vector of the missing data indicators of the first unit in the matched pairs.}
18 | 
19 | \item{Rb}{A vector of the missing data indicators of the second unit in the matched pairs.}
20 | 
21 | \item{Ta}{A vector of the treatment conditions of the first unit in the matched pairs.}
22 | 
23 | \item{Tb}{A vector of the treatment conditions of the second unit in the matched pairs.}
24 | 
25 | \item{gamma}{The sensitivity parameter which charaterizes the degree of the within-pair similarity.}
26 | 
27 | \item{l}{The lower limit of the outcome.}
28 | 
29 | \item{u}{The upper limit of the outcome.}
30 | 
31 | \item{alpha}{A positive scalar that is less than or equal to 0.5. This will
32 | determine the (1-\code{alpha}) level of confidence intervals. The default is
33 | \code{0.05}.}
34 | 
35 | \item{rep}{The number of repetitions for bootstraping.}
36 | }
37 | \value{
38 | A list of class \code{ATOPsens} which contains the following items:
39 | \item{LB}{ The lower bound for the ATOP.  } \item{UB}{ The upper bound for the ATOP.   }
40 | \item{LB.CI}{ The lower limit of the confidence interval for the ATOP.   }
41 | \item{UB.CI}{ The upper limit of the confidence interval for the ATOP.    }
42 | }
43 | \description{
44 | This function computes the bounds on the average treatment effect among always-observed pairs (ATOP)
45 |  with pre-specified sensivity parameters when
46 | some of the outcome data are missing. The sensivity parameter characterizes the degree of the within-pair similarity.
47 | The confidence intervals for the
48 | ATOP are also computed.
49 | }
50 | \details{
51 | For the details of the method implemented by this function, see the
52 | references.
53 | }
54 | \examples{
55 | data(seguro)
56 | attach(seguro)
57 | ATOPobs(Ya,Yb,Ra,Rb,Ta,Tb,gamma=0.95,kappa1=1,kappa0=1,l=0,u=1,alpha=0.05,rep=100)
58 | }
59 | \references{
60 | Kosuke Imai and Zhichao Jiang (2018).
61 | \dQuote{A Sensitivity Analysis for Missing Outcomes Due to 
62 | Truncation-by-Death under the Matched-Pairs Design}, \emph{Statistics in Medicine}.
63 | }
64 | \author{
65 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University
66 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
67 | Zhichao Jiang, Department of Politics, Princeton University
68 | \email{zhichaoj@princeton.edu}.
69 | }
70 | \keyword{design}
71 | \keyword{matched-pairs}
72 | 


--------------------------------------------------------------------------------
/src/subroutines.c:
--------------------------------------------------------------------------------
  1 | #define USE_FC_LEN_T
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <math.h>
  6 | #include <Rmath.h>
  7 | #include <R.h> 
  8 | #include <R_ext/Lapack.h>
  9 | #include "vector.h"
 10 | #include "rand.h"
 11 | 
 12 | /*  The Sweep operator */
 13 | void SWP(
 14 | 	 double **X,             /* The Matrix to work on */
 15 | 	 int k,                  /* The row to sweep */
 16 | 	 int size)               /* The dim. of X */
 17 | {
 18 |   int i,j;
 19 | 
 20 |   if (X[k][k] < 10e-20) 
 21 |     error("SWP: singular matrix.\n");
 22 |   else
 23 |     X[k][k]=-1/X[k][k];
 24 |   for(i=0;i<size;i++)
 25 |     if(i!=k){
 26 |       X[i][k]=-X[i][k]*X[k][k];
 27 |       X[k][i]=X[i][k];
 28 |     }
 29 |   for(i=0;i<size;i++)
 30 |     for(j=0;j<size;j++)
 31 |       if(i!=k && j!=k)
 32 | 	X[i][j]=X[i][j]+X[i][k]*X[k][j]/X[k][k];
 33 |   
 34 | }
 35 | 
 36 | 
 37 | /* inverting a matrix */
 38 | void dinv(double **X,
 39 | 	  int	size,
 40 | 	  double **X_inv)
 41 | {
 42 |   int i,j, k, errorM;
 43 |   double *pdInv = doubleArray(size*size);
 44 | 
 45 |   for (i = 0, j = 0; j < size; j++) 
 46 |     for (k = 0; k <= j; k++) 
 47 |       pdInv[i++] = X[k][j];
 48 |   F77_CALL(dpptrf)("U", &size, pdInv, &errorM FCONE);
 49 |   if (!errorM) {
 50 |     F77_CALL(dpptri)("U", &size, pdInv, &errorM FCONE);
 51 |     if (errorM) {
 52 |       Rprintf("LAPACK dpptri failed, %d\n", errorM);
 53 |       error("Exiting from dinv().\n");
 54 |     }
 55 |   }
 56 |   else {
 57 |     Rprintf("LAPACK dpptrf failed, %d\n", errorM);
 58 |     error("Exiting from dinv().\n");
 59 |   }
 60 |   for (i = 0, j = 0; j < size; j++) {
 61 |     for (k = 0; k <= j; k++) {
 62 |       X_inv[j][k] = pdInv[i];
 63 |       X_inv[k][j] = pdInv[i++];
 64 |     }
 65 |   }
 66 | 
 67 |   free(pdInv);
 68 | }
 69 | 
 70 | 
 71 | /* Cholesky decomposition */
 72 | /* returns lower triangular matrix */
 73 | void dcholdc(double **X, int size, double **L)
 74 | {
 75 |   int i, j, k, errorM;
 76 |   double *pdTemp = doubleArray(size*size);
 77 | 
 78 |   for (j = 0, i = 0; j < size; j++) 
 79 |     for (k = 0; k <= j; k++) 
 80 |       pdTemp[i++] = X[k][j];
 81 |   F77_CALL(dpptrf)("U", &size, pdTemp, &errorM FCONE);
 82 |   if (errorM) {
 83 |     Rprintf("LAPACK dpptrf failed, %d\n", errorM);
 84 |     error("Exiting from dcholdc().\n");
 85 |   }
 86 |   for (j = 0, i = 0; j < size; j++) {
 87 |     for (k = 0; k < size; k++) {
 88 |       if(j<k)
 89 | 	L[j][k] = 0.0;
 90 |       else
 91 | 	L[j][k] = pdTemp[i++];
 92 |     }
 93 |   }
 94 | 
 95 |   free(pdTemp);
 96 | } 
 97 | 
 98 | /* calculate the determinant of the positive definite symmetric matrix
 99 |    using the Cholesky decomposition  */
100 | double ddet(double **X, int size, int give_log)
101 | {
102 |   int i;
103 |   double logdet=0.0;
104 |   double **pdTemp = doubleMatrix(size, size);
105 |   
106 |   dcholdc(X, size, pdTemp);
107 |   for(i = 0; i < size; i++)
108 |     logdet += log(pdTemp[i][i]);
109 | 
110 |   FreeMatrix(pdTemp, size);
111 |   if(give_log)
112 |     return(2.0*logdet);
113 |   else
114 |     return(exp(2.0*logdet));
115 | }
116 | 


--------------------------------------------------------------------------------
/R/AUPEC.R:
--------------------------------------------------------------------------------
 1 | #' Estimation of the unnormalized Area Under Prescription Evaluation Curve (AUPEC) in Completely Randomized Experiments
 2 | #' 
 3 | #' This function estimates AUPEC. The details of the methods for this design are given in Imai and Li (2019).
 4 | #' 
 5 | #' 
 6 | #' 
 7 | #' @param T The unit-level binary treatment receipt variable.
 8 | #' @param tau The unit-level continuous score for treatment assignment. We assume those that have tau<0 should
 9 | #' not have treatment. Conditional Average Treatment Effect is one possible measure.
10 | #' @param Y The outcome variable of interest.
11 | #' @return A list that contains the following items: \item{aupec}{The estimated
12 | #' Area Under Prescription Evaluation Curve} \item{sd}{The estimated standard deviation
13 | #' of AUPEC.}
14 | #' @author Michael Lingzhi Li, Operations Research Center, Massachusetts Institute of Technology
15 | #' \email{mlli@mit.edu}, \url{http://mlli.mit.edu};
16 | #' @references Imai and Li (2019). \dQuote{Experimental Evaluation of Individualized Treatment Rules},
17 | #' @keywords evaluation
18 | #' @export AUPEC
19 | AUPEC <- function (T, tau , Y) {
20 |   if (!(identical(as.numeric(T),as.numeric(as.logical(T))))) {
21 |     stop("T should be binary.")
22 |   }
23 |   T=as.numeric(T)
24 |   Y=as.numeric(Y)
25 |   n=length(Y)
26 |   n1=sum(T)
27 |   n0=n-n1
28 |   pf=sum(as.numeric(tau>0))/n
29 |   Z=rbinom(1e4,n,pf)
30 |   
31 |   ThatfA=numeric(n)
32 |   kAf1=numeric(n)
33 |   kAf0=numeric(n)
34 |   kAf1A=numeric(n)
35 |   kAf1B=numeric(n)
36 |   kAf0A=numeric(n)
37 |   kAf0B=numeric(n)
38 |   covarsum=0
39 |   for (i in 1:n) {
40 |     cutofftemp=quantile(tau,1-i/n)
41 |     Thatftemp=as.numeric(tau>cutofftemp)
42 |     ThatftempA=as.numeric(tau>max(cutofftemp,0))
43 |     ThatfA=ThatfA+1/n*ThatftempA
44 |     cutofftemp2=quantile(tau,(i-1)/n)
45 |     Thatftemp2=as.numeric(tau>cutofftemp2)
46 |     tempkAf1=mean(Y[T==1 & Thatftemp2==1])-mean(Y[T==0 & Thatftemp2==1])
47 |     tempkAf0=mean(Y[T==1 & Thatftemp==0])-mean(Y[T==0 & Thatftemp==0])
48 |     if (is.nan(tempkAf1)) {
49 |       kAf1[n-i+1]=kAf1[n-i+2]
50 |     } else {
51 |       kAf1[n-i+1]=tempkAf1
52 |     }
53 |     if (is.nan(tempkAf0)) {
54 |       kAf0[i]=kAf0[i-1]
55 |     } else {
56 |       kAf0[i]=tempkAf0
57 |     }
58 |     kAf1A[n-i+1]=(n-i+1)*kAf1[n-i+1]
59 |     kAf1B[n-i+1]=(i-1)*kAf1[n-i+1]
60 |     kAf0A[i]=i*kAf0[i]
61 |     kAf0B[i]=(n-i)*kAf0[i]
62 |   }
63 |   sumtemp1=cumsum(kAf1A*kAf0B)
64 |   sumtemp2=cumsum(kAf1A)
65 |   sumtemp3=cumsum(kAf1A*kAf1B)
66 |   tempM=outer(kAf1A,kAf1B)
67 |   tempM[lower.tri(tempM,diag=TRUE)] <- 0
68 |   tempMsum=cumsum(colSums(tempM))
69 |   covarsum1=mean(-1/(n^3*(n-1))*sumtemp1[Z]-Z*(n-Z)^2/(n^3*(n-1))*kAf1[Z]*kAf0[Z]-2/(n^4*(n-1))*tempMsum[Z]-Z^2*(n-Z)^2/(n^4*(n-1))*kAf1[Z]^2
70 |                  -2*(n-Z)^2/(n^4*(n-1))*kAf1[Z]*sumtemp2[Z]+1/n^4*sumtemp3[Z])
71 |   covarsum2=var(1/n*(sumtemp2[Z]/n+(n-Z)*Z/n*kAf1[Z]))
72 |   ThatfA2=ThatfA-1/2
73 |   SfA1=var((ThatfA2*Y)[T==1])
74 |   SfA0=var((ThatfA2*Y)[T==0])
75 |   varfA=SfA1/n1+SfA0/n0+covarsum1+covarsum2
76 |   AUPEC=1/n1*sum(T*ThatfA*Y)+1/n0*sum(Y*(1-T)*(1-ThatfA))-0.5/n1*sum(T*Y)-0.5/n0*sum((1-T)*Y)
77 |   return(list(aupec=AUPEC,sd=sqrt(varfA)))
78 | }
79 | 


--------------------------------------------------------------------------------
/src/init.c:
--------------------------------------------------------------------------------
 1 | #include <stdlib.h> // for NULL
 2 | #include <R_ext/Rdynload.h>
 3 | 
 4 | /* FIXME: 
 5 | Check these declarations against the C/Fortran source code.
 6 | */
 7 | 
 8 | /* .C calls */
 9 | extern void LIbinary(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
10 | extern void LIcount(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
11 | extern void LIgaussian(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
12 | extern void LIordinal(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
13 | extern void LItwopart(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
14 | extern void MARprobit(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
15 | extern void NIbprobit(void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *, void *);
16 | 
17 | static const R_CMethodDef CEntries[] = {
18 |   {"LIbinary",   (DL_FUNC) &LIbinary,   44},
19 |   {"LIcount",    (DL_FUNC) &LIcount,    48},
20 |   {"LIgaussian", (DL_FUNC) &LIgaussian, 46},
21 |   {"LIordinal",  (DL_FUNC) &LIordinal,  46},
22 |   {"LItwopart",  (DL_FUNC) &LItwopart,  49},
23 |   {"MARprobit",  (DL_FUNC) &MARprobit,  28},
24 |   {"NIbprobit",  (DL_FUNC) &NIbprobit,  25},
25 |   {NULL, NULL, 0}
26 | };
27 | 
28 | void R_init_experiment(DllInfo *dll)
29 | {
30 |   R_registerRoutines(dll, CEntries, NULL, NULL, NULL);
31 |   R_useDynamicSymbols(dll, FALSE);
32 | }
33 | 


--------------------------------------------------------------------------------
/man/CADErand.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CADErand.R
 3 | \name{CADErand}
 4 | \alias{CADErand}
 5 | \title{Randomization-based method for the complier average direct effect and the complier average spillover effect}
 6 | \usage{
 7 | CADErand(data, individual = 1)
 8 | }
 9 | \arguments{
10 | \item{data}{A data frame containing the relevant variables. The names for the variables should be: ``Z'' for the treatment assignment,  ``D''  for the actual received treatment, ``Y'' for the outcome, ``A'' for the treatment assignment mechanism and ``id'' for the cluster ID. The variable for the cluster id should be a factor.}
11 | 
12 | \item{individual}{A binary variable with TRUE for  individual-weighted estimators and FALSE for cluster-weighted estimators.}
13 | }
14 | \value{
15 | A list of class \code{CADErand} which contains the following items:
16 | \item{CADE1}{ The point estimate of CADE(1).  } \item{CADE0}{ The point estimate of CADE(0).  } 
17 | \item{CADE1}{ The point estimate of CASE(1).  } \item{CASE0}{ The point estimate of CASE(0).  } 
18 | \item{var.CADE1}{ The  variance estimate of CADE(1).   } 
19 | \item{var.CADE0}{ The  variance estimate of CADE(0).   } 
20 | \item{var.CASE1}{ The  variance estimate of CASE(1).   } 
21 | \item{var.CASE0}{ The  variance estimate of CASE(0).   } 
22 | \item{DEY1}{ The point estimate of DEY(1).  } \item{DEY0}{ The point estimate of DEY(0).  } 
23 | \item{DED1}{ The point estimate of DED(1).  } \item{DED0}{ The point estimate of DED(0).  } 
24 | \item{var.DEY1}{ The  variance estimate of DEY(1).   } 
25 | \item{var.DEY0}{ The  variance estimate of DEY(0).   } 
26 | \item{var.DED1}{ The  variance estimate of DED(1).   } 
27 | \item{var.DED0}{ The  variance estimate of DED(0).   } 
28 | \item{SEY1}{ The point estimate of SEY(1).  } \item{SEY0}{ The point estimate of SEY(0).  } 
29 | \item{SED1}{ The point estimate of SED(1).  } \item{SED0}{ The point estimate of SED(0).  } 
30 | \item{var.SEY1}{ The  variance estimate of SEY(1).   } 
31 | \item{var.SEY0}{ The  variance estimate of SEY(0).   } 
32 | \item{var.SED1}{ The  variance estimate of SED(1).   } 
33 | \item{var.SED0}{ The  variance estimate of SED(0).   }
34 | }
35 | \description{
36 | This function computes the point estimates and variance estimates of the complier average direct effect (CADE)  and the complier average spillover effect (CASE).
37 | The estimators calculated using this function are either individual weighted or cluster-weighted. The point estimates and variances of ITT effects are also included.
38 | }
39 | \details{
40 | For the details of the method implemented by this function, see the
41 | references.
42 | }
43 | \references{
44 | Kosuke Imai, Zhichao Jiang and Anup Malani (2018).
45 | \dQuote{Causal Inference with Interference and Noncompliance in the Two-Stage Randomized Experiments}, \emph{Technical Report}. Department of Politics, Princeton
46 | University.
47 | }
48 | \author{
49 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University
50 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
51 | Zhichao Jiang, Department of Politics, Princeton University
52 | \email{zhichaoj@princeton.edu}.
53 | }
54 | \keyword{experiments}
55 | \keyword{randomized}
56 | \keyword{two-stage}
57 | 


--------------------------------------------------------------------------------
/R/Noncomp.mom.R:
--------------------------------------------------------------------------------
 1 | ###
 2 | ### Wald-Type Method of Moments Estimator for Randomized Experiments
 3 | ### with Noncompliance and Subsequent Missing Outcomes
 4 | ###
 5 | 
 6 | Noncomp.mom <- function(Y, D, Z, data = parent.frame()) {
 7 |   
 8 |   call <- match.call()
 9 |   Y <- eval(call$Y, envir = data)
10 |   D <- eval(call$D, envir = data)
11 |   Z <- eval(call$Z, envir = data)
12 |   N <- length(Y)
13 |   N1 <- length(Y[Z==1])
14 |   N0 <- length(Y[Z==0])
15 |   R <- (!is.na(Y))*1
16 |   if (sum(R)>0) {
17 |     if (sum(D == 1 & Z == 0)>0) { 
18 |       ## No always-takers
19 |       ## Frangakis and Rubin (1999) Biometrika
20 |       U <- sum(D*Z)/sum(Z)
21 |       R01 <- sum(R*(1-D)*Z)/sum((1-D)*Z) 
22 |       R0 <- sum(R*(1-Z))/sum(1-Z)
23 |       Y0 <- mean(Y[R==1 & Z==0])
24 |       Y01 <- sum(na.omit(Y*R*(1-D)*Z))/sum(R*(1-D)*Z)
25 |       Y10 <- (Y0*R0-Y01*R01*(1-U))/(R0-R01*(1-U))
26 |       Y11 <- sum(na.omit(Y*R*D*Z))/sum(R*D*Z)
27 |       q <- var(Y[R==1 & D==1 & Z==1])/mean(Z*D*R)
28 |       v <- delta <- rep(0, 5)
29 |       v[1] <- U*(1-U)/mean(Z==1)
30 |       v[2] <- var(Y[R==1 & D==0 & Z == 1])/mean(Z*(1-D)*R==1) 
31 |       v[3] <- R01*(1-R01)/mean(Z*(1-D)==1)
32 |       v[4] <- R0*(1-R0)/mean(Z==0)
33 |       v[5] <- var(Y[R==1 & Z==0])/mean(R*(1-Z)==1)
34 |       w <- 1/(R0-R01*(1-U))
35 |       delta[1] <- -R0*R01*(Y0-Y01)*(w^2)
36 |       delta[2] <- -R01*(1-U)*w
37 |       delta[3] <- R0*(Y0-Y01)*(1-U)*(w^2)
38 |       delta[4] <- -R01*(Y0-Y01)*(1-U)*(w^2)
39 |       delta[5] <- R0*w
40 |       
41 |       CACEest <- Y11-Y10
42 |       IVvar <- (q + sum(v*(delta^2)))/N
43 |       ITTest <- U*(Y11-Y10)
44 |       ITTvar <-
45 |         ((U^2)*q+v[1]*(Y11-Y10-U*delta[1])^2+(U^2)*sum(v[2:5]*(delta[2:5]^2)))/N
46 |     } else {
47 |       ## Always-Takers allowed
48 |       ## variance has not been computed yet.
49 |       Ca <- mean(D[Z == 0])
50 |       Cn <- mean(1-D[Z == 1])
51 |       Ra0 <- mean(R[D == 1 & Z == 0])
52 |       Rn1 <- mean(R[D == 0 & Z == 1])
53 |       R1 <- mean(R[Z == 1])
54 |       R0 <- mean(R[Z == 0])
55 |       Ya <- mean(Y[R == 1 & D == 1 & Z == 0])
56 |       Yn <- mean(Y[R == 1 & D == 0 & Z == 1])
57 |       Y1.obs <- mean(Y[R == 1 & Z == 1])
58 |       Y0.obs <- mean(Y[R == 1 & Z == 0])
59 |       Yc1 <- (R1*Y1.obs-Ya*Ca*Ra0-Yn*Cn*Rn1)/(R1-Ca*Ra0-Cn*Rn1)
60 |       Yc0 <- (R0*Y0.obs-Ya*Ca*Ra0-Yn*Cn*Rn1)/(R0-Ca*Ra0-Cn*Rn1)
61 |       CACEest <- Yc1 - Yc0
62 |       ITT.est <- CACEest*(1-Ca-Cn)
63 |     }
64 |   } else {
65 |     ## No missing outcomes
66 |     ## Imbens and Rubin (1997) Annals of Statistics
67 |     Y1bar <- mean(Y[Z==1])
68 |     Y0bar <- mean(Y[Z==0])
69 |     D1bar <- mean(D[Z==1])
70 |     D0bar <- mean(D[Z==0])
71 |     ITTest <- Y1bar - Y0bar
72 |     ITTestD <- D1bar - D0bar
73 |     CACEest <- ITTest/ITTestD
74 |     Y1var <- sum(Z*(Y-Y1bar))/(N1^2)
75 |     Y0var <- sum((1-Z)*(Y-Y0bar))/(N0^2)
76 |     D1var <- sum(Z*(D-D1bar))/(N1^2)
77 |     D0var <- sum((1-Z)*(D-D0bar))/(N0^2)
78 |     ITTvar <- Y1var + Y0var
79 |     ITTvarD <- D1var + D0var
80 |     ITTcov <- sum(Z*(Y-Y1bar)*(D-D1bar))/(N1^2) +
81 |       sum((1-Z)*(Y-Y0bar)*(D-D0bar))/(N0^2)
82 |     IVvar <- (ITTvar*(ITTestD^2)+ITTvarD*(ITTest^2)-2*ITTcov*ITTest*ITTestD)/(ITTestD^4)
83 |   }
84 |   return(list(CACEest = CACEest, CACEse = sqrt(IVvar), ITTest = ITTest,
85 |               ITTse = sqrt(ITTvar)))
86 | }
87 | 


--------------------------------------------------------------------------------
/man/ATOPobs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ATOPobs.R
 3 | \name{ATOPobs}
 4 | \alias{ATOPobs}
 5 | \title{Sensitivity analysis for the ATOP when some of the Outcome Data are
 6 | Missing Under the Matched-Pairs Design in Observational Studies}
 7 | \usage{
 8 | ATOPobs(Ya, Yb, Ra, Rb, Ta, Tb, gamma, kappa1, kappa0, l, u, alpha, rep)
 9 | }
10 | \arguments{
11 | \item{Ya}{A vector of the outcomes of the first unit in the matched pairs. The missing values for \code{Ya} should be coded
12 | as \code{NA}.}
13 | 
14 | \item{Yb}{A vector of the outcomes of the second unit in the matched pairs. The missing values for \code{Yb} should be coded
15 | as \code{NA}.}
16 | 
17 | \item{Ra}{A vector of the missing data indicators of the first unit in the matched pairs.}
18 | 
19 | \item{Rb}{A vector of the missing data indicators of the second unit in the matched pairs.}
20 | 
21 | \item{Ta}{A vector of the treatment conditions of the first unit in the matched pairs.}
22 | 
23 | \item{Tb}{A vector of the treatment conditions of the second unit in the matched pairs.}
24 | 
25 | \item{gamma}{The sensitivity parameter which charaterizes the degree of the within-pair similarity.}
26 | 
27 | \item{kappa1}{The sensitivity parameter which charaterizes the dependence between \code{R(1)} and \code{T}.}
28 | 
29 | \item{kappa0}{The sensitivity parameter which charaterizes the dependence between \code{R(0)} and \code{T}.}
30 | 
31 | \item{l}{The lower limit of the outcome.}
32 | 
33 | \item{u}{The upper limit of the outcome.}
34 | 
35 | \item{alpha}{A positive scalar that is less than or equal to 0.5. This will
36 | determine the (1-\code{alpha}) level of confidence intervals. The default is
37 | \code{0.05}.}
38 | 
39 | \item{rep}{The number of repetitions for bootstraping.}
40 | }
41 | \value{
42 | A list of class \code{ATOPsens} which contains the following items:
43 | \item{LB}{ The lower bound for the ATOP.  } \item{UB}{ The upper bound for the ATOP.   }
44 | \item{LB.CI}{ The lower limit of the confidence interval for the ATOP.   }
45 | \item{UB.CI}{ The upper limit of the confidence interval for the ATOP.    }
46 | }
47 | \description{
48 | This function computes the bounds on the average treatment effect among always-observed pairs (ATOP)
49 |  with pre-specified sensivity parameters when
50 | some of the outcome data are missing. The sensivity parameters characterizes the degree of the within-pair similarity 
51 | and the dependence between the potential missing indicators and the treatment. The confidence intervals for the
52 | ATOP are also computed.
53 | }
54 | \details{
55 | For the details of the method implemented by this function, see the
56 | references.
57 | }
58 | \examples{
59 | data(seguro)
60 | attach(seguro)
61 | ATOPsens(Ya,Yb,Ra,Rb,Ta,Tb,gamma=0.95,l=0,u=1,alpha=0.05,rep=100)
62 | }
63 | \references{
64 | Kosuke Imai and Zhichao Jiang (2018).
65 | \dQuote{A Sensitivity Analysis for Missing Outcomes Due to 
66 | Truncation-by-Death under the Matched-Pairs Design}, \emph{Statistics in Medicine}.
67 | }
68 | \author{
69 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University
70 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
71 | Zhichao Jiang, Department of Politics, Princeton University
72 | \email{zhichaoj@princeton.edu}.
73 | }
74 | \keyword{design}
75 | \keyword{matched-pairs}
76 | 


--------------------------------------------------------------------------------
/src/models.h:
--------------------------------------------------------------------------------
 1 | /* normal regression */
 2 | void bNormalReg(double **D, double *beta, double *sig2, 
 3 | 		int n_samp, int n_cov, int addprior, int pbeta, 
 4 | 		double *beta0, double **A0, int psig2, double s0, 
 5 | 		int nu0, int sig2fixed);
 6 |   
 7 | /* binomial probit regression */
 8 | void bprobitGibbs(int *Y, double **X, double *beta, int n_samp, 
 9 | 		  int n_cov, int prior, double *beta0, double **A0, 
10 | 		  int mda, int n_gen);
11 | 
12 | /* ordinal probit regression */
13 | void boprobitMCMC(int *Y, double **X, double *beta, 
14 | 		  double *tau, int n_samp, int n_cov, int n_cat, 
15 | 		  int prior, double *beta0, double **A0, int mda, 
16 | 		  int mh, double *prop, int *accept, int n_gen);
17 | 
18 | /* binomial and mulitnomial logistic regression */
19 | void logitMetro(int *Y, double **X, double *beta, int n_samp,      
20 | 		int n_dim, int n_cov, double *beta0, double **A0,     
21 | 		double *Var, int n_gen, int *counter);
22 | 
23 | /* Normal mixed effects regression */
24 | void bNormalMixedGibbs(double *Y, double **X, double ***Zgrp,
25 | 		       int *grp, double *beta, double **gamma, double *sig2,    
26 | 		       double **Psi, int n_samp, int n_fixed, int n_random,
27 | 		       int n_grp, int prior, double *beta0, double **A0, 
28 | 		       int imp, int nu0, double s0, int tau0, double **T0, 
29 | 		       int n_gen0); 
30 | 
31 | /* binomial mixed effects probit regression */
32 | void bprobitMixedGibbs(int *Y, double **X, double ***Zgrp, 
33 | 		       int *grp, double *beta, double **gamma, 
34 | 		       double **Psi, int n_samp, int n_fixed, 
35 | 		       int n_random, int n_grp, 
36 | 		       int prior, double *beta0, double **A0, 
37 | 		       int tau0, double **T0, int n_gen);
38 | 
39 | /* (binomial/multinomial) logistic mixed effects regression */
40 | void logitMixedMetro(int *Y, double **X, double ***Z, int *grp,
41 | 		     double *beta, double ***gamma, double ***Psi,
42 | 		     int n_samp, int n_dim, int n_fixed,
43 | 		     int n_random, int n_grp, double *beta0,
44 | 		     double **A0, int tau0, double **T0,
45 | 		     double *tune_fixed, double *tune_random,
46 | 		     int n_gen, int *acc_fixed, int *acc_random);
47 | 
48 | /* ordinal probit mixed effects regression */
49 | void boprobitMixedMCMC(int *Y, double **X, double ***Zgrp, int *grp,
50 | 		       double *beta, double **gamma, double *tau,
51 | 		       double **Psi, int n_samp, int n_cat,
52 | 		       int n_fixed, int n_random, int n_grp,
53 | 		       int prior, double *beta0, double **A0, int tau0,
54 | 		       double **T0, int mh, double *prop, int *accept,
55 | 		       int n_gen);
56 | 
57 | /* negative binomial regression */
58 | void negbinMetro(int *Y, double **X, double *beta, double *sig2,
59 | 		 int n_samp, int n_cov, double *beta0, double **A0, 
60 | 		 double a0, double b0, double *varb, double vars,
61 | 		 double *cont, int n_gen, int *counter, int sig2fixed);
62 | 
63 | /* mixed effects negative binomial regression */
64 | void bnegbinMixedMCMC(int *Y, int **Ygrp, double **X, double ***Zgrp,
65 | 		      int *grp, double *beta, double **gamma,
66 | 		      double *sig2, double **Psi, int n_samp,
67 | 		      int n_fixed, int n_random, int n_grp,
68 | 		      int max_samp_grp, double *beta0,
69 | 		      double **A0, double a0, double b0,
70 | 		      int tau0, double **T0, double *varb, double vars,
71 | 		      double *varg, int *counter, int **counterg,
72 | 		      int n_gen);
73 | 


--------------------------------------------------------------------------------
/src/vector.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <assert.h>
  3 | #include <stdio.h>
  4 | #include <R_ext/Utils.h>
  5 | #include <R.h>
  6 | 
  7 | int* intArray(int num) {
  8 |   int *iArray = (int *)malloc(num * sizeof(int));
  9 |   if (!iArray)
 10 |     error("Out of memory error in intArray\n");
 11 |   return iArray;
 12 | }
 13 | 
 14 | void PintArray(int *ivector, int length) {
 15 |   int i;
 16 |   for (i = 0; i < length; i++)
 17 |     Rprintf("%5d\n", ivector[i]);
 18 | }
 19 | 
 20 | int** intMatrix(int row, int col) {
 21 |   int i;
 22 |   int **iMatrix = (int **)malloc(row * sizeof(int *));
 23 |   if (!iMatrix) 
 24 |     error("Out of memory error in intMatrix\n");
 25 |   for (i = 0; i < row; i++) {
 26 |     iMatrix[i] = (int *)malloc(col *  sizeof(int));
 27 |     if (!iMatrix[i]) 
 28 |       error("Out of memory error in intMatrix\n");
 29 |   }
 30 |   return iMatrix;
 31 | }
 32 | 
 33 | void PintMatrix(int **imatrix, int row, int col) {
 34 |   int i, j;
 35 |   for (i = 0; i < row; i++) {
 36 |     for (j = 0; j < col; j++)
 37 |       Rprintf("%5d", imatrix[i][j]);
 38 |     Rprintf("\n");
 39 |   }
 40 | }
 41 | 
 42 | 
 43 | double* doubleArray(int num) {
 44 |   double *dArray = (double *)malloc(num * sizeof(double));
 45 |   if (!dArray)
 46 |     error("Out of memory error in doubleArray\n");
 47 |   return dArray;
 48 | }
 49 | 
 50 | void PdoubleArray(double *dvector, int length) {
 51 |   int i;
 52 |   for (i = 0; i < length; i++)
 53 |     Rprintf("%14g\n", dvector[i]);
 54 | }
 55 | 
 56 | double** doubleMatrix(int row, int col) {
 57 |   int i;
 58 |   double **dMatrix = (double **)malloc((size_t)(row * sizeof(double *)));
 59 |   if (!dMatrix) 
 60 |     error("Out of memory error in doubleMatrix\n");
 61 |   for (i = 0; i < row; i++) {
 62 |     dMatrix[i] = (double *)malloc((size_t)(col * sizeof(double)));
 63 |     if (!dMatrix[i])
 64 |       error("Out of memory error in doubleMatrix\n");
 65 |   }
 66 |   return dMatrix;
 67 | }
 68 | 
 69 | void PdoubleMatrix(double **dmatrix, int row, int col) {
 70 |   int i, j;
 71 |   for (i = 0; i < row; i++) {
 72 |     for (j = 0; j < col; j++)
 73 |       Rprintf("%14g", dmatrix[i][j]);
 74 |     Rprintf("\n");
 75 |   }
 76 | }
 77 | 
 78 | double*** doubleMatrix3D(int x, int y, int z) {
 79 |   int i;
 80 |   double ***dM3 = (double ***)malloc(x * sizeof(double **));
 81 |   if (!dM3) 
 82 |     error("Out of memory error in doubleMatrix3D\n");
 83 |   for (i = 0; i < x; i++) 
 84 |     dM3[i] = doubleMatrix(y, z);
 85 |   return dM3;
 86 | }
 87 | 
 88 | void PdoubleMatrix3D(double ***dmatrix3D, int x, int y, int z) {
 89 |   int i, j, k;
 90 |   for (i = 0; i < x; i++) {
 91 |     Rprintf("Fist dimension = %5d\n", i);
 92 |     for (j = 0; j < y; j++) {
 93 |       for (k = 0; k < z; k++)
 94 | 	Rprintf("%14g", dmatrix3D[i][j][k]);
 95 |       Rprintf("\n");
 96 |     }
 97 |   }
 98 | }
 99 | 
100 | long* longArray(int num) {
101 |   long *lArray = (long *)malloc(num * sizeof(long));
102 |   if (!lArray)
103 |     error("Out of memory error in longArray\n");
104 |   return lArray;
105 | }
106 | 
107 | void FreeMatrix(double **Matrix, int row) {
108 |   int i;
109 |   for (i = 0; i < row; i++)
110 |     free(Matrix[i]);
111 |   free(Matrix);
112 | }
113 | 
114 | void FreeintMatrix(int **Matrix, int row) {
115 |   int i;
116 |   for (i = 0; i < row; i++)
117 |     free(Matrix[i]);
118 |   free(Matrix);
119 | }
120 | 
121 | void Free3DMatrix(double ***Matrix, int index, int row) {
122 |   int i;
123 |   for (i = 0; i < index; i++)
124 |     FreeMatrix(Matrix[i], row);
125 |   free(Matrix);
126 | }
127 | 		
128 | 	
129 | 		
130 | 			
131 | 


--------------------------------------------------------------------------------
/man/CACEcluster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CACEcluster.R
 3 | \name{CACEcluster}
 4 | \alias{CACEcluster}
 5 | \title{Estimation of the Complier Average Causal Effects in Cluster-Randomized
 6 | Experiments with Unit-level Noncompliance}
 7 | \usage{
 8 | CACEcluster(
 9 |   Y,
10 |   D,
11 |   Z,
12 |   grp,
13 |   data = parent.frame(),
14 |   match = NULL,
15 |   weights = NULL,
16 |   ...
17 | )
18 | }
19 | \arguments{
20 | \item{Y}{The outcome variable of interest.}
21 | 
22 | \item{D}{The unit-level treatment receipt variable. This variable should be
23 | binary but can differ across units within each cluster.}
24 | 
25 | \item{Z}{The (randomized) cluster-level encouragement variable. This
26 | variable should be binary. Two units in the same cluster should have the
27 | same value.}
28 | 
29 | \item{grp}{A variable indicating clusters of units. Two units in the same
30 | cluster should have the same value.}
31 | 
32 | \item{data}{A data frame containing the relevant variables.}
33 | 
34 | \item{match}{A variable indicating matched-pairs of clusters. Two units in
35 | the same matched-pair of clusters should have the same value. The default is
36 | \code{NULL} (i.e., no matching).}
37 | 
38 | \item{weights}{A variable indicating the population cluster sizes, which
39 | will be used to construct weights for each pair of clusters. Two units in
40 | the same cluster should have the same value. The default is \code{NULL}, in
41 | which case sample cluster sizes will be used for constructing weights.}
42 | 
43 | \item{...}{Optional arguments passed to \code{ATEcluster}, which is called
44 | internally.}
45 | }
46 | \value{
47 | A list of class \code{CACEcluster} which contains the following
48 | items: \item{call}{ The matched call.  } \item{ITTY}{ The output object from
49 | \code{ATEcluster} which is used to estimate the ITT effect of the
50 | encouragement on the outcome variable.  } \item{ITTD}{ The output object
51 | from \code{ATEcluster} which is used to estimate the ITT effect of the
52 | encouragement on the treatment receipt variable.  } \item{n1}{ The total
53 | number of units in the treatment group.  } \item{n0}{ The total number of
54 | units in the control group.  } \item{Z}{ The treatment variable.  }
55 | \item{est}{ The estimated complier average causal effect.  } \item{var}{ The
56 | estimated variance of the complier average causal effect estimator.  }
57 | \item{cov}{ The estimated covariance between two ITT estimator.  } \item{m}{
58 | The number of pairs in the matched-pair design.  } \item{N1}{ The population
59 | cluster sizes for the treatment group.  } \item{N0}{ The population cluster
60 | sizes for the control group.  } \item{w}{ Pair-specific normalized
61 | arithmetic mean weights. These weights sum up to the total number of units
62 | in the sample, i.e., \code{n}.  }
63 | }
64 | \description{
65 | This function estimates various complier average causal effect in
66 | cluster-randomized experiments without using pre-treatment covariates when
67 | unit-level noncompliance exists. Both the encouragement and treatment
68 | variables are assumed to be binary. Currently, only the matched-pair design
69 | is allowed. The details of the methods for this design are given in Imai,
70 | King, and Nall (2007).
71 | }
72 | \references{
73 | Imai, Kosuke, Gary King, and Clayton Nall (2007). \dQuote{The
74 | Essential Role of Pair Matching in Cluster-Randomized Experiments, with
75 | Application to the Mexican Universal Health Insurance Evaluation}, Technical
76 | Report. Department of Politics, Princeton University.
77 | }
78 | \author{
79 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University
80 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
81 | }
82 | \keyword{design}
83 | 


--------------------------------------------------------------------------------
/R/ATEnocov.R:
--------------------------------------------------------------------------------
 1 | ###
 2 | ### Calculate the ATE without covariates
 3 | ###
 4 | #' Estimation of the Average Treatment Effect in Randomized Experiments
 5 | #' 
 6 | #' This function computes the standard ``difference-in-means'' estimate of the
 7 | #' average treatment effect in randomized experiments without using
 8 | #' pre-treatment covariates. The treatment variable is assumed to be binary.
 9 | #' Currently, the two designs are allowed: complete randomized design and
10 | #' matched-pair design.
11 | #'
12 | #' @useDynLib experiment 
13 | #' @importFrom stats coef complete.cases cov fitted ftable lm mahalanobis model.frame model.matrix model.response na.fail na.omit printCoefmat qnorm quantile rbinom rnorm runif pnorm uniroot sd terms var vcov weighted.mean
14 | #' @importFrom utils packageDescription
15 | #' @importFrom MASS mvrnorm
16 | #' @importFrom boot boot
17 | #' 
18 | #' @param Y The outcome variable of interest.
19 | #' @param Z The (randomized) treatment variable. This variable should be
20 | #' binary.
21 | #' @param data A data frame containing the relevant variables.
22 | #' @param match A variable indicating matched-pairs. The two units in the same
23 | #' matched-pair should have the same value.
24 | #' @return A list of class \code{ATEnocov} which contains the following items:
25 | #' \item{call}{ The matched call.  } \item{Y}{ The outcome variable.  }
26 | #' \item{Z}{ The treatment variable.  } \item{match}{ The matched-pair
27 | #' indicator variable.  } \item{ATEest}{ The estimated average treatment
28 | #' effect.  } \item{ATE.var}{ The estimated variance of the average treatment
29 | #' effect estimator.  } \item{diff}{ Within-pair differences if the
30 | #' matched-pair design is analyzed.  }
31 | #' @author Kosuke Imai, Department of Government and Department of Statistics, Harvard University
32 | #' \email{imai@@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
33 | #' @references Imai, Kosuke, (2008). \dQuote{Randomization-based Inference and
34 | #' Efficiency Analysis in Experiments under the Matched-Pair Design}, Statistics in Medicine.
35 | #' @keywords design
36 | #' @export ATEnocov
37 | ATEnocov <- function(Y, Z, data = parent.frame(), match = NULL){
38 | 
39 |   ## an internal function that checks match and returns diff
40 |   match.check <- function(Y, Z, match) { 
41 |     n <- length(Y)
42 |     if ((n %% 2) != 0)
43 |       stop("pair randomization requires the even number of observations")
44 |     if (length(unique(table(match))) > 1)
45 |       stop("invalid input for `match'")
46 |     if (unique(table(match)) != 2)
47 |       stop("invalid input for `match'")
48 |     umatch <- sort(unique(match))
49 |     diff <- rep(NA, n/2)
50 |     for (i in 1:length(umatch))
51 |       diff[i] <- Y[(Z == 1) & (match == umatch[i])] -
52 |         Y[(Z == 0) & (match == umatch[i])] 
53 |     return(diff)
54 |   }
55 |   
56 |   ## getting the data
57 |   call <- match.call()
58 |   Y <- eval(call$Y, envir = data)
59 |   Z <- eval(call$Z, envir = data)
60 |   match <- eval(call$match, envir = data)
61 | 
62 |   ## checking data
63 |   if (sum(sort(unique(Z)) == c(0,1)) != 2)
64 |     stop("`Z' should be binary taking the value of 0 or 1")
65 |   if (length(Y) != length(Z))
66 |     stop("`Y' and `Z' have different numbers of observations")
67 |   if (!is.null(match))
68 |     if (length(match) != length(Y))
69 |       stop("`match' and `Y' have different numbers of observations")
70 |     
71 |   res <- list(call = call, Y = Y, Z = Z, match = match)
72 |   ## ATE for unit randomization
73 |   res$ATE.est <- mean(Y[Z==1])-mean(Y[Z==0])
74 |   if (is.null(match)) { # without matching
75 |     res$ATE.var <- var(Y[Z==1])/sum(Z==1)+var(Y[Z==0])/sum(Z==0)
76 |   } else { # with matching
77 |     res$diff <- diff <- match.check(Y, Z, match)
78 |     res$ATE.var <- var(diff)/length(diff)
79 |   }
80 |   class(res) <- "ATEnocov"
81 |   return(res)
82 | }
83 | 
84 | 


--------------------------------------------------------------------------------
/R/NIbprobit.R:
--------------------------------------------------------------------------------
  1 | ###
  2 | ### Bayesian probit with nonignorable missing outcomes and
  3 | ### multi-valued treatments
  4 | ###
  5 | 
  6 | NIbprobit <- function(formula, Xo, Xr, data = parent.frame(),
  7 |                       n.draws = 5000, insample = FALSE,
  8 |                       param = TRUE, mda = TRUE,
  9 |                       p.mean.o = 0, p.prec.o = 0.01,
 10 |                       p.mean.r = 0, p.prec.r = 0.01,
 11 |                       coef.start.o = 0, coef.start.r = 0,
 12 |                       burnin = 0, thin = 0, verbose = TRUE) {  
 13 | 
 14 |   ## getting Y and D
 15 |   call <- match.call()
 16 |   tm <- terms(formula)
 17 |   attr(tm, "intercept") <- 0
 18 |   mf <- model.frame(tm, data = data, na.action = 'na.pass')
 19 |   D <- model.matrix(tm, data = mf)
 20 |   if (max(D) > 1 || min(D) < 0)
 21 |     stop("the treatment variable should be a factor variable.")
 22 |   Y <- model.response(mf)
 23 |   m <- ncol(D) # number of treatment levels including control
 24 |   ## getting Xo and Xr
 25 |   tm <- terms(Xo)
 26 |   attr(tm, "intercept") <- 1
 27 |   Xo <- model.matrix(tm, data = data, na.action = 'na.pass')
 28 |   Xo <- Xo[,(colnames(Xo) != "(Intercept)")]
 29 |   tm <- terms(Xr)
 30 |   attr(tm, "intercept") <- 1
 31 |   Xr <- model.matrix(tm, data = data, na.action = 'na.pass')
 32 |   Xr <- Xr[,(colnames(Xr) != "(Intercept)")]
 33 |   ## taking care of NA's in D and X
 34 |   ind <- complete.cases(cbind(D, Xo, Xr))
 35 |   Y <- Y[ind]
 36 |   D <- D[ind,]
 37 |   Xo <- Xo[ind,]
 38 |   Xr <- Xr[ind,]
 39 |   R <- (!is.na(Y))*1
 40 |   Y[is.na(Y)] <- rbinom(sum(is.na(Y)), size = 1, prob = 0.5)
 41 |   cnameso <- c(colnames(D), colnames(Xo))
 42 |   cnamesr <- c("1-Y", "Y", colnames(Xr))
 43 |   Xo <- cbind(D, Xo)
 44 |   colnames(Xo) <- cnameso
 45 |   Xr <- cbind(1-Y, Y, Xr)
 46 |   colnames(Xr) <- cnamesr
 47 |   
 48 |   res <- list(call = call, Y = Y, Xo = Xo, Xr = Xr, n.draws = n.draws)
 49 | 
 50 |   n <- length(Y)
 51 |   ncovo <- ncol(Xo)
 52 |   ncovr <- ncol(Xr)
 53 |   ## starting values
 54 |   if(length(coef.start.o) != ncovo)
 55 |     coef.start.o <- rep(coef.start.o, ncovo)
 56 |   if(length(coef.start.r) != ncovr)
 57 |     coef.start.r <- rep(coef.start.r, ncovr)
 58 |  
 59 |   ## prior
 60 |   if(length(p.mean.o) != ncovo)
 61 |     p.mean.o <- rep(p.mean.o, ncovo)
 62 |   if(length(p.mean.r) != ncovr)
 63 |     p.mean.r <- rep(p.mean.r, ncovr)
 64 |   if(!is.matrix(p.prec.o))
 65 |     p.prec.o <- diag(p.prec.o, ncovo)
 66 |   if(!is.matrix(p.prec.r))
 67 |     p.prec.r <- diag(p.prec.r, ncovr)
 68 |   
 69 |   ## checking thinnig and burnin intervals
 70 |   if (n.draws <= 0)
 71 |     stop("`n.draws' should be a positive integer.")
 72 |   if (burnin < 0 || burnin >= n.draws)
 73 |     stop("`burnin' should be a non-negative integer less than `n.draws'.")
 74 |   if (thin < 0 || thin >= n.draws)
 75 |     stop("`thin' should be a non-negative integer less than `n.draws'.")
 76 |   keep <- thin + 1
 77 | 
 78 |   ## calling C function to do MCMC
 79 |   par <- .C("NIbprobit",
 80 |             as.integer(Y), as.integer(R), 
 81 |             as.double(Xo), as.double(Xr),
 82 |             as.double(coef.start.o), as.double(coef.start.r),
 83 |             as.integer(n), as.integer(ncovo), as.integer(ncovr),
 84 |             as.integer(m), as.double(p.mean.o), as.double(p.mean.r),
 85 |             as.double(p.prec.o), as.double(p.prec.r), 
 86 |             as.integer(insample), as.integer(param), as.integer(mda),
 87 |             as.integer(n.draws), as.integer(burnin),
 88 |             as.integer(keep), as.integer(verbose),
 89 |             coef.o = double(ncovo*(ceiling((n.draws-burnin)/keep))),
 90 |             coef.r = double(ncovr*(ceiling((n.draws-burnin)/keep))),
 91 |             ATE = double((m-1)*(ceiling((n.draws-burnin)/keep))),
 92 |             BASE = double(m*(ceiling((n.draws-burnin)/keep))),
 93 |             PACKAGE="experiment")
 94 |   if (param) {
 95 |     res$coef.o <- matrix(par$coef.o, byrow = TRUE, ncol = ncovo)
 96 |     colnames(res$coef.o) <- colnames(Xo)
 97 |     res$coef.r <- matrix(par$coef.r, byrow = TRUE, ncol = ncovr)
 98 |     colnames(res$coef.r) <- colnames(Xr)
 99 |   }
100 |   res$ATE <- matrix(par$ATE, byrow = TRUE, ncol = m-1)
101 |   res$base <- matrix(par$BASE, byrow = TRUE, ncol = m)
102 |   colnames(res$base) <- colnames(D)
103 |   
104 |   class(res) <- "NIbprobit"
105 |   return(res)
106 | }
107 | 


--------------------------------------------------------------------------------
/man/randomize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/randomize.R
 3 | \name{randomize}
 4 | \alias{randomize}
 5 | \alias{Randomize}
 6 | \title{Randomization of the Treatment Assignment for Conducting Experiments}
 7 | \usage{
 8 | randomize(
 9 |   data,
10 |   group = c("Treat", "Control"),
11 |   ratio = NULL,
12 |   indx = NULL,
13 |   block = NULL,
14 |   n.block = NULL,
15 |   match = NULL,
16 |   complete = TRUE
17 | )
18 | }
19 | \arguments{
20 | \item{data}{A data frame containing the observations to which the treatments
21 | are randomly assigned.}
22 | 
23 | \item{group}{A numerical or character vector indicating the
24 | treatment/control groups. The length of the vector equals the total number
25 | of such groups. The default specifies two groups called \dQuote{Treat} and
26 | \dQuote{Control}.}
27 | 
28 | \item{ratio}{An optional numerical vector which specifies the proportion of
29 | the treatment/control groups within the sample. The length of the vector
30 | should equal the number of groups. The default is the equal allocation.}
31 | 
32 | \item{indx}{An optional variable name in the data frame to be used as the
33 | names of the observations. If not specified, the row names of the data frame
34 | will be used so long as they are available. If the row names are not
35 | available, the integer sequence starting from 1 will be used.}
36 | 
37 | \item{block}{An optional variable name in the data frame or a formula to be
38 | used as the blocking variables for randomized-block designs. If a variable
39 | name is specified, then the unique values of that variable will form blocks
40 | unless \code{n.block} is specified (see below). If a formula is specified,
41 | it will be evaluated using \code{data} and then blocking will be based on
42 | the \code{mahalanobis} distance of the resulting model matrix. In this case,
43 | users may want to specify \code{n.block} to avoid creating blocks that have
44 | too few observations.}
45 | 
46 | \item{n.block}{An optional scalar specifying the number of blocks to be
47 | created for randomized block designs. If unspecified, the unique values of
48 | the blocking variable will define blocks. If specified, the blocks of
49 | roughly equal size will be created based on the \code{quantile} of the
50 | blocking variable.}
51 | 
52 | \item{match}{An optional variable name in the data frame or a formula to be
53 | used as the matching variables for matched-pair designs. This input is
54 | applicable only to the case where there are two groups. Pairs of
55 | observations will be formed based on the similar values of the matching
56 | variable. If a formula is specified, the \code{mahalanobis} distance of the
57 | resulting model matrix will be used.}
58 | 
59 | \item{complete}{logical. If it equals \code{TRUE} (default), then complete
60 | randomization will be performed (within each block if randomized block
61 | designs are used). Otherwise, simple randomization will be implemented. For
62 | matched-pair designs, \code{complete} has to equal \code{TRUE}.}
63 | }
64 | \value{
65 | A list of class \code{randomize} which contains the following items:
66 | \item{call}{ the matched call.  } \item{treatment}{ The vector of randomized
67 | treatments.  } \item{data}{ The data frame that was used to conduct the
68 | randomization.  } \item{block}{ The blocking variable that was used to
69 | implement randomized-block designs.  } \item{match}{ The matching variable
70 | that was used to implement matched-pair designs.  } \item{block.id}{ The
71 | variable indicating which observations belong to which blocks in
72 | randomized-block designs.  } \item{match.id}{ The variable indicating which
73 | observations belong to which pairs in matched-pair designs.  }
74 | }
75 | \description{
76 | This function can be used to randomize the treatment assignment for
77 | randomized experiments. In addition to the complete randomization, it
78 | implements randomized-block and matched-pair designs.
79 | }
80 | \details{
81 | Randomized-block designs refer to the complete randomization of the
82 | treatment within the pre-specified blocks which contain multiple
83 | observations. Matched-pair designs refer to the randomization of the binary
84 | treatment variable within the pre-specified pair of observations.
85 | }
86 | \author{
87 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University
88 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
89 | }
90 | \keyword{design}
91 | 


--------------------------------------------------------------------------------
/man/ATEbounds.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/ATEbounds.R
  3 | \name{ATEbounds}
  4 | \alias{ATEbounds}
  5 | \title{Bounding the Average Treatment Effect when some of the Outcome Data are
  6 | Missing}
  7 | \usage{
  8 | ATEbounds(
  9 |   formula,
 10 |   data = parent.frame(),
 11 |   maxY = NULL,
 12 |   minY = NULL,
 13 |   alpha = 0.05,
 14 |   n.reps = 0,
 15 |   strata = NULL,
 16 |   ratio = NULL,
 17 |   survey = NULL,
 18 |   ...
 19 | )
 20 | }
 21 | \arguments{
 22 | \item{formula}{A formula of the form \code{Y ~ X} where \code{Y} is the name
 23 | of the outcome variable and \code{X} is the name of the (randomized)
 24 | treatment variable. \code{X} should be a factor variable but its value can
 25 | take more than two levels. The missing values for \code{Y} should be coded
 26 | as \code{NA}.}
 27 | 
 28 | \item{data}{A data frame containing the relevant variables.}
 29 | 
 30 | \item{maxY}{A scalar. The maximum value of the outcome variable. The default
 31 | is the maximum sample value.}
 32 | 
 33 | \item{minY}{A scalar. The minimum value of the outcome variable. The default
 34 | is the minimum sample value.}
 35 | 
 36 | \item{alpha}{A positive scalar that is less than or equal to 0.5. This will
 37 | determine the (1-\code{alpha}) level of confidence intervals. The default is
 38 | \code{0.05}.}
 39 | 
 40 | \item{n.reps}{A positive integer. The number of bootstrap replicates used
 41 | for the construction of confidence intervals via B-method of Berran (1988).
 42 | If it equals zero, the confidence intervals will not be constructed.}
 43 | 
 44 | \item{strata}{The variable name indicating strata. If this is specified, the
 45 | quantities of interest will be first calculated within each strata and then
 46 | aggregated. The default is \code{NULL}.}
 47 | 
 48 | \item{ratio}{A \eqn{J \times M} matrix of probabilities where \eqn{J} is the
 49 | number of strata and \eqn{M} is the number of treatment and control groups.
 50 | Each element of the matrix specifies the probability of a unit falling into
 51 | that category. The default is \code{NULL} in which case the sample estimates
 52 | of these probabilities are used for computation.}
 53 | 
 54 | \item{survey}{The variable name for survey weights. The default is
 55 | \code{NULL}.}
 56 | 
 57 | \item{...}{The arguments passed to other functions.}
 58 | }
 59 | \value{
 60 | A list of class \code{ATEbounds} which contains the following items:
 61 | \item{call}{ The matched call.  } \item{Y}{ The outcome variable.  }
 62 | \item{D}{ The treatment variable.  } \item{bounds}{ The point estimates of
 63 | the sharp bounds on the average treatment effect.  } \item{bounds.Y}{ The
 64 | point estimates of the sharp bounds on the outcome variable within each
 65 | treatment/control group.  } \item{bmethod.ci}{ The B-method confidence
 66 | interval of the bounds on the average treatment effect.  } \item{bonf.ci}{
 67 | The Bonferroni confidence interval of the bounds on the average treatment
 68 | effect.  } \item{bonf.ci.Y}{ The Bonferroni confidence interval of the
 69 | bounds on the outcome variable within each treatment/control group.  }
 70 | \item{bmethod.ci.Y}{ The B-method confidence interval of the bounds on the
 71 | outcome variable within each treatment/control group.  } \item{maxY}{ The
 72 | maximum value of the outcome variable used in the computation.  }
 73 | \item{minY}{ The minimum value of the outcome variable used in the
 74 | computation.  } \item{nobs}{ The number of observations.  } \item{nobs.Y}{
 75 | The number of observations within each treatment/control group.  }
 76 | \item{ratio}{ The probability of treatment assignment (within each strata if
 77 | \code{strata} is specified) used in the computation.  }
 78 | }
 79 | \description{
 80 | This function computes the sharp bounds on the average treatment effect when
 81 | some of the outcome data are missing. The confidence intervals for the
 82 | bounds are also computed.
 83 | }
 84 | \details{
 85 | For the details of the method implemented by this function, see the
 86 | references.
 87 | }
 88 | \references{
 89 | Horowitz, Joel L. and Charles F. Manski. (1998).
 90 | \dQuote{Censoring of Outcomes and Regressors due to Survey Nonresponse:
 91 | Identification and Estimation Using Weights and Imputations.} \emph{Journal
 92 | of Econometrics}, Vol. 84, pp.37-58.
 93 | 
 94 | Horowitz, Joel L. and Charles F. Manski. (2000).  \dQuote{Nonparametric
 95 | Analysis of Randomized Experiments With Missing Covariate and Outcome Data.}
 96 | \emph{Journal of the Americal Statistical Association}, Vol. 95, No. 449,
 97 | pp.77-84.
 98 | 
 99 | Harris-Lacewell, Melissa, Kosuke Imai, and Teppei Yamamoto. (2007).
100 | \dQuote{Racial Gaps in the Responses to Hurricane Katrina: An Experimental
101 | Study}, \emph{Technical Report}. Department of Politics, Princeton
102 | University.
103 | }
104 | \author{
105 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University
106 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
107 | }
108 | \keyword{design}
109 | 


--------------------------------------------------------------------------------
/R/ATOPnoassumption.R:
--------------------------------------------------------------------------------
  1 | ###
  2 | ### Calculates the no-assumption bounds for the  ATOP in the presence of missing
  3 | ### response  under matched-pairs design
  4 | ###
  5 | 
  6 | 
  7 | 
  8 | #' Bounding the  ATOP when some of the Outcome Data are
  9 | #' Missing Under the Matched-Pairs Design
 10 | #' 
 11 | #' This function computes the no assumption bounds on the average treatment effect among always-observed pairs (ATOP) when
 12 | #' some of the outcome data are missing. The confidence intervals for the
 13 | #' ATOP are also computed.
 14 | #' 
 15 | #' For the details of the method implemented by this function, see the
 16 | #' references.
 17 | #' 
 18 | #' @param Ya  A vector of the outcomes of the first unit in the matched pairs. The missing values for \code{Ya} should be coded
 19 | #' as \code{NA}.
 20 | #' @param Yb  A vector of the outcomes of the second unit in the matched pairs. The missing values for \code{Yb} should be coded
 21 | #' as \code{NA}.
 22 | #' @param Ra A vector of the missing data indicators of the first unit in the matched pairs. 
 23 | #' @param Rb A vector of the missing data indicators of the second unit in the matched pairs.
 24 | #' @param Ta A vector of the treatment conditions of the first unit in the matched pairs. 
 25 | #' @param Tb A vector of the treatment conditions of the second unit in the matched pairs.
 26 | #' @param l The lower limit of the outcome.
 27 | #' @param u The upper limit of the outcome.
 28 | #' @param alpha A positive scalar that is less than or equal to 0.5. This will
 29 | #' determine the (1-\code{alpha}) level of confidence intervals. The default is
 30 | #' \code{0.05}.
 31 | #' @param rep The number of repetitions for bootstraping.
 32 | #' @return A list of class \code{ATOPnoassumption} which contains the following items:
 33 | #' \item{LB}{ The lower bound for the ATOP.  } \item{UB}{ The upper bound for the ATOP.   }
 34 | #' \item{LB.CI}{ The lower limit of the confidence interval for the ATOP.   }
 35 | #'\item{UB.CI}{ The upper limit of the confidence interval for the ATOP.    } 
 36 | #' @author Kosuke Imai, Department of Government and Department of Statistics, Harvard University
 37 | #' \email{imai@@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
 38 | #' Zhichao Jiang, Department of Politics, Princeton University
 39 | #' \email{zhichaoj@@princeton.edu}.
 40 | #' @references Kosuke Imai and Zhichao Jiang (2018).
 41 | #' \dQuote{A Sensitivity Analysis for Missing Outcomes Due to 
 42 | #' Truncation-by-Death under the Matched-Pairs Design}, \emph{Technical Report}. Department of Politics, Princeton
 43 | #' University.
 44 | #' @keywords matched-pairs design
 45 | #' @examples 
 46 | #' data(seguro)
 47 | #' attach(seguro)
 48 | #' ATOPnoassumption(Ya,Yb,Ra,Rb,Ta,Tb,l=0,u=1,alpha=0.05,rep=100)
 49 | #' @export ATOPnoassumption
 50 | 
 51 | ATOPnoassumption = function(Ya,Yb,Ra,Rb,Ta,Tb,l,u,alpha,rep){
 52 |   if(!(is.vector(Ya)&is.vector(Yb)&is.vector(Ra)&is.vector(Rb)&is.vector(Ta)&is.vector(Tb)))
 53 |     stop('Data should be input as vectors')
 54 |   if (length(unique(apply(cbind(Ya,Yb,Ra,Rb,Ta,Tb),2,length)))!=1)
 55 |     stop('Vectors should have the same length')
 56 |   
 57 |   omega1 <- mean(c(Ya[Ta==1&Ra==1&Rb==1],Yb[Tb==1&Rb==1&Ra==1]))   	                      
 58 |   omega0 <- mean(c(Ya[Ta==0&Ra==1&Rb==1],Yb[Tb==0&Rb==1&Ra==1])) 
 59 |   
 60 |   pi <- mean(Ra==1&Rb==1)
 61 |   N <- length(Ya)
 62 |   
 63 |   #####  
 64 |   
 65 |   indL1 <- 1
 66 |   indL2 <- 1
 67 |   indU1 <- 1
 68 |   indU2 <- 1
 69 |   if (pi<1/2 | f1(omega1,2-1/pi,u)<l){indL1=2}
 70 |   if (pi<1/2 | f2(omega0,2-1/pi,l)>u){indL2=2}
 71 |   if (pi<1/2| f2(omega1,2-1/pi,l)>u){indU1=2}
 72 |   if (pi<1/2 | f1(omega0,2-1/pi,u)<l){indU2=2}
 73 |   
 74 |   
 75 |   LB <- ifelse(indL1==1,f1(omega1,2-1/pi,u),l)-ifelse(indL2==1,f2(omega0,2-1/pi,l),u)
 76 |   UB <- ifelse(indU1==1,f2(omega1,2-1/pi,l),u)-ifelse(indU2==1,f1(omega0,2-1/pi,u),l)
 77 |   
 78 |   ####  bootstrap
 79 |   LB.boots <- rep(0,rep)
 80 |   UB.boots <- rep(0,rep)
 81 |   for (i in 1 :rep){
 82 |     index <- sample(1:N,N,replace=TRUE)
 83 |     Ya.ind <- Ya[index]
 84 |     Yb.ind <- Yb[index]
 85 |     Ra.ind <- Ra[index]
 86 |     Rb.ind <- Rb[index]
 87 |     Ta.ind <- Ta[index]
 88 |     Tb.ind <- Tb[index]
 89 |     omega1.boots <- mean(c(Ya.ind[Ta.ind==1&Ra.ind==1&Rb.ind==1],Yb.ind[Tb.ind==1&Rb.ind==1&Ra.ind==1]))
 90 |     omega0.boots <- mean(c(Ya.ind[Ta.ind==0&Ra.ind==1&Rb.ind==1],Yb.ind[Tb.ind==0&Rb.ind==1&Ra.ind==1]))
 91 |     pi.boots <- mean(Ra.ind==1&Rb.ind==1)
 92 |     LB.boots[i] <- ifelse(indL1==1,f1(omega1.boots,2-1/pi.boots,u),l)-ifelse(indL2==1,f2(omega0.boots,2-1/pi.boots,l),u)
 93 |     UB.boots[i] <- ifelse(indU1==1,f2(omega1.boots, 2-1/pi.boots,l),u)-ifelse(indU2==1,f1(omega0.boots,2-1/pi.boots,u),l)
 94 |   }
 95 |   #### Calculate CI
 96 |   sd.LB <- sd(LB.boots)
 97 |   sd.UB <- sd(UB.boots)
 98 |   C <- uniroot(CalC,hat.LB=LB,hat.UB=UB,sigma.LB=sd.LB,sigma.UB=sd.UB,alpha=alpha,lower=0,upper=100)$root
 99 |   LB.CI <- max(LB-C*sd.LB,l-u)
100 |   UB.CI <- min(UB+C*sd.UB,u-l)
101 |   return(list(LB=LB,UB=UB,LB.CI=LB.CI,UB.CI=UB.CI))
102 | }
103 | 


--------------------------------------------------------------------------------
/R/CACEcluster.R:
--------------------------------------------------------------------------------
  1 | #' Estimation of the Complier Average Causal Effects in Cluster-Randomized
  2 | #' Experiments with Unit-level Noncompliance
  3 | #' 
  4 | #' This function estimates various complier average causal effect in
  5 | #' cluster-randomized experiments without using pre-treatment covariates when
  6 | #' unit-level noncompliance exists. Both the encouragement and treatment
  7 | #' variables are assumed to be binary. Currently, only the matched-pair design
  8 | #' is allowed. The details of the methods for this design are given in Imai,
  9 | #' King, and Nall (2007).
 10 | #' 
 11 | #' 
 12 | #' @param Y The outcome variable of interest.
 13 | #' @param D The unit-level treatment receipt variable. This variable should be
 14 | #' binary but can differ across units within each cluster.
 15 | #' @param Z The (randomized) cluster-level encouragement variable. This
 16 | #' variable should be binary. Two units in the same cluster should have the
 17 | #' same value.
 18 | #' @param grp A variable indicating clusters of units. Two units in the same
 19 | #' cluster should have the same value.
 20 | #' @param data A data frame containing the relevant variables.
 21 | #' @param match A variable indicating matched-pairs of clusters. Two units in
 22 | #' the same matched-pair of clusters should have the same value. The default is
 23 | #' \code{NULL} (i.e., no matching).
 24 | #' @param weights A variable indicating the population cluster sizes, which
 25 | #' will be used to construct weights for each pair of clusters. Two units in
 26 | #' the same cluster should have the same value. The default is \code{NULL}, in
 27 | #' which case sample cluster sizes will be used for constructing weights.
 28 | #' @param ...  Optional arguments passed to \code{ATEcluster}, which is called
 29 | #' internally.
 30 | #' @return A list of class \code{CACEcluster} which contains the following
 31 | #' items: \item{call}{ The matched call.  } \item{ITTY}{ The output object from
 32 | #' \code{ATEcluster} which is used to estimate the ITT effect of the
 33 | #' encouragement on the outcome variable.  } \item{ITTD}{ The output object
 34 | #' from \code{ATEcluster} which is used to estimate the ITT effect of the
 35 | #' encouragement on the treatment receipt variable.  } \item{n1}{ The total
 36 | #' number of units in the treatment group.  } \item{n0}{ The total number of
 37 | #' units in the control group.  } \item{Z}{ The treatment variable.  }
 38 | #' \item{est}{ The estimated complier average causal effect.  } \item{var}{ The
 39 | #' estimated variance of the complier average causal effect estimator.  }
 40 | #' \item{cov}{ The estimated covariance between two ITT estimator.  } \item{m}{
 41 | #' The number of pairs in the matched-pair design.  } \item{N1}{ The population
 42 | #' cluster sizes for the treatment group.  } \item{N0}{ The population cluster
 43 | #' sizes for the control group.  } \item{w}{ Pair-specific normalized
 44 | #' arithmetic mean weights. These weights sum up to the total number of units
 45 | #' in the sample, i.e., \code{n}.  }
 46 | #' @author Kosuke Imai, Department of Government and Department of Statistics, Harvard University
 47 | #' \email{imai@@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
 48 | #' @references Imai, Kosuke, Gary King, and Clayton Nall (2007). \dQuote{The
 49 | #' Essential Role of Pair Matching in Cluster-Randomized Experiments, with
 50 | #' Application to the Mexican Universal Health Insurance Evaluation}, Technical
 51 | #' Report. Department of Politics, Princeton University.
 52 | #' @keywords design
 53 | #' @export CACEcluster
 54 | CACEcluster <- function(Y, D, Z, grp, data = parent.frame(),
 55 |                         match = NULL, weights = NULL, ...) {
 56 | 
 57 |   cov.internal <- function(x){
 58 |     return(cov(x[,1],x[,2]))
 59 |   }
 60 |   
 61 |   call <- match.call()
 62 |   Y <- eval(call$Y, envir = data)
 63 |   D <- eval(call$D, envir = data)
 64 |   Z <- eval(call$Z, envir = data)
 65 |   grp <- eval(call$grp, envir = data)
 66 |   match <- eval(call$match, envir = data)
 67 |   weights <- eval(call$weights, envir = data)
 68 |   
 69 |   ITTY <- ATEcluster(Y = Y, Z = Z, grp = grp, match = match,
 70 |                      weights = weights, ...)
 71 |   ITTD <- ATEcluster(Y = D, Z = Z, grp = grp, match = match,
 72 |                      weights = weights, ...)
 73 | 
 74 |   ## point estimate
 75 |   n <- ITTY$n
 76 |   ITTY.est <- ITTY$est
 77 |   ITTD.est <- ITTD$est
 78 |   CACE.est <- ITTY.est/ITTD.est
 79 | 
 80 |   ## outputs
 81 |   res <- list(est = CACE.est, ITTY = ITTY, ITTD = ITTD)
 82 | 
 83 |   ## calculation
 84 |   if (is.null(match))
 85 |     stop("the estimator is not yet available.")
 86 |   else {
 87 |     res$n1 <- n1 <- ITTY$n1
 88 |     res$n0 <- n0 <- ITTY$n0
 89 |     res$N1 <- N1 <- ITTY$N1
 90 |     res$N0 <- N0 <- ITTY$N0
 91 |     res$m <- m <- ITTY$m
 92 |     res$w <- w <- ITTY$w
 93 |     diffY <- ITTY$diff
 94 |     diffD <- ITTD$diff
 95 |     res$cov <- Cov <- m*sum((diffY*w - n*ITTY.est/m) *
 96 |                             (diffD*w - n*ITTD.est/m))/((m-1)*(n^2))
 97 |   }
 98 |   
 99 |   res$var <- CACE.var <-
100 |     (ITTY$var*(ITTD.est^2) + ITTD$var*(ITTY.est^2) -
101 |      2*Cov*ITTY.est*ITTD.est)/(ITTD.est^4)
102 |   
103 |   class(res) <- "CACEcluster"
104 |   return(res)
105 | }
106 | 


--------------------------------------------------------------------------------
/man/ATEcluster.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/ATEcluster.R
  3 | \name{ATEcluster}
  4 | \alias{ATEcluster}
  5 | \title{Estimation of the Average Treatment Effects in Cluster-Randomized
  6 | Experiments}
  7 | \usage{
  8 | ATEcluster(
  9 |   Y,
 10 |   Z,
 11 |   grp,
 12 |   data = parent.frame(),
 13 |   match = NULL,
 14 |   weights = NULL,
 15 |   fpc = TRUE
 16 | )
 17 | }
 18 | \arguments{
 19 | \item{Y}{The outcome variable of interest.}
 20 | 
 21 | \item{Z}{The (randomized) cluster-level treatment variable. This variable
 22 | should be binary. Two units in the same cluster should have the same value.}
 23 | 
 24 | \item{grp}{A variable indicating clusters of units. Two units in the same
 25 | cluster should have the same value.}
 26 | 
 27 | \item{data}{A data frame containing the relevant variables.}
 28 | 
 29 | \item{match}{A variable indicating matched-pairs of clusters. Two units in
 30 | the same matched-pair of clusters should have the same value. The default is
 31 | \code{NULL} (i.e., no matching).}
 32 | 
 33 | \item{weights}{A variable indicating the population cluster sizes, which
 34 | will be used to construct weights for each pair of clusters. Two units in
 35 | the same cluster should have the same value. The default is \code{NULL}, in
 36 | which case sample cluster sizes will be used for constructing weights.}
 37 | 
 38 | \item{fpc}{A logical variable indicating whether or not finite population
 39 | correction should be used for estimating the lower bound of CACE variance.
 40 | This is relevant only when \code{weights} are specified.}
 41 | }
 42 | \value{
 43 | A list of class \code{ATEcluster} which contains the following
 44 | items: \item{call}{ The matched call.  } \item{n}{ The total number of
 45 | units.  } \item{n1}{ The total number of units in the treatment group.  }
 46 | \item{n0}{ The total number of units in the control group.  } \item{Y}{ The
 47 | outcome variable.  } \item{Y1bar}{ The cluster-specific (unweighted) average
 48 | value of the observed outcome for the treatment group.  } \item{Y0bar}{ The
 49 | cluster-specific (unweighted) average value of the observed outcome for the
 50 | treatment group.  } \item{Y1var}{ The cluster-specific sample variance of
 51 | the observed outcome for the treatment group.  } \item{Y0var}{ The
 52 | cluster-specific sample variance of the observed outcome for the control
 53 | group.  } \item{Z}{ The treatment variable.  } \item{grp}{ The
 54 | cluster-indicator variable.  } \item{match}{ The matched-pair indicator
 55 | variable.  } \item{weights}{ The weight variable in its original form.  }
 56 | \item{est}{ The estimated average treatment effect based on the arithmetic
 57 | mean weights.  } \item{var}{ The estimated variance of the average treatment
 58 | effect estimator based on the arithmetic mean weights. This uses the
 59 | variance formula provided in Imai, King, and Nall (2007).  } \item{var.lb}{
 60 | The estimated sharp lower bound of the cluster average treatment effect
 61 | estimator using the arithmetic mean weights.  } \item{est.dk}{ The estimated
 62 | average treatment effect based on the harmonic mean weights.  }
 63 | \item{var.dk}{ The estimated variance of the average treatment effect
 64 | estimator based on the harmonic mean weights. This uses the variance formula
 65 | provided in Donner and Klar (1993).  } \item{dkvar}{ The estimated variance
 66 | of the average treatment effect estimator based on the harmonic mean
 67 | weights. This uses the variance formula provided in Imai, King, and Nall
 68 | (2007).  } \item{eff}{ The estimated relative efficiency of the matched-pair
 69 | design over the completely randomized design (the ratio of two estimated
 70 | variances).  } \item{m}{ The number of pairs in the matched-pair design.  }
 71 | \item{N1}{ The population cluster sizes for the treatment group.  }
 72 | \item{N0}{ The population cluster sizes for the control group.  } \item{w1}{
 73 | Cluster-specific weights for the treatment group.  } \item{w0}{
 74 | Cluster-specific weights for the control group.  } \item{w}{ Pair-specific
 75 | normalized arithmetic mean weights. These weights sum up to the total number
 76 | of units in the sample, i.e., \code{n}.  } \item{w.dk}{ Pair-specific
 77 | normalized harmonic mean weights. These weights sum up to the total number
 78 | of units in the sample, i.e., \code{n}.  } \item{diff}{ Within-pair
 79 | differences if the matched-pair design is analyzed. This equals the
 80 | difference between \code{Y1bar} and \code{Y0bar}.  }
 81 | }
 82 | \description{
 83 | This function estimates various average treatment effect in
 84 | cluster-randomized experiments without using pre-treatment covariates. The
 85 | treatment variable is assumed to be binary. Currently, only the matched-pair
 86 | design is allowed. The details of the methods for this design are given in
 87 | Imai, King, and Nall (2007).
 88 | }
 89 | \references{
 90 | Donner, A. and N. Klar (1993). \dQuote{Confidence interval
 91 | construction for effect measures arising from cluster randomized trials.}
 92 | Journal of Clinical Epidemiology. Vol. 46, No. 2, pp. 123-131.
 93 | 
 94 | Imai, Kosuke, Gary King, and Clayton Nall (2007). \dQuote{The Essential Role
 95 | of Pair Matching in Cluster-Randomized Experiments, with Application to the
 96 | Mexican Universal Health Insurance Evaluation}, Technical Report. Department
 97 | of Politics, Princeton University.
 98 | }
 99 | \author{
100 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University
101 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
102 | }
103 | \keyword{design}
104 | 


--------------------------------------------------------------------------------
/R/CACEnocov.R:
--------------------------------------------------------------------------------
  1 | ###
  2 | ### Calculate the CACE for individual and group randomized trials 
  3 | ###
  4 | 
  5 | CACEnocov <- function(Y, D, Z, data = parent.frame(), grp = NULL,
  6 |                       match = NULL, grp.size = NULL,
  7 |                       method = "standard"){
  8 | 
  9 |   ## get the data
 10 |   call <- match.call()
 11 |   Y <- eval(call$Y, envir = data)
 12 |   D <- eval(call$D, envir = data)
 13 |   Z <- eval(call$Z, envir = data)
 14 |   grp <- eval(call$grp, envir = data)
 15 |   grp.size <- eval(call$grp.size, envir = data)
 16 |   match <- eval(call$match, envir = data)
 17 |   
 18 |   ## point estimates and variances
 19 |   ITTY <- ATEcluster(Y = Y, Z = Z, grp = grp, match = match)
 20 | #                   grp.size = grp.size) 
 21 |   ITTD <- ATEcluster(Y = D, Z = Z, grp = grp, match = match)
 22 | #                   grp.size = grp.size) 
 23 |   CACEest <- ITTY$est/ITTD$est
 24 | 
 25 |   ## covariance calculation
 26 |   if (is.null(grp) && is.null(grp.size)) { # unit randomization
 27 |     if (is.null(match)) { # without matching
 28 |       Cov <- cov(Y[Z==0], D[Z==0])/sum(Z==0)+
 29 |         cov(Y[Z==1], D[Z==1])/sum(Z==1)
 30 |       CACEvar <- (ITTY$var*(ITTD$est^2) + ITTD$var*(ITTY$est^2) -
 31 |                   2*Cov*ITTY$est*ITTD$est)/(ITTD$est^4)
 32 |     } else { # with matching
 33 |       K <- length(ITTY$diff)
 34 |       Cov <- cov(ITTY$diff, ITTD$diff)/K
 35 |       CACEvar <- (var(ITTY$diff)*(ITTD$est^2)/K +
 36 |                   var(ITTD$diff)*(ITTY$est^2)/K -
 37 |                   2*Cov**ITTY$est*ITTD$est)/(ITTD$est^4)
 38 |     }
 39 |   } else if (method %in% c("weighted", "unpooled")) {
 40 |     if (is.null(match)) { # without matching
 41 |       if (method == "unpooled") {
 42 |         Cov <- covCluster(Y[Z==0], D[Z==0], grp[Z==0])$cov +
 43 |           covCluster(Y[Z==1], D[Z==1], grp[Z==1])$cov
 44 |       } else {
 45 |         Cov <- covCluster(Y, D, grp)$cov
 46 |       }
 47 |       CACEvar <- (ITTY$var*(ITTD$est^2) + ITTD$var*(ITTY$est^2) -
 48 |                   2*Cov*ITTY$est*ITTD$est)/(ITTD$est^4)
 49 |     } else { # with matching
 50 |       M <- length(ITTY$diff)
 51 |       N <- ITTY$N
 52 |       Cov <- M*sum((ITTY$diff*ITTY$weights - N*ITTY$est/M) *
 53 |                    (ITTD$diff*ITTD$weights - N*ITTD$est/M))/((M-1)*(N^2))
 54 |       CACEvar <- (ITTY$var*(ITTD$est^2) + ITTD$var*(ITTY$est^2) -
 55 |                   2*Cov*ITTY$est*ITTD$est)/(ITTD$est^4)
 56 |       CovT <- M*sum((ITTY$diff*ITTY$weightsT - N*ITTY$estT/M) *
 57 |                     (ITTD$diff*ITTD$weightsT - N*ITTD$estT/M))/((M-1)*(N^2))
 58 |       CACEvarT <- (ITTY$varTw*(ITTD$estT^2) + ITTD$varTw*(ITTY$estT^2) -
 59 |                    2*CovT*ITTY$estT*ITTD$estT)/(ITTD$estT^4)
 60 |       return(list(est = CACEest, estT = ITTY$estT/ITTD$estT,
 61 |                   var = CACEvar, varT = CACEvarT,
 62 |                   ITTd = ITTD, ITTy = ITTY,
 63 |                   cov = Cov, covT = CovT)) 
 64 |     }
 65 |   } else if (method == "standard") {
 66 |     if (is.null(match)) { # without matching
 67 |       Cov <- cov(ITTY$Ysum[ITTY$Z==0],
 68 |                  ITTD$Ysum[ITTD$Z==0])/sum(ITTY$Z==0) +
 69 |                    cov(ITTY$Ysum[ITTY$Z==1],
 70 |                        ITTD$Ysum[ITTD$Z==1])/sum(ITTY$Z==1)
 71 |       CACEvar <- (ITTY$var*(ITTD$est^2) + ITTD$var*(ITTY$est^2) -
 72 |                   2*Cov*ITTY$est*ITTD$est)/(ITTD$est^4)
 73 |     } else { # with matching
 74 |       stop("this estimator is not available for matched-pair designs.")
 75 |     }
 76 |   } else { # unbiased
 77 |     if (is.null(match)) { # without matching
 78 |       if (ITTD$M == (ITTD$m1*2)) { 
 79 |         Cov <- cov(ITTY$Ysum[ITTY$Z==0],
 80 |                    ITTD$Ysum[ITTD$Z==0])/sum(ITTY$Z==0) 
 81 |         + cov(ITTY$Ysum[ITTY$Z==1],
 82 |               ITTD$Ysum[ITTD$Z==1])/sum(ITTY$Z==1)
 83 |         CACEvar <- 2*ITTY$M*((var(ITTY$Ysum[ITTY$Z==1]) +
 84 |                               var(ITTY$Ysum[ITTY$Z==1]))*ITTD$est^2 +
 85 |                              (var(ITTD$Ysum[ITTD$Z==1]) +
 86 |                               var(ITTD$Ysum[ITTD$Z==1]))*ITTY$est^2 -
 87 |                              2*cov*ITTY$est*ITTD$est)/((ITTY$N^2)*(ITTD$est^4))
 88 |       } else {
 89 |         stop("the treatment and control groups must have the same number of clusters for this estimator")
 90 |       }  
 91 |     } else { # with matching
 92 |       Cov <- cov(ITTY$diff, ITTD$diff)
 93 |       CACEvar <- (ITTY$var*(ITTD$est^2) +
 94 |                   ITTD$var*(ITTY$est^2) -
 95 |                   4*ITTY$M*Cov*ITTY$est*ITTD$est/(ITTY$N^2))/(ITTD$est^4)
 96 |     }
 97 |   }
 98 |   return(list(est = CACEest, var = CACEvar, ITTd = ITTD, ITTy = ITTY,
 99 |               cov = Cov)) 
100 | }
101 | 
102 | ###
103 | ### Calculate the cluster-adjusted covariance for two sample means 
104 | ### the calculation is based on the formula analogous to the one
105 | ### above.
106 | ###
107 | 
108 | covCluster <- function(Y1, Y2, grp) {
109 |   
110 |   n <- c(table(grp))
111 |   ugrp <- unique(grp)
112 |   k <- length(ugrp)
113 |   N <- length(Y1)
114 |   Y1bar <- mean(Y1)
115 |   Y2bar <- mean(Y2)
116 |   Y1gbar <- Y2gbar <- rep(NA, k)
117 |   MSw <- 0
118 |   ## within-group mean squares
119 |   for (i in 1:k) {
120 |     Y1gbar[i] <- mean(Y1[grp == ugrp[i]])
121 |     Y2gbar[i] <- mean(Y2[grp == ugrp[i]])
122 |     MSw <- MSw + sum((Y1[grp == ugrp[i]]-Y1gbar[i])*(Y2[grp == ugrp[i]]-Y2gbar[i]))
123 |   }
124 |   MSw <- MSw / (N-k)
125 |   ## between-group mean squares
126 |   MSb <- sum(n*(Y1gbar-Y1bar)*(Y2gbar-Y2bar))/(k-1)
127 |   ## intraclass correlation coefficient
128 |   n0 <- mean(n)-sum((n-mean(n))^2)/((k-1)*N)
129 |   rho <- (MSb - MSw)/(MSb + (n0-1)*MSw)
130 |   ## cluster-adjusted covariance
131 |   res <- var(Y1, Y2)*(1+(mean(n)-1)*rho)/N
132 |   return(list(cov = res, MSb = MSb, MSw = MSw, rho = rho))
133 | }
134 | 


--------------------------------------------------------------------------------
/R/ATOPsens.R:
--------------------------------------------------------------------------------
  1 | ###
  2 | ### Sensitivity analysis for the  ATOP in the presence of missing
  3 | ### response  under matched-pairs design
  4 | ###
  5 | 
  6 | 
  7 | 
  8 | #' Sensitivity analysis for the ATOP when some of the Outcome Data are
  9 | #' Missing Under the Matched-Pairs Design
 10 | #' 
 11 | #' This function computes the bounds on the average treatment effect among always-observed pairs (ATOP)
 12 | #'  with pre-specified sensivity parameters when
 13 | #' some of the outcome data are missing. The sensivity parameter characterizes the degree of the within-pair similarity.
 14 | #' The confidence intervals for the
 15 | #' ATOP are also computed.
 16 | #' 
 17 | #' For the details of the method implemented by this function, see the
 18 | #' references.
 19 | #' 
 20 | #' @param Ya  A vector of the outcomes of the first unit in the matched pairs. The missing values for \code{Ya} should be coded
 21 | #' as \code{NA}.
 22 | #' @param Yb  A vector of the outcomes of the second unit in the matched pairs. The missing values for \code{Yb} should be coded
 23 | #' as \code{NA}.
 24 | #' @param Ra A vector of the missing data indicators of the first unit in the matched pairs. 
 25 | #' @param Rb A vector of the missing data indicators of the second unit in the matched pairs.
 26 | #' @param Ta A vector of the treatment conditions of the first unit in the matched pairs. 
 27 | #' @param Tb A vector of the treatment conditions of the second unit in the matched pairs.
 28 | #' @param gamma The sensitivity parameter which charaterizes the degree of the within-pair similarity.
 29 | #' @param l The lower limit of the outcome.
 30 | #' @param u The upper limit of the outcome.
 31 | #' @param alpha A positive scalar that is less than or equal to 0.5. This will
 32 | #' determine the (1-\code{alpha}) level of confidence intervals. The default is
 33 | #' \code{0.05}.
 34 | #' @param rep The number of repetitions for bootstraping.
 35 | #' @return A list of class \code{ATOPsens} which contains the following items:
 36 | #' \item{LB}{ The lower bound for the ATOP.  } \item{UB}{ The upper bound for the ATOP.   }
 37 | #' \item{LB.CI}{ The lower limit of the confidence interval for the ATOP.   }
 38 | #'\item{UB.CI}{ The upper limit of the confidence interval for the ATOP.    } 
 39 | #' @author Kosuke Imai, Department of Government and Department of Statistics, Harvard University
 40 | #' \email{imai@@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
 41 | #' Zhichao Jiang, Department of Politics, Princeton University
 42 | #' \email{zhichaoj@@princeton.edu}.
 43 | #' @references Kosuke Imai and Zhichao Jiang (2018).
 44 | #' \dQuote{A Sensitivity Analysis for Missing Outcomes Due to 
 45 | #' Truncation-by-Death under the Matched-Pairs Design}, \emph{Statistics in Medicine}.
 46 | #' @keywords matched-pairs design
 47 | #' @examples 
 48 | #' data(seguro)
 49 | #' attach(seguro)
 50 | #' ATOPobs(Ya,Yb,Ra,Rb,Ta,Tb,gamma=0.95,kappa1=1,kappa0=1,l=0,u=1,alpha=0.05,rep=100)
 51 | #' @export ATOPsens
 52 | 
 53 | 
 54 | ATOPsens <- function(Ya,Yb,Ra,Rb,Ta,Tb,gamma,l,u,alpha,rep){
 55 |   if(!(is.vector(Ya)&is.vector(Yb)&is.vector(Ra)&is.vector(Rb)&is.vector(Ta)&is.vector(Tb)))
 56 |     stop('Data should be input as vectors')
 57 |   if (length(unique(apply(cbind(Ya,Yb,Ra,Rb,Ta,Tb),2,length)))!=1)
 58 |     stop('Vectors should have the same length')
 59 |   
 60 |   omega1 <- mean(c(Ya[Ta==1&Ra==1&Rb==1],Yb[Tb==1&Rb==1&Ra==1]))   	                      
 61 |   omega0 <- mean(c(Ya[Ta==0&Ra==1&Rb==1],Yb[Tb==0&Rb==1&Ra==1])) 
 62 |   
 63 |   alpha1 <- (sum(Ta==1&Ra==1)+sum(Tb==1&Rb==1))/(sum(Ta==1)+sum(Tb==1))
 64 |   alpha0 <- (sum(Ta==0&Ra==1)+sum(Tb==0&Rb==1))/(sum(Ta==0)+sum(Tb==0))
 65 |   
 66 |   pi <- mean(Ra==1&Rb==1)
 67 |   N<-length(Ya)
 68 | 
 69 |   Delta1 <- max(2*pi-1+gamma*(1-alpha1),2*pi-1+gamma*(1-alpha0),pi-(1-gamma)*(alpha1+alpha0), 2*pi-(2-gamma)*alpha1,  2*pi-(2-gamma)*alpha0,pi-(1-gamma)*(2-alpha1-alpha0),pi-(1-gamma)*(1-abs(alpha1-alpha0)))/pi
 70 |   
 71 |   ind <- which.max(c(2*pi-1+gamma*(1-alpha1),2*pi-1+gamma*(1-alpha0),pi-(1-gamma)*(alpha1+alpha0),2*pi-(2-gamma)*alpha1,  2*pi-(2-gamma)*alpha0,pi-(1-gamma)*(2-alpha1-alpha0),pi-(1-gamma)*(1-abs(alpha1-alpha0)) ))
 72 |   
 73 |   
 74 |   #####  
 75 |   
 76 |   indL1 <- ind
 77 |   indL2 <- ind
 78 |   indU1 <- ind
 79 |   indU2 <- ind
 80 |   if (Delta1<0 | f1(omega1,Delta1,u)<l){indL1=0}
 81 |   if (Delta1<0 | f2(omega0,Delta1,l)>u){indL2=0}
 82 |   if (Delta1<0| f2(omega1,Delta1,l)>u){indU1=0}
 83 |   if (Delta1<0 | f1(omega0,Delta1,u)<l){indU2=0}
 84 |   
 85 |   LB <- max(ifelse(indL1!=0,f1(omega1,Delta1,u),l)-ifelse(indL2!=0,f2(omega0,Delta1,l),u),l-u)
 86 |   UB <- min(ifelse(indU1!=0,f2(omega1,Delta1,l),u)-ifelse(indU2!=0,f1(omega0,Delta1,u),l),u-l)
 87 |   ####  bootstrap
 88 |   LB.boots <- rep(0,rep)
 89 |   UB.boots <- rep(0,rep)
 90 |   for (i in 1 :rep){
 91 |     index <- sample(1:N,N,replace=TRUE)
 92 |     Ya.ind <- Ya[index]
 93 |     Yb.ind <- Yb[index]
 94 |     Ra.ind <- Ra[index]
 95 |     Rb.ind <- Rb[index]
 96 |     Ta.ind <- Ta[index]
 97 |     Tb.ind <- Tb[index]
 98 |     omega1.boots <- mean(c(Ya.ind[Ta.ind==1&Ra.ind==1&Rb.ind==1],Yb.ind[Tb.ind==1&Rb.ind==1&Ra.ind==1]))
 99 |     omega0.boots <- mean(c(Ya.ind[Ta.ind==0&Ra.ind==1&Rb.ind==1],Yb.ind[Tb.ind==0&Rb.ind==1&Ra.ind==1]))
100 |     alpha1.boots <- (sum(Ta.ind==1&Ra.ind==1)+sum(Tb.ind==1&Rb.ind==1))/(sum(Ta.ind==1)+sum(Tb.ind==1))
101 |     alpha0.boots <- (sum(Ta.ind==0&Ra.ind==1)+sum(Tb.ind==0&Rb.ind==1))/(sum(Ta.ind==0)+sum(Tb.ind==0))
102 |     pi.boots <- mean(Ra.ind==1&Rb.ind==1)
103 |     Delta1.boots <- c(2*pi.boots-1+gamma*(1-alpha1.boots),2*pi.boots-1+gamma*(1-alpha0.boots),pi.boots-(1-gamma)*(alpha1.boots+alpha0.boots),2*pi.boots-(2-gamma)*alpha1.boots,  2*pi.boots-(2-gamma)*alpha0.boots,pi.boots-(1-gamma)*(2-alpha1.boots-alpha0.boots),pi.boots-(1-gamma)*(1-abs(alpha1.boots-alpha0.boots)) )[ind]/pi.boots
104 |     
105 |     
106 |     
107 |     LB.boots[i] <- max(ifelse(indL1!=0&Delta1.boots>0,f1(omega1.boots,Delta1.boots,u),l)-ifelse(indL2!=0&Delta1.boots>0,f2(omega0.boots,Delta1.boots,l),u),l-u)
108 |     UB.boots[i] <- min(ifelse(indU1!=0&Delta1.boots>0,f2(omega1.boots, Delta1.boots,l),u)-ifelse(indU2!=0&Delta1.boots>0,f1(omega0.boots,Delta1.boots,u),l),u-l)
109 |   }
110 |   #### Calculate CI
111 |   sd.LB <- sd(LB.boots)
112 |   sd.UB <- sd(UB.boots)
113 |   C <- uniroot(CalC,hat.LB=LB,hat.UB=UB,sigma.LB=sd.LB,sigma.UB=sd.UB,alpha=alpha,lower=0,upper=100)$root
114 |   LB.CI <- max(LB-C*sd.LB,l-u)
115 |   UB.CI <- min(UB+C*sd.UB,u-l)
116 |   return(list(LB=LB,UB=UB,LB.CI=LB.CI,UB.CI=UB.CI))
117 | }
118 | 


--------------------------------------------------------------------------------
/src/rand.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <math.h>
  4 | #include <Rmath.h>
  5 | #include <R_ext/Utils.h>
  6 | #include <R.h>
  7 | #include "vector.h"
  8 | #include "subroutines.h"
  9 | #include "rand.h"
 10 | 
 11 | /* Multivariate Normal density */
 12 | double dMVN(
 13 | 	double *Y,		/* The data */
 14 | 	double *MEAN,		/* The parameters */
 15 | 	double **SIG_INV,         /* inverse of the covariance matrix */
 16 | 	int dim,                /* dimension */
 17 | 	int give_log){          /* 1 if log_scale 0 otherwise */
 18 | 
 19 |   int j,k;
 20 |   double value=0.0;
 21 | 
 22 |   for(j=0;j<dim;j++){
 23 |     for(k=0;k<j;k++)
 24 |       value+=2*(Y[k]-MEAN[k])*(Y[j]-MEAN[j])*SIG_INV[j][k];
 25 |     value+=(Y[j]-MEAN[j])*(Y[j]-MEAN[j])*SIG_INV[j][j];
 26 |   }
 27 | 
 28 |   value=-0.5*value-0.5*dim*log(2*M_PI)+0.5*ddet(SIG_INV, dim, 1);
 29 | 
 30 | 
 31 |   if(give_log)
 32 |     return(value);
 33 |   else
 34 |     return(exp(value));
 35 | 
 36 | }
 37 | 
 38 | /* Sample from a univariate truncated Normal distribution 
 39 |    (truncated both from above and below): choose either inverse cdf
 40 |    method or rejection sampling method. For rejection sampling, 
 41 |    if the range is too far from mu, it uses standard rejection
 42 |    sampling algorithm with exponential envelope function. */ 
 43 | double TruncNorm(
 44 | 		 double lb,  /* lower bound */ 
 45 | 		 double ub,  /* upper bound */
 46 | 		 double mu,  /* mean */
 47 | 		 double var, /* variance */
 48 | 		 int invcdf  /* use inverse cdf method? */
 49 | 		 ) {
 50 |   
 51 |   double z;
 52 |   double sigma = sqrt(var);
 53 |   double stlb = (lb-mu)/sigma;  /* standardized lower bound */
 54 |   double stub = (ub-mu)/sigma;  /* standardized upper bound */
 55 |   if(stlb >= stub)
 56 |     error("TruncNorm: lower bound is greater than upper bound\n");
 57 |   if (invcdf) {  /* inverse cdf method */
 58 |     z = qnorm(runif(pnorm(stlb, 0, 1, 1, 0), pnorm(stub, 0, 1, 1, 0)),
 59 | 	      0, 1, 1, 0); 
 60 |   }
 61 |   else { /* rejection sampling method */
 62 |     double tol=2.0;
 63 |     double temp, M, u, exp_par;
 64 |     int flag=0;  /* 1 if stlb, stub <-tol */
 65 |     if(stub<=-tol){
 66 |       flag=1;
 67 |       temp=stub;
 68 |       stub=-stlb;
 69 |       stlb=-temp;
 70 |     }
 71 |     if(stlb>=tol){
 72 |       exp_par=stlb;
 73 |       while(pexp(stub,1/exp_par,1,0) - pexp(stlb,1/exp_par,1,0) < 0.000001) 
 74 | 	exp_par/=2.0;
 75 |       if(dnorm(stlb,0,1,1) - dexp(stlb,1/exp_par,1) >=
 76 | 	 dnorm(stub,0,1,1) - dexp(stub,1/exp_par,1)) 
 77 | 	M=exp(dnorm(stlb,0,1,1) - dexp(stlb,1/exp_par,1));
 78 |       else
 79 | 	M=exp(dnorm(stub,0,1,1) - dexp(stub,1/exp_par,1));
 80 |       do{ 
 81 | 	u=unif_rand();
 82 | 	z=-log(1-u*(pexp(stub,1/exp_par,1,0)-pexp(stlb,1/exp_par,1,0))
 83 | 	       -pexp(stlb,1/exp_par,1,0))/exp_par;
 84 |       }while(unif_rand() > exp(dnorm(z,0,1,1)-dexp(z,1/exp_par,1))/M );  
 85 |       if(flag==1) z=-z;
 86 |     } 
 87 |     else{ 
 88 |       do z=norm_rand();
 89 |       while( z<stlb || z>stub ); 
 90 |     }
 91 |   }
 92 |   return(z*sigma + mu); 
 93 | }
 94 | 
 95 | 
 96 | /* Sample from the MVN dist */
 97 | void rMVN(                      
 98 | 	  double *Sample,         /* Vector for the sample */
 99 | 	  double *mean,           /* The vector of means */
100 | 	  double **Var,           /* The matrix Variance */
101 | 	  int size)               /* The dimension */
102 | {
103 |   int j,k;
104 |   double **Model = doubleMatrix(size+1, size+1);
105 |   double cond_mean;
106 |     
107 |   /* draw from mult. normal using SWP */
108 |   for(j=1;j<=size;j++){       
109 |     for(k=1;k<=size;k++)
110 |       Model[j][k]=Var[j-1][k-1];
111 |     Model[0][j]=mean[j-1];
112 |     Model[j][0]=mean[j-1];
113 |   }
114 |   Model[0][0]=-1;
115 |   Sample[0]=(double)norm_rand()*sqrt(Model[1][1])+Model[0][1];
116 |   for(j=2;j<=size;j++){
117 |     SWP(Model,j-1,size+1);
118 |     cond_mean=Model[j][0];
119 |     for(k=1;k<j;k++) cond_mean+=Sample[k-1]*Model[j][k];
120 |     Sample[j-1]=(double)norm_rand()*sqrt(Model[j][j])+cond_mean;
121 |   }
122 |   
123 |   FreeMatrix(Model,size+1);
124 | }
125 | 
126 | 
127 | /* Sample from a wish dist */
128 | /* Odell, P. L. and Feiveson, A. H. ``A Numerical Procedure to Generate
129 |    a Sample Covariance Matrix'' Journal of the American Statistical
130 |    Association, Vol. 61, No. 313. (Mar., 1966), pp. 199-203. */
131 | 
132 | void rWish(                  
133 | 	   double **Sample,        /* The matrix with to hold the sample */
134 | 	   double **S,             /* The parameter */
135 | 	   int df,                 /* the degrees of freedom */
136 | 	   int size)               /* The dimension */
137 | {
138 |   int i,j,k;
139 |   double *V = doubleArray(size);
140 |   double **B = doubleMatrix(size, size);
141 |   double **C = doubleMatrix(size, size);
142 |   double **N = doubleMatrix(size, size);
143 |   double **mtemp = doubleMatrix(size, size);
144 |   
145 |   for(i=0;i<size;i++) {
146 |     V[i]=rchisq((double) df-i-1);
147 |     B[i][i]=V[i];
148 |     for(j=(i+1);j<size;j++)
149 |       N[i][j]=norm_rand();
150 |   }
151 | 
152 |   for(i=0;i<size;i++) {
153 |     for(j=i;j<size;j++) {
154 |       Sample[i][j]=0;
155 |       Sample[j][i]=0;
156 |       mtemp[i][j]=0;
157 |       mtemp[j][i]=0;
158 |       if(i==j) {
159 | 	if(i>0)
160 | 	  for(k=0;k<j;k++)
161 | 	    B[j][j]+=N[k][j]*N[k][j];
162 |       }
163 |       else { 
164 | 	B[i][j]=N[i][j]*sqrt(V[i]);
165 | 	if(i>0)
166 | 	  for(k=0;k<i;k++)
167 | 	    B[i][j]+=N[k][i]*N[k][j];
168 |       }
169 |       B[j][i]=B[i][j];
170 |     }
171 |   }
172 |   
173 |   dcholdc(S, size, C);
174 |   for(i=0;i<size;i++)
175 |     for(j=0;j<size;j++)
176 |       for(k=0;k<size;k++)
177 | 	mtemp[i][j]+=C[i][k]*B[k][j];
178 |   for(i=0;i<size;i++)
179 |     for(j=0;j<size;j++)
180 |       for(k=0;k<size;k++)
181 | 	Sample[i][j]+=mtemp[i][k]*C[j][k];
182 | 
183 |   free(V);
184 |   FreeMatrix(B, size);
185 |   FreeMatrix(C, size);
186 |   FreeMatrix(N, size);
187 |   FreeMatrix(mtemp, size);
188 | }
189 | 
190 | /* 
191 |    negative binomial distribution with an alternative parameterization 
192 |      mean = mu
193 |      var = mu + mu^2/theta
194 | */
195 | 
196 | double dnegbin(int Y,        /* sample */
197 | 	       double mu,    /* mean */
198 | 	       double theta, /* dispersion parameter */
199 | 	       int give_log
200 | 	       ) {
201 |   return(dnbinom((double)Y, theta, theta/(mu+theta), give_log));
202 | }
203 | 
204 | double rnegbin(double mu,    /* mean */
205 | 	       double theta  /* dispersion parameter */
206 | 	       ) {
207 |   return(rnbinom(theta, theta/(mu+theta)));
208 | }
209 | 


--------------------------------------------------------------------------------
/R/ATOPobs.R:
--------------------------------------------------------------------------------
  1 | ###
  2 | ### Sensitivity analysis for the  ATOP in the presence of missing
  3 | ### response  under matched-pairs design in observational studies
  4 | ###
  5 | 
  6 | 
  7 | 
  8 | #' Sensitivity analysis for the ATOP when some of the Outcome Data are
  9 | #' Missing Under the Matched-Pairs Design in Observational Studies
 10 | #' 
 11 | #' This function computes the bounds on the average treatment effect among always-observed pairs (ATOP)
 12 | #'  with pre-specified sensivity parameters when
 13 | #' some of the outcome data are missing. The sensivity parameters characterizes the degree of the within-pair similarity 
 14 | #' and the dependence between the potential missing indicators and the treatment. The confidence intervals for the
 15 | #' ATOP are also computed.
 16 | #' 
 17 | #' For the details of the method implemented by this function, see the
 18 | #' references.
 19 | #' 
 20 | #' @param Ya  A vector of the outcomes of the first unit in the matched pairs. The missing values for \code{Ya} should be coded
 21 | #' as \code{NA}.
 22 | #' @param Yb  A vector of the outcomes of the second unit in the matched pairs. The missing values for \code{Yb} should be coded
 23 | #' as \code{NA}.
 24 | #' @param Ra A vector of the missing data indicators of the first unit in the matched pairs. 
 25 | #' @param Rb A vector of the missing data indicators of the second unit in the matched pairs.
 26 | #' @param Ta A vector of the treatment conditions of the first unit in the matched pairs. 
 27 | #' @param Tb A vector of the treatment conditions of the second unit in the matched pairs.
 28 | #' @param gamma The sensitivity parameter which charaterizes the degree of the within-pair similarity.
 29 | #' @param kappa1 The sensitivity parameter which charaterizes the dependence between \code{R(1)} and \code{T}.
 30 | #' @param kappa0 The sensitivity parameter which charaterizes the dependence between \code{R(0)} and \code{T}.
 31 | #' @param l The lower limit of the outcome.
 32 | #' @param u The upper limit of the outcome.
 33 | #' @param alpha A positive scalar that is less than or equal to 0.5. This will
 34 | #' determine the (1-\code{alpha}) level of confidence intervals. The default is
 35 | #' \code{0.05}.
 36 | #' @param rep The number of repetitions for bootstraping.
 37 | #' @return A list of class \code{ATOPsens} which contains the following items:
 38 | #' \item{LB}{ The lower bound for the ATOP.  } \item{UB}{ The upper bound for the ATOP.   }
 39 | #' \item{LB.CI}{ The lower limit of the confidence interval for the ATOP.   }
 40 | #'\item{UB.CI}{ The upper limit of the confidence interval for the ATOP.    } 
 41 | #' @author Kosuke Imai, Department of Government and Department of Statistics, Harvard University
 42 | #' \email{imai@@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
 43 | #' Zhichao Jiang, Department of Politics, Princeton University
 44 | #' \email{zhichaoj@@princeton.edu}.
 45 | #' @references Kosuke Imai and Zhichao Jiang (2018).
 46 | #' \dQuote{A Sensitivity Analysis for Missing Outcomes Due to 
 47 | #' Truncation-by-Death under the Matched-Pairs Design}, \emph{Statistics in Medicine}.
 48 | #' @keywords matched-pairs design
 49 | #' @examples 
 50 | #' data(seguro)
 51 | #' attach(seguro)
 52 | #' ATOPsens(Ya,Yb,Ra,Rb,Ta,Tb,gamma=0.95,l=0,u=1,alpha=0.05,rep=100)
 53 | #' @export ATOPobs
 54 | 
 55 | ATOPobs <- function(Ya,Yb,Ra,Rb,Ta,Tb,gamma,kappa1,kappa0,l,u,alpha,rep){
 56 |   if(!(is.vector(Ya)&is.vector(Yb)&is.vector(Ra)&is.vector(Rb)&is.vector(Ta)&is.vector(Tb)))
 57 |     stop('Data should be input as vectors')
 58 |   if (length(unique(apply(cbind(Ya,Yb,Ra,Rb,Ta,Tb),2,length)))!=1)
 59 |     stop('Vectors should have the same length')
 60 |   omega1 <- mean(c(Ya[Ta==1&Ra==1&Rb==1],Yb[Tb==1&Rb==1&Ra==1]))   	                      
 61 |   omega0 <- mean(c(Ya[Ta==0&Ra==1&Rb==1],Yb[Tb==0&Rb==1&Ra==1])) 
 62 |   
 63 |   alpha1 <- (sum(Ta==1&Ra==1)+sum(Tb==1&Rb==1))/(sum(Ta==1)+sum(Tb==1))*(1+kappa1)/2/kappa1
 64 |   alpha0 <- (sum(Ta==0&Ra==1)+sum(Tb==0&Rb==1))/(sum(Ta==0)+sum(Tb==0))*(1+kappa0)/2
 65 |   
 66 |   pi <- mean(Ra==1&Rb==1)
 67 |   N <- length(Ya)
 68 | 
 69 |   Delta1 <- max(2*pi-2+2*gamma*(1-alpha1),2*pi-2+2*gamma*(1-alpha0),pi-2*(1-gamma)*(alpha1+alpha0), 2*pi-2*(2-gamma)*alpha1,  2*pi-2*(2-gamma)*alpha0,pi-2*(1-gamma)*(2-alpha1-alpha0),pi-2*(1-gamma)*(1-abs(alpha1-alpha0)))/pi
 70 |   
 71 |   ind <- which.max(c(2*pi-2+2*gamma*(1-alpha1),2*pi-2+2*gamma*(1-alpha0),pi-2*(1-gamma)*(alpha1+alpha0),2*pi-2*(2-gamma)*alpha1,  2*pi-2*(2-gamma)*alpha0,pi-2*(1-gamma)*(2-alpha1-alpha0),pi-2*(1-gamma)*(1-abs(alpha1-alpha0)) ))
 72 |   
 73 |   
 74 |   #####  
 75 |   
 76 |   indL1 <- ind
 77 |   indL2 <- ind
 78 |   indU1 <- ind
 79 |   indU2 <- ind
 80 |   if (Delta1<0 | f1(omega1,Delta1,u)<l){indL1=0}
 81 |   if (Delta1<0 | f2(omega0,Delta1,l)>u){indL2=0}
 82 |   if (Delta1<0| f2(omega1,Delta1,l)>u){indU1=0}
 83 |   if (Delta1<0 | f1(omega0,Delta1,u)<l){indU2=0}
 84 |   
 85 |   LB <- max(ifelse(indL1!=0,f1(omega1,Delta1,u),l)-ifelse(indL2!=0,f2(omega0,Delta1,l),u),l-u)
 86 |   UB <- min(ifelse(indU1!=0,f2(omega1,Delta1,l),u)-ifelse(indU2!=0,f1(omega0,Delta1,u),l),u-l)
 87 |   ####  bootstrap
 88 |   LB.boots <- rep(0,rep)
 89 |   UB.boots <- rep(0,rep)
 90 |   for (i in 1 :rep){
 91 |     index <- sample(1:N,N,replace=TRUE)
 92 |     Ya.ind <- Ya[index]
 93 |     Yb.ind <- Yb[index]
 94 |     Ra.ind <- Ra[index]
 95 |     Rb.ind <- Rb[index]
 96 |     Ta.ind <- Ta[index]
 97 |     Tb.ind <- Tb[index]
 98 |     omega1.boots <- mean(c(Ya.ind[Ta.ind==1&Ra.ind==1&Rb.ind==1],Yb.ind[Tb.ind==1&Rb.ind==1&Ra.ind==1]))
 99 |     omega0.boots <- mean(c(Ya.ind[Ta.ind==0&Ra.ind==1&Rb.ind==1],Yb.ind[Tb.ind==0&Rb.ind==1&Ra.ind==1]))
100 |     alpha1.boots <- (sum(Ta.ind==1&Ra.ind==1)+sum(Tb.ind==1&Rb.ind==1))/(sum(Ta.ind==1)+sum(Tb.ind==1))*(1+kappa1)/2/kappa1
101 |     alpha0.boots <- (sum(Ta.ind==0&Ra.ind==1)+sum(Tb.ind==0&Rb.ind==1))/(sum(Ta.ind==0)+sum(Tb.ind==0))*(1+kappa0)/2
102 |     pi.boots <- mean(Ra.ind==1&Rb.ind==1)
103 |     Delta1.boots <- c(2*pi.boots-2+2*gamma*(1-alpha1.boots),2*pi.boots-2+2*gamma*(1-alpha0.boots),pi.boots-2*(1-gamma)*(alpha1.boots+alpha0.boots),2*pi.boots-2*(2-gamma)*alpha1.boots,  2*pi.boots-2*(2-gamma)*alpha0.boots,pi.boots-2*(1-gamma)*(2-alpha1.boots-alpha0.boots),pi.boots-2*(1-gamma)*(1-abs(alpha1.boots-alpha0.boots)) )[ind]/pi.boots
104 |     
105 |     
106 |     
107 |     LB.boots[i] <- max(ifelse(indL1!=0&Delta1.boots>0,f1(omega1.boots,Delta1.boots,u),l)-ifelse(indL2!=0&Delta1.boots>0,f2(omega0.boots,Delta1.boots,l),u),l-u)
108 |     UB.boots[i] <- min(ifelse(indU1!=0&Delta1.boots>0,f2(omega1.boots, Delta1.boots,l),u)-ifelse(indU2!=0&Delta1.boots>0,f1(omega0.boots,Delta1.boots,u),l),u-l)
109 |     
110 |   }
111 |   #### Calculate CI
112 |   sd.LB <- sd(LB.boots)
113 |   sd.UB <- sd(UB.boots)
114 |   C <- uniroot(CalC,hat.LB=LB,hat.UB=UB,sigma.LB=sd.LB,sigma.UB=sd.UB,alpha=alpha,lower=0,upper=100)$root
115 |   LB.CI <- max(LB-C*sd.LB,l-u)
116 |   UB.CI <- min(UB+C*sd.UB,u-l)
117 |   return(list(LB=LB,UB=UB,LB.CI=LB.CI,UB.CI=UB.CI))
118 | }
119 | 


--------------------------------------------------------------------------------
/R/CADEreg.R:
--------------------------------------------------------------------------------
  1 | ###
  2 | ### Regression-based method for the complier average direct effect
  3 | ### 
  4 | ###
  5 | 
  6 | 
  7 | 
  8 | #' Regression-based method for the complier average direct effect
  9 | #'
 10 | #' 
 11 | #' This function computes the point estimates of the complier average direct effect (CADE) and four
 12 | #'  different variance estimates: the HC2 variance, the cluster-robust variance, the cluster-robust HC2
 13 | #'  variance and the variance proposed in the reference. The estimators calculated using this function
 14 | #'  are cluster-weighted, i.e., the weights are equal for each cluster. To obtain the indivudal-weighted
 15 | #'  estimators, please multiply the recieved treatment and the outcome by \code{n_jJ/N}, where
 16 | #'  \code{n_j} is the number of individuals in cluster \code{j}, \code{J} is the number of clusters and 
 17 | #'  \code{N} is the total number of individuals. 
 18 | #'
 19 | #' 
 20 | #' For the details of the method implemented by this function, see the
 21 | #' references.
 22 | #' 
 23 | #' @param data  A data frame containing the relevant variables. The names for the variables should be: ``Z'' for the treatment assignment,  ``D''  for the actual received treatment, ``Y'' for the outcome, ``A'' for the treatment assignment mechanism and ``id'' for the cluster ID. The variable for the cluster id should be a factor.
 24 | #' @return A list of class \code{CADEreg} which contains the following items:
 25 | #' \item{CADE1}{ The point estimate of CADE(1).  } \item{CADE0}{ The point estimate of CADE(0).  } 
 26 | #' \item{var1.clu}{ The cluster-robust variance of CADE(1).   } \item{var0.clu}{ The cluster-robust variance of CADE(0).  }
 27 | #'\item{var1.clu.hc2}{ The cluster-robust HC2 variance of CADE(1).   } 
 28 | #'\item{var0.clu.hc2}{ The cluster-robust HC2 variance of CADE(0).    } 
 29 | #'\item{var1.hc2}{ The  HC2 variance of CADE(1).    } 
 30 | #'\item{var0.hc2}{ The  HC2 variance of CADE(0).    } 
 31 | #'\item{var1.ind}{ The  individual-robust variance of CADE(1).    } 
 32 | #'\item{var0.ind}{ The  individual-robust variance of CADE(0).    } 
 33 | #'\item{var1.reg}{ The  proposed variance of CADE(1).    } 
 34 | #'\item{var0.reg}{ The  proposed variance of CADE(0).    } 
 35 | #' @author Kosuke Imai, Department of Government and Department of Statistics, Harvard University
 36 | #' \email{imai@@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
 37 | #' Zhichao Jiang, Department of Politics, Princeton University
 38 | #' \email{zhichaoj@@princeton.edu}.
 39 | #' @references Kosuke Imai, Zhichao Jiang and Anup Malani (2021).
 40 | #' \dQuote{Causal Inference with Interference and Noncompliance in the Two-Stage Randomized Experiments}, \emph{Journal of the American Statistical Association}.
 41 | #' @keywords two-stage randomized experiments
 42 | #' @export CADEreg
 43 | 
 44 | 
 45 | CADEreg=function(data){
 46 |   ## transform the data into list 	
 47 |   if(!is.factor(data$id)){stop('The cluster_id should be a factor variable.')}
 48 |   cluster.id=unique(data$id)	
 49 |   n.cluster=length(cluster.id)	
 50 |   Z=vector("list", n.cluster) 	
 51 |   D=vector("list", n.cluster) 
 52 |   Y=vector("list", n.cluster) 
 53 |   A=rep(0,n.cluster)
 54 |   for (i in 1:n.cluster){
 55 |     Z[[i]]=as.numeric(data$Z[data$id==cluster.id[i]])
 56 |     D[[i]]=as.numeric(data$D[data$id==cluster.id[i]])
 57 |     Y[[i]]=data$Y[data$id==cluster.id[i]]
 58 |     if (length(unique(data$A[data$id==cluster.id[i]]))!=1){
 59 |       stop( paste0('The assignment mechanism in cluster ',i,' should be the same.'))
 60 |     }
 61 |     A[i]=data$A[data$id==cluster.id[i]][1]
 62 |   }
 63 |   
 64 | 	
 65 |   n=sapply(Z,length)
 66 |   J=length(n)
 67 |   # weights 
 68 |   W=sum(n)
 69 |   J1=sum(A)
 70 |   J0=J-J1
 71 |   n1=sapply(Z,sum)
 72 |   n0=n-n1
 73 |   
 74 |   index.l=rep(1,J)
 75 |   index.r=rep(1,J)   
 76 |   for(j in 2:J){
 77 |     index.l[j]=1+sum(n[1:(j-1)])
 78 |   } 
 79 |   for(j in 1:J){
 80 |     index.r[j]=sum(n[1:j])
 81 |   }     
 82 |   for (j in 1:J){
 83 |     index=index.l[j]:index.r[j]
 84 |     W[index]=ifelse(A[j]==1, 1/J1,1/(J-J1)) *   ifelse(Z[[j]]==1, 1/n1[j],1/(n0[j]))
 85 |   }
 86 |   ## Design matrix in the fist stage
 87 |   A.reg= rep(0,sum(n))
 88 |   Z.reg=rep(0,sum(n))
 89 |   D.reg=rep(0,sum(n))
 90 |   Y.reg=rep(0,sum(n))
 91 |   for (j in 1:J){
 92 |     index=index.l[j]:index.r[j]
 93 |     A.reg[index]=A[j]
 94 |     Z.reg[index]=Z[[j]]
 95 |     D.reg[index]=D[[j]]
 96 |     Y.reg[index]=Y[[j]]
 97 |   }
 98 |   
 99 |   X= cbind(A.reg, 1-A.reg,  Z.reg*A.reg, Z.reg*(1-A.reg)  )
100 |   
101 |   reg1s=lm(D.reg~0+X,weights=W)
102 |   D.hat=X%*%reg1s$coefficients
103 |   
104 |   M= cbind(A.reg, 1-A.reg,  D.hat*A.reg, D.hat*(1-A.reg)  )
105 |   reg2s=lm(Y.reg~0+M,weights=as.vector(W))
106 |   res= Y.reg-cbind(A.reg, 1-A.reg,  D.reg*A.reg, D.reg*(1-A.reg)  )%*%reg2s$coefficients
107 |   ###  variance 
108 |   
109 |   ## cluster robust variance
110 |   MM=t(M)%*%diag(W)%*%M
111 |   
112 |   var.cluster.med=array(0,dim=c(4,4))
113 |   for( j in 1:J){
114 |     index= index.l[j]:index.r[j]
115 |     Mj= M[index,]	
116 |     if (A[j]==1){
117 |       Sj=cbind(W[index],   0, W[index]*D.hat[index],0)
118 |       
119 |       var.cluster.med=var.cluster.med+t(Sj)%*%res[index] %*%t(t(Sj)%*%res[index])
120 |     }else{
121 |       Sj=cbind(0,W[index],   0, W[index]*D.hat[index])
122 |       
123 |       var.cluster.med=var.cluster.med+t(Sj)%*%res[index] %*%t(t(Sj)%*%res[index])
124 |       
125 |     }
126 |   }
127 |   var.cluster=solve(MM)%*%var.cluster.med%*%solve(MM)
128 |   
129 |   
130 |   ## cluster robust hc2 variance
131 |   MM=t(M)%*%diag(W)%*%M
132 |   
133 |   var.cluster.med=array(0,dim=c(4,4))
134 |   for( j in 1:J){
135 |     index= index.l[j]:index.r[j]
136 |     Mj= M[index,]	
137 |     if (A[j]==1){
138 |       Sj=cbind(W[index],   0, W[index]*D.hat[index],0)*sqrt(J1/(J1-1)) 
139 |       
140 |       var.cluster.med=var.cluster.med+t(Sj)%*%res[index] %*%t(t(Sj)%*%res[index])
141 |     }else{
142 |       Sj=cbind(0,W[index],   0, W[index]*D.hat[index])*sqrt((J-J1)/(J-J1-1))
143 |       
144 |       var.cluster.med=var.cluster.med+t(Sj)%*%res[index] %*%t(t(Sj)%*%res[index])
145 |       
146 |     }
147 |   }
148 |   
149 |   var.cluster.hc2=solve(MM)%*%var.cluster.med%*%solve(MM)
150 |   ### individual robust hc2
151 |   res.ind=rep(0,sum(n))
152 |   var.ind.med=array(0,dim=c(4,4))
153 |   for (j in 1:J){
154 |     index= index.l[j]:index.r[j]
155 |     adj1=   sum(res[index]*Z[[j]]/sum(Z[[j]]))
156 |     adj0=   sum(res[index]*(1-Z[[j]])/sum(1-Z[[j]]))
157 |     
158 |     res.ind[index]=res[index] - ifelse(Z[[j]]==1,adj1,adj0)
159 |   }
160 |   
161 |   for (j in 1:J){
162 |     for (i in 1:n[j]){
163 |       index=index.l[j]-1+i
164 |       var.ind.med=var.ind.med+(M[index,])%*% t( M[index,])  *W[index]^2 * ifelse(Z.reg[index]==1, n1[j]/(n1[j]-1),(n0[j])/(n0[j]-1))*res.ind[index]^2
165 |     }
166 |   }
167 |   var.ind=solve(MM)%*%var.ind.med%*%solve(MM)
168 |   
169 |   
170 |   ### traditional hc2 variance
171 |   var.hc2.med=array(0,dim=c(4,4))
172 |   for (j in 1:J){
173 |     for (i in 1:n[j]){
174 |       index=index.l[j]-1+i
175 |       if (A[j]==1){
176 |         constant=ifelse(Z.reg[index]==1, J1*n1[j]/(J1*n1[j]-1),J1*n0[j]/(J1*n0[j]-1))
177 |       }else{
178 |         constant=ifelse(Z.reg[index]==1, J0*n1[j]/(J1*n1[j]-1),J0*n0[j]/(J1*n0[j]-1))
179 |       }
180 |       
181 |       var.hc2.med=var.hc2.med+(M[index,])%*% t( M[index,])  *W[index]^2 * constant*res[index]^2
182 |     }
183 |   }
184 |   var.hc2=solve(MM)%*%var.hc2.med%*%solve(MM)
185 |   
186 |   ## results
187 |   est.CADE1=reg2s$coefficients[3]
188 |   est.CADE0=reg2s$coefficients[4]
189 |   var1.cluster=var.cluster[3,3]
190 |   var0.cluster=var.cluster[4,4]
191 |   var1.cluster.hc2=var.cluster.hc2[3,3]
192 |   var0.cluster.hc2=var.cluster.hc2[4,4]
193 |   var1.ind=var.ind[3,3]
194 |   var0.ind=var.ind[4,4]
195 |   var1.reg=(1-J1/J)*var.cluster.hc2[3,3]+(J1/J)*var.ind[3,3]
196 |   var0.reg=(J1/J)*var.cluster.hc2[4,4]+(1-J1/J)*var.ind[4,4]
197 |   var1.hc2=var.hc2[3,3]
198 |   var0.hc2=var.hc2[4,4]
199 |   return(list(CADE1=est.CADE1,CADE0=est.CADE0, var1.clu=var1.cluster,var0.clu=var0.cluster,var1.clu.hc2=var1.cluster.hc2,var0.clu.hc2=var0.cluster.hc2,   var1.ind=var1.ind,var0.ind=var0.ind,var1.reg=var1.reg,var0.reg=var0.reg,var1.hc2=var1.hc2,var0.hc2=var0.hc2))
200 | }
201 | 


--------------------------------------------------------------------------------
/R/NoncompMAR.R:
--------------------------------------------------------------------------------
  1 | NoncompMAR <- function(formula, Z, D, treat = NULL, data = parent.frame(),
  2 |                        n.draws = 5000, insample = TRUE, param = TRUE,
  3 |                        p.mean.c = 0, p.var.c = 100, p.mean.o = 0,
  4 |                        p.var.o = 100, smooth = 1, tie = 0.0001, mda = TRUE,
  5 |                        coef.start.c = 0, coef.start.o = 0, burnin = 0,
  6 |                        thin = 0, verbose = TRUE) {  
  7 | 
  8 |   ## getting the data
  9 |   call <- match.call()
 10 |   mf <- model.frame(formula, data=data, na.action='na.pass')
 11 |   X <- model.matrix(formula, data=mf)
 12 |   Y <- as.integer(model.response(mf))
 13 |   N <- length(Y)
 14 |   Z <- eval(call$Z, envir = data)
 15 |   D <- eval(call$D, envir = data)
 16 | 
 17 |   ## first sort by NA and then sort by treat
 18 |   treat <- eval(call$treat, envir = data)
 19 |   if (!is.null(treat)) {
 20 |     varT <- TRUE
 21 |     N11 <- length(na.omit(treat))
 22 |     indx <- 1:N
 23 |     indx <- c(indx[!is.na(treat)], indx[is.na(treat)])
 24 |     X <- X[indx,]
 25 |     Y <- Y[indx]
 26 |     Z <- Z[indx]
 27 |     D <- D[indx]
 28 |     tmp <- sort(treat, index.return = TRUE)
 29 |     difftreat <- diff(tmp$x)
 30 |     difftreat[difftreat==0] <- tie
 31 |     treat <- c(difftreat, rep(NA, N-N11))
 32 |     indx <- c(tmp$ix, (N11+1):N)
 33 |     X <- X[indx,]
 34 |     Y <- Y[indx]
 35 |     Z <- Z[indx]
 36 |     D <- D[indx]
 37 |   }
 38 |   else {
 39 |     varT <- FALSE
 40 |     N11 <- 0
 41 |   }
 42 |   Ymax <- max(na.omit(Y))
 43 |   Ymiss <- is.na(Y)
 44 |   Y[Ymiss] <- 1
 45 |   
 46 |   ## compliance status
 47 |   C <- rep(NA, N)
 48 |   C[Z==1 & D==1] <- 1
 49 |   C[Z==1 & D==0] <- 0
 50 |   C[Z==0] <- rbinom(sum(Z==0), 1, 1/2)
 51 |   res <- list(call = call, Y = Y, X = X, C = C, D = D, Z = Z,
 52 |               n.draws = n.draws)
 53 | 
 54 |   if (varT) {
 55 |     ## Xo = [a10 1 X Xt] where a10 for compliers without treatment
 56 |     ## The default category is the never-taker without treatment
 57 |     Xo <- cbind(0,X)
 58 |     colnames(Xo) <- c("Complier without treatment", colnames(X))
 59 |     Xo[C==1 & D==0, 1] <- 1
 60 | 
 61 |     ## difference matrix
 62 |     R <- rbind(c(1, rep(0, N11-2)), c(-1,1,rep(0, N11-3)))
 63 |     for (i in 1:(N11-4))
 64 |       R <- rbind(R, c(rep(0,i),-1,1,rep(0,N11-3-i))) 
 65 |     R <- rbind(R, c(rep(0, N11-3), -1, 1))
 66 |     Q <- rbind(0, diag(N11-1))
 67 |     Xt <- cbind(c(1,1,rep(0, N11-2)), Q%*%solve(R))
 68 |     Xt <- rbind(Xt, matrix(0, ncol=N11, nrow=N-N11))
 69 |     tmp <- NULL
 70 |     for (i in 1:N11)
 71 |       tmp <- c(tmp, paste("delta", i, sep=""))
 72 |     colnames(Xt) <- tmp
 73 |   }
 74 |   else {
 75 |     ## Xo = [a11 a10 1 X] where a10 for compliers without treatment
 76 |     ##                          a11 for compliers with treatment
 77 |     ## The default category is the never-taker without treatment
 78 |     Xo <- cbind(0,0,X)
 79 |     colnames(Xo) <- c("Complier with treatment", "Complier without treatment",
 80 |                       colnames(X))
 81 |     Xo[C==1 & D==1, 1] <- 1
 82 |     Xo[C==1 & D==0, 2] <- 1
 83 |   }
 84 |   ## dimension
 85 |   res$Xo <- Xo
 86 |   ncov <- ncol(X)
 87 |   ncovo <- ncovX <- ncol(Xo)
 88 |     
 89 |   ## starting values
 90 |   if(length(coef.start.c) != ncov)
 91 |     coef.start.c <- rep(coef.start.c, ncov)
 92 |   if(length(coef.start.o) != ncovo)
 93 |     coef.start.o <- rep(coef.start.o, ncovo)
 94 |  
 95 |   ## prior
 96 |   if(length(p.mean.c) != ncov)
 97 |     p.mean.c <- rep(p.mean.c, ncov)
 98 |   if(length(p.mean.o) != ncovo)
 99 |     p.mean.o <- rep(p.mean.o, ncovo)
100 |   if(!is.matrix(p.var.c))
101 |     p.var.c <- diag(p.var.c, ncov)
102 |   if(!is.matrix(p.var.o))
103 |     p.var.o <- diag(p.var.o, ncovo)
104 |   ## prior for smooth terms
105 |   ## putting the data and smooth stuff together
106 |   if(varT) {
107 |     smooth <- diag(smooth*difftreat)
108 |     Xo <- cbind(Xo, Xt)
109 |     ncovo <- ncol(Xo)
110 |     p.mean.o <- c(p.mean.o, rep(0, ncovo-length(p.mean.o)))
111 |     coef.start.o <- c(coef.start.o, rep(0, ncovo-length(coef.start.o)))
112 |     p.var.o <- rbind(cbind(p.var.o, matrix(0, ncol = ncol(smooth),
113 |                                            nrow = nrow(p.var.o))),
114 |                      cbind(matrix(0, ncol = ncol(p.var.o), nrow =
115 |                                   nrow(smooth)), smooth))
116 |   }
117 |   
118 |   ## checking thinnig and burnin intervals
119 |   if (n.draws <= 0)
120 |     stop("`n.draws' should be a positive integer.")
121 |   if (burnin < 0 || burnin >= n.draws)
122 |     stop("`burnin' should be a non-negative integer less than `n.draws'.")
123 |   if (thin < 0 || thin >= n.draws)
124 |     stop("`thin' should be a non-negative integer less than `n.draws'.")
125 |   keep <- thin + 1
126 |   
127 |   ## calling C function
128 |   if (Ymax == 1) # binary probit
129 |     if (param)
130 |       allpar <- 6+ncov+ncovo
131 |     else
132 |       allpar <- 6
133 |   else # ordered probit
134 |     if (param)
135 |       allpar <- 2*(Ymax+1)+1+ncov+ncovo+Ymax
136 |     else
137 |       allpar <- 2*(Ymax+1)+1
138 |   par <- .C("MARprobit",
139 |             as.integer(Y), as.integer(Ymiss), as.integer(Ymax),
140 |             as.integer(Z), as.integer(D), as.integer(C), 
141 |             as.double(X), as.double(Xo),
142 |             as.double(coef.start.c), as.double(coef.start.o),
143 |             as.integer(N), as.integer(n.draws),
144 |             as.integer(ncov), as.integer(ncovo), as.integer(ncovX),
145 |             as.integer(N11),
146 |             as.double(p.mean.c), as.double(p.mean.o),
147 |             as.double(solve(p.var.c)), as.double(solve(p.var.o)),
148 |             as.integer(insample), as.integer(varT),
149 |             as.integer(param), as.integer(mda), as.integer(burnin),
150 |             as.integer(keep), as.integer(verbose),
151 |             pdStore = double(allpar*(ceiling((n.draws-burnin)/keep))),
152 |             PACKAGE="experiment")$pdStore
153 | 
154 |   ## results
155 |   par <- matrix(par, ncol = allpar, byrow=TRUE)
156 |   if (Ymax == 1) { # binary probit
157 |     if (param) {
158 |       res$coefficientsC <- par[,7:(6+ncov)]      
159 |       if (varT){
160 |         res$coefficientsO <- par[,(7+ncov):(6+ncov+ncovo-N11)]
161 |         res$coefficientsS <- par[,(7+ncov+ncovo-N11):(6+ncov+ncovo)]
162 |       }
163 |       else
164 |         res$coefficientsO <- par[,(7+ncov):(6+ncov+ncovo)]
165 |     }
166 |     res$pc <- as.matrix(par[,1])
167 |     res$cace <- as.matrix(par[,2])
168 |     res$itt <- as.matrix(par[,3])
169 |     res$base <- as.matrix(par[,4:6])
170 |     colnames(res$pc) <- "Compliance Prob."
171 |     colnames(res$itt) <- "ITT"
172 |     colnames(res$cace) <- "CACE"
173 |     colnames(res$base) <- c("Baseline for compliers",
174 |                             "Baseline for noncompliers",
175 |                             "Baseline for all") 
176 |   }
177 |   else { # ordered probit 
178 |     if (param) {
179 |       res$coefficientsC <- par[,(2*(Ymax+1)+2):(2*(Ymax+1)+1+ncov)]
180 |       res$coefficientsO <- par[,(2*(Ymax+1)+2+ncov):(2*(Ymax+1)+1+ncov+ncovo)]
181 |       res$thresholds <-
182 |         par[,(2*(Ymax+1)+2+ncov+ncovo):(2*(Ymax+1)+1+ncov+ncovo+Ymax)]
183 |       tmp <- rep("tau", ncol(res$thresholds))
184 |       for (i in 1:ncol(res$thresholds))
185 |         tmp[i] <- paste("tau", i-1, sep="")
186 |       colnames(res$thresholds) <- tmp
187 |     }
188 |     res$pc <- as.matrix(par[,1])
189 |     res$cace <- as.matrix(par[,2:(Ymax+2)])
190 |     res$itt <- as.matrix(par[,(Ymax+3):(2*(Ymax+1)+1)])
191 |     tmp1 <- rep("ITT", ncol(res$itt))
192 |     tmp2 <- rep("CACE", ncol(res$cace))
193 |     for (i in 1:(Ymax+1)) {
194 |       tmp1[i] <- paste("ITT(Y=", i-1, ")", sep="")
195 |       tmp2[i] <- paste("CACE(Y=", i-1, ")", sep="")
196 |     }
197 |     colnames(res$itt) <- tmp1
198 |     colnames(res$cace) <- tmp2
199 |     colnames(res$pc) <- "Compliance Prob."
200 |   }
201 |   if (param) {
202 |     colnames(res$coefficientsC) <- colnames(X)
203 |     if (varT) {
204 |       colnames(res$coefficientsO) <- colnames(Xo)[1:(ncovo-N11)]
205 |       colnames(res$coefficientsS) <- colnames(Xt)
206 |     }
207 |     else
208 |       colnames(res$coefficientsO) <- colnames(Xo)
209 |   }
210 |   
211 |   class(res) <- "NoncompMAR"
212 |   return(res)
213 | }
214 | 


--------------------------------------------------------------------------------
/R/ATEcluster.R:
--------------------------------------------------------------------------------
  1 | #' Estimation of the Average Treatment Effects in Cluster-Randomized
  2 | #' Experiments
  3 | #' 
  4 | #' This function estimates various average treatment effect in
  5 | #' cluster-randomized experiments without using pre-treatment covariates. The
  6 | #' treatment variable is assumed to be binary. Currently, only the matched-pair
  7 | #' design is allowed. The details of the methods for this design are given in
  8 | #' Imai, King, and Nall (2007).
  9 | #' 
 10 | #' 
 11 | #' @param Y The outcome variable of interest.
 12 | #' @param Z The (randomized) cluster-level treatment variable. This variable
 13 | #' should be binary. Two units in the same cluster should have the same value.
 14 | #' @param grp A variable indicating clusters of units. Two units in the same
 15 | #' cluster should have the same value.
 16 | #' @param data A data frame containing the relevant variables.
 17 | #' @param match A variable indicating matched-pairs of clusters. Two units in
 18 | #' the same matched-pair of clusters should have the same value. The default is
 19 | #' \code{NULL} (i.e., no matching).
 20 | #' @param weights A variable indicating the population cluster sizes, which
 21 | #' will be used to construct weights for each pair of clusters. Two units in
 22 | #' the same cluster should have the same value. The default is \code{NULL}, in
 23 | #' which case sample cluster sizes will be used for constructing weights.
 24 | #' @param fpc A logical variable indicating whether or not finite population
 25 | #' correction should be used for estimating the lower bound of CACE variance.
 26 | #' This is relevant only when \code{weights} are specified.
 27 | #' @return A list of class \code{ATEcluster} which contains the following
 28 | #' items: \item{call}{ The matched call.  } \item{n}{ The total number of
 29 | #' units.  } \item{n1}{ The total number of units in the treatment group.  }
 30 | #' \item{n0}{ The total number of units in the control group.  } \item{Y}{ The
 31 | #' outcome variable.  } \item{Y1bar}{ The cluster-specific (unweighted) average
 32 | #' value of the observed outcome for the treatment group.  } \item{Y0bar}{ The
 33 | #' cluster-specific (unweighted) average value of the observed outcome for the
 34 | #' treatment group.  } \item{Y1var}{ The cluster-specific sample variance of
 35 | #' the observed outcome for the treatment group.  } \item{Y0var}{ The
 36 | #' cluster-specific sample variance of the observed outcome for the control
 37 | #' group.  } \item{Z}{ The treatment variable.  } \item{grp}{ The
 38 | #' cluster-indicator variable.  } \item{match}{ The matched-pair indicator
 39 | #' variable.  } \item{weights}{ The weight variable in its original form.  }
 40 | #' \item{est}{ The estimated average treatment effect based on the arithmetic
 41 | #' mean weights.  } \item{var}{ The estimated variance of the average treatment
 42 | #' effect estimator based on the arithmetic mean weights. This uses the
 43 | #' variance formula provided in Imai, King, and Nall (2007).  } \item{var.lb}{
 44 | #' The estimated sharp lower bound of the cluster average treatment effect
 45 | #' estimator using the arithmetic mean weights.  } \item{est.dk}{ The estimated
 46 | #' average treatment effect based on the harmonic mean weights.  }
 47 | #' \item{var.dk}{ The estimated variance of the average treatment effect
 48 | #' estimator based on the harmonic mean weights. This uses the variance formula
 49 | #' provided in Donner and Klar (1993).  } \item{dkvar}{ The estimated variance
 50 | #' of the average treatment effect estimator based on the harmonic mean
 51 | #' weights. This uses the variance formula provided in Imai, King, and Nall
 52 | #' (2007).  } \item{eff}{ The estimated relative efficiency of the matched-pair
 53 | #' design over the completely randomized design (the ratio of two estimated
 54 | #' variances).  } \item{m}{ The number of pairs in the matched-pair design.  }
 55 | #' \item{N1}{ The population cluster sizes for the treatment group.  }
 56 | #' \item{N0}{ The population cluster sizes for the control group.  } \item{w1}{
 57 | #' Cluster-specific weights for the treatment group.  } \item{w0}{
 58 | #' Cluster-specific weights for the control group.  } \item{w}{ Pair-specific
 59 | #' normalized arithmetic mean weights. These weights sum up to the total number
 60 | #' of units in the sample, i.e., \code{n}.  } \item{w.dk}{ Pair-specific
 61 | #' normalized harmonic mean weights. These weights sum up to the total number
 62 | #' of units in the sample, i.e., \code{n}.  } \item{diff}{ Within-pair
 63 | #' differences if the matched-pair design is analyzed. This equals the
 64 | #' difference between \code{Y1bar} and \code{Y0bar}.  }
 65 | #' @author Kosuke Imai, Department of Government and Department of Statistics, Harvard University
 66 | #' \email{imai@@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
 67 | #' @references Donner, A. and N. Klar (1993). \dQuote{Confidence interval
 68 | #' construction for effect measures arising from cluster randomized trials.}
 69 | #' Journal of Clinical Epidemiology. Vol. 46, No. 2, pp. 123-131.
 70 | #' 
 71 | #' Imai, Kosuke, Gary King, and Clayton Nall (2007). \dQuote{The Essential Role
 72 | #' of Pair Matching in Cluster-Randomized Experiments, with Application to the
 73 | #' Mexican Universal Health Insurance Evaluation}, Technical Report. Department
 74 | #' of Politics, Princeton University.
 75 | #' @keywords design
 76 | #' @export ATEcluster
 77 | ATEcluster <- function(Y, Z, grp, data = parent.frame(),
 78 |                        match = NULL, weights = NULL, fpc = TRUE) {
 79 | 
 80 |   call <- match.call()
 81 |   Y <- eval(call$Y, envir = data)
 82 |   Z <- eval(call$Z, envir = data)
 83 |   grp <- eval(call$grp, envir = data)
 84 |   match <- eval(call$match, envir = data)
 85 |   weights <- eval(call$weights, envir = data)
 86 | 
 87 |   n <- length(Y)
 88 |   res <- list(call = call, n = n, Y = Y, Z = Z, grp = grp,
 89 |               match = match, weights = weights) 
 90 |   if (is.null(match))
 91 |     stop("This option is not yet available.")
 92 |   else {
 93 |     res$m <- m <- length(unique(match))
 94 |     res$Y1bar <- Y1bar <- tapply(Y[Z==1], match[Z==1], mean)
 95 |     res$Y0bar <- Y0bar <- tapply(Y[Z==0], match[Z==0], mean)
 96 |     res$diff <- diff <- Y1bar-Y0bar
 97 |     res$n1 <- n1 <- tapply(rep(1, sum(Z==1)), match[Z==1], sum)
 98 |     res$n0 <- n0 <- tapply(rep(1, sum(Z==0)), match[Z==0], sum)
 99 |   }
100 | 
101 |   if (is.null(weights)) {
102 |     ## variance for PATE1 (sampling of clusters)
103 |     N1 <- w1 <- n1
104 |     N0 <- w0 <- n0
105 |   } else {
106 |     ## variance for PATE2 (double sampling)
107 |     w1 <- N1 <- tapply(weights[Z==1], match[Z==1], mean)
108 |     w0 <- N0 <- tapply(weights[Z==0], match[Z==0], mean)
109 |   }
110 |   w <- w1 + w0
111 |   w <- n*w/sum(w)
112 |   ## estimates
113 |   ATE.est <- weighted.mean(diff, w)
114 |   ATE.var <- m*sum((w*diff-n*ATE.est/m)^2)/((m-1)*(n^2))
115 |   ## donner&klar methods:
116 |   w.dk <- w1*w0/(w1 + w0)
117 |   w.dk <- n*w.dk/sum(w.dk)
118 |   ATEdk.est <- weighted.mean(diff, w.dk)
119 |   ATEdk.var <- sum(w.dk^2)*sum(w.dk*(diff-ATEdk.est)^2)/(n^3)
120 |   ATE.dkvar <- sum(w^2)*sum(w*(diff-ATE.est)^2)/(n^3)
121 |   ## lower bound for CATE variance
122 |   if (!is.null(weights)) {
123 |     Y1var <- tapply(Y[Z==1], match[Z==1], var)/n1
124 |     Y0var <- tapply(Y[Z==0], match[Z==0], var)/n0
125 |     if (fpc) {
126 |       Y1var <- (1-n1/N1)*Y1var
127 |       Y0var <- (1-n0/N0)*Y0var
128 |       if ((sum(n0 > N0)+sum(n1 > N1))>0)
129 |         stop("population size is smaller than sample size")
130 |     }
131 |     res$Y1var <- Y1var
132 |     res$Y0var <- Y0var
133 |     res$var.lb <- sum((w/n)^2*(Y1var+Y0var))
134 |   }
135 |   ## unbiased estimation
136 |   ##Y1sum <- tapply(Y[Z==1], match[Z==1], sum)
137 |   ##Y0sum <- tapply(Y[Z==0], match[Z==0], sum)
138 |   ##ATE.estU <- 2*(sum(Y1sum)-sum(Y0sum))/n
139 |   ##ATE.varU <- 4*m*var(Y1sum-Y0sum)/(n^2)
140 |   
141 |   ## return the resutls
142 |   res$est <- ATE.est
143 |   res$est.dk <- ATEdk.est
144 |   res$var <- ATE.var
145 |   res$dkvar <- ATE.dkvar
146 |   res$var.dk <- ATEdk.var
147 |   res$eff <- 1/(1-2*cov(w*Y1bar, w*Y0bar)/(var(w*Y1bar)+var(w*Y0bar)))
148 |   res$w <- w
149 |   res$w.dk <- w.dk
150 |   if (!is.null(match)) {
151 |     res$w1 <- w1
152 |     res$w0 <- w0
153 |     res$N0 <- N0
154 |     res$N1 <- N1
155 |   }
156 |   class(res) <- "ATEcluster"
157 |   return(res)
158 | }
159 | 


--------------------------------------------------------------------------------
/R/randomize.R:
--------------------------------------------------------------------------------
  1 | #' Randomization of the Treatment Assignment for Conducting Experiments
  2 | #' 
  3 | #' This function can be used to randomize the treatment assignment for
  4 | #' randomized experiments. In addition to the complete randomization, it
  5 | #' implements randomized-block and matched-pair designs.
  6 | #' 
  7 | #' Randomized-block designs refer to the complete randomization of the
  8 | #' treatment within the pre-specified blocks which contain multiple
  9 | #' observations. Matched-pair designs refer to the randomization of the binary
 10 | #' treatment variable within the pre-specified pair of observations.
 11 | #' 
 12 | #' @aliases randomize Randomize
 13 | #' @param data A data frame containing the observations to which the treatments
 14 | #' are randomly assigned.
 15 | #' @param group A numerical or character vector indicating the
 16 | #' treatment/control groups. The length of the vector equals the total number
 17 | #' of such groups. The default specifies two groups called \dQuote{Treat} and
 18 | #' \dQuote{Control}.
 19 | #' @param ratio An optional numerical vector which specifies the proportion of
 20 | #' the treatment/control groups within the sample. The length of the vector
 21 | #' should equal the number of groups. The default is the equal allocation.
 22 | #' @param indx An optional variable name in the data frame to be used as the
 23 | #' names of the observations. If not specified, the row names of the data frame
 24 | #' will be used so long as they are available. If the row names are not
 25 | #' available, the integer sequence starting from 1 will be used.
 26 | #' @param block An optional variable name in the data frame or a formula to be
 27 | #' used as the blocking variables for randomized-block designs. If a variable
 28 | #' name is specified, then the unique values of that variable will form blocks
 29 | #' unless \code{n.block} is specified (see below). If a formula is specified,
 30 | #' it will be evaluated using \code{data} and then blocking will be based on
 31 | #' the \code{mahalanobis} distance of the resulting model matrix. In this case,
 32 | #' users may want to specify \code{n.block} to avoid creating blocks that have
 33 | #' too few observations.
 34 | #' @param n.block An optional scalar specifying the number of blocks to be
 35 | #' created for randomized block designs. If unspecified, the unique values of
 36 | #' the blocking variable will define blocks. If specified, the blocks of
 37 | #' roughly equal size will be created based on the \code{quantile} of the
 38 | #' blocking variable.
 39 | #' @param match An optional variable name in the data frame or a formula to be
 40 | #' used as the matching variables for matched-pair designs. This input is
 41 | #' applicable only to the case where there are two groups. Pairs of
 42 | #' observations will be formed based on the similar values of the matching
 43 | #' variable. If a formula is specified, the \code{mahalanobis} distance of the
 44 | #' resulting model matrix will be used.
 45 | #' @param complete logical. If it equals \code{TRUE} (default), then complete
 46 | #' randomization will be performed (within each block if randomized block
 47 | #' designs are used). Otherwise, simple randomization will be implemented. For
 48 | #' matched-pair designs, \code{complete} has to equal \code{TRUE}.
 49 | #' @return A list of class \code{randomize} which contains the following items:
 50 | #' \item{call}{ the matched call.  } \item{treatment}{ The vector of randomized
 51 | #' treatments.  } \item{data}{ The data frame that was used to conduct the
 52 | #' randomization.  } \item{block}{ The blocking variable that was used to
 53 | #' implement randomized-block designs.  } \item{match}{ The matching variable
 54 | #' that was used to implement matched-pair designs.  } \item{block.id}{ The
 55 | #' variable indicating which observations belong to which blocks in
 56 | #' randomized-block designs.  } \item{match.id}{ The variable indicating which
 57 | #' observations belong to which pairs in matched-pair designs.  }
 58 | #' @author Kosuke Imai, Department of Government and Department of Statistics, Harvard University
 59 | #' \email{imai@@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
 60 | #' @keywords design
 61 | #' @export randomize
 62 | randomize <- function(data, group = c("Treat", "Control"), ratio =
 63 |                       NULL, indx = NULL, block = NULL, n.block = NULL,
 64 |                       match = NULL, complete = TRUE){  
 65 | 
 66 |   ## call
 67 |   call <- match.call()
 68 |   ## data 
 69 |   m <- length(group)
 70 |   if ((!is.null(call$block)) && (!is.null(call$match))) {
 71 |     stop("invalid inputs for `block' and `match'.")
 72 |   } else if (!is.null(call$block)) { ## blocking
 73 |     if ("formula" %in% class(block)) {
 74 |       tm <- terms(block)
 75 |       attr(tm, "intercept") <- 0
 76 |       data <- model.frame(tm, data = data, na.action = na.fail)
 77 |       X <- model.matrix(tm, data = data)
 78 |       block <- mahalanobis(X, apply(X, 2, mean), var(X))
 79 |     } else {
 80 |       block <- eval(call$block, envir = data)
 81 |     }
 82 |   } else { ## matching
 83 |     if (m != 2)
 84 |       stop("2 groups are required for matching.")
 85 |     if (!is.null(ratio))
 86 |       warning("`ratio' will be ignored.")
 87 |     if (complete) {
 88 |       if ("formula" %in% class(match)) {
 89 |         tm <- terms(match)
 90 |         attr(tm, "intercept") <- 0
 91 |         data <- model.frame(tm, data = data, na.action = na.fail)
 92 |         X <- model.matrix(tm, data = data)
 93 |         match <- mahalanobis(X, apply(X, 2, mean), var(X))
 94 |       } else {
 95 |         match <- eval(call$match, data)
 96 |       }
 97 |     } else {
 98 |       stop("`complete' should be TRUE for matching.")
 99 |     }
100 |   }
101 |     
102 |   ## getting index
103 |   n <- nrow(data)
104 |   if (!is.null(indx))
105 |     indx <- eval(indx, data)
106 |   else if (is.null(rownames(data)))
107 |     indx <- 1:n
108 |   else
109 |     indx <- rownames(data)
110 |   
111 |   ## groups
112 |   if (is.null(ratio))
113 |     ratio <- rep(1/m, m)
114 |   ratio <- ratio/sum(ratio)
115 |   if (sum(ratio < 0) > 1)
116 |     stop("invalid input for `size'.")
117 | 
118 |   ## output
119 |   res <- list(call = call, ratio = ratio)
120 | 
121 |   ## blocking and matching variable
122 |   if (is.null(block) && is.null(match)) { 
123 |     if (complete) { # complete randomization
124 |       tmp <- ratio2size(n, ratio, group)
125 |       ttt <- sample(tmp$vector, n, replace = FALSE)
126 |     } else { # simple randomization
127 |       ttt <- sample(group, n, replace = TRUE, prob = ratio)
128 |     }
129 |     names(ttt) <- indx
130 |   } else if (is.null(match)) { ## blocking
131 |     block.id <- rep(NA, n)
132 |     if (is.null(n.block)) {
133 |       tmp <- unique(block)
134 |       n.block <- length(tmp)
135 |       for (i in 1:n.block)
136 |         block.id[block == tmp[i]] <- i 
137 |     } else {
138 |         tmp <- quantile(block, (0:(n.block-1))/n.block)
139 |         block.id <- rep(0, n)
140 |         for (i in 1:n.block) {
141 |             block.id[block >= tmp[i]] <- block.id[block >= tmp[i]] + 1
142 |         }
143 |       if (sum(table(block.id) < m) > 0)
144 |         stop("some blocks have too few observations.")
145 |     }
146 |     ttt <- rep(NA, n)
147 |     names(ttt) <- names(block.id) <- indx
148 |     for (i in 1:n.block) {
149 |       howmany <- sum(block.id == i)
150 |       if (complete) { # comlete randomization
151 |         tmp <- ratio2size(howmany, ratio, group)
152 |         ttt[block.id == i] <- sample(tmp$vector, howmany, replace = FALSE)
153 |       } else {
154 |         ttt[block.id == i] <- sample(group, sum(block.id == i),
155 |             replace = TRUE, prob = ratio)
156 |       }
157 |     }
158 |     res$block <- block
159 |     res$block.id <- block.id
160 |   } else { ## matching
161 |     match.id <- ttt <- rep(NA, n)
162 |     names(match.id) <- names(ttt) <- indx
163 |     counter <- 1
164 |     while (sum(is.na(match.id)) > 1) {
165 |       unit <- sample(indx[is.na(match.id)], 1, replace = FALSE)
166 |       diff <- abs(match[is.na(match.id)]-match[unit])
167 |       mindiff <- names(sort(diff[diff>0]))[1]
168 |       match.id[unit] <- match.id[mindiff] <- counter
169 |       tmp <- sample(group, 2, replace = FALSE)
170 |       ttt[unit] <- tmp[1]
171 |       ttt[mindiff] <- tmp[2]
172 |       counter <- counter + 1
173 |     }
174 |     res$match <- match
175 |     res$match.id <- match.id
176 |   }
177 | 
178 |   ## return the results
179 |   res$treatment <- ttt
180 |   res$data <- data
181 |   class(res) <- "randomize"
182 |   
183 |   return(res)
184 | }
185 | 
186 | ###
187 | ### This converts ratio into size while randomly allocating remainders 
188 | ###
189 | 
190 | ratio2size <- function(n, ratio, group) {
191 |   m <- length(ratio)
192 | 
193 |   size <- round(ratio * n, digits = 0)
194 |   if (sum(size) > n) {
195 |     tmp <- sample(1:length(size), sum(size)-n, replace = FALSE)
196 |     size[tmp] <- size[tmp] - 1
197 |   }
198 |   if (sum(size) < n) {
199 |     tmp <- sample(1:length(size), n-sum(size), replace = FALSE)
200 |     size[tmp] <- size[tmp] + 1
201 |   }
202 |   allgroup <- NULL
203 |   for (i in 1:m)
204 |     allgroup <- c(allgroup, rep(group[i], size[i]))
205 |   return(list(size = size, vector = allgroup))
206 |   
207 | }
208 | 


--------------------------------------------------------------------------------
/R/CADErand.R:
--------------------------------------------------------------------------------
  1 | ###
  2 | ### Randomization-based method for the complier average direct effect and the complier average spillover effect
  3 | ### 
  4 | ###
  5 | 
  6 | 
  7 | 
  8 | #' Randomization-based method for the complier average direct effect and the complier average spillover effect
  9 | #'
 10 | #' 
 11 | #' This function computes the point estimates and variance estimates of the complier average direct effect (CADE)  and the complier average spillover effect (CASE).
 12 | #' The estimators calculated using this function are either individual weighted or cluster-weighted. The point estimates and variances of ITT effects are also included. 
 13 | #'
 14 | #' 
 15 | #' For the details of the method implemented by this function, see the
 16 | #' references.
 17 | #' 
 18 | #' @param data  A data frame containing the relevant variables. The names for the variables should be: ``Z'' for the treatment assignment,  ``D''  for the actual received treatment, ``Y'' for the outcome, ``A'' for the treatment assignment mechanism and ``id'' for the cluster ID. The variable for the cluster id should be a factor.
 19 | #' @param individual  A binary variable with TRUE for  individual-weighted estimators and FALSE for cluster-weighted estimators.
 20 | #' @return A list of class \code{CADErand} which contains the following items:
 21 | #' \item{CADE1}{ The point estimate of CADE(1).  } \item{CADE0}{ The point estimate of CADE(0).  } 
 22 | #'\item{CADE1}{ The point estimate of CASE(1).  } \item{CASE0}{ The point estimate of CASE(0).  } 
 23 | #'\item{var.CADE1}{ The  variance estimate of CADE(1).   } 
 24 | #'\item{var.CADE0}{ The  variance estimate of CADE(0).   } 
 25 | #'\item{var.CASE1}{ The  variance estimate of CASE(1).   } 
 26 | #'\item{var.CASE0}{ The  variance estimate of CASE(0).   } 
 27 | #'\item{DEY1}{ The point estimate of DEY(1).  } \item{DEY0}{ The point estimate of DEY(0).  } 
 28 | #'\item{DED1}{ The point estimate of DED(1).  } \item{DED0}{ The point estimate of DED(0).  } 
 29 | #'\item{var.DEY1}{ The  variance estimate of DEY(1).   } 
 30 | #'\item{var.DEY0}{ The  variance estimate of DEY(0).   } 
 31 | #'\item{var.DED1}{ The  variance estimate of DED(1).   } 
 32 | #'\item{var.DED0}{ The  variance estimate of DED(0).   } 
 33 | #'\item{SEY1}{ The point estimate of SEY(1).  } \item{SEY0}{ The point estimate of SEY(0).  } 
 34 | #'\item{SED1}{ The point estimate of SED(1).  } \item{SED0}{ The point estimate of SED(0).  } 
 35 | #'\item{var.SEY1}{ The  variance estimate of SEY(1).   } 
 36 | #'\item{var.SEY0}{ The  variance estimate of SEY(0).   } 
 37 | #'\item{var.SED1}{ The  variance estimate of SED(1).   } 
 38 | #'\item{var.SED0}{ The  variance estimate of SED(0).   } 
 39 | #' @author Kosuke Imai, Department of Government and Department of Statistics, Harvard University
 40 | #' \email{imai@@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
 41 | #' Zhichao Jiang, Department of Politics, Princeton University
 42 | #' \email{zhichaoj@@princeton.edu}.
 43 | #' @references Kosuke Imai, Zhichao Jiang and Anup Malani (2018).
 44 | #' \dQuote{Causal Inference with Interference and Noncompliance in the Two-Stage Randomized Experiments}, \emph{Technical Report}. Department of Politics, Princeton
 45 | #' University.
 46 | #' @keywords two-stage randomized experiments
 47 | #' @export CADErand
 48 | 
 49 | 
 50 | CADErand=function(data,individual=1){
 51 | 	## transform the data into list 	
 52 |   if(!is.factor(data$id)){stop('The cluster_id should be a factor variable.')}
 53 |   cluster.id=unique(data$id)	
 54 |   n.cluster=length(cluster.id)	
 55 |   Z=vector("list", n.cluster) 	
 56 |   D=vector("list", n.cluster) 
 57 |   Y=vector("list", n.cluster) 
 58 |   A=rep(0,n.cluster)
 59 |   for (i in 1:n.cluster){
 60 |   	Z[[i]]=as.numeric(data$Z[data$id==cluster.id[i]])
 61 |   	D[[i]]=as.numeric(data$D[data$id==cluster.id[i]])
 62 |   	Y[[i]]=data$Y[data$id==cluster.id[i]]
 63 |   	if (length(unique(data$A[data$id==cluster.id[i]]))!=1){
 64 |   		stop( paste0('The assignment mechanism in cluster ',i,' should be the same.'))
 65 |   	}
 66 |   	A[i]=data$A[data$id==cluster.id[i]][1]
 67 |   }
 68 | 
 69 | 	
 70 | 	
 71 |   n=sapply(Z,length)
 72 |   N=sum(n)
 73 |   J=length(n)
 74 |   if (individual==1){
 75 |     for ( i in 1:n.cluster){
 76 |       Y[[i]]=Y[[i]]*n[i]*J/N
 77 |       D[[i]]=D[[i]]*n[i]*J/N
 78 |     }
 79 |   }
 80 |   est.Dj00=rep(0,J)
 81 |   est.Dj00= sapply(Difflist(D,Productlist(D,Z)),sum)/(n-sapply(Z,sum))*(1-A)
 82 |   est.Dj01=rep(0,J)
 83 |   est.Dj01= sapply(Difflist(D,Productlist(D,Z)),sum)/(n-sapply(Z,sum))*(A)
 84 |   est.Dj10=rep(0,J)
 85 |   est.Dj10=sapply(Productlist(D,Z),sum)/(sapply(Z,sum))*(1-A)
 86 |   est.Dj11=rep(0,J)
 87 |   est.Dj11= sapply(Productlist(D,Z),sum)/(sapply(Z,sum))*(A)
 88 |   est.D00= sum(est.Dj00*(1-A))/sum(1-A)
 89 |   est.D10= sum(est.Dj10*(1-A))/sum(1-A)
 90 |   est.D01= sum(est.Dj01*(A))/sum(A)
 91 |   est.D11= sum(est.Dj11*(A))/sum(A)
 92 |   est.DEDj0=est.Dj10-est.Dj00
 93 |   est.DEDj1=est.Dj11-est.Dj01
 94 |   est.DED1=est.D11-est.D01
 95 |   est.DED0=est.D10-est.D00
 96 |   est.SED1=est.D11-est.D10
 97 |   est.SED0=est.D01-est.D00
 98 |   ### variance
 99 |   est.xiDE0=sum((est.DEDj0-est.DED0)^2*(1-A))/(sum(1-A)-1)
100 |   est.xiDE1=sum((est.DEDj1-est.DED1)^2*(A))/(sum(A)-1)
101 |   est.xij01=rep(0,J)
102 |   est.xij00=rep(0,J)
103 |   est.xij11=rep(0,J)
104 |   est.xij10=rep(0,J)
105 |   est.xib00=sum((est.Dj00-est.D00)^2*(1-A))/(sum(1-A)-1)
106 |   est.xib10=sum((est.Dj10-est.D10)^2*(1-A))/(sum(1-A)-1)
107 |   est.xib01=sum((est.Dj01-est.D01)^2*(A))/(sum(A)-1)
108 |   est.xib11=sum((est.Dj11-est.D11)^2*(A))/(sum(A)-1)
109 |   for (j in 1:J){
110 |     est.xij01[j]=sum((D[[j]]-est.Dj01[j])^2*(1-Z[[j]]))/(sum(1-Z[[j]])-1)
111 |     est.xij00[j]=sum((D[[j]]-est.Dj00[j])^2*(1-Z[[j]]))/(sum(1-Z[[j]])-1)
112 |     est.xij11[j]=sum((D[[j]]-est.Dj11[j])^2*(Z[[j]]))/(sum(Z[[j]])-1)
113 |     est.xij10[j]=sum((D[[j]]-est.Dj10[j])^2*(Z[[j]]))/(sum(Z[[j]])-1)
114 |   }
115 |   var.DED0=est.xiDE0*(1/sum(1-A)-1/J)+ sum((est.xij00/(n-sapply(Z,sum))+est.xij10/sapply(Z,sum))*(1-A))/J/sum(1-A)
116 |   var.DED1=est.xiDE1*(1/sum(A)-1/J)+ sum((est.xij01/(n-sapply(Z,sum))+est.xij11/sapply(Z,sum))*(A))/J/sum(A)
117 |   var.SED1=est.xib11/sum(A)+est.xib10/sum(1-A)
118 |   var.SED0=est.xib01/sum(A)+est.xib00/sum(1-A)
119 |   
120 |   est.Yj00=rep(0,J)
121 |   est.Yj00= sapply(Difflist(Y,Productlist(Y,Z)),sum)/(n-sapply(Z,sum))*(1-A)
122 |   est.Yj01=rep(0,J)
123 |   est.Yj01= sapply(Difflist(Y,Productlist(Y,Z)),sum)/(n-sapply(Z,sum))*(A)
124 |   est.Yj10=rep(0,J)
125 |   est.Yj10= sapply(Productlist(Y,Z),sum)/(sapply(Z,sum))*(1-A)
126 |   est.Yj11=rep(0,J)
127 |   est.Yj11= sapply(Productlist(Y,Z),sum)/(sapply(Z,sum))*(A)
128 |   est.Y00= sum(est.Yj00*(1-A))/sum(1-A)
129 |   est.Y10= sum(est.Yj10*(1-A))/sum(1-A)
130 |   est.Y01= sum(est.Yj01*(A))/sum(A)
131 |   est.Y11= sum(est.Yj11*(A))/sum(A)
132 |   est.DEYj0=est.Yj10-est.Yj00
133 |   est.DEYj1=est.Yj11-est.Yj01
134 |   est.DEY1=est.Y11-est.Y01
135 |   est.DEY0=est.Y10-est.Y00
136 |   est.SEY1=est.Y11-est.Y10
137 |   est.SEY0=est.Y01-est.Y00
138 |   ### variance
139 |   est.sigmaDE0=sum((est.DEYj0-est.DEY0)^2*(1-A))/(sum(1-A)-1)
140 |   est.sigmaDE1=sum((est.DEYj1-est.DEY1)^2*(A))/(sum(A)-1)
141 |   est.sigmaj01=rep(0,J)
142 |   est.sigmaj00=rep(0,J)
143 |   est.sigmaj11=rep(0,J)
144 |   est.sigmaj10=rep(0,J)
145 |   est.sigmab00=sum((est.Yj00-est.Y00)^2*(1-A))/(sum(1-A)-1)
146 |   est.sigmab10=sum((est.Yj10-est.Y10)^2*(1-A))/(sum(1-A)-1)
147 |   est.sigmab01=sum((est.Yj01-est.Y01)^2*(A))/(sum(A)-1)
148 |   est.sigmab11=sum((est.Yj11-est.Y11)^2*(A))/(sum(A)-1)
149 |   for (j in 1:J){
150 |     est.sigmaj01[j]=sum((Y[[j]]-est.Yj01[j])^2*(1-Z[[j]]))/(sum(1-Z[[j]])-1)
151 |     est.sigmaj00[j]=sum((Y[[j]]-est.Yj00[j])^2*(1-Z[[j]]))/(sum(1-Z[[j]])-1)
152 |     est.sigmaj11[j]=sum((Y[[j]]-est.Yj11[j])^2*(Z[[j]]))/(sum(Z[[j]])-1)
153 |     est.sigmaj10[j]=sum((Y[[j]]-est.Yj10[j])^2*(Z[[j]]))/(sum(Z[[j]])-1)
154 |   }
155 |   var.DEY0=est.sigmaDE0*(1/sum(1-A)-1/J)+ sum((est.sigmaj00/(n-sapply(Z,sum))+est.sigmaj10/sapply(Z,sum))*(1-A))/J/sum(1-A)
156 |   var.DEY1=est.sigmaDE1*(1/sum(A)-1/J)+ sum((est.sigmaj01/(n-sapply(Z,sum))+est.sigmaj11/sapply(Z,sum))*(A))/J/sum(A)
157 |   var.SEY1=est.sigmab11/sum(A)+est.sigmab10/sum(1-A)
158 |   var.SEY0=est.sigmab01/sum(A)+est.sigmab00/sum(1-A)
159 |   
160 |   ### covariance
161 |   est.zetaDE0=sum((est.DEYj0-est.DEY0)*(est.DEDj0-est.DED0)*(1-A))/(sum(1-A)-1)
162 |   est.zetaDE1=sum((est.DEYj1-est.DEY1)*(est.DEDj1-est.DED1)*(A))/(sum(A)-1)
163 |   est.zetaj01=rep(0,J)
164 |   est.zetaj00=rep(0,J)
165 |   est.zetaj11=rep(0,J)
166 |   est.zetaj10=rep(0,J)
167 |   est.zetab00=sum((est.Yj00-est.Y00)*(est.Dj00-est.D00)*(1-A))/(sum(1-A)-1)
168 |   est.zetab10=sum((est.Yj10-est.Y10)*(est.Dj10-est.D10)*(1-A))/(sum(1-A)-1)
169 |   est.zetab01=sum((est.Yj01-est.Y01)*(est.Dj01-est.D01)*(A))/(sum(A)-1)
170 |   est.zetab11=sum((est.Yj11-est.Y11)*(est.Dj11-est.D11)*(A))/(sum(A)-1)
171 |   for (j in 1:J){
172 |     est.zetaj01[j]=sum((Y[[j]]-est.Yj01[j])*(D[[j]]-est.Dj01[j])*(1-Z[[j]]))/(sum(1-Z[[j]])-1)
173 |     est.zetaj00[j]=sum((Y[[j]]-est.Yj00[j])*(D[[j]]-est.Dj00[j])*(1-Z[[j]]))/(sum(1-Z[[j]])-1)
174 |     est.zetaj11[j]=sum((Y[[j]]-est.Yj11[j])*(D[[j]]-est.Dj11[j])*(Z[[j]]))/(sum(Z[[j]])-1)
175 |     est.zetaj10[j]=sum((Y[[j]]-est.Yj10[j])*(D[[j]]-est.Dj10[j])*(Z[[j]]))/(sum(Z[[j]])-1)
176 |   }
177 |   est.zeta0=est.zetaDE0*(1/sum(1-A)-1/J)+ sum((est.zetaj00/(n-sapply(Z,sum))+est.zetaj10/sapply(Z,sum))*(1-A))/J/sum(1-A)
178 |   est.zeta1=est.zetaDE1*(1/sum(A)-1/J)+ sum((est.zetaj01/(n-sapply(Z,sum))+est.zetaj11/sapply(Z,sum))*(A))/J/sum(A)
179 |   est.zetab1=est.zetab11/sum(A)+est.zetab10/sum(1-A)
180 |   est.zetab0=est.zetab01/sum(A)+est.zetab00/sum(1-A)
181 |   
182 |   
183 |   #### CADE
184 |   est.CADE1=est.DEY1/est.DED1
185 |   est.CADE0=est.DEY0/est.DED0
186 |   est.CASE1=est.SEY1/est.SED1
187 |   est.CASE0=est.SEY0/est.SED0
188 |   est.varCADE1=   (var.DEY1-2*est.CADE1*est.zeta1+est.CADE1^2*var.DED1)/est.DED1^2
189 |   est.varCADE0=   (var.DEY0-2*est.CADE0*est.zeta0+est.CADE0^2*var.DED0)/est.DED0^2
190 |   est.varCASE1=   (var.SEY1-2*est.CASE1*est.zetab1+est.CASE1^2*var.SED1)/est.SED1^2
191 |   est.varCASE0=   (var.SEY0-2*est.CASE0*est.zetab0+est.CASE0^2*var.SED0)/est.SED0^2
192 |   return(list(CADE1=est.CADE1,CADE0=est.CADE0,CASE1=est.CASE1,CASE0=est.CASE0, var.CADE1=est.varCADE1,var.CADE0=est.varCADE0,var.CASE1=est.varCASE1,var.CASE0=est.varCASE0,
193 |   DEY1=est.DEY1,DEY0=est.DEY0,DED1=est.DED1,DED0=est.DED0,
194 |   var.DEY1=var.DEY1,var.DEY0=var.DEY0,var.DED1=var.DED1,var.DED0=var.DED0,
195 |   SEY1=est.SEY1,SEY0=est.SEY0,SED1=est.SED1,SED0=est.SED0,
196 |   var.SEY1=var.SEY1,var.SEY0=var.SEY0,var.SED1=var.SED1,var.SED0=var.SED0
197 |   ))
198 | }
199 | 


--------------------------------------------------------------------------------
/man/NoncompLI.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/NoncompLI.R
  3 | \name{NoncompLI}
  4 | \alias{NoncompLI}
  5 | \title{Bayesian Analysis of Randomized Experiments with Noncompliance and Missing
  6 | Outcomes Under the Assumption of Latent Ignorability}
  7 | \usage{
  8 | NoncompLI(
  9 |   formulae,
 10 |   Z,
 11 |   D,
 12 |   data = parent.frame(),
 13 |   n.draws = 5000,
 14 |   param = TRUE,
 15 |   in.sample = FALSE,
 16 |   model.c = "probit",
 17 |   model.o = "probit",
 18 |   model.r = "probit",
 19 |   tune.c = 0.01,
 20 |   tune.o = 0.01,
 21 |   tune.r = 0.01,
 22 |   tune.v = 0.01,
 23 |   p.mean.c = 0,
 24 |   p.mean.o = 0,
 25 |   p.mean.r = 0,
 26 |   p.prec.c = 0.001,
 27 |   p.prec.o = 0.001,
 28 |   p.prec.r = 0.001,
 29 |   p.df.o = 10,
 30 |   p.scale.o = 1,
 31 |   p.shape.o = 1,
 32 |   mda.probit = TRUE,
 33 |   coef.start.c = 0,
 34 |   coef.start.o = 0,
 35 |   tau.start.o = NULL,
 36 |   coef.start.r = 0,
 37 |   var.start.o = 1,
 38 |   burnin = 0,
 39 |   thin = 0,
 40 |   verbose = TRUE
 41 | )
 42 | }
 43 | \arguments{
 44 | \item{formulae}{A list of formulae where the first formula specifies the
 45 | (pre-treatment) covariates in the outcome model (the latent compliance
 46 | covariate will be added automatically), the second formula specifies the
 47 | compliance model, and the third formula defines the covariate specification
 48 | for the model for missing-data mechanism (the latent compliance covariate
 49 | will be added automatically). For the outcome model, the formula should take
 50 | the two-sided standard R \command{formula} where the outcome variable is
 51 | specified in the left hand side of the formula which is then separated by
 52 | \code{~} from the covariate equation in the right hand side, e.g., \code{y ~
 53 | x1 + x2}. For the compliance and missing-data mechanism models, the
 54 | one-sided \command{formula} should be used where the left hand side is left
 55 | unspecified, e.g., \code{~ x1 + x2}.}
 56 | 
 57 | \item{Z}{A randomized encouragement variable, which should be a binary
 58 | variable in the specified data frame.}
 59 | 
 60 | \item{D}{A treatment variable, which should be a binary variable in the
 61 | specified data frame.}
 62 | 
 63 | \item{data}{A data frame which contains the variables that appear in the
 64 | model formulae (\code{formulae}), the encouragement variable (\code{Z}), and
 65 | the treatment variable (\code{D}).}
 66 | 
 67 | \item{n.draws}{The number of MCMC draws. The default is \code{5000}.}
 68 | 
 69 | \item{param}{A logical variable indicating whether the Monte Carlo draws of
 70 | the model parameters should be saved in the output object. The default is
 71 | \code{TRUE}.}
 72 | 
 73 | \item{in.sample}{A logical variable indicating whether or not the sample
 74 | average causal effect should be calculated using the observed potential
 75 | outcome for each unit. If it is set to \code{FALSE}, then the population
 76 | average causal effect will be calculated. The default is \code{FALSE}.}
 77 | 
 78 | \item{model.c}{The model for compliance. Either \code{logit} or
 79 | \code{probit} model is allowed. The default is \code{probit}.}
 80 | 
 81 | \item{model.o}{The model for outcome. The following five models are allowed:
 82 | \code{logit}, \code{probit}, \code{oprobit} (ordered probit regression),
 83 | \code{gaussian} (gaussian regression), \code{negbin} (negative binomial
 84 | regression), and \code{twopart} (two part model where the first part is the
 85 | probit regression for \eqn{Pr(Y>0|X)} and the second part models
 86 | \eqn{p(log(Y)|X, Y>0)} using the gaussian regression). The default is
 87 | \code{probit}.}
 88 | 
 89 | \item{model.r}{The model for (non)response. Either \code{logit} or
 90 | \code{probit} model is allowed. The default is \code{probit}.}
 91 | 
 92 | \item{tune.c}{Tuning constants for fitting the compliance model. These
 93 | positive constants are used to tune the (random-walk) Metropolis-Hastings
 94 | algorithm to fit the logit model. Use either a scalar or a vector of
 95 | constants whose length equals that of the coefficient vector. The default is
 96 | \code{0.01}.}
 97 | 
 98 | \item{tune.o}{Tuning constants for fitting the outcome model. These positive
 99 | constants are used to tune the (random-walk) Metropolis-Hastings algorithm
100 | to fit logit, ordered probit, and negative binomial models. Use either a
101 | scalar or a vector of constants whose length equals that of the coefficient
102 | vector for logit and negative binomial models. For the ordered probit model,
103 | use either a scalar or a vector of constants whose length equals that of
104 | cut-point parameters to be estimated. The default is \code{0.01}.}
105 | 
106 | \item{tune.r}{Tuning constants for fitting the (non)response model. These
107 | positive constants are used to tune the (random-walk) Metropolis-Hastings
108 | algorithm to fit the logit model. Use either a scalar or a vector of
109 | constants whose length equals that of the coefficient vector. The default is
110 | \code{0.01}.}
111 | 
112 | \item{tune.v}{A scalar tuning constant for fitting the variance component of
113 | the negative binomial (outcome) model. The default is \code{0.01}.}
114 | 
115 | \item{p.mean.c}{Prior mean for the compliance model. It should be either a
116 | scalar or a vector of appropriate length. The default is \code{0}.}
117 | 
118 | \item{p.mean.o}{Prior mean for the outcome model. It should be either a
119 | scalar or a vector of appropriate length. The default is \code{0}.}
120 | 
121 | \item{p.mean.r}{Prior mean for the (non)response model. It should be either
122 | a scalar or a vector of appropriate length. The default is \code{0}.}
123 | 
124 | \item{p.prec.c}{Prior precision for the compliance model. It should be
125 | either a positive scalar or a positive semi-definite matrix of appropriate
126 | size. The default is \code{0.001}.}
127 | 
128 | \item{p.prec.o}{Prior precision for the outcome model. It should be either a
129 | positive scalar or a positive semi-definite matrix of appropriate size. The
130 | default is \code{0.001}.}
131 | 
132 | \item{p.prec.r}{Prior precision for the (non)response model. It should be
133 | either a positive scalar or a positive semi-definite matrix of appropriate
134 | size. The default is \code{0.001}.}
135 | 
136 | \item{p.df.o}{A positive integer. Prior degrees of freedom parameter for the
137 | inverse chisquare distribution in the gaussian and twopart (outcome) models.
138 | The default is \code{10}.}
139 | 
140 | \item{p.scale.o}{A positive scalar. Prior scale parameter for the inverse
141 | chisquare distribution (for the variance) in the gaussian and twopart
142 | (outcome) models. For the negative binomial (outcome) model, this is used
143 | for the scale parameter of the inverse gamma distribution. The default is
144 | \code{1}.}
145 | 
146 | \item{p.shape.o}{A positive scalar. Prior shape for the inverse chisquare
147 | distribution in the negative binomial (outcome) model. The default is
148 | \code{1}.}
149 | 
150 | \item{mda.probit}{A logical variable indicating whether to use marginal data
151 | augmentation for probit models. The default is \code{TRUE}.}
152 | 
153 | \item{coef.start.c}{Starting values for coefficients of the compliance
154 | model.  It should be either a scalar or a vector of appropriate length. The
155 | default is \code{0}.}
156 | 
157 | \item{coef.start.o}{Starting values for coefficients of the outcome model.
158 | It should be either a scalar or a vector of appropriate length. The default
159 | is \code{0}.}
160 | 
161 | \item{tau.start.o}{Starting values for thresholds of the ordered probit
162 | (outcome) model.  If it is set to \code{NULL}, then the starting values will
163 | be a sequence starting from 0 and then incrementing by 0.1. The default is
164 | \code{NULL}.}
165 | 
166 | \item{coef.start.r}{Starting values for coefficients of the (non)response
167 | model.  It should be either a scalar or a vector of appropriate length. The
168 | default is \code{0}.}
169 | 
170 | \item{var.start.o}{A positive scalar starting value for the variance of the
171 | gaussian, negative binomial, and twopart (outcome) models. The default is
172 | \code{1}.}
173 | 
174 | \item{burnin}{The number of initial burnins for the Markov chain. The
175 | default is \code{0}.}
176 | 
177 | \item{thin}{The size of thinning interval for the Markov chain. The default
178 | is \code{0}.}
179 | 
180 | \item{verbose}{A logical variable indicating whether additional progress
181 | reports should be prited while running the code. The default is \code{TRUE}.}
182 | }
183 | \value{
184 | An object of class \code{NoncompLI} which contains the following
185 | elements as a list: \item{call}{The matched call.} \item{Y}{The outcome
186 | variable.} \item{D}{The treatment variable.} \item{Z}{The (randomized)
187 | encouragement variable.} \item{R}{The response indicator variable for
188 | \code{Y}.} \item{A}{The indicator variable for (known) always-takers, i.e.,
189 | the control units who received the treatment.} \item{C}{The indicator
190 | variable for (known) compliers, i.e., the encouraged units who received the
191 | treatment when there is no always-takers.} \item{Xo}{The matrix of
192 | covariates used for the outcome model.} \item{Xc}{The matrix of covariates
193 | used for the compliance model.} \item{Xr}{The matrix of covariates used for
194 | the (non)response model.} \item{n.draws}{The number of MCMC draws.}
195 | \item{QoI}{The Monte carlo draws of quantities of interest from their
196 | posterior distributions. Quantities of interest include \code{ITT}
197 | (intention-to-treat) effect, \code{CACE} (complier average causal effect),
198 | \code{Y1barC} (The mean outcome value under the treatment for compliers),
199 | \code{Y0barC} (The mean outcome value under the control for compliers),
200 | \code{YbarN} (The mean outcome value for never-takers), \code{YbarA} (The
201 | mean outcome value for always-takers), \code{pC} (The proportion of
202 | compliers), \code{pN} (The proportion of never-takers), \code{pA} (The
203 | proportion of always-takers) } If \code{param} is set to \code{TRUE}, the
204 | following elments are also included: \item{coefO}{The Monte carlo draws of
205 | coefficients of the outcome model from their posterior distribution.}
206 | \item{coefO1}{If \code{model = "twopart"}, this element contains the Monte
207 | carlo draws of coefficients of the outcome model for \eqn{p(log(Y)|X, Y >
208 | 0)} from their posterior distribution.} \item{coefC}{The Monte carlo draws
209 | of coefficients of the compliance model from their posterior distribution.}
210 | \item{coefA}{If always-takers exist, then this element contains the Monte
211 | carlo draws of coefficients of the compliance model for always-takers from
212 | their posterior distribution.} \item{coefR}{The Monte carlo draws of
213 | coefficients of the (non)response model from their posterior distribution.}
214 | \item{sig2}{The Monte carlo draws of the variance parameter for the
215 | gaussian, negative binomial, and twopart (outcome) models.}
216 | }
217 | \description{
218 | This function estimates the average causal effects for randomized
219 | experiments with noncompliance and missing outcomes under the assumption of
220 | latent ignorability (Frangakis and Rubin, 1999). The models are based on
221 | Bayesian generalized linear models and are fitted using the Markov chain
222 | Monte Carlo algorithms. Various types of the outcome variables can be
223 | analyzed to estimate the Intention-to-Treat effect and Complier Average
224 | Causal Effect.
225 | }
226 | \details{
227 | For the details of the model being fitted, see the references. Note that
228 | when always-takers exist we fit either two logistic or two probit models by
229 | first modeling whether a unit is a complier or a noncomplier, and then
230 | modeling whether a unit is an always-taker or a never-taker for those who
231 | are classified as non-compliers.
232 | }
233 | \references{
234 | Frangakis, Constantine E. and Donald B. Rubin. (1999).
235 | \dQuote{Addressing Complications of Intention-to-Treat Analysis in the
236 | Combined Presence of All-or-None Treatment Noncompliance and Subsequent
237 | Missing Outcomes.} \emph{Biometrika}, Vol. 86, No. 2, pp. 365-379.
238 | 
239 | Hirano, Keisuke, Guido W. Imbens, Donald B. Rubin, and Xiao-Hua Zhou.
240 | (2000).  \dQuote{Assessing the Effect of an Influenza Vaccine in an
241 | Encouragement Design.} \emph{Biostatistics}, Vol. 1, No. 1, pp. 69-88.
242 | 
243 | Barnard, John, Constantine E. Frangakis, Jennifer L. Hill, and Donald B.
244 | Rubin. (2003).  \dQuote{Principal Stratification Approach to Broken
245 | Randomized Experiments: A Case Study of School Choice Vouchers in New York
246 | (with Discussion)}, \emph{Journal of the American Statistical Association},
247 | Vol. 98, No. 462, pp299--311.
248 | 
249 | Horiuchi, Yusaku, Kosuke Imai, and Naoko Taniguchi (2007). \dQuote{Designing
250 | and Analyzing Randomized Experiments: Application to a Japanese Election
251 | Survey Experiment.} \emph{American Journal of Political Science}, Vol. 51,
252 | No. 3 (July), pp. 669-687.
253 | }
254 | \author{
255 | Kosuke Imai, Department of Government and Department of Statistics, Harvard University
256 | \email{imai@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
257 | }
258 | \keyword{models}
259 | 


--------------------------------------------------------------------------------
/R/NInocov.R:
--------------------------------------------------------------------------------
  1 | ###
  2 | ### Assumption of nonignorability
  3 | ###
  4 | 
  5 | NInocov <- function(Y, D, Z = NULL, CI = 0.95) {
  6 |   
  7 |   if (is.null(Z)) {
  8 |     ## Without noncompliance
  9 |     n <- length(D)
 10 |     R <- (!is.na(Y))*1
 11 |     pi00 <- mean((D == 0)*(R == 0))
 12 |     pi01 <- mean((D == 0)*(R == 1))
 13 |     pi10 <- mean((D == 1)*(R == 0))
 14 |     pi11 <- mean((D == 1)*(R == 1))
 15 |     
 16 |     p01 <- mean(Y[D == 0 & R == 1])
 17 |     p11 <- mean(Y[D == 1 & R == 1])
 18 |     
 19 |     p00 <- (p01*((1-p11)*pi00*pi11-(1-p01)*pi01*pi10))/(pi00*pi11*(p01-p11))
 20 |     p10 <- (p11*((1-p11)*pi00*pi11-(1-p01)*pi01*pi10))/(pi01*pi10*(p01-p11))
 21 |     
 22 |     p0 <- (p00*pi00+p01*pi01)/(pi00+pi01)
 23 |     p1 <- (p10*pi10+p11*pi11)/(pi10+pi11)
 24 |     Ybar <- matrix(c(p0, p1), nrow = 1)
 25 |     colnames(Ybar) <- c("Y0bar", "Y1bar")
 26 |     
 27 |     ATE <- p1-p0
 28 |     
 29 |     pi111 <- mean(Y == 1 & D == 1 & R == 1)
 30 |     pi101 <- mean(Y == 1 & D == 0 & R == 1)
 31 | 
 32 |     ## bounds
 33 |     lb <- (p11*pi11*(pi00+pi01)-(pi00+p01*pi01)*(pi10+pi11))/((pi10+pi11)*(pi00+pi01))
 34 |     ub <- ((pi10+p11*pi11)*(pi00+pi01)-p01*pi01*(pi10+pi11))/((pi10+pi11)*(pi00+pi01))
 35 |     
 36 |     ## asymptotic variance of ATE
 37 |     Sigma <- matrix(NA, ncol = 5, nrow = 5)
 38 |     Sigma[1,1] <- pi01*(1-pi01)
 39 |     Sigma[2,2] <- pi10*(1-pi10)
 40 |     Sigma[3,3] <- pi11*(1-pi11)
 41 |     Sigma[4,4] <- pi11*p11*(1-pi11*p11)
 42 |     Sigma[5,5] <- pi01*p01*(1-pi01*p01)
 43 |     Sigma[1,2] <- Sigma[2,1] <- -pi01*pi10
 44 |     Sigma[1,3] <- Sigma[3,1] <- -pi01*pi11
 45 |     Sigma[1,4] <- Sigma[4,1] <- -pi01*pi11*p11
 46 |     Sigma[1,5] <- Sigma[5,1] <- pi01*p01*(1-pi01)
 47 |     Sigma[2,3] <- Sigma[3,2] <- -pi10*pi11
 48 |     Sigma[2,4] <- Sigma[4,2] <- -pi10*pi11*p11
 49 |     Sigma[2,5] <- Sigma[5,2] <- -pi10*p01*pi01
 50 |     Sigma[3,4] <- Sigma[4,3] <- pi11*p11*(1-pi11)
 51 |     Sigma[3,5] <- Sigma[5,3] <- -pi11*pi01*p01
 52 |     Sigma[4,5] <- Sigma[5,4] <- -pi11*p11*pi01*p01
 53 |     
 54 |     Delta <- matrix(NA, ncol = 1, nrow = 5)
 55 |     A <- pi11*p11*(pi00+pi01)-pi01*p01*(pi10+pi11)
 56 |     B <- pi00*pi11-pi10*pi01
 57 |     C <- pi11*pi01*(p01-p11)*(pi00+pi01)*(pi10+pi11)
 58 |     Delta[1] <- -(pi10+pi11)*A+A*(B-A)*p11/(pi01*(p01-p11))
 59 |     Delta[2] <- -(p11*pi11+p01*pi01)*(B-2*A)-(pi11+pi01)*A -
 60 |       A*(B-A)*(pi00+pi01-pi10-pi11)/((pi00+pi01)*(pi10+pi11))
 61 |     Delta[3] <- -(p11*pi11+p01*pi01)*(B-2*A)+(pi00-pi11)*A -
 62 |       A*(B-A)*(p01/((p01-p11)*pi11)+(pi00+pi01-pi10-pi11)/((pi00+pi01)*(pi10+pi11)))
 63 |     Delta[4] <- (pi00+pi01)*(B-2*A)+A*(B-A)/(pi11*(p01-p11))
 64 |     Delta[5] <- -(pi10+pi11)*(B-2*A)-A*(B-A)/(pi01*(p01-p11))
 65 |     Delta <- Delta/C
 66 |     
 67 |     var <- t(Delta)%*%Sigma%*%Delta
 68 |     se <- sqrt(var/n)
 69 |     
 70 |     ps <- matrix(c(p00, p10, p01, p11), nrow = 1)
 71 |     colnames(ps) <- c("p00", "p10", "p01", "p11")
 72 |     
 73 |     pis <- matrix(c(pi00, pi10, pi01, pi11), nrow = 1)
 74 |     colnames(pis) <- c("pi00", "pi10", "pi01", "pi11")
 75 |     
 76 |     ATE <- matrix(c(ATE, se), nrow = 1)
 77 |     colnames(ATE) <- c("ATE", "s.e.")
 78 |     z <- qnorm(1-(1-CI)/2)
 79 |     ci.est <- matrix(c(ATE[1]-z*se, ATE[1]+z*se), nrow = 1)
 80 |     colnames(ci.est) <- c("lower CI", "upper CI")
 81 |     return(list(Ybar = Ybar, ATE = ATE, CI = ci.est,
 82 |                 bounds = c(lb, ub), ps = ps, pis = pis, n = n)) 
 83 |   } else {
 84 |     ## WITH NONCOMPLIANCE
 85 |     n <- length(D)
 86 |     R <- (!is.na(Y))*1
 87 |     pi000 <- mean(D == 0 & R == 0 & Z == 0)
 88 |     pi100 <- mean(D == 1 & R == 0 & Z == 0)
 89 |     pi001 <- mean(D == 0 & R == 0 & Z == 1)
 90 |     pi101 <- mean(D == 1 & R == 0 & Z == 1)
 91 |     pi010 <- mean(D == 0 & R == 1 & Z == 0)
 92 |     pi110 <- mean(D == 1 & R == 1 & Z == 0)
 93 |     pi011 <- mean(D == 0 & R == 1 & Z == 1)
 94 |     pi111 <- mean(D == 1 & R == 1 & Z == 1)
 95 |     
 96 |     p010 <- mean(Y[D == 0 & R == 1 & Z == 0])
 97 |     p011 <- mean(Y[D == 0 & R == 1 & Z == 1])
 98 |     p110 <- mean(Y[D == 1 & R == 1 & Z == 0])
 99 |     p111 <- mean(Y[D == 1 & R == 1 & Z == 1])
100 |     
101 |     p000 <- (p010*((1-p011)*pi000*pi011-(1-p010)*pi010*pi001))/(pi000*pi011*(p010-p011))
102 |     p100 <- (p110*((1-p111)*pi100*pi111-(1-p110)*pi110*pi101))/(pi100*pi111*(p110-p111))
103 |     p001 <- (p011*((1-p011)*pi000*pi011-(1-p010)*pi010*pi001))/(pi010*pi001*(p010-p011))
104 |     p101 <- (p111*((1-p111)*pi100*pi111-(1-p110)*pi110*pi101))/(pi110*pi101*(p110-p111))
105 |     
106 |     Y0bar <- (p000*pi000+p010*pi010+p100*pi100+p110*pi110)/(pi000+pi010+pi100+pi110)
107 |     Y1bar <- (p001*pi001+p011*pi011+p101*pi101+p111*pi111)/(pi001+pi011+pi101+pi111)
108 |     Ybar <- matrix(c(Y0bar, Y1bar), nrow = 1)
109 |     colnames(Ybar) <- c("Y0bar", "Y1bar")
110 |     
111 |     ITT <- Y1bar-Y0bar
112 |     CACE <- ITT/(mean(D[Z==1])-mean(D[Z==0]))
113 |     
114 |     ## alternative parameterization
115 |     A0 <- p011*(pi011+((1-p011)*pi000*pi011/pi010-(1-p010)*pi001)/(p010-p011)) 
116 |     A1 <- p111*(pi111+((1-p111)*pi100*pi111/pi110-(1-p110)*pi101)/(p110-p111))
117 |     A <- A0 + A1
118 |     C1 <- pi001+pi011+pi101+pi111
119 |     C0 <- pi000+pi010+pi100+pi110
120 |     Y1bar.alt <- A/C1
121 |     
122 |     B0 <- p010*(pi010+((1-p011)*pi000-(1-p010)*pi010*pi001/pi011)/(p010-p011)) 
123 |     B1 <- p110*(pi110+((1-p111)*pi100-(1-p110)*pi110*pi101/pi111)/(p110-p111)) 
124 |     B <- B0 + B1
125 |     Y0bar.alt <- B/C0
126 | 
127 |     cace <- (Y1bar.alt-Y0bar.alt)*C0*C1
128 |     cace.denom <- (pi101+pi111)*C0-(pi100+pi110)*C1
129 |     cace <- cace/cace.denom
130 | 
131 |     ## variances
132 |     Sigma <- matrix(NA, ncol = 11, nrow = 11)
133 |     Sigma[1,1] <- pi001*(1-pi001)
134 |     Sigma[2,2] <- pi010*(1-pi010)
135 |     Sigma[3,3] <- pi100*(1-pi100)
136 |     Sigma[4,4] <- pi011*(1-pi011)
137 |     Sigma[5,5] <- pi101*(1-pi101)
138 |     Sigma[6,6] <- pi110*(1-pi110)
139 |     Sigma[7,7] <- pi111*(1-pi111)
140 |     Sigma[8,8] <- p010*pi010*(1-p010*pi010)
141 |     Sigma[9,9] <- p110*pi110*(1-p110*pi110)
142 |     Sigma[10,10] <- p011*pi011*(1-p011*pi011)
143 |     Sigma[11,11] <- p111*pi111*(1-p111*pi111)
144 |     
145 |     ## covariances
146 |     Sigma[1,2] <- Sigma[2,1] <- -pi001*pi010
147 |     Sigma[1,3] <- Sigma[3,1] <- -pi001*pi100
148 |     Sigma[1,4] <- Sigma[4,1] <- -pi001*pi011
149 |     Sigma[1,5] <- Sigma[5,1] <- -pi001*pi101
150 |     Sigma[1,6] <- Sigma[6,1] <- -pi001*pi110
151 |     Sigma[1,7] <- Sigma[7,1] <- -pi001*pi111
152 |     Sigma[1,8] <- Sigma[8,1] <- -pi001*p010*pi010
153 |     Sigma[1,9] <- Sigma[9,1] <- -pi001*p110*pi110
154 |     Sigma[1,10] <- Sigma[10,1] <- -pi001*p011*pi011
155 |     Sigma[1,11] <- Sigma[11,1] <- -pi001*p111*pi111
156 |     
157 |     Sigma[2,3] <- Sigma[3,2] <- -pi010*pi100
158 |     Sigma[2,4] <- Sigma[4,2] <- -pi010*pi011
159 |     Sigma[2,5] <- Sigma[5,2] <- -pi010*pi101
160 |     Sigma[2,6] <- Sigma[6,2] <- -pi010*pi110
161 |     Sigma[2,7] <- Sigma[7,2] <- -pi010*pi111
162 |     Sigma[2,8] <- Sigma[8,2] <- p010*pi010*(1-pi010)
163 |     Sigma[2,9] <- Sigma[9,2] <- -pi010*p110*pi110
164 |     Sigma[2,10] <- Sigma[10,2] <- -pi010*p011*pi011
165 |     Sigma[2,11] <- Sigma[11,2] <- -pi010*p111*pi111
166 |     
167 |     Sigma[3,4] <- Sigma[4,3] <- -pi100*pi011
168 |     Sigma[3,5] <- Sigma[5,3] <- -pi100*pi101
169 |     Sigma[3,6] <- Sigma[6,3] <- -pi100*pi110
170 |     Sigma[3,7] <- Sigma[7,3] <- -pi100*pi111
171 |     Sigma[3,8] <- Sigma[8,3] <- -pi100*p010*pi010
172 |     Sigma[3,9] <- Sigma[9,3] <- -pi100*p110*pi110
173 |     Sigma[3,10] <- Sigma[10,3] <- -pi100*p011*pi011
174 |     Sigma[3,11] <- Sigma[11,3] <- -pi100*p111*pi111
175 |     
176 |     Sigma[4,5] <- Sigma[5,4] <- -pi011*pi101
177 |     Sigma[4,6] <- Sigma[6,4] <- -pi011*pi110
178 |     Sigma[4,7] <- Sigma[7,4] <- -pi011*pi111
179 |     Sigma[4,8] <- Sigma[8,4] <- -pi011*p010*pi010
180 |     Sigma[4,9] <- Sigma[9,4] <- -pi011*p110*pi110
181 |     Sigma[4,10] <- Sigma[10,4] <- p011*pi011*(1-pi011)
182 |     Sigma[4,11] <- Sigma[11,4] <- -pi011*p111*pi111
183 |     
184 |     Sigma[5,6] <- Sigma[6,5] <- -pi101*pi110
185 |     Sigma[5,7] <- Sigma[7,5] <- -pi101*pi111
186 |     Sigma[5,8] <- Sigma[8,5] <- -pi101*p010*pi010
187 |     Sigma[5,9] <- Sigma[9,5] <- -pi101*p110*pi110
188 |     Sigma[5,10] <- Sigma[10,5] <- -pi101*p011*pi011
189 |     Sigma[5,11] <- Sigma[11,5] <- -pi101*p111*pi111
190 |     
191 |     Sigma[6,7] <- Sigma[7,6] <- -pi110*pi111
192 |     Sigma[6,8] <- Sigma[8,6] <- -pi110*p010*pi010
193 |     Sigma[6,9] <- Sigma[9,6] <- pi110*p110*(1-pi110)
194 |     Sigma[6,10] <- Sigma[10,6] <- -pi110*p011*pi011
195 |     Sigma[6,11] <- Sigma[11,6] <- -pi110*p111*pi111
196 |     
197 |     Sigma[7,8] <- Sigma[8,7] <- -pi111*p010*pi010
198 |     Sigma[7,9] <- Sigma[9,7] <- -pi111*p110*pi110
199 |     Sigma[7,10] <- Sigma[10,7] <- -pi111*p011*pi011
200 |     Sigma[7,11] <- Sigma[11,7] <- p111*pi111*(1-pi111)
201 |     
202 |     Sigma[8,9] <- Sigma[9,8] <- -p010*pi010*p110*pi110
203 |     Sigma[8,10] <- Sigma[10,8] <- -p010*pi010*p011*pi011
204 |     Sigma[8,11] <- Sigma[11,8] <- -p010*pi010*p111*pi111
205 |     
206 |     Sigma[9,10] <- Sigma[10,9] <- -p110*pi110*p011*pi011
207 |     Sigma[9,11] <- Sigma[11,9] <- -p110*pi110*p111*pi111
208 | 
209 |     Sigma[10,11] <- Sigma[11,10] <- -p011*pi011*p111*pi111
210 |     
211 |     ## partial derivatives
212 |     dA <- dB <- matrix(NA, ncol = 1, nrow = 11)
213 |     pi1010 <- p010*pi010
214 |     pi1110 <- p110*pi110
215 |     pi1011 <- p011*pi011
216 |     pi1111 <- p111*pi111
217 |     
218 |     A0denom <- pi011*pi1010/pi1011-pi010
219 |     A1denom <- pi111*pi1110/pi1111-pi110
220 |     numer0 <- (pi011-pi1011)*pi000-pi001*(pi010-pi1010)
221 |     numer1 <- (pi111-pi1111)*pi100-pi101*(pi110-pi1110)
222 |     dA[1] <- -(pi010-pi1010)/A0denom
223 |     dA[2] <- (-pi001*A0denom+numer0)/(A0denom^2)
224 |     dA[3] <- (pi111-pi1111)/A1denom
225 |     dA[4] <- (A0denom*pi000-pi1010*numer0/pi1011)/(A0denom^2)
226 |     dA[5] <- -(pi110-pi1110)/A1denom
227 |     dA[6] <- (-pi101*A1denom+numer1)/(A1denom^2)
228 |     dA[7] <- (pi100*A1denom-pi1110*numer1/pi1111)/(A1denom^2)
229 |     dA[8] <- (pi001*A0denom-pi011*numer0/pi1011)/(A0denom^2)
230 |     dA[9] <- (pi101*A1denom-pi111*numer1/pi1111)/(A1denom^2)
231 |     dA[10] <- 1 + (-pi000*A0denom+pi011*pi1010*numer0/(pi1011^2))/(A0denom^2)
232 |     dA[11] <- 1 + (-pi100*A1denom+pi111*pi1110*numer1/(pi1111^2))/(A1denom^2) 
233 |     
234 |     B0denom <- pi011-pi1011*pi010/pi1010
235 |     B1denom <- pi111-pi1111*pi110/pi1110
236 |     dB[1] <- -(pi010-pi1010)/B0denom
237 |     dB[2] <- (-pi001*B0denom+pi1011*numer0/pi1010)/(B0denom^2)
238 |     dB[3] <- (pi111-pi1111)/B1denom
239 |     dB[4] <- (pi000*B0denom-numer0)/(B0denom^2)
240 |     dB[5] <- -(pi110-pi1110)/B1denom
241 |     dB[6] <- (-pi101*B1denom+pi1111*numer1/pi1110)/(B1denom^2)
242 |     dB[7] <- (pi100*B1denom-numer1)/(B1denom^2)
243 |     dB[8] <- 1 + (pi001*B0denom-pi1011*pi010*numer0/(pi1010^2))/(B0denom^2)
244 |     dB[9] <- 1 + (pi101*B1denom-pi1111*pi110*numer1/(pi1110^2))/(B1denom^2)
245 |     dB[10] <- (-pi000*B0denom+pi010*numer0/pi1010)/(B0denom^2)
246 |     dB[11] <- (-pi100*B1denom+pi110*numer1/pi1110)/(B1denom^2)
247 |     
248 |     dC0 <- dC1 <- matrix(0, ncol = 1, nrow = 11)
249 |     dC0[2] <- dC0[3] <- dC0[6] <- dC1[1] <- dC1[4] <- dC1[5] <- dC1[7] <- 1
250 |     
251 |     ## put together
252 |     partial <- (dA*C1 - dC1*A)/(C1^2)-(dB*C0-dC0*B)/(C0^2)
253 |     ITT.se <- sqrt(t(partial)%*%Sigma%*%partial/n)
254 |     z <- qnorm(1-(1-CI)/2)
255 |     ITT.ci <- matrix(c(ITT-z*ITT.se, ITT+z*ITT.se), nrow = 1)
256 |     colnames(ITT.ci) <- c("lower CI", "upper CI")
257 |     ITT <- matrix(c(ITT, ITT.se), nrow = 1)
258 |     colnames(ITT) <- c("ITT est.", "ITT s.e.")
259 |     
260 |     ## variance for CACE
261 |     partial.cace <- (partial*C1*C0 + ITT[1]*(dC1*C0+C1*dC0))*cace.denom
262 |     partial.cace[c(1, 2, 4, 8:11)] <- partial.cace[c(1, 2, 4, 8:11)] -
263 |       (dC0[c(1, 2, 4, 8:11)]*(pi101+pi111) -
264 |        (pi100+pi110)*dC1[c(1, 2, 4, 8:11)])*ITT[1]*C1*C0
265 |     partial.cace[c(3, 6)] <- partial.cace[c(3,6)] -
266 |       (dC0[c(3,6)]*(pi101+pi111) - C1 - (pi100+pi110)*dC1[c(3,6)])*ITT[1]*C1*C0
267 |     partial.cace[c(5, 7)] <- partial.cace[c(5,7)] -
268 |       (C0 + dC0[c(5, 7)]*(pi101+pi111) - (pi100+pi110)*dC1[c(5,7)])*ITT[1]*C1*C0
269 |     partial.cace <- partial.cace/(cace.denom^2)
270 |     CACE.se <- sqrt(t(partial.cace)%*%Sigma%*%partial.cace/n)
271 |     CACE.ci <- matrix(c(CACE-z*CACE.se, CACE+z*CACE.se), nrow = 1)
272 |     colnames(CACE.ci) <- c("lower CI", "upper CI")
273 |     CACE <- matrix(c(CACE, CACE.se), nrow = 1)
274 |     colnames(CACE) <- c("CACE est.", "CACE s.e.")
275 |     
276 |     ## output
277 |     ps <- matrix(c(p000, p100, p001, p101, p010, p011, p110, p111), nrow = 1)
278 |     colnames(ps) <- c("p000", "p100", "p001", "p101", "p010", "p011",
279 |                       "p110", "p111")
280 |     
281 |     pis <- matrix(c(pi000, pi100, pi001, pi101, pi010, pi110, pi011, pi111), nrow = 1)
282 |     colnames(pis) <- c("pi000", "pi100", "pi001", "pi101", "pi010",
283 |                        "pi110", "pi011", "pi111")
284 |     
285 |     return(list(Ybar = Ybar, ITT = ITT, CACE = CACE, ITT.ci = ITT.ci,
286 |                 CACE.ci = CACE.ci, ps = ps, pis = pis, n = n))
287 |   }
288 | }
289 | 


--------------------------------------------------------------------------------
/R/ATEbounds.R:
--------------------------------------------------------------------------------
  1 | ###
  2 | ### Calculates the bounds for the ATE in the presence of missing
  3 | ### response 
  4 | ###
  5 | 
  6 | 
  7 | 
  8 | #' Bounding the Average Treatment Effect when some of the Outcome Data are
  9 | #' Missing
 10 | #' 
 11 | #' This function computes the sharp bounds on the average treatment effect when
 12 | #' some of the outcome data are missing. The confidence intervals for the
 13 | #' bounds are also computed.
 14 | #' 
 15 | #' For the details of the method implemented by this function, see the
 16 | #' references.
 17 | #' 
 18 | #' @param formula A formula of the form \code{Y ~ X} where \code{Y} is the name
 19 | #' of the outcome variable and \code{X} is the name of the (randomized)
 20 | #' treatment variable. \code{X} should be a factor variable but its value can
 21 | #' take more than two levels. The missing values for \code{Y} should be coded
 22 | #' as \code{NA}.
 23 | #' @param data A data frame containing the relevant variables.
 24 | #' @param maxY A scalar. The maximum value of the outcome variable. The default
 25 | #' is the maximum sample value.
 26 | #' @param minY A scalar. The minimum value of the outcome variable. The default
 27 | #' is the minimum sample value.
 28 | #' @param alpha A positive scalar that is less than or equal to 0.5. This will
 29 | #' determine the (1-\code{alpha}) level of confidence intervals. The default is
 30 | #' \code{0.05}.
 31 | #' @param strata The variable name indicating strata. If this is specified, the
 32 | #' quantities of interest will be first calculated within each strata and then
 33 | #' aggregated. The default is \code{NULL}.
 34 | #' @param ratio A \eqn{J \times M} matrix of probabilities where \eqn{J} is the
 35 | #' number of strata and \eqn{M} is the number of treatment and control groups.
 36 | #' Each element of the matrix specifies the probability of a unit falling into
 37 | #' that category. The default is \code{NULL} in which case the sample estimates
 38 | #' of these probabilities are used for computation.
 39 | #' @param survey The variable name for survey weights. The default is
 40 | #' \code{NULL}.
 41 | #' @param n.reps A positive integer. The number of bootstrap replicates used
 42 | #' for the construction of confidence intervals via B-method of Berran (1988).
 43 | #' If it equals zero, the confidence intervals will not be constructed.
 44 | #' @param ...  The arguments passed to other functions.
 45 | #' @return A list of class \code{ATEbounds} which contains the following items:
 46 | #' \item{call}{ The matched call.  } \item{Y}{ The outcome variable.  }
 47 | #' \item{D}{ The treatment variable.  } \item{bounds}{ The point estimates of
 48 | #' the sharp bounds on the average treatment effect.  } \item{bounds.Y}{ The
 49 | #' point estimates of the sharp bounds on the outcome variable within each
 50 | #' treatment/control group.  } \item{bmethod.ci}{ The B-method confidence
 51 | #' interval of the bounds on the average treatment effect.  } \item{bonf.ci}{
 52 | #' The Bonferroni confidence interval of the bounds on the average treatment
 53 | #' effect.  } \item{bonf.ci.Y}{ The Bonferroni confidence interval of the
 54 | #' bounds on the outcome variable within each treatment/control group.  }
 55 | #' \item{bmethod.ci.Y}{ The B-method confidence interval of the bounds on the
 56 | #' outcome variable within each treatment/control group.  } \item{maxY}{ The
 57 | #' maximum value of the outcome variable used in the computation.  }
 58 | #' \item{minY}{ The minimum value of the outcome variable used in the
 59 | #' computation.  } \item{nobs}{ The number of observations.  } \item{nobs.Y}{
 60 | #' The number of observations within each treatment/control group.  }
 61 | #' \item{ratio}{ The probability of treatment assignment (within each strata if
 62 | #' \code{strata} is specified) used in the computation.  }
 63 | #' @author Kosuke Imai, Department of Government and Department of Statistics, Harvard University
 64 | #' \email{imai@@Harvard.Edu}, \url{https://imai.fas.harvard.edu};
 65 | #' @references Horowitz, Joel L. and Charles F. Manski. (1998).
 66 | #' \dQuote{Censoring of Outcomes and Regressors due to Survey Nonresponse:
 67 | #' Identification and Estimation Using Weights and Imputations.} \emph{Journal
 68 | #' of Econometrics}, Vol. 84, pp.37-58.
 69 | #' 
 70 | #' Horowitz, Joel L. and Charles F. Manski. (2000).  \dQuote{Nonparametric
 71 | #' Analysis of Randomized Experiments With Missing Covariate and Outcome Data.}
 72 | #' \emph{Journal of the Americal Statistical Association}, Vol. 95, No. 449,
 73 | #' pp.77-84.
 74 | #' 
 75 | #' Harris-Lacewell, Melissa, Kosuke Imai, and Teppei Yamamoto. (2007).
 76 | #' \dQuote{Racial Gaps in the Responses to Hurricane Katrina: An Experimental
 77 | #' Study}, \emph{Technical Report}. Department of Politics, Princeton
 78 | #' University.
 79 | #' @keywords design
 80 | #' @export ATEbounds
 81 | ATEbounds <- function(formula, data = parent.frame(), maxY = NULL,
 82 |                       minY = NULL, alpha = 0.05, n.reps = 0,
 83 |                       strata = NULL, ratio = NULL, survey = NULL, ...) {
 84 | 
 85 |   ## getting Y and D
 86 |   call <- match.call()
 87 |   tm <- terms(formula)
 88 |   attr(tm, "intercept") <- 0
 89 |   mf <- model.frame(tm, data = data, na.action = 'na.pass')
 90 |   D <- model.matrix(tm, data = mf)
 91 |   M <- ncol(D)
 92 |   if (max(D) > 1 || min(D) < 0)
 93 |     stop("the treatment variable should be a factor variable.")
 94 |   Y <- model.response(mf)
 95 |   if (is.null(maxY))
 96 |     maxY <- max(Y, na.rm = TRUE)
 97 |   if (is.null(minY))
 98 |     minY <- min(Y, na.rm = TRUE)
 99 |   if (!is.null(call$survey))
100 |     survey <- eval(call$survey, data)
101 |   else
102 |     survey <- rep(1, length(Y))
103 |   ### computing the bounds
104 |   if (!is.null(call$strata)) {
105 |     strata <- eval(call$strata, data)
106 |     res <- boundsAggComp(cbind(Y, strata, D), rep(1, length(Y)), maxY,
107 |                          minY, alpha = alpha, ratio = ratio, survey = survey)
108 |   } else {
109 |     res <- boundsComp(cbind(Y, D), rep(1, length(Y)), maxY, minY,
110 |                       alpha = alpha, survey = survey)
111 |   }
112 |   
113 |   ## CI based on B-method
114 |   if (n.reps > 0) {
115 |     if (!is.null(call$strata)) {
116 |       breps <- boot(data = cbind(Y, strata, D), statistic = boundsAggComp, 
117 |                     R = n.reps, maxY = maxY, minY = minY, alpha =
118 |                     NULL, survey = survey)$t
119 |       res$bmethod.ci <- res$bonf.ci <- matrix(NA, ncol = 2, nrow = choose(M, 2))
120 |       counter <- 1
121 |       for (i in 1:(M-1)) 
122 |         for (j in (i+1):M) {
123 |           tmp <- boundsCI(breps[,counter], breps[,counter+1],
124 |                           res$bounds[(counter+1)/2,1],
125 |                           res$bounds[(counter+1)/2,2], alpha)
126 |           res$bmethod.ci[(counter+1)/2,] <- tmp$bmethod
127 |           res$bonf.ci[(counter+1)/2,] <- tmp$bonferroni
128 |           counter <- counter + 2
129 |         }
130 |     } else {
131 |       breps <- boot(data = cbind(Y, D), statistic = boundsComp,
132 |                     R = n.reps, maxY = maxY, minY = minY, alpha = NULL,
133 |                     survey = survey)$t
134 |       res$bmethod.ci.Y <- matrix(NA, ncol = 2, nrow = M)
135 |       res$bmethod.ci <- matrix(NA, ncol = 2, nrow = choose(M, 2))
136 |       for (i in 1:M) { 
137 |         tmp <- boundsCI(breps[,(i-1)*2+1], breps[,i*2],
138 |                         res$bounds.Y[i,1],
139 |                         res$bounds.Y[i,2], alpha)
140 |         res$bmethod.ci.Y[i,] <- tmp$bmethod
141 |         res$bonf.ci.Y[i,] <- tmp$bonferroni
142 |       }
143 |       counter <- 1
144 |       for (i in 1:(M-1)) 
145 |         for (j in (i+1):M) {
146 |           tmp <- boundsCI(breps[,2*M+counter], breps[,2*M+counter+1],
147 |                           res$bounds[(counter+1)/2,1],
148 |                           res$bounds[(counter+1)/2,2], alpha)
149 |           res$bmethod.ci[(counter+1)/2,] <- tmp$bmethod
150 |           res$bonf.ci[(counter+1)/2,] <- tmp$bonferroni
151 |           counter <- counter + 2
152 |         }
153 |     }
154 |   }
155 |   
156 |   ## dimnames
157 |   tmp <- NULL
158 |   for (i in 1:(M-1)) 
159 |     for (j in (i+1):M) 
160 |       tmp <- c(tmp, paste(colnames(D)[i], "-", colnames(D)[j]))
161 |   if (is.null(call$strata)) {
162 |     rownames(res$bounds.Y) <- rownames(res$bonf.ci.Y) <- colnames(D)
163 |     rownames(res$bounds) <- rownames(res$bonf.ci) <- tmp
164 |     colnames(res$bounds) <- colnames(res$bounds.Y) <- c("lower", "upper")
165 |     colnames(res$bonf.ci) <- colnames(res$bonf.ci.Y) <-
166 |       c(paste("lower ", alpha/2, "%CI", sep=""),
167 |         paste("upper ", 1-alpha/2, "%CI", sep=""))
168 |     if (n.reps > 0) {
169 |       rownames(res$bmethod.ci.Y) <- colnames(D)
170 |       rownames(res$bmethod.ci) <- tmp
171 |       colnames(res$bmethod.ci) <- colnames(res$bmethod.ci.Y) <-
172 |         c(paste("lower ", alpha/2, "%CI", sep=""),
173 |           paste("upper ", 1-alpha/2, "%CI", sep=""))
174 |     }
175 |   } else {
176 |     rownames(res$bounds) <- tmp
177 |     colnames(res$bounds) <- c("lower", "upper")
178 |     if (n.reps > 0) {
179 |       rownames(res$bmethod.ci) <- rownames(res$bonf.ci) <- tmp
180 |       colnames(res$bmethod.ci) <- colnames(res$bonf.ci) <-
181 |         c(paste("lower ", alpha/2, "%CI", sep=""),
182 |           paste("upper ", 1-alpha/2, "%CI", sep=""))
183 |     }
184 |   }
185 |   res$Y <- Y
186 |   res$D <- D
187 |   res$call <- call
188 |   class(res) <- "ATEbounds"
189 |   return(res)
190 | }
191 | 
192 | ###
193 | ### An internal function which computes the bounds and bonferroni CI
194 | ### if alpha is specified (when alpha = NULL, then it returns a vector
195 | ### of bounds; this is used for bootstrap)
196 | ###
197 | 
198 | boundsComp <- function(data, weights, maxY, minY, alpha = NULL,
199 |                        survey = NULL) {
200 |   Y <- data[,1]
201 |   D <- data[,-1]
202 |   M <- ncol(D)
203 |   bounds.Y <- ci.Y <- vars.Y <- matrix(NA, ncol = 2, nrow = M)
204 |   nobs.Y <- NULL
205 |   if (is.null(survey))
206 |     survey <- rep(1, length(Y))
207 |   for (i in 1:M) {
208 |     Ysub <- Y[D[,i]==1]
209 |     w <- weights[D[,i]==1]*survey[D[,i]==1]
210 |     n <- length(Ysub)
211 |     Ymax <- Ymin <- Ysub
212 |     Ymax[is.na(Ysub)] <- maxY
213 |     Ymin[is.na(Ysub)] <- minY
214 |     ## point estimates of the bounds
215 |     bounds.Y[i,] <- c(weighted.mean(Ymin, w), weighted.mean(Ymax, w))
216 |     if (!is.null(alpha)) {
217 |       ## variances
218 |       vars.Y[i,] <- c(weighted.var(Ymin, w)*sum(w^2)/(sum(w)^2),
219 |                       weighted.var(Ymax, w)*sum(w^2)/(sum(w)^2))
220 |       ## Bonferroni bounds
221 |       ci.Y[i,] <- c(bounds.Y[i,1] - qnorm(1-alpha/2)*sqrt(vars.Y[i,1]),
222 |                     bounds.Y[i,2] + qnorm(1-alpha/2)*sqrt(vars.Y[i,2]))
223 |     }
224 |     nobs.Y <- c(nobs.Y, n)
225 |   }
226 |   
227 |   ## Bounds for the ATE
228 |   bounds <- ci <- matrix(NA, ncol = 2, nrow = choose(M, 2))
229 |   counter <- 1
230 |   nobs <- tmp <- NULL
231 |   for (i in 1:(M-1)) {
232 |     for (j in (i+1):M) {
233 |       bounds[counter,] <- c(bounds.Y[i,1]-bounds.Y[j,2],
234 |                             bounds.Y[i,2]-bounds.Y[j,1])
235 |       if (!is.null(alpha))
236 |         ci[counter,] <- c(bounds[counter,1] -
237 |                           qnorm(1-alpha/2)*sqrt(vars.Y[i,1]+vars.Y[j,2]),
238 |                           bounds[counter,2] +
239 |                           qnorm(1-alpha/2)*sqrt(vars.Y[i,2]+vars.Y[j,1]))
240 |       counter <- counter + 1
241 |       nobs <- c(nobs, nobs.Y[i]+nobs.Y[j])
242 |     }
243 |   }
244 |   
245 |   if (is.null(alpha))
246 |     return(c(t(rbind(bounds.Y, bounds))))
247 |   else
248 |     return(list(bounds.Y = bounds.Y, bounds = bounds, bonf.ci = ci,
249 |                 bonf.ci.Y = ci.Y, maxY = maxY, minY = minY,
250 |                 nobs = nobs, nobs.Y = nobs.Y))
251 | }
252 | 
253 | ###
254 | ### Aggregate bounds
255 | ###
256 | 
257 | boundsAggComp <- function(data, weights, maxY, minY, alpha = NULL,
258 |                           ratio = NULL, survey = NULL) {
259 |   Y <- data[,1]
260 |   S <- data[,2]
261 |   Svalue <- unique(S)
262 |   J <- length(Svalue)
263 |   D <- data[,3:ncol(data)]
264 |   M <- ncol(D)
265 |   ## compute bounds within each strata and weights across strata
266 |   res.sub <- list()
267 |   if (is.null(ratio))
268 |     ratio.cal <- TRUE
269 |   else
270 |     ratio.cal <- FALSE
271 |   if (ratio.cal)
272 |     ratio <- matrix(NA, nrow = J, ncol = M)
273 |   if (is.null(survey))
274 |     survey <- rep(1, length(Y))
275 |   for (i in 1:J) {
276 |     sub <- (S == Svalue[i])
277 |     res.sub[[i]] <- boundsComp(data[sub,-2], weights[sub], maxY, minY,
278 |                                0.05, survey[sub])
279 |     if (ratio.cal)
280 |       for (j in 1:M)
281 |         ratio[i,j] <- sum(weights[sub & (D[,j] == 1)])
282 |   }
283 |   if (ratio.cal)
284 |     ratio <- ratio/sum(weights)
285 |   omega <- matrix(NA, nrow = (M-1)*M/2, ncol = J)
286 |   counter <- 1
287 |   for (j in 1:(M-1)) {
288 |     for (k in (j+1):M) {
289 |       tmp <- 0
290 |       for (i in 1:J) {
291 |         omega[counter,i] <- ratio[i,j] + ratio[i,k]
292 |         tmp <- tmp + omega[counter,i]
293 |       }
294 |       omega[counter,] <- omega[counter,]/tmp
295 |       counter <- counter + 1
296 |     }
297 |   }
298 |   
299 |   ## aggregate the results
300 |   bounds <- matrix(0, ncol = 2, nrow = choose(M, 2))
301 |   counter <- 1
302 |   for (j in 1:(M-1)) {
303 |     for (k in (j+1):M) {
304 |       for (i in 1:J) {
305 |         bounds[counter,] <- bounds[counter,] +
306 |           (res.sub[[i]]$bounds)[counter,]*omega[counter,i]
307 |       }
308 |       counter <- counter + 1
309 |     }
310 |   }
311 |   if (is.null(alpha))
312 |     return(c(t(bounds)))
313 |   else
314 |     return(list(bounds = bounds, maxY = maxY, minY = minY,
315 |                 ratio = ratio))
316 |     #return(list(bounds = bounds, maxY = maxY, minY = minY,
317 |     #            ratio = ratio, omega = omega))
318 | }
319 | 


--------------------------------------------------------------------------------
/src/NIstandard.c:
--------------------------------------------------------------------------------
  1 | #include <string.h>
  2 | #include <stdio.h>      
  3 | #include <stdlib.h>
  4 | #include <math.h>
  5 | #include <Rmath.h>
  6 | #include <R.h>
  7 | #include "vector.h"
  8 | #include "subroutines.h"
  9 | #include "rand.h"
 10 | #include "models.h"
 11 | 
 12 | /* 
 13 |    Bayesian Binary Probit with Nonignorable Missing Outcomes 
 14 | */
 15 | 
 16 | void NIbprobit(int *Y,         /* binary outcome variable */ 
 17 | 	       int *R,         /* recording indicator for Y */
 18 | 	       double *dXo,    /* covariates */
 19 | 	       double *dXr,    /* covariates */
 20 | 	       double *beta,   /* coefficients */
 21 | 	       double *delta,  /* coefficients */
 22 | 	       int *insamp,    /* # of obs */ 
 23 | 	       int *incovo,    /* # of covariates */
 24 | 	       int *incovr,    /* # of covariates */
 25 | 	       int *intreat,   /* # of treatments */
 26 | 	       double *beta0,  /* prior mean */
 27 | 	       double *delta0, /* prior mean */
 28 | 	       double *dAo,    /* prior precision */
 29 | 	       double *dAr,    /* prior precision */
 30 | 	       int *Insample,  /* insample QoI */
 31 | 	       int *param,     /* store parameters? */ 
 32 | 	       int *mda,       /* marginal data augmentation? */ 
 33 | 	       int *ndraws,    /* # of gibbs draws */
 34 | 	       int *iBurnin,   /* # of burnin */
 35 | 	       int *iKeep,     /* every ?th draws to keep */
 36 | 	       int *verbose,  
 37 | 	       double *coefo,  /* storage for coefficients */ 
 38 | 	       double *coefr,  /* storage for coefficients */ 
 39 | 	       double *ATE,     /* storage for ATE */
 40 | 	       double *BASE    /* storage for baseline */
 41 | 	       ) {
 42 |   
 43 |   /*** counters ***/
 44 |   int n_samp = *insamp;      /* sample size */
 45 |   int n_gen = *ndraws;       /* number of gibbs draws */
 46 |   int n_covo = *incovo;      /* number of covariates */
 47 |   int n_covr = *incovr;      /* number of covariates */
 48 |   int n_treat = *intreat;    /* number of treatments */
 49 | 
 50 |   /*** data ***/
 51 |   /* covariates for the response model */
 52 |   double **Xr = doubleMatrix(n_samp+n_covr, n_covr+1);
 53 |   /* covariates for the outcome model */     
 54 |   double **Xo = doubleMatrix(n_samp+n_covo, n_covo+1);
 55 | 
 56 |   /*** model parameters ***/
 57 |   double **Ao = doubleMatrix(n_covo, n_covo);
 58 |   double **Ar = doubleMatrix(n_covr, n_covr);
 59 |   double **mtemp1 = doubleMatrix(n_covo, n_covo);
 60 |   double **mtemp2 = doubleMatrix(n_covr, n_covr);
 61 | 
 62 |   /*** QoIs ***/
 63 |   double *base = doubleArray(n_treat);
 64 |   double *cATE = doubleArray(n_treat);
 65 | 
 66 |   /*** storage parameters and loop counters **/
 67 |   int progress = 1;
 68 |   int keep = 1;
 69 |   int i, j, k, main_loop;  
 70 |   int itemp, itemp0, itemp1, itemp2, itempP = ftrunc((double) n_gen/10);
 71 |   double dtemp, pj, r0, r1;
 72 | 
 73 |   /*** get random seed **/
 74 |   GetRNGstate();
 75 | 
 76 |   /*** read the data ***/
 77 |   itemp = 0;
 78 |   for (j = 0; j < n_covo; j++)
 79 |     for (i = 0; i < n_samp; i++) 
 80 |       Xo[i][j] = dXo[itemp++];
 81 |   itemp = 0;
 82 |   for (j = 0; j < n_covr; j++)
 83 |     for (i = 0; i < n_samp; i++) 
 84 |       Xr[i][j] = dXr[itemp++];
 85 |   
 86 |   /*** read the prior and it as additional data points ***/ 
 87 |   itemp = 0;
 88 |   for (k = 0; k < n_covo; k++)
 89 |     for (j = 0; j < n_covo; j++)
 90 |       Ao[j][k] = dAo[itemp++];
 91 | 
 92 |   itemp = 0;
 93 |   for (k = 0; k < n_covr; k++)
 94 |     for (j = 0; j < n_covr; j++)
 95 |       Ar[j][k] = dAr[itemp++];
 96 | 
 97 |   dcholdc(Ao, n_covo, mtemp1);
 98 |   for(i = 0; i < n_covo; i++) {
 99 |     Xo[n_samp+i][n_covo] = 0;
100 |     for(j = 0; j < n_covo; j++) {
101 |       Xo[n_samp+i][n_covo] += mtemp1[i][j]*beta0[j];
102 |       Xo[n_samp+i][j] = mtemp1[i][j];
103 |     }
104 |   }
105 | 
106 |   dcholdc(Ar, n_covr, mtemp2);
107 |   for(i = 0; i < n_covr; i++) {
108 |     Xr[n_samp+i][n_covr] = 0;
109 |     for(j = 0; j < n_covr; j++) {
110 |       Xr[n_samp+i][n_covr] += mtemp2[i][j]*delta0[j];
111 |       Xr[n_samp+i][j] = mtemp2[i][j];
112 |     }
113 |   }
114 | 
115 |   /*** Gibbs Sampler! ***/
116 |   itemp = 0; itemp0 = 0; itemp1 = 0; itemp2 = 0;     
117 |   for(main_loop = 1; main_loop <= n_gen; main_loop++){
118 | 
119 |     /** Response Model: binary Probit **/    
120 |     bprobitGibbs(R, Xr, delta, n_samp, n_covr, 0, delta0, Ar, *mda, 1);
121 |       
122 |     /** Outcome Model: binary probit **/
123 |     bprobitGibbs(Y, Xo, beta, n_samp, n_covo, 0, beta0, Ao, *mda, 1);
124 | 
125 |     /** Imputing the missing data **/
126 |     for (i = 0; i < n_samp; i++) {
127 |       if (R[i] == 0) {
128 | 	pj = 0;
129 | 	r0 = delta[0];
130 | 	r1 = delta[1];
131 | 	for (j = 0; j < n_covo; j++) 
132 | 	  pj += Xo[i][j]*beta[j];
133 | 	for (j = 2; j < n_covr; j++) {
134 | 	  r0 += Xr[i][j]*delta[j];
135 | 	  r1 += Xr[i][j]*delta[j];
136 | 	}
137 | 	pj = pnorm(0, pj, 1, 0, 0);
138 | 	r0 = pnorm(0, r0, 1, 0, 0);
139 | 	r1 = pnorm(0, r1, 1, 0, 0);
140 | 	if (unif_rand() < ((1-r1)*pj/((1-r1)*pj+(1-r0)*(1-pj)))) {
141 | 	  Y[i] = 1;
142 | 	  Xr[i][0] = 0;
143 | 	  Xr[i][1] = 1;
144 | 	} else {
145 | 	  Y[i] = 0;
146 | 	  Xr[i][0] = 1;
147 | 	  Xr[i][1] = 0;
148 | 	} 
149 |       }
150 |     }
151 |     
152 |     /** Compute quantities of interest **/
153 |     for (j = 0; j < n_treat; j++) 
154 |       base[j] = 0;
155 |     for (i = 0; i < n_samp; i++) {
156 |       dtemp = 0; 
157 |       for (j = n_treat; j < n_covo; j++) 
158 | 	dtemp += Xo[i][j]*beta[j];
159 |       for (j = 0; j < n_treat; j++) {
160 | 	if (*Insample) {
161 | 	  if (Xo[i][j] == 1)
162 | 	    base[j] += (double)Y[i];
163 | 	  else
164 | 	    base[j] += (double)((dtemp+beta[j]+norm_rand()) > 0);
165 | 	} else
166 | 	  base[j] += pnorm(0, dtemp+beta[j], 1, 0, 0);
167 |       }
168 |     }
169 |     for (j = 0; j < n_treat; j++) 
170 |       base[j] /= (double)n_samp;
171 |     
172 |     /** Storing the results **/
173 |     if (main_loop > *iBurnin) {
174 |       if (keep == *iKeep) {
175 | 	for (j = 0; j < (n_treat-1); j++)
176 | 	  ATE[itemp0++] = base[j+1] - base[0];
177 | 	for (j = 0; j < n_treat; j++)
178 | 	  BASE[itemp++] = base[j];
179 | 	if (*param) {
180 | 	  for (i = 0; i < n_covo; i++) 
181 | 	    coefo[itemp1++] = beta[i];
182 | 	  for (i = 0; i < n_covr; i++) 
183 | 	    coefr[itemp2++] = delta[i];
184 | 	}
185 | 	keep = 1;
186 |       }
187 |       else
188 | 	keep++;
189 |     }
190 | 
191 |     if(*verbose) {
192 |       if(main_loop == itempP) {
193 | 	Rprintf("%3d percent done.\n", progress*10);
194 | 	itempP += ftrunc((double) n_gen/10); 
195 | 	progress++;
196 | 	R_FlushConsole(); 
197 |       }
198 |     }
199 |     R_CheckUserInterrupt();
200 |   } /* end of Gibbs sampler */
201 | 
202 |   /** write out the random seed **/
203 |   PutRNGstate();
204 | 
205 |   /** freeing memory **/
206 |   FreeMatrix(Xr, n_samp+n_covr);
207 |   FreeMatrix(Xo, n_samp+n_covo);
208 |   FreeMatrix(Ao, n_covo);
209 |   FreeMatrix(Ar, n_covr);
210 |   FreeMatrix(mtemp1, n_covo);
211 |   FreeMatrix(mtemp2, n_covr);
212 |   free(base);
213 |   free(cATE);
214 | } /* NIbprobit */
215 | 
216 | 
217 | 
218 | 
219 | /* 
220 |    Bayesian Binary Mixed Effects Probit with Nonignorable Missing
221 |    Outcomes  
222 | */
223 | 
224 | void NIbprobitMixed(int *Y,         /* binary outcome variable */ 
225 | 		    int *R,         /* recording indicator for Y */
226 | 		    int *grp,       /* group indicator */
227 | 		    int *in_grp,    /* number of groups */
228 | 		    int *max_samp_grp, /* max # of obs within group */
229 | 		    double *dXo,    /* fixed effects covariates */
230 | 		    double *dXr,    /* fixed effects covariates */
231 | 		    double *dZo,    /* random effects covariates */
232 | 		    double *dZr,    /* random effects  covariates */
233 | 		    double *beta,   /* coefficients */
234 | 		    double *delta,  /* coefficients */
235 | 		    double *dPsio,  /* random effects variance */
236 | 		    double *dPsir,  /* random effects variance */
237 | 		    int *insamp,    /* # of obs */ 
238 | 		    int *incovo,    /* # of fixed effects */
239 | 		    int *incovr,    /* # of fixed effects */
240 | 		    int *incovoR,   /* # of random effects */
241 | 		    int *incovrR,   /* # of random effects */
242 | 		    int *intreat,   /* # of treatments */
243 | 		    double *beta0,  /* prior mean */
244 | 		    double *delta0, /* prior mean */
245 | 		    double *dAo,    /* prior precision */
246 | 		    double *dAr,    /* prior precision */
247 | 		    int *dfo,       /* prior degrees of freedom */
248 | 		    int *dfr,       /* prior degrees of freedom */
249 | 		    double *dS0o, /* prior scale */
250 | 		    double *dS0r, /* prior scale */
251 | 		    int *Insample,  /* insample QoI */
252 | 		    int *param,     /* store parameters? */ 
253 | 		    int *mda,       /* marginal data augmentation? */ 
254 | 		    int *ndraws,    /* # of gibbs draws */
255 | 		    int *iBurnin,   /* # of burnin */
256 | 		    int *iKeep,     /* every ?th draws to keep */
257 | 		    int *verbose,  
258 | 		    double *coefo,  /* storage for coefficients */ 
259 | 		    double *coefr,  /* storage for coefficients */ 
260 | 		    double *sPsiO,   /* storage for variance */
261 | 		    double *sPsiR,   /* storage for variance */
262 | 		    double *ATE,     /* storage for ATE */
263 | 		    double *BASE    /* storage for baseline */
264 | 		    ) {
265 |   
266 |   /*** counters ***/
267 |   int n_samp = *insamp;      /* sample size */
268 |   int n_gen = *ndraws;       /* number of gibbs draws */
269 |   int n_grp = *in_grp;       /* number of groups */
270 |   int n_covo = *incovo;      /* number of fixed effects */
271 |   int n_covr = *incovr;      /* number of fixed effects */
272 |   int n_covoR = *incovoR;    /* number of random effects */
273 |   int n_covrR = *incovrR;    /* number of random effects */
274 |   int n_treat = *intreat;    /* number of treatments */
275 | 
276 |   /*** data ***/
277 |   /* covariates for the response model */
278 |   double **Xr = doubleMatrix(n_samp+n_covr, n_covr+1);
279 |   /* covariates for the outcome model */     
280 |   double **Xo = doubleMatrix(n_samp+n_covo, n_covo+1);
281 |   /* random effects covariates */
282 |   double ***Zo = doubleMatrix3D(n_grp, *max_samp_grp + n_covoR,
283 | 				n_covoR + 1);
284 |   double ***Zr = doubleMatrix3D(n_grp, *max_samp_grp + n_covrR,
285 | 				n_covrR + 1);
286 | 
287 |   /*** model parameters ***/
288 |   double **PsiO = doubleMatrix(n_covoR, n_covoR);
289 |   double **PsiR = doubleMatrix(n_covrR, n_covrR);
290 |   double **xiO = doubleMatrix(n_grp, n_covoR);
291 |   double **xiR = doubleMatrix(n_grp, n_covrR);
292 |   double **S0o = doubleMatrix(n_covoR, n_covoR);
293 |   double **S0r = doubleMatrix(n_covrR, n_covrR);
294 |   double **Ao = doubleMatrix(n_covo, n_covo);
295 |   double **Ar = doubleMatrix(n_covr, n_covr);
296 |   double **mtemp1 = doubleMatrix(n_covo, n_covo);
297 |   double **mtemp2 = doubleMatrix(n_covr, n_covr);
298 | 
299 |   /*** QoIs ***/
300 |   double *base = doubleArray(n_treat);
301 |   double *cATE = doubleArray(n_treat);
302 | 
303 |   /*** storage parameters and loop counters **/
304 |   int progress = 1;
305 |   int keep = 1;
306 |   int i, j, k, main_loop;  
307 |   int itemp, itemp0, itemp1, itemp2, itemp3 = 0, itempP = ftrunc((double) n_gen/10);
308 |   int *vitemp = intArray(n_grp);
309 |   double dtemp, pj, r0, r1;
310 | 
311 |   /*** get random seed **/
312 |   GetRNGstate();
313 | 
314 |   /*** fixed effects ***/
315 |   itemp = 0;
316 |   for (j = 0; j < n_covo; j++)
317 |     for (i = 0; i < n_samp; i++) 
318 |       Xo[i][j] = dXo[itemp++];
319 |   itemp = 0;
320 |   for (j = 0; j < n_covr; j++)
321 |     for (i = 0; i < n_samp; i++) 
322 |       Xr[i][j] = dXr[itemp++];
323 |   
324 |   /* prior */
325 |   itemp = 0;
326 |   for (k = 0; k < n_covo; k++)
327 |     for (j = 0; j < n_covo; j++)
328 |       Ao[j][k] = dAo[itemp++];
329 | 
330 |   itemp = 0;
331 |   for (k = 0; k < n_covr; k++)
332 |     for (j = 0; j < n_covr; j++)
333 |       Ar[j][k] = dAr[itemp++];
334 | 
335 |   dcholdc(Ao, n_covo, mtemp1);
336 |   for(i = 0; i < n_covo; i++) {
337 |     Xo[n_samp+i][n_covo] = 0;
338 |     for(j = 0; j < n_covo; j++) {
339 |       Xo[n_samp+i][n_covo] += mtemp1[i][j]*beta0[j];
340 |       Xo[n_samp+i][j] = mtemp1[i][j];
341 |     }
342 |   }
343 | 
344 |   dcholdc(Ar, n_covr, mtemp2);
345 |   for(i = 0; i < n_covr; i++) {
346 |     Xr[n_samp+i][n_covr] = 0;
347 |     for(j = 0; j < n_covr; j++) {
348 |       Xr[n_samp+i][n_covr] += mtemp2[i][j]*delta0[j];
349 |       Xr[n_samp+i][j] = mtemp2[i][j];
350 |     }
351 |   }
352 | 
353 |   /* random effects */
354 |   itemp = 0;
355 |   for (j = 0; j < n_grp; j++)
356 |     vitemp[j] = 0;
357 |   for (i = 0; i < n_samp; i++) {
358 |     for (j = 0; j < n_covoR; j++)
359 |       Zo[grp[i]][vitemp[grp[i]]][j] = dZo[itemp++];
360 |     vitemp[grp[i]]++;
361 |   }
362 | 
363 |   itemp = 0;
364 |   for (j = 0; j < n_grp; j++)
365 |     vitemp[j] = 0;
366 |   for (i = 0; i < n_samp; i++) {
367 |     for (j = 0; j < n_covrR; j++)
368 |       Zr[grp[i]][vitemp[grp[i]]][j] = dZr[itemp++];
369 |     vitemp[grp[i]]++;
370 |   }
371 | 
372 |   /* prior variance for random effects */
373 |   itemp = 0;
374 |   for (k = 0; k < n_covoR; k++)
375 |     for (j = 0; j < n_covoR; j++) 
376 |       PsiO[j][k] = dPsio[itemp++];
377 | 
378 |   itemp = 0;
379 |   for (k = 0; k < n_covrR; k++)
380 |     for (j = 0; j < n_covrR; j++) 
381 |       PsiR[j][k] = dPsir[itemp++];
382 | 
383 |   itemp = 0;
384 |   for (k = 0; k < n_covoR; k++)
385 |     for (j = 0; j < n_grp; j++)
386 |       xiO[j][k] = norm_rand();
387 | 
388 |   itemp = 0;
389 |   for (k = 0; k < n_covrR; k++)
390 |     for (j = 0; j < n_grp; j++)
391 |       xiR[j][k] = norm_rand();
392 | 
393 |   /* hyper prior scale parameter for random effects */
394 |   itemp = 0;
395 |   for (k = 0; k < n_covoR; k++)
396 |     for (j = 0; j < n_covoR; j++)
397 |       S0o[j][k] = dS0o[itemp++];
398 | 
399 |   itemp = 0;
400 |   for (k = 0; k < n_covrR; k++)
401 |     for (j = 0; j < n_covrR; j++)
402 |       S0r[j][k] = dS0r[itemp++];
403 | 
404 |   /*** Gibbs Sampler! ***/
405 |   itemp = 0; itemp0 = 0; itemp1 = 0; itemp2 = 0;     
406 |   for(main_loop = 1; main_loop <= n_gen; main_loop++){
407 | 
408 |     /** Response Model: binary Probit **/    
409 |     bprobitMixedGibbs(R, Xr, Zr, grp, delta, xiR, PsiR, n_samp,
410 | 		      n_covr, n_covrR, n_grp, 0, delta0, Ar, *dfr, S0r,
411 | 		      1);
412 |       
413 |     /** Outcome Model: binary probit **/
414 |     bprobitMixedGibbs(Y, Xo, Zr, grp, beta, xiO, PsiO, n_samp, n_covo,
415 | 		      n_covoR, n_grp, 0, beta0, Ao, *dfo, S0o, 1);
416 | 
417 |     /** Imputing the missing data **/
418 |     for (j = 0; j < n_grp; j++)
419 |       vitemp[j] = 0;
420 |     for (i = 0; i < n_samp; i++) {
421 |       if (R[i] == 0) {
422 | 	pj = 0;
423 | 	r0 = delta[0];
424 | 	r1 = delta[1];
425 | 	for (j = 0; j < n_covo; j++) 
426 | 	  pj += Xo[i][j]*beta[j];
427 | 	for (j = 2; j < n_covr; j++) {
428 | 	  r0 += Xr[i][j]*delta[j];
429 | 	  r1 += Xr[i][j]*delta[j];
430 | 	}
431 | 	for (j = 0; j < n_covoR; j++)
432 | 	  pj += Zo[grp[i]][vitemp[grp[i]]][j]*xiO[grp[i]][j];
433 | 	for (j = 0; j < n_covrR; j++) {
434 | 	  r0 += Zr[grp[i]][vitemp[grp[i]]][j]*xiR[grp[i]][j];
435 | 	  r1 += Zr[grp[i]][vitemp[grp[i]]][j]*xiR[grp[i]][j];
436 | 	}
437 | 	pj = pnorm(0, pj, 1, 0, 0);
438 | 	r0 = pnorm(0, r0, 1, 0, 0);
439 | 	r1 = pnorm(0, r1, 1, 0, 0);
440 | 	if (unif_rand() < ((1-r1)*pj/((1-r1)*pj+(1-r0)*(1-pj)))) {
441 | 	  Y[i] = 1;
442 | 	  Xr[i][0] = 0;
443 | 	  Xr[i][1] = 1;
444 | 	} else {
445 | 	  Y[i] = 0;
446 | 	  Xr[i][0] = 1;
447 | 	  Xr[i][1] = 0;
448 | 	} 
449 |       }
450 |       vitemp[grp[i]]++;
451 |     }
452 |     
453 |     /** Compute quantities of interest **/
454 |     for (j = 0; j < n_grp; j++)
455 |       vitemp[j] = 0;
456 |     for (j = 0; j < n_treat; j++) 
457 |       base[j] = 0;
458 |     for (i = 0; i < n_samp; i++) {
459 |       dtemp = 0; 
460 |       for (j = n_treat; j < n_covo; j++) 
461 | 	dtemp += Xo[i][j]*beta[j];
462 |       for (j = 0; j < n_covoR; j++)
463 | 	dtemp += Zo[grp[i]][vitemp[grp[i]]][j]*xiO[grp[i]][j];
464 |       for (j = 0; j < n_treat; j++) {
465 | 	if (*Insample) {
466 | 	  if (Xo[i][j] == 1)
467 | 	    base[j] += (double)Y[i];
468 | 	  else
469 | 	    base[j] += (double)((dtemp+beta[j]+norm_rand()) > 0);
470 | 	} else
471 | 	  base[j] += pnorm(0, dtemp+beta[j], 1, 0, 0);
472 |       }
473 |       vitemp[grp[i]]++;
474 |     }
475 |     for (j = 0; j < n_treat; j++) 
476 |       base[j] /= (double)n_samp;
477 |     
478 |     /** Storing the results **/
479 |     if (main_loop > *iBurnin) {
480 |       if (keep == *iKeep) {
481 | 	for (j = 0; j < (n_treat-1); j++)
482 | 	  ATE[itemp0++] = base[j+1] - base[0];
483 | 	for (j = 0; j < n_treat; j++)
484 | 	  BASE[itemp++] = base[j];
485 | 	if (*param) {
486 | 	  for (i = 0; i < n_covo; i++) 
487 | 	    coefo[itemp1++] = beta[i];
488 | 	  for (i = 0; i < n_covr; i++) 
489 | 	    coefr[itemp2++] = delta[i];
490 | 	  for (i = 0; i < n_covoR; i++)
491 | 	    for (j = i; j < n_covoR; j++)
492 | 	      sPsiO[itemp3++] = PsiO[i][j];
493 | 	  for (i = 0; i < n_covrR; i++)
494 | 	    for (j = i; j < n_covrR; j++)
495 | 	      sPsiR[itemp3++] = PsiR[i][j];
496 | 	}
497 | 	keep = 1;
498 |       }
499 |       else
500 | 	keep++;
501 |     }
502 | 
503 |     if(*verbose) {
504 |       if(main_loop == itempP) {
505 | 	Rprintf("%3d percent done.\n", progress*10);
506 | 	itempP += ftrunc((double) n_gen/10); 
507 | 	progress++;
508 | 	R_FlushConsole(); 
509 |       }
510 |     }
511 |     R_CheckUserInterrupt();
512 |   } /* end of Gibbs sampler */
513 | 
514 |   /** write out the random seed **/
515 |   PutRNGstate();
516 | 
517 |   /** freeing memory **/
518 |   FreeMatrix(Xr, n_samp+n_covr);
519 |   FreeMatrix(Xo, n_samp+n_covo);
520 |   Free3DMatrix(Zo, n_grp, *max_samp_grp + n_covoR);
521 |   Free3DMatrix(Zr, n_grp, *max_samp_grp + n_covrR);
522 |   FreeMatrix(PsiO, n_covoR);
523 |   FreeMatrix(PsiR, n_covrR);
524 |   FreeMatrix(xiO, n_grp);
525 |   FreeMatrix(xiR, n_grp);
526 |   FreeMatrix(S0o, n_covoR);
527 |   FreeMatrix(S0r, n_covrR);
528 |   FreeMatrix(Ao, n_covo);
529 |   FreeMatrix(Ar, n_covr);
530 |   FreeMatrix(mtemp1, n_covo);
531 |   FreeMatrix(mtemp2, n_covr);
532 |   free(base);
533 |   free(cATE);
534 |   free(vitemp);
535 | } /* NIbprobitMixed */
536 | 
537 | 
538 | 
539 | 


--------------------------------------------------------------------------------