├── .gitignore ├── R └── ivdesc │ ├── data │ └── FoxDebate.rda │ ├── .Rbuildignore │ ├── NAMESPACE │ ├── NEWS.md │ ├── R │ ├── utils.R │ ├── FoxDebate.R │ ├── ivdesc.R │ └── ivdesc_est.R │ ├── DESCRIPTION │ └── man │ ├── ivdesc.Rd │ └── FoxDebate.Rd ├── stata.toc ├── ivdesc.pkg ├── README.md ├── ivdesc.sthlp ├── ivdesc_calc.ado └── ivdesc.ado /.gitignore: -------------------------------------------------------------------------------- 1 | inst/doc 2 | data-raw 3 | install.R 4 | doc 5 | Meta 6 | docs 7 | .DS_Store -------------------------------------------------------------------------------- /R/ivdesc/data/FoxDebate.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sumtxt/ivdesc/HEAD/R/ivdesc/data/FoxDebate.rda -------------------------------------------------------------------------------- /stata.toc: -------------------------------------------------------------------------------- 1 | v 1.1.0 2 | d Materials by Moritz Marbach 3 | d ETH Zürich 4 | d moritz.marbach@gess.ethz.ch 5 | d https://github.com/sumtxt/ivdesc/ 6 | 7 | p ivdesc 8 | -------------------------------------------------------------------------------- /R/ivdesc/.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^data-raw 2 | ^README\.md$ 3 | ^cran-comments\.md$ 4 | ^\.DS_Store 5 | \.git 6 | ^CRAN-RELEASE$ 7 | ^DS_Store$ 8 | ^doc$ 9 | ^Meta$ 10 | ^_pkgdown\.yml$ 11 | ^pkgdown$ -------------------------------------------------------------------------------- /ivdesc.pkg: -------------------------------------------------------------------------------- 1 | v 1.1.0 2 | d 'IVDESC': Profiling Compliers and Non-compliers for Instrumental Variable Analysis 3 | d 4 | d Distribution-Date: 20210318 5 | d License: GNU General Public License v3.0 6 | d 7 | F ivdesc.sthlp 8 | F ivdesc.ado 9 | F ivdesc_calc.ado 10 | -------------------------------------------------------------------------------- /R/ivdesc/NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,ivdesc) 4 | export(ivdesc) 5 | importFrom(knitr,kable) 6 | importFrom(purrr,map_df) 7 | importFrom(rsample,analysis) 8 | importFrom(rsample,bootstraps) 9 | importFrom(stats,cov) 10 | importFrom(stats,t.test) 11 | importFrom(stats,var) 12 | -------------------------------------------------------------------------------- /R/ivdesc/NEWS.md: -------------------------------------------------------------------------------- 1 | # ivdesc 1.1.2 2 | 3 | # Version 1.1.2 (2025-07-21) 4 | 5 | * Added FoxDebate data from archived icsw package 6 | * Removed icsw package dependency 7 | 8 | # Version 1.1.1 (2022-12-18) 9 | 10 | * Fixed lazy data note 11 | 12 | # Version 1.1.0 (2020-03-18) 13 | 14 | * Added standard errors based on asymptotic theory 15 | 16 | # Version 1.0.0 (2019-10-10) 17 | 18 | * First release 19 | -------------------------------------------------------------------------------- /R/ivdesc/R/utils.R: -------------------------------------------------------------------------------- 1 | #' @importFrom stats cov 2 | get_var_mu_co <- function(N,X,Z,D){ 3 | 4 | C <- cbind(X,Z*(1-D)*X,D*(1-Z)*X,Z*(1-D),D*(1-Z),Z) 5 | 6 | empCov <- cov(C) 7 | means <- apply(C,2,mean) 8 | 9 | hatdH <- deltaH(means[1:3],means[4:6]) 10 | 11 | var_mu_co <- 1/N*t(hatdH)%*%empCov%*%hatdH 12 | 13 | return(var_mu_co) 14 | } 15 | 16 | 17 | deltaH <- function(mu,pi){ 18 | p <- pi[3] 19 | 20 | q <- pi[1] 21 | k <- pi[2] 22 | 23 | x <- mu[1] 24 | y <- mu[2] 25 | z <- mu[3] 26 | 27 | Pco <- (1-q/p-k/(1-p)) 28 | Mu <- (x-y/p-z/(1-p)) 29 | 30 | dH <- rep(NA,6) 31 | 32 | dH[1] <- 1/Pco 33 | dH[2] <- -1/(Pco*p) 34 | dH[3] <- -1/(Pco*(1-p)) 35 | dH[4] <- Mu/(Pco^2*p) 36 | dH[5] <- Mu/(Pco^2*(1-p)) 37 | dH[6] <- (k/(1-p)^2*x-q/p^2*x -k/(p*(1-p))^2*y + y/p^2 - z/(1-p)^2 + q/(p*(1-p))^2*z )/(Pco)^2 38 | 39 | return(dH) 40 | } -------------------------------------------------------------------------------- /R/ivdesc/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: ivdesc 2 | Title: Profiling Compliers and Non-Compliers for Instrumental Variable Analysis 3 | Version: 1.1.2 4 | Authors@R: c(person("Moritz", "Marbach", role = c("aut", "cre"), email="m.marbach@ucl.ac.uk", comment = c(ORCID = "0000-0002-7101-2821"))) 5 | Description: Estimating the mean and variance of a covariate for the complier, never-taker and always-taker subpopulation in the context of instrumental variable estimation. This package implements the method described in Marbach and Hangartner (2020) and Hangartner, Marbach, Henckel, Maathuis, Kelz and Keele (2021) . 6 | Depends: R (>= 3.4.0) 7 | License: GPL-3 8 | URL: https://github.com/sumtxt/ivdesc/ 9 | BugReports: https://github.com/sumtxt/ivdesc/issues 10 | Encoding: UTF-8 11 | LazyData: true 12 | RoxygenNote: 7.3.2 13 | Suggests: 14 | haven 15 | Imports: 16 | knitr (>= 1.20.8), 17 | purrr (>= 0.2.5), 18 | rsample (>= 0.0.3) 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ivdesc 2 | 3 | 4 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/ivdesc)](https://cran.r-project.org/package=ivdesc) 5 | [![Downloads](https://cranlogs.r-pkg.org/badges/grand-total/ivdesc)](https://CRAN.R-project.org/package=ivdesc) 6 | [![Downloads](https://cranlogs.r-pkg.org/badges/ivdesc)](https://CRAN.R-project.org/package=ivdesc) 7 | 8 | 9 | Estimating the mean and variance of a covariate for the complier, never-taker and always-taker subpopulation in the context of instrumental variable estimation. This package implements the method described in [Marbach and Hangartner (2020)](https://doi.org/10.1017/pan.2019.48) and [Hangartner, Marbach, Henckel, Maathuis, Kelz and Keele (2021)](https://arxiv.org/abs/2103.06328). 10 | 11 | 12 | ### Install R Package 13 | 14 | You can install the package directly from CRAN: 15 | 16 | ```R 17 | install.packages("ivdesc") 18 | ``` 19 | 20 | Or install the latest version from Github using: 21 | 22 | ```R 23 | remotes::install_github("sumtxt/ivdesc/R/ivdesc") 24 | ``` 25 | 26 | You may need to install the `remotes` package first. 27 | 28 | 29 | ### Install STATA Package 30 | 31 | To install the STATA package directly from SSC use: 32 | 33 | ```STATA 34 | ssc install ivdesc 35 | ``` 36 | 37 | Or install the latest version from Github using: 38 | 39 | ```STATA 40 | github install sumtxt/ivdesc 41 | ``` 42 | 43 | This will only work if you installed the [GITHUB module](https://github.com/haghish/github) via `net install github, from("https://haghish.github.io/github/")` first. 44 | -------------------------------------------------------------------------------- /R/ivdesc/man/ivdesc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ivdesc.R 3 | \name{ivdesc} 4 | \alias{ivdesc} 5 | \title{Profiling compliers and non-compliers for instrumental variable analysis} 6 | \usage{ 7 | ivdesc( 8 | X, 9 | D, 10 | Z, 11 | variance = FALSE, 12 | boot = TRUE, 13 | bootn = 1000, 14 | balance = TRUE, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{X}{vector with numeric covariate} 20 | 21 | \item{D}{vector with binary treatment} 22 | 23 | \item{Z}{vector with binary instrument} 24 | 25 | \item{variance}{Calculate the variance of the covariate for each subgroup?} 26 | 27 | \item{boot}{Replace all standard errors with bootstrap standard errors?} 28 | 29 | \item{bootn}{number of bootstraps (ignored if \code{boot=FALSE} )} 30 | 31 | \item{balance}{Run balance test?} 32 | 33 | \item{...}{additional arguments to be passed to \code{ivdesc_all}} 34 | } 35 | \value{ 36 | Returns a object \code{ivdesc} with estimates for each subgroup (\code{co}: complier, \code{nt}: never-taker, \code{at} : always-taker) and the full sample: 37 | 38 | \itemize{ 39 | \item \code{mu} and \code{mu_se} : Mean of \code{X} and standard error 40 | \item \code{pi} and \code{pi_se}: Proportion of each subgroup in the sample and standard error 41 | \item \code{var}: Variance of \code{X} (if \code{variance=TRUE}) 42 | } 43 | 44 | Can be coerced to a proper \code{data.frame} using \code{as.data.frame}. 45 | } 46 | \description{ 47 | Estimates the mean and variance of a covariate for the complier, never-taker and always-taker subpopulation. 48 | } 49 | \details{ 50 | This function estimates the mean and the associated standard error of \code{X} for the complier, never-taker and always-taker subpopulation within a sample where some, but not all, units are encouraged by instrument \code{Z} to take the treatment \code{D}. 51 | Observations with missing values in either \code{X}, \code{D}, or \code{Z} are droppped (listwise deletion). 52 | 53 | One-sided noncompliance is supported. The mean for the always-/never-taker subpopulation will only be computed if there are at least two observed units in these subpopulations. 54 | 55 | If \code{boot=FALSE}, standard errors based on asymptotic theory are estimated. 56 | 57 | The balance test is a t-test allowing for unequal variances. 58 | } 59 | \examples{ 60 | 61 | 62 | # Example 1: Albertson/Lawrence (2009) 63 | # see Marbach/Hangartner (2019) for details/discussion 64 | data(FoxDebate) 65 | 66 | with(FoxDebate, ivdesc(X=readnews,D=watchpro,Z=conditn) ) 67 | 68 | 69 | 70 | \donttest{ 71 | 72 | # Example 2: JTPA Data 73 | 74 | library(haven) 75 | jtpa <- read_dta("http://fmwww.bc.edu/repec/bocode/j/jtpa.dta") 76 | 77 | with(jtpa, ivdesc(age, training, assignmt, bootn=500)) 78 | with(jtpa, ivdesc(hispanic, training, assignmt, boot=FALSE)) 79 | 80 | } 81 | 82 | 83 | 84 | 85 | } 86 | \references{ 87 | M. Marbach and D. Hangartner. 2020. Profiling Compliers and Non-compliers for Instrumental Variable Analysis. \emph{Political Analysis}, 28(3), 435-444. 88 | 89 | D. Hangartner, M. Marbach, L. Henckel, M. H. Maathuis, R. R. Kelz, and L. Keele. 2021. Profiling Compliers in Instrumental Variables Designs. Available at arXiv: \href{https://arxiv.org/abs/2103.06328}{https://arxiv.org/abs/2103.06328}. 90 | } 91 | \seealso{ 92 | \code{\link[AER]{ivreg}} 93 | } 94 | -------------------------------------------------------------------------------- /ivdesc.sthlp: -------------------------------------------------------------------------------- 1 | {smcl} 2 | {* *! version 1.1.0 March 18, 2021 @ 18:24:26}{...} 3 | {cmd:help ivdesc} 4 | {hline} 5 | 6 | {title:Title} 7 | 8 | {p2colset 5 20 22 2}{...} {p2col :{hi:ivdesc} {hline 2}} 9 | Profiling compliers and non-compliers for instrumental variable analysis {p_end} {p2colreset}{...} 10 | 11 | {title:Syntax} 12 | 13 | {p 8 17 2}{cmd:ivdesc} 14 | {it:{help varname:covariate}} 15 | {it:{help varname:treatment}} 16 | {it:{help varname:instrument}} 17 | {ifin} 18 | [{cmd:,} {it:options}] 19 | 20 | 21 | {marker options}{...} 22 | {title:Options} 23 | 24 | {phang}{opt no:boot} report standard errors based on asymptotic theory instead of bootstrap standard errors 25 | 26 | {phang}{opt r:eps(#)} perform # bootstrap replications; default is reps(1000) 27 | 28 | {phang}{opt var:iance} also report the variance of the covariate for each subgroup 29 | 30 | {phang}{opt nobal:ance} skip the balance test 31 | 32 | {phang}{cmd:fmt(}{it:subopts}{cmd:)} passed through to {cmd:estout matrix(,subopts)} that handles the display of the estimates 33 | 34 | 35 | {marker description}{...} 36 | {title:Description} 37 | 38 | {phang} {cmd:ivdesc} estimates the mean and the associated standard error of a covariate for the complier, never-taker and always-taker subpopulation within a sample where some, but not all, units are encouraged by instrument to take the treatment. 39 | 40 | {title:Remarks} 41 | 42 | {phang} Observations with missing values in either {it:covariate}, {it:treatment}, 43 | or {it:instrument} are deleted before estimation (listwise deletion). 44 | 45 | {phang} One-sided noncompliance is supported. The mean for the always-/never-taker subpopulation will only be computed if there are at least two observed units in these subpopulations. 46 | 47 | {phang} If {cmd:noboot}, standard errors based on asymptotic theory are estimated. 48 | 49 | {phang} The balance test is a t-test allowing for unequal variances. 50 | 51 | 52 | {title:Examples} 53 | 54 | {pstd}Load the JTPA data 55 | 56 | {p 4 8 2}{stata "use http://fmwww.bc.edu/repec/bocode/j/jtpa.dta, clear":. use "http://fmwww.bc.edu/repec/bocode/j/jtpa.dta",clear}{p_end} 57 | 58 | {p 4 8 2}{stata "ivdesc age training assignmt":. ivdesc age training assignmt}{p_end} 59 | {p 4 8 2}{stata "ivdesc hispanic training assignmt":. ivdesc hispanic training assignmt}{p_end} 60 | 61 | {pstd}Plot the results 62 | 63 | {p 4 8 2}{stata "matrix C = r(ivdesc)'":. matrix C = r(ivdesc)'}{p_end} 64 | {p 4 8 2}{stata "coefplot matrix(C), se(C[2])":. coefplot matrix(C), se(C[2])}{p_end} 65 | 66 | 67 | {title:Saved results} 68 | 69 | {synoptset 15 tabbed}{...} 70 | {p2col 5 15 19 2: Matrices}{p_end} 71 | {synopt:{cmd:r(ivdesc)}} all estimates {p_end} 72 | 73 | {p2col 5 15 19 2: Scalar}{p_end} 74 | {synopt:{cmd:r(pval)}} p-value from balance test {p_end} 75 | 76 | 77 | {title:Reference} 78 | {p 4 8 2} 79 | 80 | {pstd} M. Marbach and D. Hangartner. 2020. Profiling Compliers and Non-compliers for Instrumental Variable Analysis. {it:Political Analysis} 28(3), 435-444. {p_end} 81 | 82 | {pstd} D. Hangartner, M. Marbach, L. Henckel, M. H. Maathuis, R. R. Kelz, and L. Keele. 2021. Profiling Compliers in Instrumental Variables Designs. Available at arXiv: https://arxiv.org/abs/2103.06328. {p_end} 83 | 84 | {title:Authors} 85 | 86 | Moritz Marbach (Maintainer), moritz.marbach@tamu.edu 87 | Texas A&M University, United States 88 | -------------------------------------------------------------------------------- /R/ivdesc/R/FoxDebate.R: -------------------------------------------------------------------------------- 1 | #' The effects of watching a Fox debate on Proposition 209 2 | #' 3 | #' The data set (n=507) contains findings from the experiment described in Albertson and Lawrence (2009) 4 | #' in which a representative sample of survey respondents in Orange County, California, were randomly 5 | #' assigned to receive encouragement to view a Fox debate on affirmative action, which would take 6 | #' place on the eve of the 1996 presidential election. Shortly after the election, these respondents were 7 | #' reinterviewed. The postelection questionnaire asked respondents whether they viewed the debate, 8 | #' whether they supported a California proposition (209) to eliminate affirmative action (\code{support}), 9 | #' and how informed they felt about the proposition (\code{infopro}). The dataset can be used to reproduce 10 | #' Table 2 in Aronow and Carnegie (2013). Note that mean imputation was used to handle missing data so 11 | #' non-integer values are imputed. \code{support} and \code{infopro} are excepted and include missing values. 12 | #' 13 | #' This dataset data documentation has been copied from the archived R package \emph{cicsw}. 14 | #' 15 | #' @format A data frame with 507 observations on the following 12 variables: 16 | #' \describe{ 17 | #' \item{partyid}{An 11 point scale from "strong Republican" to "strong Democrat".} 18 | #' \item{pnintst}{Respondent interest in politics and national affairs. Coded 1 = "very interested", 2 = "somewhat interested", 3 = "only slightly interested", 4 = "not interested at all".} 19 | #' \item{watchnat}{Frequency of national television news consumption. Coded 1 = "never", 2 = "less than once a month", 3 = "once a month", 4 = "several times a month", 5 = "once a week", 6 = "several times a week", 7 = "every day".} 20 | #' \item{educad}{Education level of respondent. Coded 1 = "eighth grade or less", 2 = "beyond eighth grade, not high school", 3 = "ged", 4 = "high school", 5 = "less than one year vocational school", 6 = "one to two year vocational school", 7 = "two years or more vocational school", 8 = "less than two years of college", 9 = "two or more years of college", 10 = "finished a two-year college program", 11 = "finished a four-year college program", 12 = "master degree or equivalent", 13 = "ph.d., m.d., or other advance degree".} 21 | #' \item{readnews}{How often respondent reads political news. Coded 1 = "never", 2 = "less than once a month", 3 = "once a month", 4 = "several times a month", 5 = "once a week", 6 = "several times a week", 7 = "every day".} 22 | #' \item{gender}{Respondent gender. Coded 1 for female and 0 for male.} 23 | #' \item{income}{Family income from all sources. Coded 1 = "under $10,000", 2 = "between $10,000 and $20,000", 3 = "between $20,000 and $30,000", 4 = "between $30,000 and $40,000", 5 = "between $40,000 and $50,000", 6 = "between $50,000 and $60,000", 7 = "between $60,000 and $70,000", 8 = "between $70,000 and $80,000", 9 = "between $80,000 and $90,000", 10 = "between $90,000 and $100,000", 11 = "$100,000 or more".} 24 | #' \item{white}{Binary indicator coded 1 if subject is white and 0 otherwise.} 25 | #' \item{support}{Support for Proposition 209. Coded 1 if subject voted against or opposed and 0 if subject voted for or favored} 26 | #' \item{infopro}{Information on Proposition 209. Coded from 1 to 4, with 4 meaning respondents had a great deal of information about Proposition 209 prior to the election, and 1 meaning respondents reported no information about the proposition before the election.} 27 | #' \item{watchpro}{Binary indicator coded 1 if subject watched the Fox Debate about affirmative action and 0 otherwise. This is the outcome ("treatment") of interest.} 28 | #' \item{conditn}{Binary indicator coded 1 if subject was (randomly) prompted to watch the Fox Debate about affirmative action. This is the encouragement (instrumental) variable.} 29 | #' } 30 | #' 31 | #' @references 32 | #' Bethany Albertson and Adria Lawrence. (2009). After the credits roll: The long-term effects of educational television on public knowledge and attitudes. \emph{American Politics Research}. 37(2): 275-300. 33 | #' 34 | #' Peter M. Aronow and Allison Carnegie. (2013). Beyond LATE: Estimation of the average treatment effect with an instrumental variable. \emph{Political Analysis}. 21.4 (2013): 492-506. 35 | #' 36 | #' Peter M. Aronow and Allison Carnegie. (2013). Replication data for: Beyond LATE: Estimation of the average treatment effect with an instrumental variable. \emph{Dataverse Network.} http://hdl.handle.net/1902.1/21729 (accessed May 14, 2015). 37 | #' 38 | #' @keywords datasets 39 | #' @docType data 40 | #' @name FoxDebate 41 | "FoxDebate" -------------------------------------------------------------------------------- /R/ivdesc/man/FoxDebate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FoxDebate.R 3 | \docType{data} 4 | \name{FoxDebate} 5 | \alias{FoxDebate} 6 | \title{The effects of watching a Fox debate on Proposition 209} 7 | \format{ 8 | A data frame with 507 observations on the following 12 variables: 9 | \describe{ 10 | \item{partyid}{An 11 point scale from "strong Republican" to "strong Democrat".} 11 | \item{pnintst}{Respondent interest in politics and national affairs. Coded 1 = "very interested", 2 = "somewhat interested", 3 = "only slightly interested", 4 = "not interested at all".} 12 | \item{watchnat}{Frequency of national television news consumption. Coded 1 = "never", 2 = "less than once a month", 3 = "once a month", 4 = "several times a month", 5 = "once a week", 6 = "several times a week", 7 = "every day".} 13 | \item{educad}{Education level of respondent. Coded 1 = "eighth grade or less", 2 = "beyond eighth grade, not high school", 3 = "ged", 4 = "high school", 5 = "less than one year vocational school", 6 = "one to two year vocational school", 7 = "two years or more vocational school", 8 = "less than two years of college", 9 = "two or more years of college", 10 = "finished a two-year college program", 11 = "finished a four-year college program", 12 = "master degree or equivalent", 13 = "ph.d., m.d., or other advance degree".} 14 | \item{readnews}{How often respondent reads political news. Coded 1 = "never", 2 = "less than once a month", 3 = "once a month", 4 = "several times a month", 5 = "once a week", 6 = "several times a week", 7 = "every day".} 15 | \item{gender}{Respondent gender. Coded 1 for female and 0 for male.} 16 | \item{income}{Family income from all sources. Coded 1 = "under $10,000", 2 = "between $10,000 and $20,000", 3 = "between $20,000 and $30,000", 4 = "between $30,000 and $40,000", 5 = "between $40,000 and $50,000", 6 = "between $50,000 and $60,000", 7 = "between $60,000 and $70,000", 8 = "between $70,000 and $80,000", 9 = "between $80,000 and $90,000", 10 = "between $90,000 and $100,000", 11 = "$100,000 or more".} 17 | \item{white}{Binary indicator coded 1 if subject is white and 0 otherwise.} 18 | \item{support}{Support for Proposition 209. Coded 1 if subject voted against or opposed and 0 if subject voted for or favored} 19 | \item{infopro}{Information on Proposition 209. Coded from 1 to 4, with 4 meaning respondents had a great deal of information about Proposition 209 prior to the election, and 1 meaning respondents reported no information about the proposition before the election.} 20 | \item{watchpro}{Binary indicator coded 1 if subject watched the Fox Debate about affirmative action and 0 otherwise. This is the outcome ("treatment") of interest.} 21 | \item{conditn}{Binary indicator coded 1 if subject was (randomly) prompted to watch the Fox Debate about affirmative action. This is the encouragement (instrumental) variable.} 22 | } 23 | } 24 | \usage{ 25 | FoxDebate 26 | } 27 | \description{ 28 | The data set (n=507) contains findings from the experiment described in Albertson and Lawrence (2009) 29 | in which a representative sample of survey respondents in Orange County, California, were randomly 30 | assigned to receive encouragement to view a Fox debate on affirmative action, which would take 31 | place on the eve of the 1996 presidential election. Shortly after the election, these respondents were 32 | reinterviewed. The postelection questionnaire asked respondents whether they viewed the debate, 33 | whether they supported a California proposition (209) to eliminate affirmative action (\code{support}), 34 | and how informed they felt about the proposition (\code{infopro}). The dataset can be used to reproduce 35 | Table 2 in Aronow and Carnegie (2013). Note that mean imputation was used to handle missing data so 36 | non-integer values are imputed. \code{support} and \code{infopro} are excepted and include missing values. 37 | } 38 | \details{ 39 | This dataset data documentation has been copied from the archived R package \emph{cicsw}. 40 | } 41 | \references{ 42 | Bethany Albertson and Adria Lawrence. (2009). After the credits roll: The long-term effects of educational television on public knowledge and attitudes. \emph{American Politics Research}. 37(2): 275-300. 43 | 44 | Peter M. Aronow and Allison Carnegie. (2013). Beyond LATE: Estimation of the average treatment effect with an instrumental variable. \emph{Political Analysis}. 21.4 (2013): 492-506. 45 | 46 | Peter M. Aronow and Allison Carnegie. (2013). Replication data for: Beyond LATE: Estimation of the average treatment effect with an instrumental variable. \emph{Dataverse Network.} http://hdl.handle.net/1902.1/21729 (accessed May 14, 2015). 47 | } 48 | \keyword{datasets} 49 | -------------------------------------------------------------------------------- /R/ivdesc/R/ivdesc.R: -------------------------------------------------------------------------------- 1 | #' Profiling compliers and non-compliers for instrumental variable analysis 2 | #' 3 | #' 4 | #' Estimates the mean and variance of a covariate for the complier, never-taker and always-taker subpopulation. 5 | #' 6 | #' 7 | #' @param X vector with numeric covariate 8 | #' @param D vector with binary treatment 9 | #' @param Z vector with binary instrument 10 | #' @param boot Replace all standard errors with bootstrap standard errors? 11 | #' @param bootn number of bootstraps (ignored if \code{boot=FALSE} ) 12 | #' @param variance Calculate the variance of the covariate for each subgroup? 13 | #' @param balance Run balance test? 14 | #' @param ... additional arguments to be passed to \code{ivdesc_all} 15 | #' 16 | #' @details 17 | #' This function estimates the mean and the associated standard error of \code{X} for the complier, never-taker and always-taker subpopulation within a sample where some, but not all, units are encouraged by instrument \code{Z} to take the treatment \code{D}. 18 | #' Observations with missing values in either \code{X}, \code{D}, or \code{Z} are droppped (listwise deletion). 19 | #' 20 | #' One-sided noncompliance is supported. The mean for the always-/never-taker subpopulation will only be computed if there are at least two observed units in these subpopulations. 21 | #' 22 | #' If \code{boot=FALSE}, standard errors based on asymptotic theory are estimated. 23 | #' 24 | #' The balance test is a t-test allowing for unequal variances. 25 | #' 26 | #' @return 27 | #' Returns a object \code{ivdesc} with estimates for each subgroup (\code{co}: complier, \code{nt}: never-taker, \code{at} : always-taker) and the full sample: 28 | #' 29 | #' \itemize{ 30 | #' \item \code{mu} and \code{mu_se} : Mean of \code{X} and standard error 31 | #' \item \code{pi} and \code{pi_se}: Proportion of each subgroup in the sample and standard error 32 | #' \item \code{var}: Variance of \code{X} (if \code{variance=TRUE}) 33 | #' } 34 | #' 35 | #' Can be coerced to a proper \code{data.frame} using \code{as.data.frame}. 36 | #' 37 | #' @seealso 38 | #' \code{\link[AER]{ivreg}} 39 | #' 40 | #' 41 | #' @references 42 | #' M. Marbach and D. Hangartner. 2020. Profiling Compliers and Non-compliers for Instrumental Variable Analysis. \emph{Political Analysis}, 28(3), 435-444. 43 | #' 44 | #' D. Hangartner, M. Marbach, L. Henckel, M. H. Maathuis, R. R. Kelz, and L. Keele. 2021. Profiling Compliers in Instrumental Variables Designs. Available at arXiv: \href{https://arxiv.org/abs/2103.06328}{https://arxiv.org/abs/2103.06328}. 45 | #' 46 | #' @examples 47 | #' 48 | #' 49 | #' # Example 1: Albertson/Lawrence (2009) 50 | #' # see Marbach/Hangartner (2019) for details/discussion 51 | #' data(FoxDebate) 52 | #' 53 | #' with(FoxDebate, ivdesc(X=readnews,D=watchpro,Z=conditn) ) 54 | #' 55 | #' 56 | #' 57 | #' \donttest{ 58 | #' 59 | #' # Example 2: JTPA Data 60 | #' 61 | #' library(haven) 62 | #' jtpa <- read_dta("http://fmwww.bc.edu/repec/bocode/j/jtpa.dta") 63 | #' 64 | #' with(jtpa, ivdesc(age, training, assignmt, bootn=500)) 65 | #' with(jtpa, ivdesc(hispanic, training, assignmt, boot=FALSE)) 66 | #' 67 | #' } 68 | #' 69 | #' 70 | #' 71 | #' 72 | #' @importFrom knitr kable 73 | #' 74 | #' @export 75 | ivdesc <- function(X,D,Z, variance=FALSE, boot=TRUE, bootn=1000, balance=TRUE, ...){ 76 | 77 | # Checks 78 | if(!is.numeric(D)) stop("D has to be numeric with values c(0,1,NA).") 79 | if(!is.numeric(Z)) stop("Z has to be numeric with values c(0,1,NA).") 80 | 81 | if( sum(D %in% c(0,1,NA))!=length(D) ) stop("D can only contain values c(0,1,NA).") 82 | if( sum(Z %in% c(0,1,NA))!=length(Z) ) stop("Z can only contain values c(0,1,NA).") 83 | 84 | if( length(D)!=length(Z) ) stop("D has to be of the same length of Z.") 85 | if( length(X)!=length(Z) ) stop("X has to be of the same length of Z.") 86 | if( length(X)!=length(D) ) stop("X has to be of the same length of D.") 87 | 88 | if( boot==TRUE & bootn<2 ) stop("bootn has be larger than 2.") 89 | 90 | if (!is.numeric(X)){ 91 | X <- as.numeric(X) 92 | warning("X coerced to numeric.") 93 | } 94 | 95 | # Listwise deletion 96 | nomiss <- !is.na(X) & !is.na(D) & !is.na(Z) 97 | X <- X[nomiss] 98 | D <- D[nomiss] 99 | Z <- Z[nomiss] 100 | 101 | if(boot==FALSE) { boot <- 0 } 102 | else { boot <- bootn } 103 | 104 | if( (mean(D[Z==1]==1)-mean(D[Z==0]==1))<0 ) stop("First-stage is negative. Please reverse coding of Z.") 105 | if( sum(D==Z)==length(D) ) stop("There is full compliance with the instrument (D=Z).") 106 | 107 | res <- ivdesc_all(X,D,Z,boot=boot,variance=variance,...) 108 | 109 | if( balance ){ 110 | bal <- t.test(X ~ Z, var.equal=FALSE) 111 | attr(res, "balance_pval") <- bal$p.value 112 | } 113 | 114 | class(res) <- c('ivdesc', 'data.frame') 115 | 116 | return(res) 117 | } 118 | 119 | #' @method print ivdesc 120 | #' @export 121 | print.ivdesc <- function(x,...) { 122 | class(x) <- 'data.frame' 123 | print(kable(x)) 124 | pvals <- attr(x, 'pvals') 125 | balance_pval <- attr(x, 'balance_pval') 126 | if( !is.null(pvals) ){ 127 | cat("\nBootstrapped p-values:") 128 | print(kable(pvals, col.names=c("group","Pr(Tt)"))) 129 | cat("\n\n") 130 | } else {cat("\n")} 131 | if( !is.null(balance_pval) ){ 132 | cat("Balance test: H0: E[X|Z=0]=E[X|Z=1]\n") 133 | cat("Pr(|T| > |t|) = ", format(attr(x,'balance_pval'),digits=3), "\n\n") 134 | } 135 | invisible(x) 136 | } 137 | 138 | 139 | -------------------------------------------------------------------------------- /ivdesc_calc.ado: -------------------------------------------------------------------------------- 1 | /* 2 | Version 1.1.0 (March 18, 2021) 3 | Author: Moritz Marbach, moritz.marbach@tamu.edu 4 | URL: https://github.com/sumtxt/ivdesc 5 | Changelog 6 | 1.0.1 : Added coefplot to help file 7 | 1.1.0 : Added asymptotic SE for complier mean 8 | */ 9 | 10 | program ivdesc_calc, rclass 11 | version 12 12 | 13 | syntax varlist(default=none min=1 max=3) [, VARiance] 14 | 15 | local X: word 1 of `varlist' 16 | local D: word 2 of `varlist' 17 | local Z: word 3 of `varlist' 18 | 19 | * Estimation 20 | 21 | quietly: { 22 | 23 | summarize `D' if `Z'==1 24 | local pi_co1 = r(mean) 25 | local v_pi_co1 = r(Var) 26 | 27 | summarize `D' if `Z'==0 28 | local pi_co2 = r(mean) 29 | local v_pi_co2 = r(Var) 30 | 31 | summarize `Z' 32 | local N_z1 = r(sum) 33 | local N_z0 = r(N)-r(sum) 34 | local N = r(N) 35 | local p_z = r(mean) 36 | 37 | local pi_co = `pi_co1'-`pi_co2' 38 | local se_pi_co = sqrt( (`v_pi_co1'/`N_z1') + (`v_pi_co2'/`N_z0') ) 39 | 40 | if `pi_co'<0 { 41 | display as error "First-stage is negative. Please reverse coding of Z." 42 | exit 42 43 | } 44 | 45 | summarize `D' if `Z'==1 46 | local pi_nt = 1-r(mean) 47 | local se_pi_nt = sqrt( r(Var)/`N_z1' ) 48 | 49 | summarize `D' if `Z'==0 50 | local pi_at = r(mean) 51 | local se_pi_at = sqrt( r(Var)/`N_z0' ) 52 | 53 | summarize `X' if `Z'==1 & `D'==0 54 | local v_nt = r(Var) 55 | local mu_nt = r(mean) 56 | 57 | summarize `X' if `Z'==0 & `D'==1 58 | local v_at = r(Var) 59 | local mu_at = r(mean) 60 | 61 | summarize `X' 62 | local mu = r(mean) 63 | local v = r(Var) 64 | 65 | count if `Z'==1 & `D'==0 66 | local k_nt = r(N) 67 | 68 | count if `Z'==0 & `D'==1 69 | local k_at = r(N) 70 | 71 | if `k_at'<2 { 72 | 73 | local mu_co = (1/`pi_co') * `mu' - (`pi_nt'/`pi_co') * `mu_nt' 74 | 75 | if !missing("`variance'") { 76 | 77 | local v_co1 = (`v_nt' * `pi_nt') 78 | 79 | local v_co2 = ( `mu_co' * `mu_co' * `pi_co' * (1-`pi_co') ) + ( `mu_nt' *`mu_nt' * `pi_nt' * (1-`pi_nt') ) 80 | 81 | local v_co3 = (`mu_nt' * `pi_nt' * `mu_co' * `pi_co') 82 | 83 | local v_co = (1/`pi_co')*`v' - (1/`pi_co')*(`v_co1' + `v_co2' -2 * `v_co3') 84 | 85 | } 86 | 87 | } 88 | 89 | else if `k_nt'<2 { 90 | 91 | local mu_co = (1/`pi_co') * `mu' - (`pi_at'/`pi_co') * `mu_at' 92 | 93 | if !missing("`variance'") { 94 | 95 | local v_co1 = (`v_at' * `pi_at') 96 | 97 | local v_co2 = ( `mu_co' * `mu_co' * `pi_co' * (1-`pi_co') ) + ( `mu_at' * `mu_at' * `pi_at' * (1-`pi_at') ) 98 | 99 | local v_co3 = (`mu_at' * `pi_at' * `mu_co' * `pi_co') 100 | 101 | local v_co = (1/`pi_co')*`v' - (1/`pi_co')*(`v_co1' + `v_co2' -2 * `v_co3') 102 | 103 | } 104 | 105 | } 106 | 107 | else { 108 | 109 | local mu_co = (1/`pi_co') * `mu' - (`pi_nt'/`pi_co') * `mu_nt' - (`pi_at'/`pi_co') * `mu_at' 110 | 111 | if !missing("`variance'") { 112 | 113 | local v_co1 = (`v_nt' * `pi_nt') + (`v_at' * `pi_at') 114 | 115 | local v_co2 = ( `mu_co' * `mu_co' * `pi_co' * (1-`pi_co') ) + ( `mu_nt' *`mu_nt' * `pi_nt' * (1-`pi_nt') ) + ( `mu_at' * `mu_at' * `pi_at' * (1-`pi_at') ) 116 | 117 | local v_co3 = (`mu_nt' * `pi_nt' * `mu_co' * `pi_co') + (`mu_at' * `pi_at' * `mu_co' * `pi_co') + (`mu_at' * `pi_at' * `mu_nt' * `pi_nt') 118 | 119 | local v_co = (1/`pi_co')*`v' - (1/`pi_co')*(`v_co1' + `v_co2' -2 * `v_co3') 120 | 121 | } 122 | 123 | } 124 | 125 | local se_mu = sqrt(`v')/sqrt(_N) 126 | local se_mu_nt = sqrt(`v_nt')/sqrt(`k_nt') 127 | local se_mu_at = sqrt(`v_at')/sqrt(`k_at') 128 | 129 | * Start: compute se_mu_co 130 | tempvar Z1DX D1ZX Z1D D1Z 131 | gen `Z1DX' = `Z'*(1-`D')*`X' 132 | gen `D1ZX' = `D'*(1-`Z')*`X' 133 | gen `Z1D' = `Z'*(1-`D') 134 | gen `D1Z' = `D'*(1-`Z') 135 | 136 | summarize `Z1DX' 137 | local mu_vnt = r(mean) 138 | 139 | summarize `D1ZX' 140 | local mu_vat = r(mean) 141 | 142 | summarize `Z1D' 143 | local pi_vnt = r(mean) 144 | 145 | summarize `D1Z' 146 | local pi_vat = r(mean) 147 | 148 | corr `X' `Z1DX' `D1ZX' `Z1D' `D1Z' `Z', cov 149 | matrix covB = r(C) 150 | 151 | local Mu = (`mu'-`mu_vnt'/`p_z'-`mu_vat'/(1-`p_z')) 152 | 153 | local b1 = 1/`pi_co' 154 | local b2 = -1/(`pi_co'*`p_z') 155 | local b3 = -1/(`pi_co'*(1-`p_z')) 156 | local b4 = `Mu'/(`pi_co'^2*`p_z') 157 | local b5 = `Mu'/(`pi_co'^2*(1-`p_z')) 158 | local b6 = (`pi_vat'/(1-`p_z')^2*`mu'-`pi_vnt'/`p_z'^2*`mu' -`pi_vat'/(`p_z'*(1-`p_z'))^2*`mu_vnt' + `mu_vnt'/`p_z'^2 - `mu_vat'/(1-`p_z')^2 + `pi_vnt'/(`p_z'*(1-`p_z'))^2*`mu_vat' )/(`pi_co')^2 159 | 160 | matrix B = (`b1' \ `b2' \ `b3' \ `b4' \ `b5' \ `b6') 161 | 162 | matrix tBcovBB = B' * covB * B 163 | local se_mu_co = sqrt( (1/`N') * tBcovBB[1,1]) 164 | * :End 165 | 166 | } 167 | 168 | return scalar mu = `mu' 169 | 170 | return scalar mu_nt = `mu_nt' 171 | return scalar mu_at = `mu_at' 172 | return scalar mu_co = `mu_co' 173 | 174 | return scalar se_mu = `se_mu' 175 | return scalar se_mu_nt = `se_mu_nt' 176 | return scalar se_mu_at = `se_mu_at' 177 | return scalar se_mu_co = `se_mu_co' 178 | 179 | return scalar pi_co = `pi_co' 180 | return scalar pi_nt = `pi_nt' 181 | return scalar pi_at = `pi_at' 182 | 183 | return scalar se_pi_co = `se_pi_co' 184 | return scalar se_pi_nt = `se_pi_nt' 185 | return scalar se_pi_at = `se_pi_at' 186 | 187 | if !missing("`variance'") { 188 | 189 | return scalar v_nt = `v_nt' 190 | return scalar v_at = `v_at' 191 | return scalar v_co = `v_co' 192 | return scalar v = `v' 193 | 194 | } 195 | 196 | end -------------------------------------------------------------------------------- /ivdesc.ado: -------------------------------------------------------------------------------- 1 | /* 2 | Version 1.1.0 (March 18, 2021) 3 | Author: Moritz Marbach, moritz.marbach@tamu.edu 4 | URL: https://github.com/sumtxt/ivdesc 5 | Changelog 6 | 1.0.1 : Added coefplot to help file 7 | 1.1.0 : Added asymptotic SE for complier mean 8 | */ 9 | 10 | program ivdesc, rclass 11 | version 12 12 | 13 | syntax varlist(default=none min=1 max=3) [if/] [in/] [, NOboot VARiance NOBALance fmt(passthru) Reps(integer 1000)] 14 | 15 | local X: word 1 of `varlist' 16 | local D: word 2 of `varlist' 17 | local Z: word 3 of `varlist' 18 | 19 | preserve 20 | 21 | if !missing("`if'") { 22 | quietly: keep if `if' & `X'!=. & `D'!=. & `Z'!=. 23 | } 24 | else { 25 | quietly: keep if `X'!=. & `D'!=. & `Z'!=. 26 | } 27 | 28 | quietly: count if `D'!=`Z' 29 | if r(N)<1 { 30 | display as error "There is full compliance with the instrument (D=Z)." 31 | exit 42 32 | } 33 | 34 | 35 | if missing("`noboot'") { 36 | 37 | ivdesc_calc `X' `D' `Z', `variance' 38 | 39 | local mu = r(mu) 40 | local mu_co = r(mu_co) 41 | local mu_nt = r(mu_nt) 42 | local mu_at = r(mu_at) 43 | 44 | local pi_co = r(pi_co) 45 | local pi_nt = r(pi_nt) 46 | local pi_at = r(pi_at) 47 | 48 | if !missing("`variance'") { 49 | 50 | local v_co = r(v_co) 51 | local v_nt = r(v_nt) 52 | local v_at = r(v_at) 53 | local v = r(v) 54 | 55 | } 56 | 57 | if `mu_at'==. { 58 | bootstrap mu_co=r(mu_co) mu_nt=r(mu_nt) mu=r(mu) /// 59 | pi_co=r(pi_co) pi_nt=r(pi_nt) /// 60 | p_co_s_nt=(r(mu_co)r(mu_nt)), reps(`reps') notable nolegend nowarn noheader: ivdesc_calc `X' `D' `Z' 61 | } 62 | 63 | else if `mu_nt'==. { 64 | bootstrap mu_co=r(mu_co) mu_at=r(mu_at) mu=r(mu) /// 65 | pi_co=r(pi_co) pi_at=r(pi_at) /// 66 | p_co_s_at=(r(mu_co)r(mu_at)), reps(`reps') notable nolegend nowarn noheader: ivdesc_calc `X' `D' `Z' 67 | } 68 | 69 | else { 70 | bootstrap mu_co=r(mu_co) mu_at=r(mu_at) mu_nt=r(mu_nt) mu=r(mu) /// 71 | pi_co=r(pi_co) pi_at=r(pi_at) pi_nt=r(pi_nt) /// 72 | p_co_s_nt=(r(mu_co)r(mu_nt)) /// 73 | p_co_s_at=(r(mu_co)r(mu_at)) /// 74 | p_at_s_nt=(r(mu_at)r(mu_nt)), reps(`reps') notable nolegend nowarn noheader: ivdesc_calc `X' `D' `Z' 75 | } 76 | 77 | matrix bootse = e(se) 78 | matrix bootb = e(b_bs) 79 | 80 | local se_mu = bootse[1,colnumb(bootse,"mu")] 81 | local se_mu_co = bootse[1,colnumb(bootse,"mu_co")] 82 | local se_mu_at = bootse[1,colnumb(bootse,"mu_at")] 83 | local se_mu_nt = bootse[1,colnumb(bootse,"mu_nt")] 84 | 85 | local se_pi_co = bootse[1,colnumb(bootse,"pi_co")] 86 | local se_pi_at = bootse[1,colnumb(bootse,"pi_at")] 87 | local se_pi_nt = bootse[1,colnumb(bootse,"pi_nt")] 88 | 89 | local p_co_s_nt = bootb[1,colnumb(bootb,"p_co_s_nt")] 90 | local p_co_g_nt = bootb[1,colnumb(bootb,"p_co_g_nt")] 91 | local p_co_s_at = bootb[1,colnumb(bootb,"p_co_s_at")] 92 | local p_co_g_at = bootb[1,colnumb(bootb,"p_co_g_at")] 93 | local p_at_s_nt = bootb[1,colnumb(bootb,"p_at_s_nt")] 94 | local p_at_g_nt = bootb[1,colnumb(bootb,"p_at_g_nt")] 95 | 96 | if missing("`variance'") { 97 | 98 | matrix input res = ( `mu', `se_mu', 1, 0 \ `mu_co', `se_mu_co', `pi_co', `se_pi_co' \ `mu_nt', `se_mu_nt', `pi_nt', `se_pi_nt' \ `mu_at', `se_mu_at', `pi_at', `se_pi_at' ) 99 | matrix colnames res = "Mean" "Boot.-SE" "Proportion" "Boot.-SE" 100 | 101 | } 102 | 103 | else { 104 | 105 | matrix input res = ( `mu', `se_mu', `v', 1, 0 \ `mu_co', `se_mu_co', `v_co', `pi_co', `se_pi_co' \ `mu_nt', `se_mu_nt', `v_nt', `pi_nt', `se_pi_nt' \ `mu_at', `se_mu_at', `v_at', `pi_at', `se_pi_at' ) 106 | matrix colnames res = "Mean" "Boot.-SE" "Variance" "Proportion" "Boot.-SE" 107 | 108 | } 109 | 110 | matrix input pvals = ( `p_co_s_nt', `p_co_g_nt' \ `p_co_s_at', `p_co_g_at' \ `p_at_s_nt', `p_at_g_nt') 111 | matrix colnames pvals = "Pr(T < t)" "Pr(T > t)" 112 | matrix rownames pvals = "co vs nt" "co vs at" "at vs nt" 113 | 114 | } 115 | 116 | else { 117 | 118 | ivdesc_calc `X' `D' `Z', `variance' 119 | 120 | if missing("`variance'") { 121 | 122 | matrix input res = ( `r(mu)', `r(se_mu)', 1, 0 \ `r(mu_co)', `r(se_mu_co)', `r(pi_co)', `r(se_pi_co)' \ `r(mu_nt)', `r(se_mu_nt)', `r(pi_nt)', `r(se_pi_nt)' \ `r(mu_at)', `r(se_mu_at)', `r(pi_at)', `r(se_pi_at)' ) 123 | matrix colnames res = "Mean" "SE" "Proportion" "SE" 124 | 125 | } 126 | 127 | else { 128 | 129 | matrix input res = ( `r(mu)', `r(se_mu)', `r(v)', 1, 0 \ `r(mu_co)', ., `r(v_co)', `r(pi_co)', `r(se_pi_co)' \ `r(mu_nt)', `r(se_mu_nt)', `r(v_nt)', `r(pi_nt)', `r(se_pi_nt)' \ `r(mu_at)', `r(se_mu_at)', `r(v_at)', `r(pi_at)', `r(se_pi_at)' ) 130 | matrix colnames res = "Mean" "SE" "Variance" "Proportion" "SE" 131 | 132 | } 133 | 134 | } 135 | 136 | matrix rownames res = "whole sample" "complier" "never-taker" "always-taker" 137 | 138 | estout matrix(res, `fmt'), mlabels(none) title("Variable: " `X') 139 | return mat ivdesc = res 140 | 141 | 142 | if missing("`noboot'") { 143 | 144 | estout matrix(pvals, `fmt'), mlabels(none) title("Bootstrapped p-values:") 145 | return mat pvals = pvals 146 | 147 | } 148 | 149 | 150 | if missing("`nobalance'") { 151 | 152 | quietly: ttest `X', by(`Z') unequal 153 | local p2 : di %6.4f r(p) 154 | 155 | di as txt "" 156 | di as txt "Balance test: H0: E[X|Z=0]=E[X|Z=1]" 157 | di "Pr(|T| > |t|) = `p2'" 158 | 159 | return scalar bal_pval = `p2' 160 | 161 | } 162 | 163 | restore 164 | 165 | end 166 | 167 | 168 | 169 | -------------------------------------------------------------------------------- /R/ivdesc/R/ivdesc_est.R: -------------------------------------------------------------------------------- 1 | #' @importFrom rsample analysis 2 | #' @importFrom rsample bootstraps 3 | #' @importFrom purrr map_df 4 | #' @importFrom stats var t.test 5 | ivdesc_boot <- function(X,D,Z,times){ 6 | df <- bootstraps(data.frame(X=X,D=D,Z=Z), times=times) 7 | return(map_df(df$splits, ivdesc_mu_F0_)) 8 | } 9 | 10 | ivdesc_boot_sum <- function(boot){ 11 | 12 | se_mu <- with(boot, sd(mu, na.rm=TRUE) ) 13 | se_mu_co <- with(boot, sd(mu_co, na.rm=TRUE) ) 14 | se_mu_nt <- with(boot, sd(mu_nt, na.rm=TRUE) ) 15 | se_mu_at <- with(boot, sd(mu_at, na.rm=TRUE) ) 16 | 17 | se_pi_co <- with(boot, sd(pi_co, na.rm=TRUE) ) 18 | se_pi_nt <- with(boot, sd(pi_nt, na.rm=TRUE) ) 19 | se_pi_at <- with(boot, sd(pi_at, na.rm=TRUE) ) 20 | 21 | p_co_nt <- with(boot, mean(mu_co>mu_nt, na.rm=TRUE) ) 22 | p_nt_co <- with(boot, mean(mu_nt>mu_co, na.rm=TRUE) ) 23 | 24 | p_co_at <- with(boot, mean(mu_co>mu_at, na.rm=TRUE) ) 25 | p_at_co <- with(boot, mean(mu_at>mu_co, na.rm=TRUE) ) 26 | 27 | p_at_nt <- with(boot, mean(mu_at>mu_nt, na.rm=TRUE) ) 28 | p_nt_at <- with(boot, mean(mu_nt>mu_at, na.rm=TRUE) ) 29 | 30 | agg <- data.frame(se_mu=se_mu, 31 | se_mu_co=se_mu_co, se_mu_nt=se_mu_nt, se_mu_at=se_mu_at, 32 | se_pi_co=se_pi_co, se_pi_nt=se_pi_nt, se_pi_at=se_pi_at) 33 | 34 | pvals <- as.data.frame(rbind( 35 | c("co_vs_nt", p_nt_co, p_co_nt), 36 | c("co_vs_at", p_at_co, p_co_at), 37 | c("at_vs_nt", p_nt_at, p_at_nt) 38 | )) 39 | 40 | colnames(pvals) <- c("group", "smaller", "greater") 41 | 42 | return(list(agg=agg, pvals=pvals)) 43 | } 44 | 45 | ivdesc_mu_F0_ <- function(split) with(analysis(split), ivdesc_mu_F0(X,D,Z)) 46 | 47 | ivdesc_mu_F0 <- function(X,D,Z){ 48 | 49 | pi_co = mean(D[Z==1]==1)-mean(D[Z==0]==1) 50 | pi_nt = mean(D[Z==1]==0) 51 | pi_at = mean(D[Z==0]==1) 52 | 53 | mu_nt = mean(X[Z==1 & D==0]) 54 | mu_at = mean(X[Z==0 & D==1]) 55 | mu = mean(X) 56 | 57 | K_nt = sum(Z==1 & D==0) 58 | K_at = sum(Z==0 & D==1) 59 | 60 | if( K_at<2){ 61 | 62 | mu_co = (1/pi_co) * mu - (pi_nt/pi_co) * mu_nt 63 | 64 | } else if (K_nt<2) { 65 | 66 | mu_co = (1/pi_co) * mu - (pi_at/pi_co) * mu_at 67 | 68 | } else { 69 | 70 | mu_co = (1/pi_co) * mu - (pi_nt/pi_co) * mu_nt - (pi_at/pi_co) * mu_at 71 | 72 | } 73 | 74 | dat <- data.frame(mu=mu, mu_co=mu_co,mu_nt=mu_nt,mu_at=mu_at, 75 | pi_co=pi_co, pi_nt=pi_nt, pi_at=pi_at) 76 | 77 | return(dat) 78 | } 79 | 80 | 81 | ivdesc_all <- function(X,D,Z,boot,variance,kappa=FALSE){ 82 | 83 | N <- length(X) 84 | 85 | pi_co = mean(D[Z==1]==1)-mean(D[Z==0]==1) 86 | pi_nt = mean(D[Z==1]==0) 87 | pi_at = mean(D[Z==0]==1) 88 | 89 | # Totals 90 | 91 | K_nt = sum(Z==1 & D==0) 92 | K_at = sum(Z==0 & D==1) 93 | 94 | N_co = pi_co * N 95 | N_nt = pi_nt * N 96 | N_at = pi_at * N 97 | 98 | 99 | # Mean / variance nt/at/sample 100 | 101 | mu_nt = mean(X[Z==1 & D==0]) 102 | mu_at = mean(X[Z==0 & D==1]) 103 | 104 | mu = mean(X) 105 | 106 | v_nt = var(X[Z==1 & D==0]) 107 | v_at = var(X[Z==0 & D==1]) 108 | 109 | v = var(X) 110 | 111 | sd_nt = sqrt(v_nt) 112 | sd_at = sqrt(v_at) 113 | 114 | sd = sqrt(v) 115 | 116 | 117 | # Mean / variance / standard deviation co 118 | 119 | if(kappa == FALSE){ 120 | 121 | if( K_at<2){ 122 | 123 | mu_co = (1/pi_co) * mu - (pi_nt/pi_co) * mu_nt 124 | 125 | if ( variance == TRUE ) { 126 | 127 | v_co1 = (v_nt * pi_nt) 128 | 129 | v_co2 = ( mu_co^2 * pi_co * (1-pi_co) )+ 130 | ( mu_nt^2 * pi_nt * (1-pi_nt) ) 131 | 132 | v_co3 = (mu_nt * pi_nt * mu_co * pi_co) 133 | 134 | v_co = (1/pi_co)*v - (1/pi_co)*(v_co1 + v_co2 -2 * v_co3) 135 | 136 | } 137 | 138 | } else if (K_nt<2) { 139 | 140 | mu_co = (1/pi_co) * mu - (pi_at/pi_co) * mu_at 141 | 142 | if ( variance == TRUE ) { 143 | 144 | v_co1 = (v_at * pi_at) 145 | 146 | v_co2 = ( mu_co^2 * pi_co * (1-pi_co) )+ 147 | ( mu_at^2 * pi_at * (1-pi_at) ) 148 | 149 | v_co3 = (mu_at * pi_at * mu_co * pi_co) 150 | 151 | v_co = (1/pi_co)*v - (1/pi_co)*(v_co1 + v_co2 -2 * v_co3) 152 | 153 | } 154 | 155 | } else { 156 | 157 | mu_co = (1/pi_co) * mu - (pi_nt/pi_co) * mu_nt - (pi_at/pi_co) * mu_at 158 | 159 | if ( variance == TRUE ) { 160 | 161 | v_co1 = ( (v_nt) * pi_nt) + ( (v_at) * pi_at) 162 | 163 | v_co2 = ( mu_co^2 * pi_co * (1-pi_co) )+ 164 | ( mu_nt^2 * pi_nt * (1-pi_nt) )+ 165 | ( mu_at^2 * pi_at * (1-pi_at) ) 166 | 167 | v_co3 = (mu_nt * pi_nt * mu_co * pi_co) + 168 | (mu_at * pi_at * mu_co * pi_co) + 169 | (mu_at * pi_at * mu_nt * pi_nt) 170 | 171 | v_co = (1/pi_co)*(v) - (1/pi_co)*(v_co1 + v_co2 -2 * v_co3) 172 | 173 | } 174 | 175 | } 176 | 177 | } else { 178 | 179 | kappa <- ( 1-((D*(1-Z))/mean(Z==0)) - (((1-D)*Z)/mean(Z==1)) ) 180 | mu_co <- (1/pi_co)*mean(X*kappa) 181 | variance <- FALSE 182 | 183 | } 184 | 185 | pz <- mean(Z==1) 186 | 187 | se_mu = sd/sqrt(N) 188 | se_mu_co = sqrt(get_var_mu_co(N=N,X=X,Z=Z,D=D)) 189 | se_mu_nt = sd_nt/sqrt(K_nt) 190 | se_mu_at = sd_at/sqrt(K_at) 191 | 192 | v_pi_co1 = var(D[Z==1]==1) 193 | v_pi_co2 = var(D[Z==0]==1) 194 | v_pi_nt = var(D[Z==1]==0) 195 | v_pi_at = var(D[Z==0]==1) 196 | 197 | se_pi_co = sqrt( (v_pi_co1/sum(Z==1))+(v_pi_co2/sum(Z==0)) ) 198 | se_pi_nt = sqrt( v_pi_nt/sum(Z==1) ) 199 | se_pi_at = sqrt( v_pi_at/sum(Z==0) ) 200 | 201 | 202 | if (variance == FALSE) { 203 | 204 | est = data.frame( 205 | group=c("sample","co", "nt", "at"), 206 | mu=c(mu,mu_co,mu_nt,mu_at), 207 | mu_se=c(se_mu,se_mu_co,se_mu_nt,se_mu_at), 208 | pi=c(1,pi_co,pi_nt,pi_at), 209 | pi_se=c(0,se_pi_co,se_pi_nt,se_pi_at)) 210 | 211 | } else { 212 | 213 | est = data.frame( 214 | group=c("sample","co", "nt", "at"), 215 | mu=c(mu,mu_co,mu_nt,mu_at), 216 | mu_se=c(se_mu,se_mu_co,se_mu_nt,se_mu_at), 217 | var=c(v,v_co,v_nt,v_at), 218 | pi=c(1,pi_co,pi_nt,pi_at), 219 | pi_se=c(0,se_pi_co,se_pi_nt,se_pi_at)) 220 | 221 | } 222 | 223 | 224 | if (boot>0) { 225 | 226 | boots <- ivdesc_boot(X,D,Z,times=boot) 227 | res <- ivdesc_boot_sum(boots) 228 | 229 | pvals <- res$pvals 230 | agg <- res$agg 231 | 232 | est[est$group=='sample','mu_se'] <- agg$se_mu 233 | est[est$group=='co','mu_se'] <- agg$se_mu_co 234 | est[est$group=='nt','mu_se'] <- agg$se_mu_nt 235 | est[est$group=='at','mu_se'] <- agg$se_mu_at 236 | 237 | est[est$group=='co','pi_se'] <- agg$se_pi_co 238 | est[est$group=='nt','pi_se'] <- agg$se_pi_nt 239 | est[est$group=='at','pi_se'] <- agg$se_pi_at 240 | 241 | attr(est, "pvals") <- pvals 242 | } 243 | 244 | return(est) 245 | 246 | } 247 | --------------------------------------------------------------------------------