├── .gitignore
├── R
    └── ivdesc
    │   ├── data
    │       └── FoxDebate.rda
    │   ├── .Rbuildignore
    │   ├── NAMESPACE
    │   ├── NEWS.md
    │   ├── R
    │       ├── utils.R
    │       ├── FoxDebate.R
    │       ├── ivdesc.R
    │       └── ivdesc_est.R
    │   ├── DESCRIPTION
    │   └── man
    │       ├── ivdesc.Rd
    │       └── FoxDebate.Rd
├── stata.toc
├── ivdesc.pkg
├── README.md
├── ivdesc.sthlp
├── ivdesc_calc.ado
└── ivdesc.ado


/.gitignore:
--------------------------------------------------------------------------------
1 | inst/doc
2 | data-raw
3 | install.R
4 | doc
5 | Meta
6 | docs
7 | .DS_Store


--------------------------------------------------------------------------------
/R/ivdesc/data/FoxDebate.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sumtxt/ivdesc/HEAD/R/ivdesc/data/FoxDebate.rda


--------------------------------------------------------------------------------
/stata.toc:
--------------------------------------------------------------------------------
1 | v 1.1.0
2 | d Materials by Moritz Marbach
3 | d ETH Zürich
4 | d moritz.marbach@gess.ethz.ch
5 | d https://github.com/sumtxt/ivdesc/
6 | 
7 | p ivdesc
8 | 


--------------------------------------------------------------------------------
/R/ivdesc/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^data-raw
 2 | ^README\.md$
 3 | ^cran-comments\.md$
 4 | ^\.DS_Store
 5 | \.git
 6 | ^CRAN-RELEASE$
 7 | ^DS_Store$
 8 | ^doc$
 9 | ^Meta$
10 | ^_pkgdown\.yml$
11 | ^pkgdown$


--------------------------------------------------------------------------------
/ivdesc.pkg:
--------------------------------------------------------------------------------
 1 | v 1.1.0
 2 | d 'IVDESC': Profiling Compliers and Non-compliers for Instrumental Variable Analysis
 3 | d 
 4 | d Distribution-Date: 20210318
 5 | d License: GNU General Public License v3.0
 6 | d 
 7 | F ivdesc.sthlp
 8 | F ivdesc.ado
 9 | F ivdesc_calc.ado
10 | 


--------------------------------------------------------------------------------
/R/ivdesc/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(print,ivdesc)
 4 | export(ivdesc)
 5 | importFrom(knitr,kable)
 6 | importFrom(purrr,map_df)
 7 | importFrom(rsample,analysis)
 8 | importFrom(rsample,bootstraps)
 9 | importFrom(stats,cov)
10 | importFrom(stats,t.test)
11 | importFrom(stats,var)
12 | 


--------------------------------------------------------------------------------
/R/ivdesc/NEWS.md:
--------------------------------------------------------------------------------
 1 | # ivdesc 1.1.2
 2 | 
 3 | # Version 1.1.2 (2025-07-21)
 4 | 
 5 | * Added FoxDebate data from archived icsw package 
 6 | * Removed icsw package dependency
 7 | 
 8 | # Version 1.1.1 (2022-12-18)
 9 | 
10 | * Fixed lazy data note
11 | 
12 | # Version 1.1.0 (2020-03-18)
13 | 
14 | * Added standard errors based on asymptotic theory
15 | 
16 | # Version 1.0.0 (2019-10-10)
17 | 
18 | * First release
19 | 


--------------------------------------------------------------------------------
/R/ivdesc/R/utils.R:
--------------------------------------------------------------------------------
 1 | #' @importFrom stats cov
 2 | get_var_mu_co <- function(N,X,Z,D){
 3 |   
 4 |   C <- cbind(X,Z*(1-D)*X,D*(1-Z)*X,Z*(1-D),D*(1-Z),Z)
 5 |   
 6 |   empCov <- cov(C)  
 7 |   means <- apply(C,2,mean)
 8 |   
 9 |   hatdH <- deltaH(means[1:3],means[4:6])
10 |   
11 |   var_mu_co <- 1/N*t(hatdH)%*%empCov%*%hatdH
12 |   
13 |   return(var_mu_co)
14 |   }
15 | 
16 | 
17 | deltaH <- function(mu,pi){
18 |   p <- pi[3]
19 |   
20 |   q <- pi[1]
21 |   k <- pi[2]
22 |   
23 |   x <- mu[1]
24 |   y <- mu[2]
25 |   z <- mu[3]
26 |   
27 |   Pco <- (1-q/p-k/(1-p))
28 |   Mu <- (x-y/p-z/(1-p))
29 |   
30 |   dH <- rep(NA,6)
31 |   
32 |   dH[1] <- 1/Pco
33 |   dH[2] <- -1/(Pco*p)
34 |   dH[3] <-  -1/(Pco*(1-p))
35 |   dH[4] <- Mu/(Pco^2*p)
36 |   dH[5] <- Mu/(Pco^2*(1-p))
37 |   dH[6] <- (k/(1-p)^2*x-q/p^2*x -k/(p*(1-p))^2*y + y/p^2 - z/(1-p)^2 + q/(p*(1-p))^2*z )/(Pco)^2
38 | 
39 |   return(dH)
40 |   }


--------------------------------------------------------------------------------
/R/ivdesc/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: ivdesc
 2 | Title: Profiling Compliers and Non-Compliers for Instrumental Variable Analysis
 3 | Version: 1.1.2
 4 | Authors@R: c(person("Moritz", "Marbach", role = c("aut", "cre"), email="m.marbach@ucl.ac.uk", comment = c(ORCID = "0000-0002-7101-2821")))
 5 | Description: Estimating the mean and variance of a covariate for the complier, never-taker and always-taker subpopulation in the context of instrumental variable estimation. This package implements the method described in Marbach and Hangartner (2020) <doi:10.1017/pan.2019.48> and Hangartner, Marbach, Henckel, Maathuis, Kelz and Keele (2021) <doi:10.48550/arXiv:2103.06328>.
 6 | Depends: R (>= 3.4.0)
 7 | License: GPL-3
 8 | URL: https://github.com/sumtxt/ivdesc/
 9 | BugReports: https://github.com/sumtxt/ivdesc/issues
10 | Encoding: UTF-8
11 | LazyData: true
12 | RoxygenNote: 7.3.2
13 | Suggests: 
14 | 		haven
15 | Imports:
16 |     knitr (>= 1.20.8),
17 |     purrr (>= 0.2.5),
18 |     rsample (>= 0.0.3)
19 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ivdesc
 2 | 
 3 | <!-- badges: start -->
 4 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/ivdesc)](https://cran.r-project.org/package=ivdesc)
 5 | [![Downloads](https://cranlogs.r-pkg.org/badges/grand-total/ivdesc)](https://CRAN.R-project.org/package=ivdesc)
 6 | [![Downloads](https://cranlogs.r-pkg.org/badges/ivdesc)](https://CRAN.R-project.org/package=ivdesc)
 7 | <!-- badges: end -->
 8 | 
 9 | Estimating the mean and variance of a covariate for the complier, never-taker and always-taker subpopulation in the context of instrumental variable estimation. This package implements the method described in [Marbach and Hangartner (2020)](https://doi.org/10.1017/pan.2019.48) and [Hangartner, Marbach, Henckel, Maathuis, Kelz and Keele (2021)](https://arxiv.org/abs/2103.06328).
10 | 
11 | 
12 | ### Install R Package 
13 | 
14 | You can install the package directly from CRAN: 
15 | 
16 | ```R
17 | install.packages("ivdesc")
18 | ```
19 | 
20 | Or install the latest version from Github using:  
21 | 
22 | ```R
23 | remotes::install_github("sumtxt/ivdesc/R/ivdesc")
24 | ```
25 | 
26 | You may need to install the `remotes` package first. 
27 | 
28 | 
29 | ### Install STATA Package
30 | 
31 | To install the STATA package directly from SSC use: 
32 | 
33 | ```STATA
34 | ssc install ivdesc
35 | ```
36 | 
37 | Or install the latest version from Github using:  
38 | 
39 | ```STATA
40 | github install sumtxt/ivdesc
41 | ```
42 | 
43 | This will only work if you installed the [GITHUB module](https://github.com/haghish/github) via `net install github, from("https://haghish.github.io/github/")` first. 
44 | 


--------------------------------------------------------------------------------
/R/ivdesc/man/ivdesc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ivdesc.R
 3 | \name{ivdesc}
 4 | \alias{ivdesc}
 5 | \title{Profiling compliers and non-compliers for instrumental variable analysis}
 6 | \usage{
 7 | ivdesc(
 8 |   X,
 9 |   D,
10 |   Z,
11 |   variance = FALSE,
12 |   boot = TRUE,
13 |   bootn = 1000,
14 |   balance = TRUE,
15 |   ...
16 | )
17 | }
18 | \arguments{
19 | \item{X}{vector with numeric covariate}
20 | 
21 | \item{D}{vector with binary treatment}
22 | 
23 | \item{Z}{vector with binary instrument}
24 | 
25 | \item{variance}{Calculate the variance of the covariate for each subgroup?}
26 | 
27 | \item{boot}{Replace all standard errors with bootstrap standard errors?}
28 | 
29 | \item{bootn}{number of bootstraps (ignored if \code{boot=FALSE} )}
30 | 
31 | \item{balance}{Run balance test?}
32 | 
33 | \item{...}{additional arguments to be passed to \code{ivdesc_all}}
34 | }
35 | \value{
36 | Returns a object \code{ivdesc} with estimates for each subgroup (\code{co}: complier, \code{nt}: never-taker, \code{at} : always-taker) and the full sample: 
37 | 
38 | \itemize{
39 | 	\item \code{mu} and \code{mu_se} : Mean of \code{X} and standard error
40 |  \item \code{pi} and \code{pi_se}: Proportion of each subgroup in the sample and standard error 
41 |  \item \code{var}: Variance of \code{X} (if \code{variance=TRUE})
42 | }
43 | 
44 | Can be coerced to a proper \code{data.frame} using \code{as.data.frame}.
45 | }
46 | \description{
47 | Estimates the mean and variance of a covariate for the complier, never-taker and always-taker subpopulation.
48 | }
49 | \details{
50 | This function estimates the mean and the associated standard error of \code{X} for the complier, never-taker and always-taker subpopulation within a sample where some, but not all, units are encouraged by instrument \code{Z} to take the treatment \code{D}. 
51 | Observations with missing values in either \code{X}, \code{D}, or \code{Z} are droppped (listwise deletion). 
52 | 
53 | One-sided noncompliance is supported. The mean for the always-/never-taker subpopulation will only be computed if there are at least two observed units in these subpopulations.
54 | 
55 | If \code{boot=FALSE}, standard errors based on asymptotic theory are estimated.
56 | 
57 | The balance test is a t-test allowing for unequal variances.
58 | }
59 | \examples{
60 |  
61 | 
62 |  # Example 1: Albertson/Lawrence (2009)
63 |  # see Marbach/Hangartner (2019) for details/discussion 
64 |  data(FoxDebate)
65 | 
66 |  with(FoxDebate, ivdesc(X=readnews,D=watchpro,Z=conditn) )  
67 | 
68 |  
69 | 
70 |  \donttest{
71 | 
72 |  # Example 2: JTPA Data
73 | 
74 |  library(haven)
75 |  jtpa <- read_dta("http://fmwww.bc.edu/repec/bocode/j/jtpa.dta") 
76 | 
77 |  with(jtpa, ivdesc(age, training, assignmt, bootn=500))
78 |  with(jtpa, ivdesc(hispanic, training, assignmt, boot=FALSE))
79 | 
80 |  }
81 |  
82 |  
83 | 
84 | 
85 | }
86 | \references{
87 | M. Marbach and D. Hangartner. 2020. Profiling Compliers and Non-compliers for Instrumental Variable Analysis. \emph{Political Analysis}, 28(3), 435-444.
88 | 
89 | D. Hangartner, M. Marbach, L. Henckel, M. H. Maathuis, R. R. Kelz, and L. Keele. 2021. Profiling Compliers in Instrumental Variables Designs. Available at arXiv: \href{https://arxiv.org/abs/2103.06328}{https://arxiv.org/abs/2103.06328}.
90 | }
91 | \seealso{
92 | \code{\link[AER]{ivreg}}
93 | }
94 | 


--------------------------------------------------------------------------------
/ivdesc.sthlp:
--------------------------------------------------------------------------------
 1 | {smcl}
 2 | {* *! version 1.1.0  March 18, 2021 @ 18:24:26}{...}
 3 | {cmd:help ivdesc}
 4 | {hline}
 5 | 
 6 | {title:Title}
 7 | 
 8 | {p2colset 5 20 22 2}{...}  {p2col :{hi:ivdesc} {hline 2}}
 9 |  Profiling compliers and non-compliers for instrumental variable analysis {p_end} {p2colreset}{...}
10 | 
11 | {title:Syntax}
12 | 
13 | {p 8 17 2}{cmd:ivdesc} 
14 | {it:{help varname:covariate}} 
15 | {it:{help varname:treatment}} 
16 | {it:{help varname:instrument}}
17 | {ifin} 
18 | [{cmd:,} {it:options}]
19 | 
20 | 
21 | {marker options}{...}
22 | {title:Options}
23 | 
24 | {phang}{opt no:boot} report standard errors based on asymptotic theory instead of bootstrap standard errors 
25 | 
26 | {phang}{opt r:eps(#)}  perform # bootstrap replications; default is reps(1000)
27 | 
28 | {phang}{opt var:iance} also report the variance of the covariate for each subgroup 
29 | 
30 | {phang}{opt nobal:ance} skip the balance test 
31 | 
32 | {phang}{cmd:fmt(}{it:subopts}{cmd:)} passed through to {cmd:estout matrix(,subopts)} that handles the display of the estimates
33 | 
34 | 
35 | {marker description}{...}
36 | {title:Description}
37 | 
38 | {phang} {cmd:ivdesc} estimates the mean and the associated standard error of a covariate for the complier, never-taker and always-taker subpopulation within a sample where some, but not all, units are encouraged by instrument to take the treatment. 
39 | 
40 | {title:Remarks}
41 | 
42 | {phang} Observations with missing values in either {it:covariate}, {it:treatment}, 
43 | or {it:instrument} are deleted before estimation (listwise deletion). 
44 | 
45 | {phang} One-sided noncompliance is supported. The mean for the always-/never-taker subpopulation will only be computed if there are at least two observed units in these subpopulations. 
46 | 
47 | {phang} If {cmd:noboot}, standard errors based on asymptotic theory are estimated. 
48 | 
49 | {phang} The balance test is a t-test allowing for unequal variances. 
50 | 
51 | 
52 | {title:Examples}
53 | 
54 | {pstd}Load the JTPA data 
55 | 
56 | {p 4 8 2}{stata "use http://fmwww.bc.edu/repec/bocode/j/jtpa.dta, clear":. use "http://fmwww.bc.edu/repec/bocode/j/jtpa.dta",clear}{p_end}
57 | 
58 | {p 4 8 2}{stata "ivdesc age training assignmt":. ivdesc age training assignmt}{p_end}
59 | {p 4 8 2}{stata "ivdesc hispanic training assignmt":. ivdesc hispanic training assignmt}{p_end}
60 | 
61 | {pstd}Plot the results
62 | 
63 | {p 4 8 2}{stata "matrix C = r(ivdesc)'":. matrix C = r(ivdesc)'}{p_end}
64 | {p 4 8 2}{stata "coefplot matrix(C), se(C[2])":. coefplot matrix(C), se(C[2])}{p_end}
65 | 
66 | 
67 | {title:Saved results}
68 | 
69 | {synoptset 15 tabbed}{...}
70 | {p2col 5 15 19 2: Matrices}{p_end}
71 | {synopt:{cmd:r(ivdesc)}} all estimates {p_end}
72 | 
73 | {p2col 5 15 19 2: Scalar}{p_end}
74 | {synopt:{cmd:r(pval)}} p-value from balance test {p_end}
75 | 
76 | 
77 | {title:Reference}
78 | {p 4 8 2}
79 | 
80 | {pstd} M. Marbach and D. Hangartner. 2020. Profiling Compliers and Non-compliers for Instrumental Variable Analysis. {it:Political Analysis} 28(3), 435-444. {p_end}
81 | 
82 | {pstd} D. Hangartner, M. Marbach, L. Henckel, M. H. Maathuis, R. R. Kelz, and L. Keele. 2021. Profiling Compliers in Instrumental Variables Designs. Available at arXiv: https://arxiv.org/abs/2103.06328. {p_end}
83 | 
84 | {title:Authors}
85 | 
86 | 	Moritz Marbach (Maintainer), moritz.marbach@tamu.edu
87 | 	Texas A&M University, United States
88 | 


--------------------------------------------------------------------------------
/R/ivdesc/R/FoxDebate.R:
--------------------------------------------------------------------------------
 1 | #' The effects of watching a Fox debate on Proposition 209
 2 | #'
 3 | #' The data set (n=507) contains findings from the experiment described in Albertson and Lawrence (2009) 
 4 | #' in which a representative sample of survey respondents in Orange County, California, were randomly
 5 | #' assigned to receive encouragement to view a Fox debate on affirmative action, which would take
 6 | #' place on the eve of the 1996 presidential election. Shortly after the election, these respondents were
 7 | #' reinterviewed. The postelection questionnaire asked respondents whether they viewed the debate,
 8 | #' whether they supported a California proposition (209) to eliminate affirmative action (\code{support}), 
 9 | #' and how informed they felt about the proposition (\code{infopro}). The dataset can be used to reproduce 
10 | #' Table 2 in Aronow and Carnegie (2013). Note that mean imputation was used to handle missing data so 
11 | #' non-integer values are imputed. \code{support} and \code{infopro} are excepted and include missing values.
12 | #' 
13 | #' This dataset data documentation has been copied from the archived R package \emph{cicsw}. 
14 | #'
15 | #' @format A data frame with 507 observations on the following 12 variables:
16 | #' \describe{
17 | #'   \item{partyid}{An 11 point scale from "strong Republican" to "strong Democrat".}
18 | #'   \item{pnintst}{Respondent interest in politics and national affairs. Coded 1 = "very interested", 2 = "somewhat interested", 3 = "only slightly interested", 4 = "not interested at all".}
19 | #'   \item{watchnat}{Frequency of national television news consumption. Coded 1 = "never", 2 = "less than once a month", 3 = "once a month", 4 = "several times a month", 5 = "once a week", 6 = "several times a week", 7 = "every day".}
20 | #'   \item{educad}{Education level of respondent. Coded 1 = "eighth grade or less", 2 = "beyond eighth grade, not high school", 3 = "ged", 4 = "high school", 5 = "less than one year vocational school", 6 = "one to two year vocational school", 7 = "two years or more vocational school", 8 = "less than two years of college", 9 = "two or more years of college", 10 = "finished a two-year college program", 11 = "finished a four-year college program", 12 = "master degree or equivalent", 13 = "ph.d., m.d., or other advance degree".}
21 | #'   \item{readnews}{How often respondent reads political news. Coded 1 = "never", 2 = "less than once a month", 3 = "once a month", 4 = "several times a month", 5 = "once a week", 6 = "several times a week", 7 = "every day".}
22 | #'   \item{gender}{Respondent gender. Coded 1 for female and 0 for male.}
23 | #'   \item{income}{Family income from all sources. Coded 1 = "under $10,000", 2 = "between $10,000 and $20,000", 3 = "between $20,000 and $30,000", 4 = "between $30,000 and $40,000", 5 = "between $40,000 and $50,000", 6 = "between $50,000 and $60,000", 7 = "between $60,000 and $70,000", 8 = "between $70,000 and $80,000", 9 = "between $80,000 and $90,000", 10 = "between $90,000 and $100,000", 11 = "$100,000 or more".}
24 | #'   \item{white}{Binary indicator coded 1 if subject is white and 0 otherwise.}
25 | #'   \item{support}{Support for Proposition 209. Coded 1 if subject voted against or opposed and 0 if subject voted for or favored}
26 | #'   \item{infopro}{Information on Proposition 209. Coded from 1 to 4, with 4 meaning respondents had a great deal of information about Proposition 209 prior to the election, and 1 meaning respondents reported no information about the proposition before the election.}
27 | #'   \item{watchpro}{Binary indicator coded 1 if subject watched the Fox Debate about affirmative action and 0 otherwise. This is the outcome ("treatment") of interest.}
28 | #'   \item{conditn}{Binary indicator coded 1 if subject was (randomly) prompted to watch the Fox Debate about affirmative action. This is the encouragement (instrumental) variable.}
29 | #' }
30 | #'
31 | #' @references 
32 | #' Bethany Albertson and Adria Lawrence. (2009). After the credits roll: The long-term effects of educational television on public knowledge and attitudes. \emph{American Politics Research}. 37(2): 275-300.
33 | #' 
34 | #' Peter M. Aronow and Allison Carnegie. (2013). Beyond LATE: Estimation of the average treatment effect with an instrumental variable. \emph{Political Analysis}. 21.4 (2013): 492-506.
35 | #' 
36 | #' Peter M. Aronow and Allison Carnegie. (2013). Replication data for: Beyond LATE: Estimation of the average treatment effect with an instrumental variable. \emph{Dataverse Network.} http://hdl.handle.net/1902.1/21729 (accessed May 14, 2015).
37 | #' 
38 | #' @keywords datasets
39 | #' @docType data
40 | #' @name FoxDebate
41 | "FoxDebate"


--------------------------------------------------------------------------------
/R/ivdesc/man/FoxDebate.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FoxDebate.R
 3 | \docType{data}
 4 | \name{FoxDebate}
 5 | \alias{FoxDebate}
 6 | \title{The effects of watching a Fox debate on Proposition 209}
 7 | \format{
 8 | A data frame with 507 observations on the following 12 variables:
 9 | \describe{
10 |   \item{partyid}{An 11 point scale from "strong Republican" to "strong Democrat".}
11 |   \item{pnintst}{Respondent interest in politics and national affairs. Coded 1 = "very interested", 2 = "somewhat interested", 3 = "only slightly interested", 4 = "not interested at all".}
12 |   \item{watchnat}{Frequency of national television news consumption. Coded 1 = "never", 2 = "less than once a month", 3 = "once a month", 4 = "several times a month", 5 = "once a week", 6 = "several times a week", 7 = "every day".}
13 |   \item{educad}{Education level of respondent. Coded 1 = "eighth grade or less", 2 = "beyond eighth grade, not high school", 3 = "ged", 4 = "high school", 5 = "less than one year vocational school", 6 = "one to two year vocational school", 7 = "two years or more vocational school", 8 = "less than two years of college", 9 = "two or more years of college", 10 = "finished a two-year college program", 11 = "finished a four-year college program", 12 = "master degree or equivalent", 13 = "ph.d., m.d., or other advance degree".}
14 |   \item{readnews}{How often respondent reads political news. Coded 1 = "never", 2 = "less than once a month", 3 = "once a month", 4 = "several times a month", 5 = "once a week", 6 = "several times a week", 7 = "every day".}
15 |   \item{gender}{Respondent gender. Coded 1 for female and 0 for male.}
16 |   \item{income}{Family income from all sources. Coded 1 = "under $10,000", 2 = "between $10,000 and $20,000", 3 = "between $20,000 and $30,000", 4 = "between $30,000 and $40,000", 5 = "between $40,000 and $50,000", 6 = "between $50,000 and $60,000", 7 = "between $60,000 and $70,000", 8 = "between $70,000 and $80,000", 9 = "between $80,000 and $90,000", 10 = "between $90,000 and $100,000", 11 = "$100,000 or more".}
17 |   \item{white}{Binary indicator coded 1 if subject is white and 0 otherwise.}
18 |   \item{support}{Support for Proposition 209. Coded 1 if subject voted against or opposed and 0 if subject voted for or favored}
19 |   \item{infopro}{Information on Proposition 209. Coded from 1 to 4, with 4 meaning respondents had a great deal of information about Proposition 209 prior to the election, and 1 meaning respondents reported no information about the proposition before the election.}
20 |   \item{watchpro}{Binary indicator coded 1 if subject watched the Fox Debate about affirmative action and 0 otherwise. This is the outcome ("treatment") of interest.}
21 |   \item{conditn}{Binary indicator coded 1 if subject was (randomly) prompted to watch the Fox Debate about affirmative action. This is the encouragement (instrumental) variable.}
22 | }
23 | }
24 | \usage{
25 | FoxDebate
26 | }
27 | \description{
28 | The data set (n=507) contains findings from the experiment described in Albertson and Lawrence (2009) 
29 | in which a representative sample of survey respondents in Orange County, California, were randomly
30 | assigned to receive encouragement to view a Fox debate on affirmative action, which would take
31 | place on the eve of the 1996 presidential election. Shortly after the election, these respondents were
32 | reinterviewed. The postelection questionnaire asked respondents whether they viewed the debate,
33 | whether they supported a California proposition (209) to eliminate affirmative action (\code{support}), 
34 | and how informed they felt about the proposition (\code{infopro}). The dataset can be used to reproduce 
35 | Table 2 in Aronow and Carnegie (2013). Note that mean imputation was used to handle missing data so 
36 | non-integer values are imputed. \code{support} and \code{infopro} are excepted and include missing values.
37 | }
38 | \details{
39 | This dataset data documentation has been copied from the archived R package \emph{cicsw}.
40 | }
41 | \references{
42 | Bethany Albertson and Adria Lawrence. (2009). After the credits roll: The long-term effects of educational television on public knowledge and attitudes. \emph{American Politics Research}. 37(2): 275-300.
43 | 
44 | Peter M. Aronow and Allison Carnegie. (2013). Beyond LATE: Estimation of the average treatment effect with an instrumental variable. \emph{Political Analysis}. 21.4 (2013): 492-506.
45 | 
46 | Peter M. Aronow and Allison Carnegie. (2013). Replication data for: Beyond LATE: Estimation of the average treatment effect with an instrumental variable. \emph{Dataverse Network.} http://hdl.handle.net/1902.1/21729 (accessed May 14, 2015).
47 | }
48 | \keyword{datasets}
49 | 


--------------------------------------------------------------------------------
/R/ivdesc/R/ivdesc.R:
--------------------------------------------------------------------------------
  1 | #' Profiling compliers and non-compliers for instrumental variable analysis
  2 | #'
  3 | #'
  4 | #' Estimates the mean and variance of a covariate for the complier, never-taker and always-taker subpopulation. 
  5 | #'  
  6 | #' 
  7 | #' @param X vector with numeric covariate
  8 | #' @param D vector with binary treatment 
  9 | #' @param Z vector with binary instrument
 10 | #' @param boot Replace all standard errors with bootstrap standard errors?
 11 | #' @param bootn number of bootstraps (ignored if \code{boot=FALSE} )
 12 | #' @param variance Calculate the variance of the covariate for each subgroup? 
 13 | #' @param balance Run balance test? 
 14 | #' @param ... additional arguments to be passed to \code{ivdesc_all} 
 15 | #' 
 16 | #' @details 
 17 | #' This function estimates the mean and the associated standard error of \code{X} for the complier, never-taker and always-taker subpopulation within a sample where some, but not all, units are encouraged by instrument \code{Z} to take the treatment \code{D}. 
 18 | #' Observations with missing values in either \code{X}, \code{D}, or \code{Z} are droppped (listwise deletion). 
 19 | #' 
 20 | #' One-sided noncompliance is supported. The mean for the always-/never-taker subpopulation will only be computed if there are at least two observed units in these subpopulations.
 21 | #' 
 22 | #' If \code{boot=FALSE}, standard errors based on asymptotic theory are estimated.
 23 | #' 
 24 | #' The balance test is a t-test allowing for unequal variances. 
 25 | #' 
 26 | #' @return 
 27 | #' Returns a object \code{ivdesc} with estimates for each subgroup (\code{co}: complier, \code{nt}: never-taker, \code{at} : always-taker) and the full sample: 
 28 | #' 
 29 | #' \itemize{
 30 | #' 	\item \code{mu} and \code{mu_se} : Mean of \code{X} and standard error
 31 | #'  \item \code{pi} and \code{pi_se}: Proportion of each subgroup in the sample and standard error 
 32 | #'  \item \code{var}: Variance of \code{X} (if \code{variance=TRUE})
 33 | #' }
 34 | #'
 35 | #' Can be coerced to a proper \code{data.frame} using \code{as.data.frame}.
 36 | #' 
 37 | #' @seealso 
 38 | #' \code{\link[AER]{ivreg}}
 39 | #'
 40 | #' 
 41 | #' @references
 42 | #' M. Marbach and D. Hangartner. 2020. Profiling Compliers and Non-compliers for Instrumental Variable Analysis. \emph{Political Analysis}, 28(3), 435-444.
 43 | #' 
 44 | #' D. Hangartner, M. Marbach, L. Henckel, M. H. Maathuis, R. R. Kelz, and L. Keele. 2021. Profiling Compliers in Instrumental Variables Designs. Available at arXiv: \href{https://arxiv.org/abs/2103.06328}{https://arxiv.org/abs/2103.06328}.
 45 | #' 
 46 | #' @examples 
 47 | #'  
 48 | #' 
 49 | #'  # Example 1: Albertson/Lawrence (2009)
 50 | #'  # see Marbach/Hangartner (2019) for details/discussion 
 51 | #'  data(FoxDebate)
 52 | #' 
 53 | #'  with(FoxDebate, ivdesc(X=readnews,D=watchpro,Z=conditn) )  
 54 | #' 
 55 | #'  
 56 | #' 
 57 | #'  \donttest{
 58 | #' 
 59 | #'  # Example 2: JTPA Data
 60 | #' 
 61 | #'  library(haven)
 62 | #'  jtpa <- read_dta("http://fmwww.bc.edu/repec/bocode/j/jtpa.dta") 
 63 | #'
 64 | #'  with(jtpa, ivdesc(age, training, assignmt, bootn=500))
 65 | #'  with(jtpa, ivdesc(hispanic, training, assignmt, boot=FALSE))
 66 | #' 
 67 | #'  }
 68 | #'  
 69 | #'  
 70 | #' 
 71 | #' 
 72 | #' @importFrom knitr kable
 73 | #' 
 74 | #' @export
 75 | ivdesc <- function(X,D,Z, variance=FALSE, boot=TRUE, bootn=1000, balance=TRUE, ...){
 76 | 
 77 | 	# Checks 
 78 | 	if(!is.numeric(D)) stop("D has to be numeric with values c(0,1,NA).")
 79 | 	if(!is.numeric(Z)) stop("Z has to be numeric with values c(0,1,NA).")
 80 | 
 81 | 	if( sum(D %in% c(0,1,NA))!=length(D) ) stop("D can only contain values c(0,1,NA).")
 82 | 	if( sum(Z %in% c(0,1,NA))!=length(Z) ) stop("Z can only contain values c(0,1,NA).")
 83 | 
 84 | 	if( length(D)!=length(Z) ) stop("D has to be of the same length of Z.") 
 85 | 	if( length(X)!=length(Z) ) stop("X has to be of the same length of Z.") 
 86 | 	if( length(X)!=length(D) ) stop("X has to be of the same length of D.")
 87 | 	
 88 | 	if( boot==TRUE & bootn<2 ) stop("bootn has be larger than 2.")
 89 | 
 90 | 	if (!is.numeric(X)){ 
 91 | 		X <- as.numeric(X)
 92 | 		warning("X coerced to numeric.")
 93 | 	}
 94 | 
 95 | 	# Listwise deletion 
 96 | 	nomiss <- !is.na(X) & !is.na(D) & !is.na(Z)
 97 | 	X <- X[nomiss]
 98 | 	D <- D[nomiss]
 99 | 	Z <- Z[nomiss]
100 | 
101 | 	if(boot==FALSE) { boot <- 0 }
102 | 	else { boot <- bootn } 
103 | 
104 |   if( (mean(D[Z==1]==1)-mean(D[Z==0]==1))<0 ) stop("First-stage is negative. Please reverse coding of Z.")
105 |   if( sum(D==Z)==length(D) ) stop("There is full compliance with the instrument (D=Z).")
106 |   	
107 | 	res <- ivdesc_all(X,D,Z,boot=boot,variance=variance,...)
108 | 
109 | 	if( balance ){
110 | 		bal <- t.test(X ~ Z, var.equal=FALSE)
111 | 		attr(res, "balance_pval") <- bal$p.value
112 | 	}
113 | 	
114 | 	class(res) <- c('ivdesc', 'data.frame')
115 | 
116 | 	return(res)
117 | 	}
118 | 
119 | #' @method print ivdesc
120 | #' @export    
121 | print.ivdesc <- function(x,...) {
122 | 	class(x) <- 'data.frame'
123 | 	print(kable(x))
124 | 	pvals <- attr(x, 'pvals')
125 | 	balance_pval <- attr(x, 'balance_pval')
126 | 	if( !is.null(pvals) ){
127 | 		cat("\nBootstrapped p-values:")
128 | 		print(kable(pvals, col.names=c("group","Pr(T<t)", "Pr(T>t)")))
129 | 		cat("\n\n")
130 | 	} else {cat("\n")}
131 | 	if( !is.null(balance_pval) ){
132 | 		cat("Balance test: H0: E[X|Z=0]=E[X|Z=1]\n")
133 | 		cat("Pr(|T| > |t|) = ", format(attr(x,'balance_pval'),digits=3), "\n\n")
134 | 	}
135 | 	invisible(x)
136 | }
137 | 
138 | 
139 | 


--------------------------------------------------------------------------------
/ivdesc_calc.ado:
--------------------------------------------------------------------------------
  1 | /*
  2 | Version 1.1.0 (March 18, 2021)
  3 | Author:  Moritz Marbach, moritz.marbach@tamu.edu
  4 | URL: https://github.com/sumtxt/ivdesc
  5 | Changelog
  6 | 1.0.1 : Added coefplot to help file
  7 | 1.1.0 : Added asymptotic SE for complier mean
  8 | */
  9 | 
 10 | program ivdesc_calc,  rclass
 11 | 	version 12 
 12 | 	
 13 | 	syntax varlist(default=none min=1 max=3) [, VARiance]
 14 | 
 15 | 	local X: word 1 of `varlist'
 16 | 	local D: word 2 of `varlist'
 17 | 	local Z: word 3 of `varlist'
 18 | 
 19 | 	* Estimation 
 20 | 
 21 | 	quietly: { 
 22 | 
 23 | 		summarize `D' if `Z'==1
 24 | 		local pi_co1 = r(mean)
 25 | 		local v_pi_co1 = r(Var)
 26 | 
 27 | 		summarize `D' if `Z'==0
 28 | 		local pi_co2 = r(mean)
 29 | 		local v_pi_co2 = r(Var)
 30 | 
 31 | 		summarize `Z'
 32 | 		local N_z1 = r(sum)
 33 | 		local N_z0 = r(N)-r(sum)
 34 | 		local N = r(N)
 35 | 		local p_z = r(mean)
 36 | 
 37 | 		local pi_co = `pi_co1'-`pi_co2'
 38 | 		local se_pi_co = sqrt( (`v_pi_co1'/`N_z1') + (`v_pi_co2'/`N_z0') )
 39 | 
 40 | 		if `pi_co'<0 {
 41 | 			display as error "First-stage is negative. Please reverse coding of Z."
 42 |     	exit 42
 43 |     }
 44 | 
 45 | 		summarize `D' if `Z'==1
 46 | 		local pi_nt = 1-r(mean)
 47 | 		local se_pi_nt = sqrt( r(Var)/`N_z1' )
 48 | 
 49 | 		summarize `D' if `Z'==0 
 50 | 		local pi_at = r(mean)
 51 | 		local se_pi_at = sqrt( r(Var)/`N_z0' )
 52 | 
 53 | 		summarize `X' if `Z'==1 & `D'==0
 54 | 		local v_nt = r(Var)
 55 | 		local mu_nt = r(mean)
 56 | 
 57 | 		summarize `X' if `Z'==0 & `D'==1
 58 | 		local v_at = r(Var)
 59 | 		local mu_at = r(mean)
 60 | 
 61 | 		summarize `X' 
 62 | 		local mu = r(mean)
 63 | 		local v = r(Var)
 64 | 
 65 | 		count if `Z'==1 & `D'==0
 66 | 		local k_nt = r(N) 
 67 | 
 68 | 		count if `Z'==0 & `D'==1
 69 | 		local k_at = r(N) 
 70 | 
 71 | 		if `k_at'<2 {
 72 | 
 73 | 			local mu_co = (1/`pi_co') * `mu' - (`pi_nt'/`pi_co') * `mu_nt' 
 74 | 
 75 | 			if !missing("`variance'") {
 76 | 
 77 | 				local v_co1 = (`v_nt' * `pi_nt') 
 78 | 
 79 | 				local v_co2 = ( `mu_co' * `mu_co' * `pi_co' * (1-`pi_co') ) + ( `mu_nt' *`mu_nt' * `pi_nt' * (1-`pi_nt') ) 
 80 | 
 81 | 				local v_co3 = (`mu_nt' * `pi_nt' * `mu_co' * `pi_co') 
 82 | 
 83 | 				local v_co = (1/`pi_co')*`v' - (1/`pi_co')*(`v_co1' + `v_co2' -2 * `v_co3')
 84 | 
 85 | 			}
 86 | 
 87 | 		}
 88 | 		
 89 | 		else if `k_nt'<2 {
 90 | 
 91 | 			local mu_co = (1/`pi_co') * `mu' - (`pi_at'/`pi_co') * `mu_at'
 92 | 
 93 | 			if !missing("`variance'") {
 94 | 
 95 | 				local v_co1 = (`v_at' * `pi_at')
 96 | 
 97 | 				local v_co2 = ( `mu_co' * `mu_co' * `pi_co' * (1-`pi_co') ) + ( `mu_at' * `mu_at' * `pi_at' * (1-`pi_at') ) 
 98 | 
 99 | 				local v_co3 = (`mu_at' * `pi_at' * `mu_co' * `pi_co')
100 | 
101 | 				local v_co = (1/`pi_co')*`v' - (1/`pi_co')*(`v_co1' + `v_co2' -2 * `v_co3')
102 | 
103 | 			}
104 | 		
105 | 		} 
106 | 
107 | 		else {
108 | 
109 | 			local mu_co = (1/`pi_co') * `mu' - (`pi_nt'/`pi_co') * `mu_nt' - (`pi_at'/`pi_co') * `mu_at'
110 | 
111 | 			if !missing("`variance'") {
112 | 
113 | 				local v_co1 = (`v_nt' * `pi_nt') + (`v_at' * `pi_at')
114 | 
115 | 				local v_co2 = ( `mu_co' * `mu_co' * `pi_co' * (1-`pi_co') ) + ( `mu_nt' *`mu_nt' * `pi_nt' * (1-`pi_nt') ) + ( `mu_at' * `mu_at' * `pi_at' * (1-`pi_at') ) 
116 | 
117 | 				local v_co3 = (`mu_nt' * `pi_nt' * `mu_co' * `pi_co') + (`mu_at' * `pi_at' * `mu_co' * `pi_co') + (`mu_at' * `pi_at' * `mu_nt' * `pi_nt') 
118 | 
119 | 				local v_co = (1/`pi_co')*`v' - (1/`pi_co')*(`v_co1' + `v_co2' -2 * `v_co3')
120 | 
121 | 			}
122 | 
123 | 		}
124 | 
125 | 		local se_mu = sqrt(`v')/sqrt(_N)
126 | 		local se_mu_nt = sqrt(`v_nt')/sqrt(`k_nt')
127 | 		local se_mu_at = sqrt(`v_at')/sqrt(`k_at')
128 | 
129 | 		* Start: compute se_mu_co
130 | 		tempvar Z1DX D1ZX Z1D D1Z
131 | 		gen `Z1DX' = `Z'*(1-`D')*`X'
132 | 		gen `D1ZX' = `D'*(1-`Z')*`X'
133 | 		gen `Z1D' = `Z'*(1-`D')
134 | 		gen `D1Z' = `D'*(1-`Z')
135 | 
136 |  	 	summarize `Z1DX'
137 | 		local mu_vnt = r(mean)
138 | 
139 |  	 	summarize `D1ZX'
140 | 		local mu_vat = r(mean)
141 | 
142 | 		summarize `Z1D'
143 | 		local pi_vnt = r(mean)
144 | 
145 | 		summarize `D1Z'
146 | 		local pi_vat = r(mean)
147 | 
148 | 		corr `X' `Z1DX' `D1ZX' `Z1D' `D1Z' `Z', cov
149 | 		matrix covB = r(C)
150 | 
151 | 	  local Mu = (`mu'-`mu_vnt'/`p_z'-`mu_vat'/(1-`p_z'))
152 | 	  
153 | 	  local b1 = 1/`pi_co'
154 | 	  local b2 = -1/(`pi_co'*`p_z')
155 | 	  local b3 =  -1/(`pi_co'*(1-`p_z'))
156 | 	  local b4 = `Mu'/(`pi_co'^2*`p_z')
157 | 	  local b5 = `Mu'/(`pi_co'^2*(1-`p_z'))
158 | 	  local b6 = (`pi_vat'/(1-`p_z')^2*`mu'-`pi_vnt'/`p_z'^2*`mu' -`pi_vat'/(`p_z'*(1-`p_z'))^2*`mu_vnt' + `mu_vnt'/`p_z'^2 - `mu_vat'/(1-`p_z')^2 + `pi_vnt'/(`p_z'*(1-`p_z'))^2*`mu_vat' )/(`pi_co')^2
159 | 
160 | 	  matrix B = (`b1' \ `b2' \ `b3' \ `b4' \ `b5' \ `b6')
161 | 
162 | 	  matrix tBcovBB = B' * covB * B
163 | 		local se_mu_co = sqrt( (1/`N') * tBcovBB[1,1])
164 | 		* :End 
165 | 	
166 | 	}
167 | 
168 | 	return scalar mu = `mu'
169 | 
170 | 	return scalar mu_nt = `mu_nt'
171 | 	return scalar mu_at = `mu_at'
172 | 	return scalar mu_co = `mu_co'
173 | 
174 | 	return scalar se_mu = `se_mu'
175 | 	return scalar se_mu_nt = `se_mu_nt'
176 | 	return scalar se_mu_at = `se_mu_at'
177 | 	return scalar se_mu_co = `se_mu_co'
178 | 
179 | 	return scalar pi_co = `pi_co'
180 | 	return scalar pi_nt = `pi_nt'
181 | 	return scalar pi_at = `pi_at'
182 | 	
183 | 	return scalar se_pi_co = `se_pi_co'
184 | 	return scalar se_pi_nt = `se_pi_nt'
185 | 	return scalar se_pi_at = `se_pi_at'
186 | 	
187 | 	if !missing("`variance'") {
188 | 
189 | 		return scalar v_nt = `v_nt'
190 | 		return scalar v_at = `v_at'
191 | 		return scalar v_co = `v_co'
192 | 		return scalar v = `v'
193 | 
194 | 	}	
195 | 
196 | end


--------------------------------------------------------------------------------
/ivdesc.ado:
--------------------------------------------------------------------------------
  1 | /*
  2 | Version 1.1.0 (March 18, 2021)
  3 | Author:  Moritz Marbach, moritz.marbach@tamu.edu
  4 | URL: https://github.com/sumtxt/ivdesc
  5 | Changelog
  6 | 1.0.1 : Added coefplot to help file
  7 | 1.1.0 : Added asymptotic SE for complier mean
  8 | */
  9 | 
 10 | program ivdesc,  rclass
 11 | 	version 12
 12 | 
 13 | 	syntax varlist(default=none min=1 max=3) [if/] [in/] [, NOboot VARiance NOBALance fmt(passthru) Reps(integer 1000)]
 14 | 
 15 | 	local X: word 1 of `varlist'
 16 | 	local D: word 2 of `varlist'
 17 | 	local Z: word 3 of `varlist'
 18 | 
 19 | 	preserve 
 20 | 
 21 | 		if !missing("`if'") {
 22 | 			quietly: keep if `if' & `X'!=. & `D'!=. & `Z'!=.
 23 | 		}
 24 | 		else {
 25 | 			quietly: keep if `X'!=. & `D'!=. & `Z'!=. 
 26 | 		}
 27 | 
 28 | 		quietly: count if `D'!=`Z'
 29 | 		if r(N)<1 {
 30 | 			display as error "There is full compliance with the instrument (D=Z)."
 31 |     	exit 42
 32 |     }
 33 | 
 34 | 
 35 | 		if missing("`noboot'") {
 36 | 			
 37 | 			ivdesc_calc `X' `D' `Z', `variance'
 38 | 
 39 | 			local mu = r(mu)
 40 | 			local mu_co = r(mu_co)
 41 | 			local mu_nt = r(mu_nt)
 42 | 			local mu_at = r(mu_at)
 43 | 
 44 | 			local pi_co = r(pi_co)
 45 | 			local pi_nt = r(pi_nt)
 46 | 			local pi_at = r(pi_at)
 47 | 
 48 | 			if !missing("`variance'") {
 49 | 
 50 | 				local v_co = r(v_co)
 51 | 				local v_nt = r(v_nt)
 52 | 				local v_at = r(v_at)
 53 | 				local v = r(v)
 54 | 
 55 | 			}
 56 | 
 57 | 			if `mu_at'==. {
 58 | 				bootstrap mu_co=r(mu_co) mu_nt=r(mu_nt) mu=r(mu) /// 
 59 | 									pi_co=r(pi_co) pi_nt=r(pi_nt) ///
 60 | 									p_co_s_nt=(r(mu_co)<r(mu_nt)) p_co_g_nt=(r(mu_co)>r(mu_nt)), reps(`reps') notable nolegend nowarn noheader: ivdesc_calc `X' `D' `Z' 
 61 | 			} 
 62 | 
 63 | 			else if `mu_nt'==. {
 64 | 				bootstrap mu_co=r(mu_co) mu_at=r(mu_at) mu=r(mu) /// 
 65 | 									pi_co=r(pi_co) pi_at=r(pi_at) /// 
 66 | 									p_co_s_at=(r(mu_co)<r(mu_at)) p_co_g_nt=(r(mu_co)>r(mu_at)), reps(`reps') notable nolegend nowarn noheader: ivdesc_calc `X' `D' `Z' 					
 67 | 			}
 68 | 
 69 | 			else {
 70 | 				bootstrap mu_co=r(mu_co) mu_at=r(mu_at) mu_nt=r(mu_nt) mu=r(mu) /// 
 71 | 									pi_co=r(pi_co) pi_at=r(pi_at) pi_nt=r(pi_nt) /// 
 72 | 									p_co_s_nt=(r(mu_co)<r(mu_nt)) p_co_g_nt=(r(mu_co)>r(mu_nt)) ///
 73 | 									p_co_s_at=(r(mu_co)<r(mu_at)) p_co_g_at=(r(mu_co)>r(mu_at)) ///
 74 | 									p_at_s_nt=(r(mu_at)<r(mu_nt)) p_at_g_nt=(r(mu_at)>r(mu_nt)), reps(`reps') notable nolegend nowarn noheader: ivdesc_calc `X' `D' `Z' 
 75 | 			}
 76 | 
 77 | 			matrix bootse = e(se)
 78 | 			matrix bootb = e(b_bs)
 79 | 
 80 | 			local se_mu = bootse[1,colnumb(bootse,"mu")]
 81 | 			local se_mu_co = bootse[1,colnumb(bootse,"mu_co")]
 82 | 			local se_mu_at = bootse[1,colnumb(bootse,"mu_at")]
 83 | 			local se_mu_nt = bootse[1,colnumb(bootse,"mu_nt")]
 84 | 
 85 | 			local se_pi_co = bootse[1,colnumb(bootse,"pi_co")]
 86 | 			local se_pi_at = bootse[1,colnumb(bootse,"pi_at")]
 87 | 			local se_pi_nt = bootse[1,colnumb(bootse,"pi_nt")]
 88 | 
 89 | 			local p_co_s_nt = bootb[1,colnumb(bootb,"p_co_s_nt")]
 90 | 			local p_co_g_nt = bootb[1,colnumb(bootb,"p_co_g_nt")]
 91 | 			local p_co_s_at = bootb[1,colnumb(bootb,"p_co_s_at")]
 92 | 			local p_co_g_at = bootb[1,colnumb(bootb,"p_co_g_at")]
 93 | 			local p_at_s_nt = bootb[1,colnumb(bootb,"p_at_s_nt")]
 94 | 			local p_at_g_nt = bootb[1,colnumb(bootb,"p_at_g_nt")]
 95 | 
 96 | 			if missing("`variance'") {
 97 | 
 98 | 				matrix input res = ( `mu', `se_mu', 1, 0 \ `mu_co', `se_mu_co', `pi_co', `se_pi_co'  \ `mu_nt', `se_mu_nt', `pi_nt', `se_pi_nt' \ `mu_at', `se_mu_at', `pi_at', `se_pi_at' )
 99 | 				matrix colnames res = "Mean" "Boot.-SE" "Proportion" "Boot.-SE"
100 | 
101 | 			} 
102 | 
103 | 			else {
104 | 
105 | 				matrix input res = ( `mu', `se_mu', `v', 1, 0 \ `mu_co', `se_mu_co', `v_co', `pi_co', `se_pi_co' \ `mu_nt', `se_mu_nt', `v_nt',  `pi_nt', `se_pi_nt' \ `mu_at', `se_mu_at', `v_at',  `pi_at', `se_pi_at' )
106 | 				matrix colnames res = "Mean" "Boot.-SE" "Variance" "Proportion" "Boot.-SE"
107 | 
108 | 			}
109 | 
110 | 			matrix input pvals = ( `p_co_s_nt', `p_co_g_nt' \ `p_co_s_at', `p_co_g_at' \ `p_at_s_nt', `p_at_g_nt')
111 | 			matrix colnames pvals = "Pr(T < t)" "Pr(T > t)"
112 | 			matrix rownames pvals = "co vs nt" "co vs at" "at vs nt"
113 | 
114 | 		} 
115 | 
116 | 		else {
117 | 
118 | 			ivdesc_calc `X' `D' `Z', `variance'
119 | 	
120 | 			if missing("`variance'") {
121 | 
122 | 				matrix input res = ( `r(mu)', `r(se_mu)', 1, 0 \ `r(mu_co)', `r(se_mu_co)', `r(pi_co)', `r(se_pi_co)' \ `r(mu_nt)', `r(se_mu_nt)', `r(pi_nt)', `r(se_pi_nt)' \ `r(mu_at)', `r(se_mu_at)', `r(pi_at)', `r(se_pi_at)'  )
123 | 				matrix colnames res = "Mean" "SE" "Proportion" "SE"
124 | 
125 | 			} 
126 | 
127 | 			else {
128 | 
129 | 				matrix input res = ( `r(mu)', `r(se_mu)', `r(v)', 1, 0 \ `r(mu_co)', ., `r(v_co)', `r(pi_co)', `r(se_pi_co)' \ `r(mu_nt)', `r(se_mu_nt)', `r(v_nt)', `r(pi_nt)', `r(se_pi_nt)' \ `r(mu_at)', `r(se_mu_at)', `r(v_at)',  `r(pi_at)', `r(se_pi_at)'  )
130 | 				matrix colnames res = "Mean" "SE" "Variance" "Proportion" "SE"
131 | 
132 | 			}
133 | 
134 | 		}
135 | 
136 | 		matrix rownames res = "whole sample" "complier" "never-taker" "always-taker"
137 | 			
138 | 		estout matrix(res, `fmt'), mlabels(none) title("Variable: " `X')
139 | 		return mat ivdesc = res
140 | 		
141 | 
142 | 		if missing("`noboot'") {
143 | 			
144 | 			estout matrix(pvals, `fmt'), mlabels(none) title("Bootstrapped p-values:")
145 | 			return mat pvals = pvals
146 | 
147 | 		}
148 | 		
149 | 
150 | 		if missing("`nobalance'") {
151 | 
152 | 			quietly: ttest `X', by(`Z') unequal
153 | 			local p2 : di %6.4f r(p)
154 | 
155 | 			di as txt ""
156 | 			di as txt "Balance test: H0: E[X|Z=0]=E[X|Z=1]"
157 | 			di "Pr(|T| > |t|) = `p2'"
158 | 
159 | 			return scalar bal_pval = `p2'
160 | 
161 | 		}
162 | 
163 | 	restore
164 | 
165 | end
166 | 
167 | 
168 | 
169 | 


--------------------------------------------------------------------------------
/R/ivdesc/R/ivdesc_est.R:
--------------------------------------------------------------------------------
  1 | #' @importFrom rsample analysis
  2 | #' @importFrom rsample bootstraps
  3 | #' @importFrom purrr map_df
  4 | #' @importFrom stats var t.test
  5 | ivdesc_boot <- function(X,D,Z,times){
  6 | 	df <- bootstraps(data.frame(X=X,D=D,Z=Z), times=times)
  7 | 	return(map_df(df$splits, ivdesc_mu_F0_))
  8 | 	}
  9 | 
 10 | ivdesc_boot_sum <- function(boot){
 11 | 
 12 | 	se_mu <- with(boot, sd(mu, na.rm=TRUE) )
 13 | 	se_mu_co <- with(boot, sd(mu_co, na.rm=TRUE) )
 14 | 	se_mu_nt <- with(boot, sd(mu_nt, na.rm=TRUE) )
 15 | 	se_mu_at <- with(boot, sd(mu_at, na.rm=TRUE) )
 16 | 
 17 | 	se_pi_co <- with(boot, sd(pi_co, na.rm=TRUE) )
 18 | 	se_pi_nt <- with(boot, sd(pi_nt, na.rm=TRUE) )
 19 | 	se_pi_at <- with(boot, sd(pi_at, na.rm=TRUE) )
 20 | 
 21 | 	p_co_nt <- with(boot, mean(mu_co>mu_nt, na.rm=TRUE) )
 22 | 	p_nt_co <- with(boot, mean(mu_nt>mu_co, na.rm=TRUE) )
 23 | 
 24 | 	p_co_at <- with(boot, mean(mu_co>mu_at, na.rm=TRUE) )
 25 | 	p_at_co <- with(boot, mean(mu_at>mu_co, na.rm=TRUE) )
 26 | 
 27 | 	p_at_nt <- with(boot, mean(mu_at>mu_nt, na.rm=TRUE) )
 28 | 	p_nt_at <- with(boot, mean(mu_nt>mu_at, na.rm=TRUE) )
 29 | 
 30 | 	agg <- data.frame(se_mu=se_mu, 
 31 | 		se_mu_co=se_mu_co, se_mu_nt=se_mu_nt, se_mu_at=se_mu_at,
 32 | 		se_pi_co=se_pi_co, se_pi_nt=se_pi_nt, se_pi_at=se_pi_at)
 33 | 
 34 | 	pvals <- as.data.frame(rbind(
 35 | 		c("co_vs_nt", p_nt_co, p_co_nt), 
 36 | 		c("co_vs_at", p_at_co, p_co_at),
 37 | 		c("at_vs_nt", p_nt_at, p_at_nt)
 38 | 		))
 39 | 
 40 | 	colnames(pvals) <- c("group", "smaller", "greater")
 41 | 
 42 | 	return(list(agg=agg, pvals=pvals))
 43 | 	}
 44 | 
 45 | ivdesc_mu_F0_ <- function(split) with(analysis(split), ivdesc_mu_F0(X,D,Z))
 46 | 
 47 | ivdesc_mu_F0 <- function(X,D,Z){
 48 | 
 49 | 	pi_co = mean(D[Z==1]==1)-mean(D[Z==0]==1)
 50 | 	pi_nt = mean(D[Z==1]==0)
 51 | 	pi_at = mean(D[Z==0]==1)
 52 | 
 53 | 	mu_nt = mean(X[Z==1 & D==0])
 54 | 	mu_at = mean(X[Z==0 & D==1])
 55 | 	mu = mean(X)
 56 | 
 57 |   K_nt = sum(Z==1 & D==0)
 58 |   K_at = sum(Z==0 & D==1)
 59 | 
 60 | 	if( K_at<2){
 61 | 		
 62 | 		mu_co = (1/pi_co) * mu - (pi_nt/pi_co) * mu_nt 
 63 | 
 64 | 	} else if (K_nt<2) {
 65 | 
 66 | 		mu_co = (1/pi_co) * mu - (pi_at/pi_co) * mu_at 		
 67 | 
 68 | 	} else {	
 69 | 
 70 | 		mu_co = (1/pi_co) * mu - (pi_nt/pi_co) * mu_nt - (pi_at/pi_co) * mu_at
 71 | 		
 72 | 		}
 73 | 
 74 | 	dat <- data.frame(mu=mu, mu_co=mu_co,mu_nt=mu_nt,mu_at=mu_at, 
 75 | 		pi_co=pi_co, pi_nt=pi_nt, pi_at=pi_at)
 76 | 
 77 | 	return(dat)
 78 | 	}
 79 | 
 80 | 
 81 | ivdesc_all <- function(X,D,Z,boot,variance,kappa=FALSE){
 82 | 
 83 | 	N <- length(X)
 84 | 
 85 | 	pi_co = mean(D[Z==1]==1)-mean(D[Z==0]==1)
 86 | 	pi_nt = mean(D[Z==1]==0)
 87 | 	pi_at = mean(D[Z==0]==1)
 88 | 
 89 | 	# Totals 
 90 | 
 91 |   K_nt = sum(Z==1 & D==0)
 92 |   K_at = sum(Z==0 & D==1)
 93 |  	
 94 |  	N_co = pi_co * N
 95 |  	N_nt = pi_nt * N
 96 |  	N_at = pi_at * N
 97 | 
 98 | 
 99 |  	# Mean / variance nt/at/sample
100 | 
101 | 	mu_nt = mean(X[Z==1 & D==0])
102 | 	mu_at = mean(X[Z==0 & D==1])
103 | 
104 | 	mu = mean(X)
105 | 
106 | 	v_nt = var(X[Z==1 & D==0])
107 | 	v_at = var(X[Z==0 & D==1])
108 | 
109 | 	v = var(X)
110 | 
111 | 	sd_nt = sqrt(v_nt)
112 | 	sd_at = sqrt(v_at)
113 | 
114 | 	sd = sqrt(v)
115 | 
116 | 
117 | 	# Mean / variance / standard deviation co
118 | 
119 | 	if(kappa == FALSE){
120 | 
121 | 	if( K_at<2){
122 | 		
123 | 		mu_co = (1/pi_co) * mu - (pi_nt/pi_co) * mu_nt 
124 | 
125 | 		if ( variance == TRUE ) {
126 | 
127 | 			v_co1 = (v_nt * pi_nt) 
128 | 
129 | 			v_co2 = ( mu_co^2 * pi_co * (1-pi_co) )+
130 | 							( mu_nt^2 * pi_nt * (1-pi_nt) ) 
131 | 
132 | 			v_co3 = (mu_nt * pi_nt * mu_co * pi_co)
133 | 
134 | 			v_co = (1/pi_co)*v - (1/pi_co)*(v_co1 + v_co2 -2 * v_co3)
135 | 
136 | 		}
137 | 
138 | 	} else if (K_nt<2) {
139 | 
140 | 		mu_co = (1/pi_co) * mu - (pi_at/pi_co) * mu_at 		
141 | 
142 | 		if ( variance == TRUE ) {
143 | 
144 | 			v_co1 = (v_at * pi_at)
145 | 
146 | 			v_co2 = ( mu_co^2 * pi_co * (1-pi_co) )+
147 | 							( mu_at^2 * pi_at * (1-pi_at) ) 
148 | 
149 | 			v_co3 = (mu_at * pi_at * mu_co * pi_co) 
150 | 
151 | 			v_co = (1/pi_co)*v - (1/pi_co)*(v_co1 + v_co2 -2 * v_co3)
152 | 
153 | 		}
154 | 
155 | 	} else {	
156 | 
157 | 		mu_co = (1/pi_co) * mu - (pi_nt/pi_co) * mu_nt - (pi_at/pi_co) * mu_at
158 | 	
159 | 		if ( variance == TRUE ) {
160 | 
161 | 			v_co1 = ( (v_nt) * pi_nt) + ( (v_at) * pi_at)
162 | 		
163 | 			v_co2 = ( mu_co^2 * pi_co * (1-pi_co) )+
164 | 							( mu_nt^2 * pi_nt * (1-pi_nt) )+
165 | 							( mu_at^2 * pi_at * (1-pi_at) ) 
166 | 		
167 | 			v_co3 = (mu_nt * pi_nt * mu_co * pi_co) + 
168 | 							(mu_at * pi_at * mu_co * pi_co) + 
169 | 							(mu_at * pi_at * mu_nt * pi_nt) 
170 | 		
171 | 			v_co = (1/pi_co)*(v) - (1/pi_co)*(v_co1 + v_co2 -2 * v_co3)
172 | 
173 | 		}
174 | 	
175 | 		}
176 | 	
177 | 	} else {
178 | 
179 | 		kappa <- ( 1-((D*(1-Z))/mean(Z==0)) - (((1-D)*Z)/mean(Z==1)) )
180 | 		mu_co <- (1/pi_co)*mean(X*kappa)
181 | 		variance <- FALSE 
182 | 
183 | 	}
184 | 
185 | 	pz <- mean(Z==1)
186 | 
187 | 	se_mu = sd/sqrt(N)
188 | 	se_mu_co = sqrt(get_var_mu_co(N=N,X=X,Z=Z,D=D))
189 | 	se_mu_nt = sd_nt/sqrt(K_nt)
190 | 	se_mu_at = sd_at/sqrt(K_at)
191 | 
192 | 	v_pi_co1 = var(D[Z==1]==1)
193 | 	v_pi_co2 = var(D[Z==0]==1)
194 | 	v_pi_nt = var(D[Z==1]==0)
195 | 	v_pi_at = var(D[Z==0]==1)
196 | 
197 | 	se_pi_co = sqrt( (v_pi_co1/sum(Z==1))+(v_pi_co2/sum(Z==0)) )
198 | 	se_pi_nt = sqrt( v_pi_nt/sum(Z==1) )
199 | 	se_pi_at = sqrt( v_pi_at/sum(Z==0) )
200 | 
201 | 
202 | 	if (variance == FALSE) {
203 | 
204 | 		est = data.frame(
205 | 				group=c("sample","co", "nt", "at"), 
206 | 				mu=c(mu,mu_co,mu_nt,mu_at),
207 | 				mu_se=c(se_mu,se_mu_co,se_mu_nt,se_mu_at),
208 | 				pi=c(1,pi_co,pi_nt,pi_at),
209 | 				pi_se=c(0,se_pi_co,se_pi_nt,se_pi_at))
210 | 
211 | 	} else {
212 | 
213 | 		est = data.frame(
214 | 				group=c("sample","co", "nt", "at"), 
215 | 				mu=c(mu,mu_co,mu_nt,mu_at),
216 | 				mu_se=c(se_mu,se_mu_co,se_mu_nt,se_mu_at),
217 | 				var=c(v,v_co,v_nt,v_at),
218 | 				pi=c(1,pi_co,pi_nt,pi_at),
219 | 				pi_se=c(0,se_pi_co,se_pi_nt,se_pi_at))	
220 | 
221 | 	}
222 | 
223 | 
224 | 	if (boot>0) {
225 | 
226 | 		boots <- ivdesc_boot(X,D,Z,times=boot)
227 | 		res <- ivdesc_boot_sum(boots)
228 | 
229 | 		pvals <- res$pvals
230 | 		agg <- res$agg
231 | 
232 | 		est[est$group=='sample','mu_se'] <- agg$se_mu
233 | 		est[est$group=='co','mu_se'] <- agg$se_mu_co
234 | 		est[est$group=='nt','mu_se'] <- agg$se_mu_nt
235 | 		est[est$group=='at','mu_se'] <- agg$se_mu_at
236 | 
237 | 		est[est$group=='co','pi_se'] <- agg$se_pi_co
238 | 		est[est$group=='nt','pi_se'] <- agg$se_pi_nt
239 | 		est[est$group=='at','pi_se'] <- agg$se_pi_at
240 | 
241 | 		attr(est, "pvals") <- pvals  
242 | 	}
243 | 
244 | 	return(est)
245 | 
246 | 	}
247 | 


--------------------------------------------------------------------------------