├── RDDtools
    ├── .Rbuildignore
    ├── data
    │   ├── Lee2008.rda
    │   └── STAR_MHE.rda
    ├── inst
    │   └── doc
    │   │   └── RDDtools.pdf
    ├── vignettes
    │   ├── RDDtools.pdf
    │   └── RDD_refs.bib
    ├── R
    │   ├── myRDD-package.R
    │   ├── various_code.R
    │   ├── RDDcoef.R
    │   ├── Lee2008-data.R
    │   ├── qplot_experim.R
    │   ├── plotBin.R
    │   ├── dens_test.R
    │   ├── model.matrix.RDD.R
    │   ├── bw_ROT.R
    │   ├── get_methods.R
    │   ├── Waldci.R
    │   ├── gen_MC_IK.R
    │   ├── RDDdata_methods.R
    │   ├── STAR_MHE-data.R
    │   ├── as.npreg.R
    │   ├── deprecated.R
    │   ├── reg_gen.R
    │   ├── reg_lm.R
    │   ├── clusterInf.R
    │   ├── RDDdata.R
    │   ├── bw_IK.R
    │   ├── covarTests.R
    │   └── RDDpred.R
    ├── man
    │   ├── RDDtools-package.Rd
    │   ├── ROT_bw.Rd
    │   ├── as.lm.Rd
    │   ├── waldci.Rd
    │   ├── dens_test.Rd
    │   ├── Lee2008.Rd
    │   ├── RDDcoef.Rd
    │   ├── RDDbw_RSW.Rd
    │   ├── plotBin.Rd
    │   ├── RDDbw_IK.Rd
    │   ├── RDDdata.Rd
    │   ├── gen_MC_IK.Rd
    │   ├── RDDreg_np.Rd
    │   ├── clusterInf.Rd
    │   ├── plot.RDDdata.Rd
    │   ├── vcovCluster.Rd
    │   ├── as.npregbw.Rd
    │   ├── STAR_MHE.Rd
    │   ├── covarTest_dis.Rd
    │   ├── RDDgenreg.Rd
    │   ├── covarTest_mean.Rd
    │   ├── plotSensi.Rd
    │   ├── plotPlacebo.Rd
    │   ├── RDDreg_lm.Rd
    │   └── RDDpred.Rd
    ├── misc
    │   ├── TODO
    │   ├── test_RDest_t0_RDDreg.R
    │   ├── locpoly.f
    │   └── locpoly.R
    ├── DESCRIPTION
    ├── NEWS
    ├── tests
    │   ├── RDDtools_vs_rdd.R
    │   ├── simple_MC.R
    │   ├── RDDtools_vs_rdd.Rout.save
    │   ├── simple_MC.Rout.save
    │   ├── packageDemo.R
    │   └── RDDpred.R
    └── NAMESPACE
├── figuresREADME
    ├── DensPlot.png
    ├── RegPlot.png
    ├── dataPlot.png
    ├── reg_para.png
    ├── SensiPlot.png
    └── placeboPlot.png
└── README.Rmd


/RDDtools/.Rbuildignore:
--------------------------------------------------------------------------------
1 | misc
2 | 


--------------------------------------------------------------------------------
/RDDtools/data/Lee2008.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/RDDtools/data/Lee2008.rda


--------------------------------------------------------------------------------
/RDDtools/data/STAR_MHE.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/RDDtools/data/STAR_MHE.rda


--------------------------------------------------------------------------------
/figuresREADME/DensPlot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/figuresREADME/DensPlot.png


--------------------------------------------------------------------------------
/figuresREADME/RegPlot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/figuresREADME/RegPlot.png


--------------------------------------------------------------------------------
/figuresREADME/dataPlot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/figuresREADME/dataPlot.png


--------------------------------------------------------------------------------
/figuresREADME/reg_para.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/figuresREADME/reg_para.png


--------------------------------------------------------------------------------
/figuresREADME/SensiPlot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/figuresREADME/SensiPlot.png


--------------------------------------------------------------------------------
/figuresREADME/placeboPlot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/figuresREADME/placeboPlot.png


--------------------------------------------------------------------------------
/RDDtools/inst/doc/RDDtools.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/RDDtools/inst/doc/RDDtools.pdf


--------------------------------------------------------------------------------
/RDDtools/vignettes/RDDtools.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/RDDtools/vignettes/RDDtools.pdf


--------------------------------------------------------------------------------
/RDDtools/R/myRDD-package.R:
--------------------------------------------------------------------------------
 1 | #' Regression Discontinuity Design 
 2 | #' 
 3 | #' Provides function to do a comprehensive regression discontinuity analysis. 
 4 | #' 
 5 | #' @name RDDtools-package
 6 | #' @aliases RDDtools
 7 | #' @docType package
 8 | #' @import KernSmooth
 9 | #' @import np
10 | #' @import ggplot2
11 | #' @title Regression Discontinuity Design 
12 | #' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}>
13 | NULL
14 | 


--------------------------------------------------------------------------------
/RDDtools/man/RDDtools-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \docType{package}
 3 | \name{RDDtools-package}
 4 | \alias{RDDtools}
 5 | \alias{RDDtools-package}
 6 | \title{Regression Discontinuity Design}
 7 | \description{
 8 | Regression Discontinuity Design
 9 | }
10 | \details{
11 | Provides function to do a comprehensive regression discontinuity analysis.
12 | }
13 | \author{
14 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/RDDtools/R/various_code.R:
--------------------------------------------------------------------------------
 1 | ### MISC
 2 | is.even <- function (a) {
 3 |   a%%2 == 0
 4 | }
 5 | 
 6 | 
 7 | Kernel_tri <- function(X, center, bw) {
 8 |   ifelse(abs(X - center) > bw, 0, 1 - (abs(X - center) / bw))
 9 | }
10 | 
11 | Kernel_uni <- function(X, center, bw) {
12 |   ifelse(abs(X - center) > bw, 0, 1)
13 | }
14 | 
15 | .onLoad <- function(libname, pkgname)
16 |   packageStartupMessage("\nRDDtools ", utils::packageVersion("RDDtools"), 
17 |                         "\nPLEASE NOTE THIS is currently only a development version. \nRun vignette('RDDtools') for the documentation")
18 | 


--------------------------------------------------------------------------------
/RDDtools/man/ROT_bw.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{ROT_bw}
 3 | \alias{ROT_bw}
 4 | \title{Bandwidth selector}
 5 | \usage{
 6 | ROT_bw(object)
 7 | }
 8 | \arguments{
 9 | \item{object}{object of class RDDdata}
10 | }
11 | \description{
12 | implements dpill
13 | }
14 | \examples{
15 | #No discontinuity
16 | }
17 | \author{
18 | Drew Dimmery <\email{drewd@nyu.edu}>
19 | }
20 | \references{
21 | McCrary, Justin. (2008) "Manipulation of the running variable in the regression discontinuity design: A density test," \emph{Journal of Econometrics}. 142(2): 698-714. \url{http://dx.doi.org/10.1016/j.jeconom.2007.05.005}
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/RDDtools/misc/TODO:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | -plot.RDDreg_np: use np with confidence interval instead!
 4 | -plot.RDDreg_np: option one/two sided
 5 | 
 6 | -plot.RDDreg_np: understand differences in plotSensi and plot.RDDreg_np !! see slides 29
 7 | 
 8 | -plotSensi: legend: LATE and CI
 9 | -plotSensi: also compare slope specifications?
10 | 
11 | -PLACEBO Y: change y, at true cutoff
12 | 
13 | -RDDreg_lm: info on bw
14 | 
15 | -labels
16 | 
17 | -Check McCary var estimate
18 | -
19 | 
20 | ## Long term
21 | -CI? clustering?! eventually bootstrap?
22 | -covariate: in param reg! see fröhlich, extend to plotSensi/placebo!
23 | -median/quantile LATE, see fröhlich
24 | -GLM parametric
25 | 


--------------------------------------------------------------------------------
/RDDtools/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: RDDtools
 2 | Type: Package
 3 | Title: A toolbox for regression discontinuity design (RDD)
 4 | Version: 0.22
 5 | Date: 21/05/2014
 6 | Authors@R: person("Matthieu", "Stigler", role = c("aut","cre"),
 7 |     email="Matthieu.Stigler@iheid.ch")
 8 | Maintainer: Matthieu Stigler <Matthieu.Stigler@iheid.ch>
 9 | Imports:
10 |     KernSmooth,
11 |     ggplot2,
12 |     rdd,
13 |     np,
14 |     sandwich,
15 |     lmtest,
16 |     Formula,
17 |     locpol,
18 |     methods
19 | Depends:
20 |     AER
21 | Suggests:
22 |     stats4,
23 |     car
24 | Description: Provides a set of functions for RDD, from data visualisation,
25 |     estimation and testing.
26 | License: GPL (>= 2)
27 | 


--------------------------------------------------------------------------------
/RDDtools/man/as.lm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{as.lm}
 3 | \alias{as.lm}
 4 | \title{Convert a rdd object to lm}
 5 | \usage{
 6 | as.lm(x)
 7 | }
 8 | \arguments{
 9 | \item{x}{An object to convert to lm}
10 | }
11 | \value{
12 | An object of class \code{lm}
13 | }
14 | \description{
15 | Convert a rdd object to lm
16 | }
17 | \examples{
18 | data(Lee2008)
19 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
20 | reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd)
21 | reg_para_lm <- as.lm(reg_para)
22 | reg_para_lm
23 | plot(reg_para_lm, which=4)
24 | }
25 | \seealso{
26 | \code{\link{as.npreg}} which converts \code{RDDreg} objects into \code{npreg} from package \code{np}.
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/RDDtools/man/waldci.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{waldci}
 3 | \alias{waldci}
 4 | \title{Confint allowing vcov}
 5 | \usage{
 6 | waldci(x, parm = NULL, level = 0.95, vcov. = NULL, df = NULL, ...)
 7 | }
 8 | \arguments{
 9 | \item{x}{Object of class lm or else}
10 | 
11 | \item{parm}{specification of which parameters are to be given confidence intervals, see confint}
12 | 
13 | \item{level}{the confidence level required, see confint()}
14 | 
15 | \item{vcov.}{Specific covariance function to pass to coeftest. See help of sandwich}
16 | 
17 | \item{df}{Degrees of freedom}
18 | 
19 | \item{\ldots}{Further argument}
20 | }
21 | \description{
22 | Version of vcov allowing for confint
23 | }
24 | \keyword{internal}
25 | 
26 | 


--------------------------------------------------------------------------------
/RDDtools/man/dens_test.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{dens_test}
 3 | \alias{dens_test}
 4 | \title{Run the McCracy test for manipulation of the forcing variable}
 5 | \usage{
 6 | dens_test(RDDobject, bin = NULL, bw = NULL, plot = TRUE, ...)
 7 | }
 8 | \arguments{
 9 | \item{RDDobject}{object of class RDDdata}
10 | 
11 | \item{bin}{Argument of the \code{\link{DCdensity}} function, the binwidth}
12 | 
13 | \item{bw}{Argument of the \code{\link{DCdensity}} function, the bandwidth}
14 | 
15 | \item{plot}{Whether to return a plot. Logical, default ot TRUE.}
16 | 
17 | \item{\ldots}{Further arguments passed to \code{\link[rdd]{DCdensity}}.}
18 | }
19 | \description{
20 | Calls the \code{\link[rdd]{DCdensity}} test from package \code{rdd} on a \code{RDDobject}.
21 | }
22 | \examples{
23 | library(RDDtools)
24 | data(Lee2008)
25 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
26 | dens_test(Lee2008_rdd)
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/RDDtools/man/Lee2008.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \docType{data}
 3 | \name{Lee2008}
 4 | \alias{Lee2008}
 5 | \title{Dataset used in Lee (2008)}
 6 | \format{A data frame with 6558 observations and two variables:
 7 | \describe{
 8 | \item{x}{Vote at election t-1}
 9 | \item{y}{Vote at election t}
10 | }}
11 | \source{
12 | Guido Imbens webpage: \url{http://scholar.harvard.edu/imbens/scholar_software/regression-discontinuity}
13 | }
14 | \usage{
15 | Lee2008
16 | }
17 | \description{
18 | U.S. House elections data
19 | }
20 | \examples{
21 | data(Lee2008)
22 | RDDlee <- RDDdata(x=x, y=y, data=Lee2008, cutpoint=0)
23 | summary(RDDlee)
24 | plot(RDDlee)
25 | }
26 | \references{
27 | Imbens, Guido and Karthik Kalyanaraman. (2012) "Optimal Bandwidth Choice for the regression discontinuity estimator,"
28 | Review of Economic Studies (2012) 79, 933-959
29 | 
30 | Lee, D. (2008) Randomized experiments from non-random selection in U.S. House elections,
31 | \emph{Journal of Econometrics}, 142, 675-697
32 | }
33 | 
34 | 


--------------------------------------------------------------------------------
/RDDtools/man/RDDcoef.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{RDDcoef}
 3 | \alias{RDDcoef}
 4 | \alias{RDDcoef.RDDreg_np}
 5 | \alias{RDDcoef.default}
 6 | \title{RDD coefficient}
 7 | \usage{
 8 | RDDcoef(object, allInfo = FALSE, allCo = FALSE, ...)
 9 | 
10 | \method{RDDcoef}{default}(object, allInfo = FALSE, allCo = FALSE, ...)
11 | 
12 | \method{RDDcoef}{RDDreg_np}(object, allInfo = FALSE, allCo = FALSE, ...)
13 | }
14 | \arguments{
15 | \item{object}{A RDD regression object}
16 | 
17 | \item{allInfo}{whether to return just the coefficients (allInfo=FALSE) or also the se/t stat/pval.}
18 | 
19 | \item{allCo}{Whether to give only the RDD coefficient (allCo=FALSE) or all coefficients}
20 | 
21 | \item{\ldots}{Further arguments passed to/from specific methods}
22 | }
23 | \value{
24 | Either a numeric value of the RDD coefficient estimate, or a data frame with the estimate,
25 | its standard value, t test and p-value and
26 | }
27 | \description{
28 | Function to access the RDD coefficient in the various regressions
29 | }
30 | 
31 | 


--------------------------------------------------------------------------------
/RDDtools/man/RDDbw_RSW.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{RDDbw_RSW}
 3 | \alias{RDDbw_RSW}
 4 | \title{Global bandwidth selector of Ruppert, Sheather and Wand (1995) from package \pkg{KernSmooth}}
 5 | \usage{
 6 | RDDbw_RSW(object, type = c("global", "sided"))
 7 | }
 8 | \arguments{
 9 | \item{object}{object of class RDDdata created by \code{\link{RDDdata}}}
10 | 
11 | \item{type}{Whether to choose a global bandwidth for the whole function (\code{global})
12 | or for each side (\code{sided})}
13 | }
14 | \value{
15 | One (or two for \code{sided}) bandwidth value.
16 | }
17 | \description{
18 | Uses the global bandwidth selector of Ruppert, Sheather and Wand (1995)
19 | either to the whole function, or to the functions below and above the cutpoint.
20 | }
21 | \examples{
22 | data(Lee2008)
23 | rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0)
24 | RDDbw_RSW(rd)
25 | }
26 | \references{
27 | See \code{\link[KernSmooth]{dpill}}
28 | }
29 | \seealso{
30 | \code{\link{RDDbw_IK}} Local RDD bandwidth selector using the plug-in method of Imbens and Kalyanaraman (2012)
31 | }
32 | 
33 | 


--------------------------------------------------------------------------------
/RDDtools/man/plotBin.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{plotBin}
 3 | \alias{plotBin}
 4 | \title{Bin plotting}
 5 | \usage{
 6 | plotBin(x, y, h = 0.05, nbins = NULL, cutpoint = 0, plot = TRUE,
 7 |   type = c("value", "number"), xlim = range(x, na.rm = TRUE), cex = 0.9,
 8 |   main = NULL, xlab, ylab, ...)
 9 | }
10 | \arguments{
11 | \item{x}{Forcing variable}
12 | 
13 | \item{y}{Output}
14 | 
15 | \item{h}{the bandwidth (defaults to \code{2*sd(runvar)*length(runvar)^(-.5)})}
16 | 
17 | \item{cutpoint}{Cutpoint}
18 | 
19 | \item{plot}{Logical. Whether to plot or only returned silently}
20 | 
21 | \item{type}{Whether returns the y averages, or the x frequencies}
22 | 
23 | \item{xlim,cex,main,xlab,ylab}{Usual parameters passed to plot(), see \code{\link{par}}}
24 | 
25 | \item{\ldots}{further arguments passed to plot.}
26 | }
27 | \value{
28 | Returns silently values
29 | }
30 | \description{
31 | Do a "scatterplot bin smoothing"
32 | }
33 | \author{
34 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
35 | }
36 | \references{
37 | McCrary, Justin.
38 | }
39 | \keyword{internal}
40 | 
41 | 


--------------------------------------------------------------------------------
/RDDtools/man/RDDbw_IK.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{RDDbw_IK}
 3 | \alias{RDDbw_IK}
 4 | \title{Imbens-Kalyanaraman Optimal Bandwidth Calculation}
 5 | \usage{
 6 | RDDbw_IK(RDDobject, kernel = c("Triangular", "Uniform", "Normal"))
 7 | }
 8 | \arguments{
 9 | \item{RDDobject}{of class RDDdata created by \code{\link{RDDdata}}}
10 | 
11 | \item{kernel}{The type of kernel used: either \code{triangular} or \code{uniform}.}
12 | }
13 | \value{
14 | The optimal bandwidth
15 | }
16 | \description{
17 | Imbens-Kalyanaraman optimal bandwidth
18 | for local linear regression in Regression discontinuity designs.
19 | }
20 | \examples{
21 | data(Lee2008)
22 | rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0)
23 | RDDbw_IK(rd)
24 | }
25 | \author{
26 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
27 | }
28 | \references{
29 | Imbens, Guido and Karthik Kalyanaraman. (2012) "Optimal Bandwidth Choice for the regression discontinuity estimator,"
30 | Review of Economic Studies (2012) 79, 933-959
31 | }
32 | \seealso{
33 | \code{\link{RDDbw_RSW}} Global bandwidth selector of Ruppert, Sheather and Wand (1995)
34 | }
35 | 
36 | 


--------------------------------------------------------------------------------
/RDDtools/NEWS:
--------------------------------------------------------------------------------
 1 | 
 2 | RDDtools 0.22
 3 | ===========
 4 | Updated on 21/5/14
 5 | 
 6 | * RDDdata: change arg z to covar, add new argument z for sharp, currently unused. 
 7 | 
 8 | * dens_test: work now on RDDreg, return object htest
 9 | 
10 | * Multiple changes in help files
11 | 
12 | * Correct import, suggests, calls to :::
13 | 
14 | RDDtools 0.21
15 | ===========
16 | Updated on 25/7/13
17 | 
18 | * Add new function RDDpred
19 | 
20 | * Add new model.matrix.RDDdata, preparing all output, now used by all RDDreg_np, RDDreg_lm, RDDgenre...
21 | 
22 | * Add method vcov.RDDreg, as.lm.RDDreg
23 | 
24 | * Add enw function vcovCluster2, complement doc, add M Arai, 
25 | 
26 | * Add data STAR_MHE
27 | 
28 | * Many small fixes
29 | 
30 | RDDtools 0.2
31 | ===========
32 | Updated on 16/7/13
33 | 
34 | * Add new option to have separate or same covariates
35 | 
36 | * Add as.nprg, to convert to a np regression from package np
37 | 
38 | * Add RDDcoef, working on multiple models (lm, np, npreg). 
39 | 
40 | * Many fixes...
41 | 
42 | RDDtools 0.1
43 | ===========
44 | Initial commit on 29/04/2013
45 | 
46 | * Initial commit, containing RDDdata, RDDreg_lm, RDDreg_np, plotSensi, plotPlacebo, etc...
47 | 


--------------------------------------------------------------------------------
/RDDtools/R/RDDcoef.R:
--------------------------------------------------------------------------------
 1 | #' RDD coefficient
 2 | #'
 3 | #' Function to access the RDD coefficient in the various regressions
 4 | #' @param object A RDD regression object
 5 | #' @param allInfo whether to return just the coefficients (allInfo=FALSE) or also the se/t stat/pval. 
 6 | #' @param allCo Whether to give only the RDD coefficient (allCo=FALSE) or all coefficients
 7 | #' @param \ldots Further arguments passed to/from specific methods
 8 | #' @return Either a numeric value of the RDD coefficient estimate, or a data frame with the estimate, 
 9 | #' its standard value, t test and p-value and 
10 | #' @export
11 | RDDcoef <- function(object, allInfo=FALSE, allCo=FALSE, ...)
12 |   UseMethod("RDDcoef")
13 | 
14 | #' @rdname RDDcoef
15 | #' @method RDDcoef default
16 | #' @S3method RDDcoef default
17 | RDDcoef.default <- function(object, allInfo=FALSE, allCo=FALSE, ...){
18 |   res <- coef(summary(object))
19 |   if(!allCo) res <- res["D",, drop=FALSE]
20 |   if(!allInfo) res <- res[,"Estimate"]
21 |   res
22 | }
23 | 
24 | #' @rdname RDDcoef
25 | #' @method RDDcoef RDDreg_np
26 | #' @S3method RDDcoef RDDreg_np
27 | RDDcoef.RDDreg_np <- function(object, allInfo=FALSE, allCo=FALSE, ...){
28 |   res<- object$coefMat
29 |   if(!allCo) res <- res["D",, drop=FALSE]
30 |   if(!allInfo) res <- res[,"Estimate"]
31 |   res
32 | }
33 | 
34 | 


--------------------------------------------------------------------------------
/RDDtools/R/Lee2008-data.R:
--------------------------------------------------------------------------------
 1 | #' @name Lee2008
 2 | #' @title Dataset used in Lee (2008)
 3 | #' @description U.S. House elections data
 4 | #' @docType data
 5 | #' @usage Lee2008
 6 | #' @description Dataset described used in Imbens and Kalyamaran (2012), and probably the same dataset used in Lee (2008),
 7 | #' @format A data frame with 6558 observations and two variables:
 8 | #' \describe{
 9 | #' \item{x}{Vote at election t-1}
10 | #' \item{y}{Vote at election t}
11 | #' }
12 | #' @source Guido Imbens webpage: \url{http://scholar.harvard.edu/imbens/scholar_software/regression-discontinuity}
13 | #' @references Imbens, Guido and Karthik Kalyanaraman. (2012) "Optimal Bandwidth Choice for the regression discontinuity estimator," 
14 | #' Review of Economic Studies (2012) 79, 933-959
15 | #' @references   Lee, D. (2008) Randomized experiments from non-random selection in U.S. House elections, 
16 | #' \emph{Journal of Econometrics}, 142, 675-697
17 | #' @examples 
18 | #' data(Lee2008)
19 | #' RDDlee <- RDDdata(x=x, y=y, data=Lee2008, cutpoint=0)
20 | #' summary(RDDlee)
21 | #' plot(RDDlee)
22 | 
23 | 
24 | NULL
25 | # Lee2008 <- read.csv("/home/mat/Dropbox/HEI/rdd/Rcode/IK bandwidth/datasets/imbens_from_MATLAB.csv", header=FALSE)
26 | # colnames(Lee2008) <- c("x", "y")
27 | # save(Lee2008, file="/home/mat/Dropbox/HEI/rdd/Rcode/RDDtools/data/Lee2008.rda")


--------------------------------------------------------------------------------
/RDDtools/man/RDDdata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{RDDdata}
 3 | \alias{RDDdata}
 4 | \title{Construct RDDdata}
 5 | \usage{
 6 | RDDdata(y, x, covar, cutpoint, z, labels, data)
 7 | }
 8 | \arguments{
 9 | \item{x}{Forcing variable}
10 | 
11 | \item{y}{Output}
12 | 
13 | \item{covar}{Exogeneous variables}
14 | 
15 | \item{cutpoint}{Cutpoint}
16 | 
17 | \item{labels}{Additional labels to provide as list (with entries \code{x}, \code{y}, and eventually vector \code{covar}). Unused currently.}
18 | 
19 | \item{data}{A data-frame for the \code{x} and \code{y} variables. If this is provided,
20 | the column names can be entered directly for argument \code{x} and \code{y}}
21 | 
22 | \item{z}{Assignment variable for the fuzzy case.}
23 | }
24 | \value{
25 | Object of class \code{RDDdata}, inheriting from \code{data.frame}
26 | }
27 | \description{
28 | Construct the base RDD object, containing x, y and the cutpoint, eventuallay covariates.
29 | }
30 | \examples{
31 | data(Lee2008)
32 | rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0)
33 | rd2 <- RDDdata(x=x, y=y, data=Lee2008, cutpoint=0)
34 | 
35 | # The print() function is the same as the print.data.frame:
36 | rd
37 | 
38 | # The summary() and plot() function are specific to RDDdata
39 | summary(rd)
40 | plot(rd)
41 | }
42 | \author{
43 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
44 | }
45 | 
46 | 


--------------------------------------------------------------------------------
/RDDtools/misc/test_RDest_t0_RDDreg.R:
--------------------------------------------------------------------------------
 1 | 
 2 | if(FALSE){
 3 | RDDdat_choice <- RDDdata(y=choice_pg, x=xx, cutpoint=30)
 4 | RDDdat_risk <- RDDdata(y=risky_option, x=xx, cutpoint=30)
 5 | 
 6 | RDbw_choice <- RDDbw_IK(RDDdat_choice)
 7 | RDbw_risk <- RDDbw_IK(RDDdat_risk)
 8 | 
 9 | rd_choice <- RDDreg_np(RDDdat_choice)
10 | rd_choice_sam <- RDDreg_np(RDDdat_choice, slope="same")
11 | rd_choice_lm <- RDDreg_lm(RDDdat_choice)
12 | 
13 | rd_risk <- RDDreg_np(RDDdat_risk)
14 | rd_risk_sam <- RDDreg_np(RDDdat_risk, slope="same")
15 | 
16 | 
17 | rd_choice
18 | rd_choice_sam
19 | rd_choice_lm
20 | rdd_choice <- RDestimate(choice_pg~xx, cutpoint=30, bw=RDbw_choice, model=TRUE)
21 | coef(summary(rdd_choice$model[[1]]))
22 | coef(summary(rd_choice))
23 | plot(rd_choice)
24 | 
25 | RDest_t0_RDDreg <- function(RDest){
26 | 
27 |   cutpoint <- getCall(RDest)$cutpoint
28 |   co_RDest <- coef(summary(RDest$model[[1]]))[,1]
29 | 
30 |   coefs <- vector("numeric", 4)
31 |   names(coefs) <- c("(Intercept)", "x", "D", "x_right")
32 |   coefs["x"] <- co_RDest["Xl"]
33 |   coefs["x_right"] <- co_RDest["Xr"]-co_RDest["Xl"]
34 |   coefs["(Intercept)"] <- co_RDest["(Intercept)"] - cutpoint*co_RDest["Xl"]
35 |   coefs["D"] <- co_RDest["Tr"] + cutpoint*(co_RDest["Xl"]-co_RDest["Xr"])
36 | coefs
37 | }
38 | 
39 | RDest_t0_RDDreg(RDest=rdd_choice)
40 | coef(summary(rd_choice))[,1]
41 | 
42 | rd_risk
43 | rd_risk_sam
44 | RDestimate(risky_option~xx, cutpoint=30, bw=RDbw_risk)
45 | 
46 | 
47 | head(model.frame(RDestimate(risky_option~xx, cutpoint=30, model=TRUE, bw=RDbw_risk)$model[[1]]))
48 | 
49 | head(model.frame(rd_choice_sam$model))
50 | 
51 | }


--------------------------------------------------------------------------------
/RDDtools/man/gen_MC_IK.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{gen_MC_IK}
 3 | \alias{gen_MC_IK}
 4 | \title{Generate Monte Carlo simulations of Imbens and Kalyanaraman}
 5 | \usage{
 6 | gen_MC_IK(n = 200, version = 1, sd = 0.1295, output = c("data.frame",
 7 |   "RDDdata"), size)
 8 | }
 9 | \arguments{
10 | \item{n}{The size of sampel to generate}
11 | 
12 | \item{version}{The MC version of Imbens and Kalnayaraman (between 1 and 4).}
13 | 
14 | \item{sd}{The standard deviation of the error term.}
15 | 
16 | \item{output}{Whether to return a data-frame, or already a RDDdata}
17 | 
18 | \item{size}{The size of the effect, this depends on the specific version, defaults are as in IK: 0.04, NULL, 0.1, 0.1}
19 | }
20 | \value{
21 | An data frame with x and y variables.
22 | }
23 | \description{
24 | Generate the simulations reported in Imbens and Kalyanaraman (2012)
25 | }
26 | \examples{
27 | MC1_dat <- gen_MC_IK()
28 | MC1_rdd <- RDDdata(y=MC1_dat$y, x=MC1_dat$x, cutpoint=0)
29 | 
30 | ## Use np regression:
31 | reg_nonpara <- RDDreg_np(RDDobject=MC1_rdd)
32 | reg_nonpara
33 | 
34 | # Represent the curves:
35 | plotCu <- function(version=1, xlim=c(-0.1,0.1)){
36 |   res <- gen_MC_IK(sd=0.0000001, n=1000, version=version)
37 |   res <- res[order(res$x),]
38 |   ylim <- range(subset(res, x>=min(xlim) & x<=max(xlim), "y"))
39 |   plot(res, type="l", xlim=xlim, ylim=ylim, main=paste("DGP", version))
40 |   abline(v=0)
41 |   xCut <- res[which(res$x==min(res$x[res$x>=0]))+c(0,-1),]
42 |   points(xCut, col=2)
43 | }
44 | layout(matrix(1:4,2, byrow=TRUE))
45 | plotCu(version=1)
46 | plotCu(version=2)
47 | plotCu(version=3)
48 | plotCu(version=4)
49 | layout(matrix(1))
50 | }
51 | \references{
52 | TODO
53 | }
54 | 
55 | 


--------------------------------------------------------------------------------
/RDDtools/man/RDDreg_np.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{RDDreg_np}
 3 | \alias{RDDreg_np}
 4 | \title{Parametric polynomial estimator of the regression discontinuity}
 5 | \usage{
 6 | RDDreg_np(RDDobject, covariates = NULL, bw = RDDbw_IK(RDDobject),
 7 |   slope = c("separate", "same"), inference = c("np", "lm"),
 8 |   covar.opt = list(slope = c("same", "separate"), bw = NULL))
 9 | }
10 | \arguments{
11 | \item{RDDobject}{Object of class RDDdata created by \code{\link{RDDdata}}}
12 | 
13 | \item{covariates}{TODO}
14 | 
15 | \item{bw}{A bandwidth to specify the subset on which the parametric regression is estimated}
16 | 
17 | \item{inference}{Type of inference to conduct: non-parametric one (\code{np}) or standard (\code{lm}). See details.}
18 | 
19 | \item{slope}{Whether slopes should be different on left or right (separate), or the same.}
20 | 
21 | \item{covar.opt}{Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}).}
22 | }
23 | \value{
24 | An object of class RDDreg_np and class lm, with specific print and plot methods
25 | }
26 | \description{
27 | Compute a parametric polynomial regression of the ATE,
28 | possibly on the range specified by bandwidth
29 | }
30 | \examples{
31 | ## Step 0: prepare data
32 | data(Lee2008)
33 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
34 | ## Step 2: regression
35 | # Simple polynomial of order 1:
36 | reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd)
37 | print(reg_nonpara)
38 | plot(reg_nonpara)
39 | }
40 | \references{
41 | TODO
42 | }
43 | \seealso{
44 | \code{\link{RDDbw_IK}} Bandwidth selection using the plug-in bandwidth of Imbens and Kalyanaraman (2012)
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/RDDtools/tests/RDDtools_vs_rdd.R:
--------------------------------------------------------------------------------
 1 | 
 2 | library(rdd)
 3 | library(RDDtools)
 4 | 
 5 | set.seed(1234)
 6 | x<-runif(1000,-1,1)
 7 | cov<-rnorm(1000)
 8 | y<-3+2*x+3*cov+10*(x>=0)+rnorm(1000)
 9 | 
10 | RD <- RDDdata(x=x, y=y, cutpoint=0, covar=cov)
11 | 
12 | ### Simple estimation:
13 | bw <- IKbandwidth(X=x, Y=y, cutpoint=0)
14 | bw
15 | rdd_mod <- RDestimate(y~x, bw=bw, se.type="const", model=TRUE)$model[[1]]
16 | RDDtools_mod <- RDDreg_np(RD, bw=bw, inference="lm")
17 | 
18 | rdd_co <- coef(summary(rdd_mod))
19 | RDDtools_co <- RDDcoef(RDDtools_mod, allCo=TRUE, allInfo=TRUE)
20 | rdd_co
21 | RDDtools_co
22 | 
23 | all.equal(rdd_co[-4,], RDDtools_co[1:3,], check.attributes=FALSE)
24 | all.equal(rdd_co[4,1], sum(RDDtools_co[3:4,1]), check.attributes=FALSE)
25 | 
26 | 
27 | ### Covariate estimation:
28 | rdd_mod_cov <- RDestimate(y~x|cov, kernel="rectangular", bw=5, model=TRUE, se.type="const")$model[[1]]
29 | RDDtools_mod_cov <- RDDreg_lm(RD, bw=5, covariates="cov", covar.opt=list(slope="separate"))
30 | 
31 | rdd_co_cov <- coef(summary(rdd_mod_cov))
32 | RDDtools_co_cov <- RDDcoef(RDDtools_mod_cov, allCo=TRUE, allInfo=TRUE)
33 | rdd_co_cov
34 | RDDtools_co_cov
35 | 
36 | all.equal(rdd_co_cov[-4,], RDDtools_co_cov[-4,], check.attributes=FALSE)
37 | 
38 | ## Fuzzy
39 | set.seed(123)
40 | selec <- rbinom(nrow(RD), 1, prob=ifelse(RD$x<0, 0.1, 0.9))
41 | RD_rdd_ins <- RDDdata(y=RD$y, x=RD$x, z=selec,cutpoint=0)
42 | 
43 | RDDto_reg_fuz <- RDDreg_lm(RD_rdd_ins, bw=0.2)
44 | rdd_reg_fuz <- RDestimate(y~x+selec, data=RD_rdd_ins, kernel="rectangular", bw=0.2, model=TRUE, se.type="const")$model[[2]][[1]]
45 | 
46 | all.equal(RDDcoef(RDDto_reg_fuz),coef(summary(rdd_reg_fuz))[2,1])
47 | all.equal(RDDcoef(RDDto_reg_fuz, allCo=TRUE)[1:3],coef(summary(rdd_reg_fuz))[1:3,1], check.attributes=FALSE)
48 | 
49 | 


--------------------------------------------------------------------------------
/RDDtools/man/clusterInf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{clusterInf}
 3 | \alias{clusterInf}
 4 | \title{Post-inference for clustered data}
 5 | \usage{
 6 | clusterInf(object, clusterVar, vcov. = NULL, type = c("df-adj", "HC"), ...)
 7 | }
 8 | \arguments{
 9 | \item{object}{Object of class lm, from which RDDreg also inherits.}
10 | 
11 | \item{clusterVar}{The variable containing the cluster attributions.}
12 | 
13 | \item{vcov.}{Specific covariance function to pass to coeftest. See help of sandwich}
14 | 
15 | \item{type}{The type of cluster correction to use: either the degrees of freedom, or a HC matrix.}
16 | 
17 | \item{\ldots}{Further arguments passed to coeftest}
18 | }
19 | \value{
20 | The output of the coeftest function, which is itself of class \code{coeftest}
21 | }
22 | \description{
23 | Correct standard-errors to account for clustered data, doing either a degrees of freedom correction or using a heteroskedasticidty-cluster robust covariance matrix
24 | possibly on the range specified by bandwidth
25 | }
26 | \examples{
27 | data(Lee2008)
28 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
29 | reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd)
30 | 
31 | # here we just generate randomly a cluster variable:
32 | nlet <- sort(c(outer(letters, letters, paste, sep="")))
33 | clusRandom <- sample(nlet[1:60], size=nrow(Lee2008_rdd), replace=TRUE)
34 | 
35 | # now do post-inference:
36 | clusterInf(reg_para, clusterVar=clusRandom)
37 | clusterInf(reg_para, clusterVar=clusRandom, type="HC")
38 | }
39 | \references{
40 | Wooldridge (2003) Cluster-sample methods in applied econometrics.
41 | \emph{AmericanEconomic Review}, 93, p. 133-138
42 | }
43 | \seealso{
44 | \code{\link{vcovCluster}}, which implements the cluster-robust covariance matrix estimator used by \code{cluserInf}
45 | }
46 | 
47 | 


--------------------------------------------------------------------------------
/RDDtools/NAMESPACE:
--------------------------------------------------------------------------------
 1 | S3method("[",RDDdata)
 2 | S3method(RDDcoef,RDDreg_np)
 3 | S3method(RDDcoef,RDDreg_npreg)
 4 | S3method(RDDcoef,default)
 5 | S3method(as.data.frame,RDDdata)
 6 | S3method(as.lm,RDDreg)
 7 | S3method(as.lm,RDDreg_np)
 8 | S3method(bread,RDDreg_np)
 9 | S3method(covarTest_dis,RDDdata)
10 | S3method(covarTest_dis,RDDreg)
11 | S3method(covarTest_mean,RDDdata)
12 | S3method(covarTest_mean,RDDreg)
13 | S3method(estfun,RDDreg_np)
14 | S3method(getCall,RDDreg)
15 | S3method(model.frame,RDDreg_np)
16 | S3method(model.matrix,RDDdata)
17 | S3method(plot,RDDdata)
18 | S3method(plot,RDDreg_lm)
19 | S3method(plot,RDDreg_np)
20 | S3method(plotPlacebo,PlaceboVals)
21 | S3method(plotPlacebo,RDDreg)
22 | S3method(plotPlaceboDens,PlaceboVals)
23 | S3method(plotPlaceboDens,RDDreg)
24 | S3method(plotSensi,RDDreg_lm)
25 | S3method(plotSensi,RDDreg_np)
26 | S3method(print,RDDreg_lm)
27 | S3method(print,RDDreg_np)
28 | S3method(print,summary.RDDreg_np)
29 | S3method(subset,RDDdata)
30 | S3method(summary,RDDdata)
31 | S3method(summary,RDDreg_np)
32 | S3method(vcov,RDDreg_np)
33 | export(RDDbw_IK)
34 | export(RDDbw_RSW)
35 | export(RDDcoef)
36 | export(RDDdata)
37 | export(RDDgenreg)
38 | export(RDDpred)
39 | export(RDDreg_lm)
40 | export(RDDreg_np)
41 | export(ROT_bw)
42 | export(as.lm)
43 | export(as.npreg)
44 | export(as.npregbw)
45 | export(clusterInf)
46 | export(computePlacebo)
47 | export(covarTest_dis)
48 | export(covarTest_mean)
49 | export(dens_test)
50 | export(gen_MC_IK)
51 | export(plotPlacebo)
52 | export(plotPlaceboDens)
53 | export(plotSensi)
54 | export(vcovCluster)
55 | export(vcovCluster2)
56 | import(Formula)
57 | import(KernSmooth)
58 | import(ggplot2)
59 | import(lmtest)
60 | import(methods)
61 | import(np)
62 | import(rdd)
63 | import(sandwich)
64 | importFrom(AER,ivreg)
65 | importFrom(locpol,gaussK)
66 | importFrom(locpol,locpol)
67 | importFrom(stats,getCall)
68 | 


--------------------------------------------------------------------------------
/RDDtools/tests/simple_MC.R:
--------------------------------------------------------------------------------
 1 | 
 2 | library(RDDtools)
 3 | library(rdd)
 4 | 
 5 | ## simple MC:
 6 | set.seed(123)
 7 | 
 8 | MC_simple <- function(n=200, CATE=0.3, HATE=0.1){
 9 |   x <- rnorm(n, mean=20, sd=5)
10 |   D <- x>= 20
11 |   y <- 0.8 + CATE*D+ 0.3*x+HATE*x*D+rnorm(n, sd=0.1)
12 |   cat("effect", CATE+HATE*20, "\n")
13 |   RDDdata(x=x, y=y, cutpoint=20)
14 | 
15 | }
16 | 
17 | input_mc <- MC_simple(n=1000, CATE=0.4)
18 | plot(input_mc)
19 | 
20 | RDD_bw <- RDDbw_IK(input_mc)
21 | 
22 | RDD_np_sep <- RDDreg_np(input_mc, bw=RDD_bw)
23 | RDD_np_same <- RDDreg_np(input_mc, slope="same", bw=RDD_bw)
24 | RDD_np_sep_inflm <- RDDreg_np(input_mc, bw=RDD_bw, inf="lm")
25 | RDD_np_same_inflm <- RDDreg_np(input_mc, slope="same", bw=RDD_bw, inf="lm")
26 | RDD_lm_sep <- RDDreg_lm(input_mc, bw=RDD_bw)
27 | RDD_lm_same <- RDDreg_lm(input_mc, slope="same", bw=RDD_bw)
28 | rdd_RDe <- RDestimate(y~x, data=input_mc, cutpoint=20, model=TRUE, bw=RDD_bw)
29 | 
30 | 
31 | printCoefmat(coef(summary(RDD_np_sep_inflm$RDDslot$model)))
32 | printCoefmat(coef(summary(RDD_np_same_inflm$RDDslot$model)))
33 | printCoefmat(coef(summary(RDD_lm_sep)))
34 | printCoefmat(coef(summary(RDD_lm_same)))
35 | printCoefmat(coef(summary(rdd_RDe $model[[1]])))
36 | 
37 | 
38 | ## few checks:
39 | plse <- plotSensi(RDD_np_sep, from=5, to=20, by=0.5)
40 | plotPlacebo(RDD_np_sep)
41 | 
42 | plotSensi(RDD_np_same, from=5, to=20, by=0.5)
43 | plotPlacebo(RDD_np_same)
44 | 
45 | a<-plotSensi(RDD_lm_sep, from=5, to=20, by=0.5)
46 | plotPlacebo(RDD_lm_sep)
47 | 
48 | plotSensi(RDD_lm_same, from=5, to=20, by=0.5)
49 | plotPlacebo(RDD_lm_same)
50 | 
51 | #### Other MCs:
52 | set.seed(123)
53 | head(gen_MC_IK())
54 | 
55 | set.seed(123)
56 | head(gen_MC_IK(output="RDDdata"))
57 | 
58 | set.seed(123)
59 | head(gen_MC_IK(version=2))
60 | 
61 | set.seed(123)
62 | head(gen_MC_IK(version=3))
63 | 
64 | set.seed(123)
65 | head(gen_MC_IK(version=4))
66 | 


--------------------------------------------------------------------------------
/RDDtools/man/plot.RDDdata.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{plot.RDDdata}
 3 | \alias{plot.RDDdata}
 4 | \title{Plot RDDdata}
 5 | \usage{
 6 | \method{plot}{RDDdata}(x, h, nbins = NULL, xlim = range(object$x, na.rm =
 7 |   TRUE), cex = 0.7, nplot = 1, device = c("base", "ggplot"), ...)
 8 | }
 9 | \arguments{
10 | \item{x}{Object of class RDDdata}
11 | 
12 | \item{h}{The binwidth parameter (note this differs from the bandwidth parameter!)}
13 | 
14 | \item{nbins}{Alternative to h, the total number of bins in the plot.}
15 | 
16 | \item{xlim}{The range of the x data}
17 | 
18 | \item{cex}{Size of the points, see \code{\link{par}}}
19 | 
20 | \item{nplot}{Number of plot to draw}
21 | 
22 | \item{device}{Type of device used. Currently not used.}
23 | 
24 | \item{\ldots}{Further arguments passed to the \code{\link{plot}} function.}
25 | }
26 | \value{
27 | A plot
28 | }
29 | \description{
30 | Binned plot of the forcing and outcome variable
31 | }
32 | \details{
33 | Produces a simple binned plot averaging values within each interval. The length of the intervals
34 | is specified with the argument \code{h}, specifying the whole binwidth (contrary to the usual bandwidth
35 | argument, that gives half of the length of the kernel window.
36 | When no bandwidth is given, the bandwidth of Ruppert et al is used, see \code{\link{RDDbw_RSW}}.
37 | }
38 | \examples{
39 | data(Lee2008)
40 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
41 | plot(Lee2008_rdd)
42 | 
43 | ## Specify manually the bandwidth:
44 | plot(Lee2008_rdd, h=0.2)
45 | 
46 | ## Show three plots with different bandwidth:
47 | plot(Lee2008_rdd, h=c(0.2,0.3,0.4), nplot=3)
48 | 
49 | ## Specify instead of the bandwidth, the final number of bins:
50 | plot(Lee2008_rdd, nbins=22)
51 | 
52 | ## If the specified number of bins is odd, the larger number is given to side with largest range
53 | plot(Lee2008_rdd, nbins=21)
54 | }
55 | \author{
56 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
57 | }
58 | 
59 | 


--------------------------------------------------------------------------------
/RDDtools/man/vcovCluster.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{vcovCluster}
 3 | \alias{vcovCluster}
 4 | \alias{vcovCluster2}
 5 | \title{Cluster Heteroskedasticity-consistent estimation of the covariance matrix.}
 6 | \usage{
 7 | vcovCluster(object, clusterVar)
 8 | 
 9 | vcovCluster2(object, clusterVar1, clusterVar2)
10 | }
11 | \arguments{
12 | \item{object}{Object of class lm, from which RDDreg also inherits.}
13 | 
14 | \item{clusterVar}{The variable containing the cluster attributions.}
15 | 
16 | \item{clusterVar1,clusterVar2}{The two cluster variables for the 2-cluster case.}
17 | }
18 | \value{
19 | A matrix containing the covariance matrix estimate.
20 | }
21 | \description{
22 | Offer a cluster variant of the usual Heteroskedasticity-consistent
23 | }
24 | \examples{
25 | data(STAR_MHE)
26 | if(all(c(require(sandwich), require(lmtest)))){
27 | 
28 | # Run simple regression:
29 | reg_krug <- lm(pscore~cs, data=STAR_MHE)
30 | 
31 | # Row 1 of Table 8.2.1, inference with standard vcovHC:
32 | coeftest(reg_krug,vcov.=vcovHC(reg_krug, "HC1"))[2,2]
33 | 
34 | # Row 4 of Table 8.2.1, inference with cluster vcovHC:
35 | coeftest(reg_krug,vcov.=vcovCluster(reg_krug, clusterVar=STAR_MHE$classid))[2,2]
36 | }
37 | }
38 | \author{
39 | Mahmood Arai, see \url{http://people.su.se/~ma/econometrics.html}
40 | }
41 | \references{
42 | Cameron, C.,  Gelbach, J. and Miller, D. (2011) Robust Inference With Multiway Clustering,
43 | \emph{Journal of Business and Economic Statistics},  vol. 29(2), pages 238-249.
44 | #'
45 | 
46 | Wooldridge (2003) Cluster-sample methods in applied econometrics.
47 | \emph{AmericanEconomic Review}, 93, p. 133-138
48 | 
49 | Arai, M. (2011) Cluster-robust standard errors using R, Note available \url{http://people.su.se/~ma/clustering.pdf}.
50 | }
51 | \seealso{
52 | \code{\link{clusterInf}} for a direct function, allowing also alternative cluster inference methods.
53 | See also \code{\link[rms]{robcov}} from package \code{rms} for another implementation of the cluster robust.
54 | }
55 | 
56 | 


--------------------------------------------------------------------------------
/RDDtools/R/qplot_experim.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | gplot <- function(x,  h, xlim=range(object$x, na.rm=TRUE), cex=0.7, nplot=3,type=c("base", "ggplot"),...){
 4 |   object <- x
 5 |   cutpoint <- getCutpoint(object)
 6 | 
 7 | ## bandwidth: use Ruppert, Sheather and Wand (KernSmooth:::dpill)
 8 |   if(missing(h)) {
 9 |     if(!all(xlim==range(object$x, na.rm=TRUE))){
10 |       object <- subset(object, object$x> min(xlim) & object$x< max(xlim))
11 |     }
12 |     h <- RDDbw_RSW(object) 
13 |     if(is.even(nplot)) {
14 |       se <- seq(from=1-(sum(1:nplot<(nplot/2)))*0.2, to=1+(sum(1:nplot>(nplot/2)))*0.2, by=.2)
15 |     } else {
16 |       se <- seq(from=1-floor(nplot/2)*0.2, to=1+floor(nplot/2)*0.2, by=.2)
17 |     }
18 |     hs <- if(nplot==1) h else se *h
19 |   } else {
20 |     if(length(h)==1){
21 |       if(is.even(nplot)) {
22 | 	se <- seq(from=1-(sum(1:nplot<(nplot/2)))*0.2, to=1+(sum(1:nplot>(nplot/2)))*0.2, by=.2)
23 |       } else {
24 | 	se <- seq(from=1-floor(nplot/2)*0.2, to=1+floor(nplot/2)*0.2, by=.2)
25 |       }
26 |       hs <- if(nplot==1) h else se *h
27 |     } else {
28 |       if(length(h==nplot)){
29 | 	hs <- h
30 |       } else {
31 | 	stop("Length of h should be either one or equal to nplot (", nplot, ")")
32 |       }
33 |     }
34 |   }
35 | 
36 | 
37 |   
38 | 
39 | ## plot
40 |   if(type=="base"){
41 |     par_orig <- par()
42 |     par(mfrow=c(nplot,1))
43 |     for(i in 1:nplot){
44 |       plotBin(x=object$x, y=object$y, cutpoint=cutpoint, h=hs[i], xlim=xlim, cex=cex)
45 |     }
46 |   } else {
47 | 
48 |     plotBin_out <- plotBin(x=object$x, y=object$y, cutpoint=cutpoint, h=hs[1], xlim=xlim, cex=cex, plot=FALSE)
49 |     plotBin_out$h <- rep(hs[1], nrow(plotBin_out))
50 |     for(i in 2:nplot){
51 |       new <- plotBin(x=object$x, y=object$y, cutpoint=cutpoint, h=hs[i], xlim=xlim, cex=cex)
52 |       new$h <- rep(hs[i], nrow(new))
53 |       plotBin_out <- rbind(plotBin_out, new)
54 |     }
55 | 
56 |     plotBin_out$h <- round(plotBin_out$h,4)
57 |     qplot(x=x, y=y, data=plotBin_out)+facet_grid(h~.)
58 | 
59 |   }
60 | 
61 | }
62 | 


--------------------------------------------------------------------------------
/RDDtools/man/as.npregbw.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{as.npregbw}
 3 | \alias{as.npreg}
 4 | \alias{as.npregbw}
 5 | \title{Convert an RDDreg object to a  \code{npreg} object}
 6 | \usage{
 7 | as.npregbw(x, ...)
 8 | 
 9 | as.npreg(x, ...)
10 | }
11 | \arguments{
12 | \item{x}{Object of class \code{RDDreg} created by \code{\link{RDDreg_np}} or \code{\link{RDDreg_lm}}}
13 | 
14 | \item{\ldots}{Further arguments passed to the \code{\link{npregbw}} or \code{\link{npreg}}}
15 | }
16 | \value{
17 | An object of class \code{npreg} or \code{npregbw}
18 | }
19 | \description{
20 | Convert an RDDobject to a non-parametric regression \code{npreg} from package \code{np}
21 | }
22 | \details{
23 | This function converts an RDDreg object into an \code{npreg} object from package \code{np}
24 | Note that the output won't be the same, since \code{npreg} does not offer a triangualr kernel, but a gaussian or Epanechinkov one.
25 | Another reason why estimates might differ slightly is that \code{npreg} implements a multivariate kernel, while RDDreg
26 | proceeds as if the kernerl was univariate. A simple solution to make the multivariate kernel similar to the  univariate one
27 | is to set the bandwidth for x and Dx to a large number, so that they converge towards a constant, and one obtains back the univariate kernel.
28 | }
29 | \examples{
30 | # Estimate ususal RDDreg:
31 |  data(Lee2008)
32 |  Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
33 |  reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd)
34 | 
35 | ## Convert to npreg:
36 |  reg_nonpara_np <- as.npreg(reg_nonpara)
37 |  reg_nonpara_np
38 |  RDDcoef(reg_nonpara_np, allCo=TRUE, allInfo=TRUE)
39 | 
40 | ## Compare with result obtained with a Gaussian kernel:
41 |  bw_lm <- dnorm(Lee2008_rdd$x, sd=RDDtools:::getBW(reg_nonpara))
42 |  reg_nonpara_gaus <- RDDreg_lm(RDDobject=Lee2008_rdd, w=bw_lm)
43 |  all.equal(RDDcoef(reg_nonpara_gaus),RDDcoef(reg_nonpara_np))
44 | }
45 | \seealso{
46 | \code{\link{as.lm}} which converts \code{RDDreg} objects into \code{lm}.
47 | }
48 | 
49 | 


--------------------------------------------------------------------------------
/RDDtools/man/STAR_MHE.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \docType{data}
 3 | \name{STAR_MHE}
 4 | \alias{STAR_MHE}
 5 | \title{Transformation of the STAR dataset as used in Angrist and Pischke (2008)}
 6 | \format{A data frame containing 5743 observations and 6 variables. The first variable is from the original dataset,
 7 | all other are created by Angrist and Pischke STAT code.
 8 | \describe{
 9 | \item{schidkn}{School ID in kindergarden (original variable, schoolidk in \code{\link[AER]{STAR}})}
10 | \item{pscore}{The propensity score  (computed by A & P)}
11 | \item{classid}{The id of the class (computed by A & P)}
12 | \item{cs}{Class size (computed by A & P)}
13 | \item{female, nwhite}{Various covariates (computed by A & P)}
14 | }}
15 | \source{
16 | Data obtained using the script krueger.do on data webstar.rda, found on J. Angrist website
17 | \url{http://economics.mit.edu/faculty/angrist/data1/mhe/krueger}, retrieved on 26 November 2012.
18 | }
19 | \usage{
20 | STAR_MHE
21 | }
22 | \description{
23 | Transformation of the STAR dataset as used in Table 8.2.1 of Angrist and Pischke (2008)
24 | }
25 | \details{
26 | ). This is a transformation of the dataset from the project STAR (Student/Teacher Achievement Ratio.
27 | The full dataset is described and available in package AER, \code{\link[AER]{STAR}}.
28 | The transformed data was obtained using the STATA script krueger.do, obtained from Joshua Angrist website
29 | (\url{http://economics.mit.edu/faculty/angrist/data1/mhe/krueger}), on the webstar.dta.
30 | }
31 | \examples{
32 | data(STAR_MHE)
33 | 
34 | # Compute the group means:
35 | STAR_MHE_means <- aggregate(STAR_MHE[, c("classid", "pscore", "cs")], by=list(STAR_MHE$classid), mean)
36 | 
37 | # Regression of means, with weighted average:
38 | reg_krug_gls <- lm(pscore~cs, data=STAR_MHE_means, weights=cs)
39 | coef(summary(reg_krug_gls))[2,2]
40 | }
41 | \references{
42 | Krueger, A. (1999) "Experimental Estimates Of Education Production Functions,"
43 | \emph{The Quarterly Journal of Economics}, Vol. 114(2), pages 497-532, May.
44 | 
45 | Angrist, A. ad  Pischke J-S (2008) \emph{Mostly Harmless Econometrics: An Empiricist's Companion},
46 | Princeton University press
47 | }
48 | \seealso{
49 | \code{\link[AER]{STAR}} for the original dataset.
50 | }
51 | 
52 | 


--------------------------------------------------------------------------------
/RDDtools/man/covarTest_dis.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{covarTest_dis}
 3 | \alias{covarTest_dis}
 4 | \alias{covarTest_dis.RDDdata}
 5 | \alias{covarTest_dis.RDDreg}
 6 | \title{Testing for balanced covariates: equality of distribution}
 7 | \usage{
 8 | covarTest_dis(object, bw, exact = NULL, p.adjust = c("none", "holm", "BH",
 9 |   "BY", "hochberg", "hommel", "bonferroni"))
10 | 
11 | \method{covarTest_dis}{RDDdata}(object, bw = NULL, exact = FALSE,
12 |   p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel",
13 |   "bonferroni"))
14 | 
15 | \method{covarTest_dis}{RDDreg}(object, bw = NULL, exact = FALSE,
16 |   p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel",
17 |   "bonferroni"))
18 | }
19 | \arguments{
20 | \item{object}{object of class RDDdata}
21 | 
22 | \item{bw}{a bandwidth}
23 | 
24 | \item{exact}{Argument of the \code{\link{ks.test}} function: NULL or a logical indicating whether an exact p-value should be computed.}
25 | 
26 | \item{p.adjust}{Whether to adjust the p-values for multiple testing. Uses the \code{\link{p.adjust}} function}
27 | 
28 | \item{\ldots}{currently not used}
29 | }
30 | \value{
31 | A data frame  with, for each covariate, the K-S statistic and its p-value.
32 | }
33 | \description{
34 | Tests equality of distribution with a Kolmogorov-Smirnov for each covariates, between the two full groups or around the discontinuity threshold
35 | }
36 | \examples{
37 | data(Lee2008)
38 | 
39 | ## Add randomly generated covariates
40 | set.seed(123)
41 | n_Lee <- nrow(Lee2008)
42 | Z <- data.frame(z1 = rnorm(n_Lee, sd=2),
43 |                 z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)),
44 |                 z3 = sample(letters, size = n_Lee, replace = TRUE))
45 | Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0)
46 | 
47 | ## Kolmogorov-Smirnov test of equality in distribution:
48 | covarTest_dis(Lee2008_rdd_Z, bw=0.3)
49 | 
50 | ## Can also use function covarTest_dis() for a t-test for equality of means around cutoff:
51 | covarTest_mean(Lee2008_rdd_Z, bw=0.3)
52 | ## covarTest_dis works also on regression outputs (bw will be taken from the model)
53 | reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd_Z)
54 | covarTest_dis(reg_nonpara)
55 | }
56 | \author{
57 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
58 | }
59 | \seealso{
60 | \code{\link{covarTest_mean}} for the t-test of equality of means
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/RDDtools/R/plotBin.R:
--------------------------------------------------------------------------------
 1 | #' Bin plotting
 2 | #' 
 3 | #' Do a "scatterplot bin smoothing"
 4 | #' 
 5 | #' @param x Forcing variable
 6 | #' @param y Output
 7 | #' @param h the bandwidth (defaults to \code{2*sd(runvar)*length(runvar)^(-.5)})
 8 | #' @param cutpoint Cutpoint
 9 | #' @param plot Logical. Whether to plot or only returned silently
10 | #' @param type Whether returns the y averages, or the x frequencies
11 | #' @param xlim,cex,main,xlab,ylab Usual parameters passed to plot(), see \code{\link{par}}
12 | #' @param \ldots further arguments passed to plot. 
13 | #' @return Returns silently values
14 | #' @references McCrary, Justin. 
15 | #' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}>
16 | #' @keywords internal
17 | 
18 | plotBin <-  function(x, y, h=0.05, nbins=NULL, cutpoint=0,  plot=TRUE, type=c("value", "number"),xlim=range(x, na.rm=TRUE), cex=0.9,main=NULL, xlab, ylab, ...){
19 | 
20 |   type <- match.arg(type)
21 |   x_name <- if(missing(xlab)) deparse(substitute(x)) else xlab
22 |   y_name <- if(missing(ylab)) deparse(substitute(y)) else ylab
23 | 
24 | 
25 | ## Set intervals and midpoints
26 |   min_x <- min(xlim)
27 |   max_x <- max(xlim)
28 | 
29 |   if(!is.null(nbins)) h <- diff(xlim)/nbins
30 | 
31 |   K0 <- ceiling((cutpoint-min_x)/h) # Number of cells on left
32 |   K1 <- ceiling((cutpoint+max_x)/h) # Number of cells on right
33 |   K <- K0+K1
34 |   if(!is.null(nbins) && K!=nbins) {
35 |     ranges <- c(cutpoint-min_x, cutpoint+max_x)
36 |     if(which.min(ranges)==1) {
37 |       K0 <- K0-1 
38 |     } else {
39 |       K1 <- K1-1 
40 |     }
41 |     K <- K0+K1
42 |   }
43 | 
44 |   b_k <- cutpoint - (K0-c(1:K)+1)*h # Lee and Lemieux (2010) p. 308
45 |   mid_points_bk <- b_k+h/2
46 |   n_bins <- length(mid_points_bk)
47 |   brk <- c(b_k,cutpoint + (K1+2)*h)
48 | 
49 | ## compute output (mean of count)
50 |   intervs <- cut(x, breaks=brk, include.lowest=TRUE)
51 |   table_intervs <- table(intervs)
52 |   n_non0_intervs <- sum(table_intervs!=0)
53 | 
54 |   y2 <- switch(type, 
55 | 		"value" =tapply(y, intervs, mean, na.rm=TRUE),
56 | 		"number" =table_intervs)
57 | 
58 | 
59 | ## plot
60 |   if(plot){
61 |     plot(mid_points_bk, as.numeric(y2), pch=19, cex=cex, xlab=x_name, ylab=y_name, xlim=xlim,...)
62 |     title(main=main, sub=paste("h=", round(h,4), ",\\tn bins=", n_non0_intervs, sep=""))
63 |     abline(v=cutpoint, lty=2)
64 |   }
65 | 
66 | ## return invisible result
67 |   res <- data.frame(x=mid_points_bk,y=y2)
68 |   invisible(res)
69 | }
70 | 
71 | 
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/RDDtools/man/RDDgenreg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{RDDgenreg}
 3 | \alias{RDDgenreg}
 4 | \title{General polynomial estimator of the regression discontinuity}
 5 | \usage{
 6 | RDDgenreg(RDDobject, fun = glm, covariates = NULL, order = 1, bw = NULL,
 7 |   slope = c("separate", "same"), covar.opt = list(strategy = c("include",
 8 |   "residual"), slope = c("same", "separate"), bw = NULL), weights, ...)
 9 | }
10 | \arguments{
11 | \item{RDDobject}{Object of class RDDdata created by \code{\link{RDDdata}}}
12 | 
13 | \item{covariates}{Formula to include covariates}
14 | 
15 | \item{order}{Order of the polynomial regression.}
16 | 
17 | \item{bw}{A bandwidth to specify the subset on which the kernel weighted regression is estimated}
18 | 
19 | \item{weights}{Optional weights to pass to the lm function. Note this cannot be entered together with \code{bw}}
20 | 
21 | \item{slope}{Whether slopes should be different on left or right (separate), or the same.}
22 | 
23 | \item{covar.opt}{Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}).}
24 | 
25 | \item{fun}{The function to estimate the parameters}
26 | 
27 | \item{\ldots}{Further arguments passed to fun. See the example.}
28 | }
29 | \value{
30 | An object of class RDDreg_lm and class lm, with specific print and plot methods
31 | }
32 | \description{
33 | Compute RDD estimate allowing a locally kernel weighted version of any estimation function
34 | possibly on the range specified by bandwidth
35 | }
36 | \details{
37 | This function allows the user to use a custom estimating function, instead of the traditional \code{lm()}.
38 | It is assumed that the custom funciton has following behaviour:
39 | \enumerate{
40 |   \item A formula interface, together with a \code{data} argument
41 |   \item A \code{weight} argument
42 |   \item A coef(summary(x)) returning a data-frame containing a column Estimate
43 | }
44 | Note that for the last requirement, this can be accomodated by writing a specific \code{\link{RDDcoef}}
45 | function for the class of the object returned by \code{fun}.
46 | }
47 | \examples{
48 | ## Step 0: prepare data
49 | data(Lee2008)
50 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
51 | 
52 | ## Estimate a local probit:
53 | Lee2008_rdd$y <- with(Lee2008_rdd, ifelse(y<quantile(y, 0.25), 0,1))
54 | reg_bin_glm <- RDDgenreg(RDDobject=Lee2008_rdd, fun= glm, family=binomial(link="probit"))
55 | print(reg_bin_glm)
56 | summary(reg_bin_glm)
57 | }
58 | \references{
59 | TODO
60 | }
61 | 
62 | 


--------------------------------------------------------------------------------
/RDDtools/R/dens_test.R:
--------------------------------------------------------------------------------
 1 | #' Run the McCracy test for manipulation of the forcing variable
 2 | #' 
 3 | #' Calls the \code{\link[rdd]{DCdensity}} test from package \code{rdd} on a \code{RDDobject}.
 4 | #' 
 5 | #' @param RDDobject object of class RDDdata
 6 | #' @param bin Argument of the \code{\link{DCdensity}} function, the binwidth
 7 | #' @param bw Argument of the \code{\link{DCdensity}} function, the bandwidth
 8 | #' @param plot Whether to return a plot. Logical, default ot TRUE. 
 9 | #' @param \ldots Further arguments passed to \code{\link[rdd]{DCdensity}}. 
10 | #' @export
11 | #' @import rdd
12 | #' @examples
13 | #' library(RDDtools)
14 | #' data(Lee2008)
15 | #' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
16 | #' dens_test(Lee2008_rdd)
17 | 
18 | 
19 | 
20 | dens_test <- function(RDDobject, bin=NULL, bw=NULL, plot=TRUE,...){
21 |   checkIsRDD(RDDobject)
22 |   cutpoint <- getCutpoint(RDDobject)
23 |   x <- getOriginalX(RDDobject)
24 |   test <- try(DCdensity(runvar=x, cutpoint=cutpoint, bin = bin, bw = bw, plot=plot, ext.out=TRUE, ...), silent=TRUE)
25 |   if(inherits(test, "try-error")){
26 |     warning("Error in computing the density, returning a simple histogram", if(is.null(bin)) " with arbitrary bin" else NULL)
27 |     if(is.null(bin)) {
28 |       test <- try(DCdensity(RDDobject$x, cutpoint, bin = bin, bw = 0.2, ext.out=TRUE, plot=FALSE), silent=TRUE)
29 |       bin <- test$binsize
30 |     }
31 |     max_x <- max(RDDobject$x, na.rm=TRUE)
32 |     seq_breaks <- seq(from=min(RDDobject$x, na.rm=TRUE), to=max_x, by=bin)
33 |     if(max_x>max(seq_breaks)) seq_breaks <- c(seq_breaks, max_x+0.001)
34 |     hist(RDDobject$x, breaks=seq_breaks)
35 |     abline(v=cutpoint, col=2, lty=2)
36 |   }
37 |   
38 |   test.htest <- list()
39 |   test.htest$statistic <- c("z-val"=test$z)
40 |   test.htest$p.value <- test$p
41 |   test.htest$data.name <- deparse(substitute(RDDobject))
42 |   test.htest$method <- "McCrary Test for no discontinuity of density around cutpoint"
43 |   test.htest$alternative <- "Density is discontinuous around cutpoint"
44 |   test.htest$estimate <- c(Discontinuity=test$theta)
45 |   test.htest$test.output <- test
46 |   class(test.htest) <- "htest"
47 |   return(test.htest)
48 | }
49 | 
50 | # print.MCcraryTest <- function(x,...){
51 | #   cat("#### MC Crary Test of no discontinuity in density\n\n")
52 | #   cat("Estimate of discontinuity:\t", x$theta, "\n")
53 | #   cat("z-value:\t", x$z, "\t p-value:\t", x$p, "\n")
54 | # }
55 | 
56 | if(FALSE){
57 | 
58 | library(RDDtools)
59 | data(Lee2008)
60 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
61 | 
62 | dens_test(Lee2008_rdd)
63 | 
64 | }


--------------------------------------------------------------------------------
/RDDtools/man/covarTest_mean.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{covarTest_mean}
 3 | \alias{covarTest_mean}
 4 | \alias{covarTest_mean.RDDdata}
 5 | \alias{covarTest_mean.RDDreg}
 6 | \title{Testing for balanced covariates: equality of means with t-test}
 7 | \usage{
 8 | covarTest_mean(object, bw = NULL, paired = FALSE, var.equal = FALSE,
 9 |   p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel",
10 |   "bonferroni"))
11 | 
12 | \method{covarTest_mean}{RDDdata}(object, bw = NULL, paired = FALSE,
13 |   var.equal = FALSE, p.adjust = c("none", "holm", "BH", "BY", "hochberg",
14 |   "hommel", "bonferroni"))
15 | 
16 | \method{covarTest_mean}{RDDreg}(object, bw = NULL, paired = FALSE,
17 |   var.equal = FALSE, p.adjust = c("none", "holm", "BH", "BY", "hochberg",
18 |   "hommel", "bonferroni"))
19 | }
20 | \arguments{
21 | \item{object}{object of class RDDdata}
22 | 
23 | \item{bw}{a bandwidth}
24 | 
25 | \item{paired}{Argument of the \code{\link{t.test}} function: logical indicating whether you want paired t-tests.}
26 | 
27 | \item{var.equal}{Argument of the \code{\link{t.test}} function:  logical variable indicating whether to treat the two variances as being equal}
28 | 
29 | \item{p.adjust}{Whether to adjust the p-values for multiple testing. Uses the \code{\link{p.adjust}} function}
30 | 
31 | \item{\ldots}{currently not used}
32 | }
33 | \value{
34 | A data frame with, for each covariate, the mean on each size, the difference, t-stat and ts p-value.
35 | }
36 | \description{
37 | Tests equality of means by a t-test for each covariate, between the two full groups or around the discontinuity threshold
38 | }
39 | \examples{
40 | data(Lee2008)
41 | 
42 | ## Add randomly generated covariates
43 | set.seed(123)
44 | n_Lee <- nrow(Lee2008)
45 | Z <- data.frame(z1 = rnorm(n_Lee, sd=2),
46 |                 z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)),
47 |                 z3 = sample(letters, size = n_Lee, replace = TRUE))
48 | Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0)
49 | 
50 | ## test for equality of means around cutoff:
51 | covarTest_mean(Lee2008_rdd_Z, bw=0.3)
52 | 
53 | ## Can also use function covarTest_dis() for Kolmogorov-Smirnov test:
54 | covarTest_dis(Lee2008_rdd_Z, bw=0.3)
55 | 
56 | ## covarTest_mean works also on regression outputs (bw will be taken from the model)
57 | reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd_Z)
58 | covarTest_mean(reg_nonpara)
59 | }
60 | \author{
61 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
62 | }
63 | \seealso{
64 | \code{\link{covarTest_dis}} for the Kolmogorov-Smirnov test of equality of distribution
65 | }
66 | 
67 | 


--------------------------------------------------------------------------------
/RDDtools/man/plotSensi.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{plotSensi}
 3 | \alias{plotSensi}
 4 | \alias{plotSensi.RDDreg_lm}
 5 | \alias{plotSensi.RDDreg_np}
 6 | \title{Plot the sensitivity to the bandwidth}
 7 | \usage{
 8 | plotSensi(RDDregobject, from, to, by = 0.01, level = 0.95,
 9 |   output = c("data", "ggplot"), plot = TRUE, ...)
10 | 
11 | \method{plotSensi}{RDDreg_np}(RDDregobject, from, to, by = 0.05,
12 |   level = 0.95, output = c("data", "ggplot"), plot = TRUE,
13 |   device = c("ggplot", "base"), vcov. = NULL, ...)
14 | 
15 | \method{plotSensi}{RDDreg_lm}(RDDregobject, from, to, by = 0.05,
16 |   level = 0.95, output = c("data", "ggplot"), plot = TRUE, order,
17 |   type = c("colour", "facet"), ...)
18 | }
19 | \arguments{
20 | \item{RDDregobject}{object of a RDD regression, from either \code{\link{RDDreg_lm}} or \code{\link{RDDreg_np}}}
21 | 
22 | \item{from}{First bandwidth point. Default value is max(1e-3, bw-0.1)}
23 | 
24 | \item{to}{Last bandwidth point. Default value is bw+0.1}
25 | 
26 | \item{by}{Increments in the \code{from} \code{to} sequence}
27 | 
28 | \item{level}{Level of the confidence interval}
29 | 
30 | \item{order}{For parametric models (from \code{\link{RDDreg_lm}}), the order of the polynomial.}
31 | 
32 | \item{type}{For parametric models (from \code{\link{RDDreg_lm}}) whether different orders are represented as different colour or as different facets.}
33 | 
34 | \item{device}{Whether to draw a base or a ggplot graph.}
35 | 
36 | \item{output}{Whether to return (invisibly) the data frame containing the bandwidths and corresponding estimates, or the ggplot object}
37 | 
38 | \item{plot}{Whether to actually plot the data.}
39 | 
40 | \item{\ldots}{Further arguments passed to specific methods}
41 | 
42 | \item{vcov.}{Specific covariance function to pass to coeftest. See help of package \code{\link[sandwich]{sandwich}}}
43 | }
44 | \value{
45 | A data frame containing the bandwidths and corresponding estimates and confidence intervals.
46 | }
47 | \description{
48 | Draw a plot showing the LATE estimates depending on multiple bandwidths
49 | }
50 | \examples{
51 | data(Lee2008)
52 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
53 | 
54 | #Non-parametric estimate
55 | bw_ik <- RDDbw_IK(Lee2008_rdd)
56 | reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd, bw=bw_ik)
57 | plotSensi(reg_nonpara)
58 | plotSensi(reg_nonpara, device="base")
59 | 
60 | #Parametric estimate:
61 | reg_para_ik <- RDDreg_lm(RDDobject=Lee2008_rdd, order=4, bw=bw_ik)
62 | plotSensi(reg_para_ik)
63 | plotSensi(reg_para_ik, type="facet")
64 | }
65 | \author{
66 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
67 | }
68 | 
69 | 


--------------------------------------------------------------------------------
/RDDtools/man/plotPlacebo.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{plotPlacebo}
 3 | \alias{computePlacebo}
 4 | \alias{plotPlacebo}
 5 | \alias{plotPlacebo.RDDreg}
 6 | \alias{plotPlaceboDens}
 7 | \alias{plotPlaceboDens.RDDreg}
 8 | \title{Draw a (density) plot of placebo tests}
 9 | \usage{
10 | plotPlacebo(object, device = c("ggplot", "base"), ...)
11 | 
12 | \method{plotPlacebo}{RDDreg}(object, device = c("ggplot", "base"),
13 |   from = 0.25, to = 0.75, by = 0.1, level = 0.95, same_bw = FALSE,
14 |   vcov. = NULL, plot = TRUE, output = c("data", "ggplot"), ...)
15 | 
16 | plotPlaceboDens(object, device = c("ggplot", "base"), ...)
17 | 
18 | \method{plotPlaceboDens}{RDDreg}(object, device = c("ggplot", "base"),
19 |   from = 0.25, to = 0.75, by = 0.1, level = 0.95, same_bw = FALSE,
20 |   vcov. = NULL, ...)
21 | 
22 | computePlacebo(object, from = 0.25, to = 0.75, by = 0.1, level = 0.95,
23 |   same_bw = FALSE, vcov. = NULL)
24 | }
25 | \arguments{
26 | \item{object}{the output of an RDD regression}
27 | 
28 | \item{device}{Whether to draw a base or a ggplot graph.}
29 | 
30 | \item{\ldots}{Further arguments passed to specific methods.}
31 | 
32 | \item{vcov.}{Specific covariance function to pass to coeftest. See help of package \code{\link[sandwich]{sandwich}}.}
33 | 
34 | \item{plot}{Whether to actually plot the data.}
35 | 
36 | \item{output}{Whether to return (invisibly) the data frame containing the bandwidths and corresponding estimates, or the ggplot object}
37 | 
38 | \item{from}{Starting point of the fake cutpoints sequence. Refers ot the quantile of each side of the true cutpoint}
39 | 
40 | \item{to}{Ending   point of the fake cutpoints sequence. Refers ot the quantile of each side of the true cutpoint}
41 | 
42 | \item{by}{Increments of the from-to sequence}
43 | 
44 | \item{level}{Level of the confidence interval shown}
45 | 
46 | \item{same_bw}{Whether to re-estimate the bandwidth at each point}
47 | }
48 | \value{
49 | A data frame containing the cutpoints, their corresponding estimates and confidence intervals.
50 | }
51 | \description{
52 | Draw a plot of placebo tests, estimating the impact on fake cutpoints
53 | }
54 | \examples{
55 | data(Lee2008)
56 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
57 | reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd)
58 | plotPlacebo(reg_nonpara)
59 | 
60 | # Use with another vcov function; cluster case
61 | reg_nonpara_lminf <- RDDreg_np(RDDobject=Lee2008_rdd, inference="lm")
62 | # need to be a function applied to updated object!
63 | vc <- function(x) vcovCluster(x, clusterVar=model.frame(x)$x)
64 | plotPlacebo(reg_nonpara_lminf, vcov. = vc)
65 | }
66 | \author{
67 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}>
68 | }
69 | 
70 | 


--------------------------------------------------------------------------------
/RDDtools/R/model.matrix.RDD.R:
--------------------------------------------------------------------------------
 1 | #' @S3method model.matrix RDDdata
 2 | 
 3 | model.matrix.RDDdata <- function(object, covariates=NULL, order=1, bw=NULL, slope=c("separate", "same"), covar.opt=list(strategy=c("include", "residual"), slope=c("same", "separate"), bw=NULL), covar.strat=c("include", "residual"), ...){
 4 | 
 5 |   checkIsRDD(object)
 6 |   RDDobject <- object
 7 |   type <- getType(object)
 8 | 
 9 |   if(!missing(covar.strat)) warning("covar.strat is (soon) deprecated arg!")
10 | 
11 |   slope <- match.arg(slope)
12 |   covar.strat <- match.arg(covar.opt$strategy, choices=c("include", "residual"))
13 |   covar.slope <- match.arg(covar.opt$slope, choices=c("same", "separate"))
14 |   
15 |   cutpoint <- getCutpoint(RDDobject)
16 |   if(!is.null(covariates) & !hasCovar(RDDobject))  stop("Arg 'covariates' was specified, but no covariates found in 'RDDobject'.")
17 | 
18 | ## Construct data
19 |   dat <- as.data.frame(RDDobject)
20 | 
21 |   dat_step1 <- dat[, c("y", "x")]
22 |   dat_step1$x <- dat_step1$x -cutpoint
23 | 
24 |   L <- ifelse(dat_step1$x>= 0, 1,0)
25 |   dat_step1$D <- if(type=="Sharp") L else object$z
26 | 
27 |   if(order>0){
28 |     polys <- poly(dat_step1$x, degree=order, raw=TRUE)
29 |     colnames(polys) <- paste("x", 1:order, sep="^")
30 |     dat_step1  <- cbind(dat_step1[,c("y", "D")],polys)
31 |     if(slope=="separate") {
32 |       polys2 <- polys*L
33 |       colnames(polys2) <- paste(colnames(polys), "right", sep="_")
34 |       dat_step1  <- cbind(dat_step1,polys2)
35 |     }
36 |   } else {
37 |     dat_step1$x <- NULL
38 |   }
39 | 
40 | ## Covariates
41 |   if(!is.null(covariates)){
42 |     covar <- getCovar(RDDobject)
43 |     formu.cova <- covariates
44 | 
45 |     if(grepl("\\.", formu.cova)) formu.cova <- paste(colnames(covar), collapse=" + ")
46 |     if(covar.slope=="separate") { 
47 |       formu.cova <- paste(formu.cova, "+", paste("D*(", formu.cova,")", sep=""), sep=" ")
48 |       covar$D <- dat_step1$D
49 |     } 
50 | 
51 |     formula.cova <- as.formula(paste("~", formu.cova))
52 |     mf <- model.frame(formula.cova, covar, na.action=na.pass)
53 |     M_covar <- model.matrix(formula.cova, data=mf)
54 | 
55 |     if(covar.strat=="residual"){
56 |       M_covar <- data.frame(y=dat_step1$y, M_covar)
57 |       first_stage <- lm(y~., data=M_covar) ## regress y on covariates only
58 |       dat_step1$y <- residuals(first_stage) ## change in original data
59 |     } else {
60 |       rem <- switch(covar.slope, "separate"="^D$|(Intercept)", "same" ="(Intercept)")
61 |       M_covar <- M_covar[,-grep(rem, colnames(M_covar)), drop=FALSE	]
62 |       dat_step1 <- cbind(dat_step1, M_covar) ## add covar as regressors
63 |     }
64 |   }
65 | 
66 | ## Colnames cleaning
67 |   colnames(dat_step1) <- gsub("x\\^1", "x", colnames(dat_step1))
68 | 
69 | ## 
70 |  if(type=="Fuzzy") dat_step1$ins <- L
71 | 
72 | ## return results: 
73 |   dat_step1
74 | }


--------------------------------------------------------------------------------
/RDDtools/misc/locpoly.f:
--------------------------------------------------------------------------------
 1 | c  Part of R package KernSmooth
 2 | c  Copyright (C) 1995  M. P. Wand
 3 | c
 4 | c  Unlimited use and distribution (see LICENCE).
 5 | 
 6 | cccccccccc FORTRAN subroutine locpol.f cccccccccc
 7 | 
 8 | c For computing an binned approximation to a
 9 | c local bandwidth local polynomial kernel regression estimator
10 | c of an arbitrary derivative of a regression function.
11 | c LINPACK is used for matrix inversion.
12 | 
13 | c Last changed: 10/02/95
14 | 
15 |       subroutine locpol(xcnts,ycnts,idrv,delta,hdisc,Lvec,indic,
16 |      +                  midpts,M,iQ,fkap,ipp,ippp,ss,tt,Smat,Tvec,
17 |      +                  ipvt,cvest)
18 |       integer i,j,k,ii,Lvec(*),M,iQ,mid,indic(*),midpts(*),ipvt(*),
19 |      +        info,idrv,ipp,ippp,indss
20 |       double precision xcnts(*),ycnts(*),fkap(*),hdisc(*),
21 |      +                 cvest(*),delta,ss(M,ippp),tt(M,ipp),
22 |      +                 Smat(ipp,ipp),Tvec(ipp),fac
23 | 
24 | c Obtain kernel weights
25 | 
26 |       PRINT *,"Hello"
27 |       mid = Lvec(1) + 1
28 |       do 10 i=1,(iQ-1)
29 |          midpts(i) = mid
30 |          fkap(mid) = 1.0d0
31 |          do 20 j=1,Lvec(i)
32 |             fkap(mid+j) = exp(-(delta*j/hdisc(i))**2/2)
33 |             fkap(mid-j) = fkap(mid+j)
34 | 20       continue
35 |          mid = mid + Lvec(i) + Lvec(i+1) + 1
36 | 10    continue
37 |       midpts(iQ) = mid
38 |       fkap(mid) = 1.0d0
39 |       do 30 j=1,Lvec(iQ)
40 |          fkap(mid+j) = exp(-(delta*j/hdisc(iQ))**2/2)
41 |          fkap(mid-j) = fkap(mid+j)
42 | 30    continue
43 | 
44 | c Combine kernel weights and grid counts
45 | 
46 |       do 40 k = 1,M
47 |          if (xcnts(k).ne.0) then
48 |             do 50 i = 1,iQ
49 |                do 60 j = max(1,k-Lvec(i)),min(M,k+Lvec(i))
50 |                   if (indic(j).eq.i) then
51 |                      fac = 1.0d0
52 |                      ss(j,1) = ss(j,1) + xcnts(k)*fkap(k-j+midpts(i))
53 |                      tt(j,1) = tt(j,1) + ycnts(k)*fkap(k-j+midpts(i))
54 |                      do 70 ii = 2,ippp
55 |                         fac = fac*delta*(k-j)
56 |                         ss(j,ii) = ss(j,ii)
57 |      +                  + xcnts(k)*fkap(k-j+midpts(i))*fac
58 |                         if (ii.le.ipp) then
59 |                            tt(j,ii) = tt(j,ii)
60 |      +                     + ycnts(k)*fkap(k-j+midpts(i))*fac
61 |                         endif
62 | 70                   continue
63 |                   endif
64 | 60             continue
65 | 50          continue
66 |          endif
67 | 40    continue
68 | 
69 |       do 80 k = 1,M
70 |          do 90 i = 1,ipp
71 |             do 100 j = 1,ipp
72 |                indss = i + j - 1
73 |                Smat(i,j) = ss(k,indss)
74 | 100         continue
75 |          Tvec(i) = tt(k,i)
76 | 90       continue
77 | 
78 |         call dgefa(Smat,ipp,ipp,ipvt,info)
79 |         call dgesl(Smat,ipp,ipp,ipvt,Tvec,0)
80 | 
81 |         cvest(k) = Tvec(idrv+1)
82 | 
83 | 80    continue
84 | 
85 |       return
86 |       end
87 | 
88 | cccccccccc End of locpol.f cccccccccc
89 | 


--------------------------------------------------------------------------------
/RDDtools/R/bw_ROT.R:
--------------------------------------------------------------------------------
 1 | #' Bandwidth selector
 2 | #' 
 3 | #' implements dpill
 4 | #' 
 5 | #' @param object object of class RDDdata
 6 | #' @references McCrary, Justin. (2008) "Manipulation of the running variable in the regression discontinuity design: A density test," \emph{Journal of Econometrics}. 142(2): 698-714. \url{http://dx.doi.org/10.1016/j.jeconom.2007.05.005}
 7 | #' @include plotBin.R
 8 | #' @export
 9 | #' @author Drew Dimmery <\email{drewd@@nyu.edu}>
10 | #' @examples
11 | #' #No discontinuity
12 | 
13 | ### Crary bw
14 | 
15 | ROT_bw <- function(object){
16 | 
17 |   if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects")
18 |   cutpoint <- getCutpoint(object)
19 |   x <- object$x
20 |   y <- object$y
21 | 
22 | ##### first step
23 |   n <- length(y)
24 |   sd_x <- sd(x, na.rm=TRUE)
25 |   bw_pilot <- (2*sd_x)/sqrt(n)
26 | 
27 | ## hist
28 |   his <- plotBin(x=x, y=y, h=bw_pilot, cutpoint=cutpoint,plot=FALSE, type="number")
29 | #   his2 <- hist(x, breaks=c(min(x), his[["x"]], max(x)))
30 |   x1 <- his$x
31 |   y1 <- his[,"y.Freq"]
32 | 
33 | ##### second step
34 | 
35 | ## regs:
36 |  reg_left  <- lm(y1 ~ poly(x1, degree=4, raw=TRUE), subset=x1<cutpoint)
37 |  reg_right <- lm(y1 ~ poly(x1, degree=4, raw=TRUE), subset=x1>=cutpoint)
38 | 
39 | 
40 | 
41 | }
42 | 
43 | 
44 | #' Global bandwidth selector of Ruppert, Sheather and Wand (1995) from package \pkg{KernSmooth}
45 | #' 
46 | #' Uses the global bandwidth selector of Ruppert, Sheather and Wand (1995) 
47 | #' either to the whole function, or to the functions below and above the cutpoint. 
48 | #' 
49 | #' @param object object of class RDDdata created by \code{\link{RDDdata}}
50 | #' @param type Whether to choose a global bandwidth for the whole function (\code{global}) 
51 | #' or for each side (\code{sided})
52 | #' @return One (or two for \code{sided}) bandwidth value. 
53 | #' @references See \code{\link[KernSmooth]{dpill}}
54 | #' @include plotBin.R
55 | #' @seealso \code{\link{RDDbw_IK}} Local RDD bandwidth selector using the plug-in method of Imbens and Kalyanaraman (2012)
56 | #' @import KernSmooth
57 | #' @export
58 | #' @examples
59 | #' data(Lee2008)
60 | #' rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0)
61 | #' RDDbw_RSW(rd)
62 | 
63 | 
64 | ####
65 | RDDbw_RSW <- function(object, type=c("global", "sided")){
66 | 
67 |   type <- match.arg(type)
68 | 
69 |   if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects")
70 |   cutpoint <- getCutpoint(object)
71 |   x <- object$x
72 |   y <- object$y
73 | 
74 | ##
75 |   if(type=="global"){
76 |     bw <- dpill(x=x, y=y)
77 |   } else {
78 |     dat_left  <- subset(object, x<cutpoint)
79 |     dat_right <- subset(object, x>=cutpoint)
80 | 
81 |     bw_left  <- dpill(x=dat_left$x, y=dat_left$y)
82 |     bw_right <- dpill(x=dat_right$x, y=dat_right$y)
83 |     bw <- c(bw_left, bw_right)
84 |   }
85 | 
86 | ## result
87 |   bw
88 | }
89 | 
90 | 
91 | if(FALSE){
92 | #   lee_dat4 <- read.csv("/home/mat/Dropbox/HEI/rdd/Rcode/IK bandwidth/datasets/imbens_from_MATLAB.csv", header=FALSE)
93 | #   head(lee_dat4)
94 | #   a<-RDDdata(y=lee_dat4[,2], x=lee_dat4[,1], cutpoint=0)
95 | # ROT_bw(object=a)
96 | # RDDbw_RSW(object=a)
97 | RDDbw_RSW(object=a, type="sided")
98 | }
99 | 


--------------------------------------------------------------------------------
/RDDtools/man/RDDreg_lm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{RDDreg_lm}
 3 | \alias{RDDreg_lm}
 4 | \title{Parametric polynomial estimator of the regression discontinuity}
 5 | \usage{
 6 | RDDreg_lm(RDDobject, covariates = NULL, order = 1, bw = NULL,
 7 |   slope = c("separate", "same"), covar.opt = list(strategy = c("include",
 8 |   "residual"), slope = c("same", "separate"), bw = NULL),
 9 |   covar.strat = c("include", "residual"), weights)
10 | }
11 | \arguments{
12 | \item{RDDobject}{Object of class RDDdata created by \code{\link{RDDdata}}}
13 | 
14 | \item{covariates}{Formula to include covariates}
15 | 
16 | \item{order}{Order of the polynomial regression.}
17 | 
18 | \item{bw}{A bandwidth to specify the subset on which the parametric regression is estimated}
19 | 
20 | \item{covar.strat}{DEPRECATED, use covar.opt instead.}
21 | 
22 | \item{covar.opt}{Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}).}
23 | 
24 | \item{weights}{Optional weights to pass to the lm function. Note this cannot be entered together with \code{bw}}
25 | 
26 | \item{slope}{Whether slopes should be different on left or right (separate), or the same.}
27 | }
28 | \value{
29 | An object of class RDDreg_lm and class lm, with specific print and plot methods
30 | }
31 | \description{
32 | Compute a parametric polynomial regression of the ATE,
33 | possibly on the range specified by bandwidth
34 | }
35 | \details{
36 | This function estimates the standard \emph{discontinuity regression}:
37 | \deqn{Y=\alpha+\tau D+\beta_{1}(X-c)+\beta_{2}D(X-c)+\epsilon}
38 | with \eqn{\tau} the main parameter of interest. Several versions of the regression can be estimated, either restricting the slopes to be the same,
39 | i.e \eqn{\beta_{1}=\beta_{2}} (argument \code{slope}). The order of the polynomial in \eqn{X-c} can also be adjusted with argument \code{order}.
40 | Note that a value of zero can be used, which corresponds to the simple \emph{difference in means}, that one would use if the samples were random.
41 | Covariates can also be added in the regression, according to the two strategies discussed in Lee and Lemieux (2010, sec 4.5), through argument \code{covar.strat}:
42 | \describe{
43 | \item{include}{Covariates are simply added as supplementary regressors in the RD equation}
44 | \item{residual}{The dependent variable is first regressed on the covariates only, then the RDD equation is applied on the residuals from this first step}}
45 | The regression can also be estimated in a neighborhood of the cutpoint with the argument \code{bw}. This make the parametric regression resemble
46 | the non-parametric local kernel \code{\link{RDDreg_np}}. Similarly, weights can also be provided (but not simultaneously to \code{bw}).
47 | 
48 | The returned object is a classical \code{lm} object, augmented with a \code{RDDslot}, so usual methods can be applied. As is done in general in R,
49 | heteroskeadsticity-robust inference can be done later on with the usual function from package \pkg{sandwich}. For the case of clustered observations
50 | a specific function \code{\link{clusterInf}} is provided.
51 | }
52 | \examples{
53 | ## Step 0: prepare data
54 | data(Lee2008)
55 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
56 | ## Step 2: regression
57 | # Simple polynomial of order 1:
58 | reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd)
59 | print(reg_para)
60 | plot(reg_para)
61 | 
62 | # Simple polynomial of order 4:
63 | reg_para4 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=4)
64 | reg_para4
65 | plot(reg_para4)
66 | 
67 | # Restrict sample to bandwidth area:
68 | bw_ik <- RDDbw_IK(Lee2008_rdd)
69 | reg_para_ik <- RDDreg_lm(RDDobject=Lee2008_rdd, bw=bw_ik, order=4)
70 | reg_para_ik
71 | plot(reg_para_ik)
72 | }
73 | \references{
74 | TODO
75 | }
76 | 
77 | 


--------------------------------------------------------------------------------
/RDDtools/man/RDDpred.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2 (4.0.1): do not edit by hand
 2 | \name{RDDpred}
 3 | \alias{RDDpred}
 4 | \title{RDD coefficient prediction}
 5 | \usage{
 6 | RDDpred(object, covdata, se.fit = TRUE, vcov. = NULL, newdata,
 7 |   stat = c("identity", "sum", "mean"), weights)
 8 | }
 9 | \arguments{
10 | \item{object}{A RDD regression object}
11 | 
12 | \item{covdata}{New data.frame specifying the values of the covariates, can have multiple rows.}
13 | 
14 | \item{se.fit}{A switch indicating if standard errors are required.}
15 | 
16 | \item{vcov.}{Specific covariance function (see package sandwich ), by default uses the \code{\link{vcov}}}
17 | 
18 | \item{newdata}{Another data on which to evaluate the x/D variables. Useful in very few cases.}
19 | 
20 | \item{stat}{The statistic to use if there are multiple predictions, 'identity' just returns the single values, 'mean' averages them}
21 | 
22 | \item{weights}{Eventual weights for the averaging of the predicted values.}
23 | }
24 | \value{
25 | Returns the predicted value(s), and, if se.fit=TRUE, their standard errors.
26 | }
27 | \description{
28 | Function to predict the RDD coefficient in presence of covariate (without covariates, returns the same than \code{\link{RDDcoef}})
29 | }
30 | \details{
31 | The function \code{RDDpred} does a simple prediction of the RDD effect
32 |  \deqn{RDDeffect= \mu(x, z, D=1) - \mu(x, z, D=0)}
33 | When there are no covariates (and z is irrelevant in the equation above), this amounts exactly to the usual RDD coefficient,
34 | shown in the outputs, or obtained with \code{\link{RDDcoef}}. If there were covariates, and if these covariates were estimated using the
35 | \dQuote{include} \emph{strategy} and with different coefficients left and right to the cutoff (i.e.
36 | had argument \emph{slope} = \dQuote{separate}), than the RDD effect is also dependent on the value of the covariate(s).
37 | \code{RDDpred} allows to set the value of the covariate(s) at which to evaluate the RDD effect, by providing a data.frame with
38 | the values for the covariates. Note that the effect can be evaluated at multiple points, if you provide multiple rows of \code{covdata}.
39 | 
40 | In pressence of covariate-specific RDD effect, one may wish to estimate an average effect. This can be done by setting the argument \code{stat="mean"}.
41 | Weights can additionally be added, with the argument \code{weights}, to obtain a weighted-average of the predictions. Note however that in most cases,
42 | this will be equivalent to provide covariates at their (weighted) mean value, which will be much faster also!
43 | 
44 | Standard errors, obtained setting the argument \code{se.fit=TRUE}, are computed using following formula:
45 |  \deqn{x_i \Omega x_i^{'}}
46 | where \eqn{\Omega} is the estimated variance-covariance matrix ( by default \eqn{\sigma^2(X^{'}X)^{-1}} using \code{\link{vcov}}) and
47 | \eqn{x_i} is the input data (a mix of covdata and input data). If one wishes individual predictions, standard errors are simply obtained
48 | as the square of that diagonal matrix, whereas for mean/sum, covariances are taken into account.
49 | }
50 | \examples{
51 | ## Load data, add (artificial) covariates:
52 |   data(Lee2008)
53 |   n_Lee <- nrow(Lee2008)
54 |   z1 <- runif(n_Lee)
55 |   Lee2008_rdd <- RDDdata(y=y, x=x, data=Lee2008, covar=z1, cutpoint=0)
56 | 
57 | ## estimation without covariates: RDDpred is the same than RDDcoef:
58 |   reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd)
59 | 
60 |   RDDpred(reg_para)
61 |   RDDcoef(reg_para, allInfo=TRUE)
62 | 
63 | ## estimation with covariates:
64 |   reg_para_cov <- RDDreg_lm(RDDobject=Lee2008_rdd, covariates="z1", covar.opt=list(slope="separate"))
65 |   RDDpred(reg_para_cov, covdata=data.frame(z1=0)) ## should obtain same result than with RDestimate
66 |   RDDpred(reg_para_cov, covdata=data.frame(z1=0.5)) #evaluate at mean of z1 (as comes from uniform)
67 | }
68 | \references{
69 | Froehlich (2007) Regression discontinuity design with covariates, IZA discussion paper 3024
70 | }
71 | 
72 | 


--------------------------------------------------------------------------------
/RDDtools/R/get_methods.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | # checkIsRDD <- function(object)  if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects")
  4 | # checkIsAnyRDD <- function(object)  if(!inherits(object, c("RDDdata", "RDDreg_np"))) stop("Only works for RDDdata objects")
  5 | 
  6 | # function(object)  if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects")
  7 | checkIsAnyRDD <- checkIsRDD <- function(object)  {
  8 |   classesOk <- c("RDDdata", "RDDreg_np", "RDDreg_lm")
  9 |   if(!inherits(object, classesOk)) stop("Only works for RDDdata objects")
 10 | }
 11 | 
 12 | getType <- function(object){
 13 |   checkIsRDD(object)
 14 |   attr(object, "type")
 15 | }
 16 | 
 17 | isFuzzy <- function(object){
 18 |   checkIsRDD(object)
 19 |   attr(object, "type")=="Fuzzy"
 20 | }
 21 | 
 22 | getCutpoint <- function(object){
 23 |   
 24 |   checkIsRDD(object)
 25 |   attr(object, "cutpoint")
 26 | }
 27 | 
 28 | getOrder <- function(object){
 29 | 
 30 |   checkIsRDD(object)
 31 |   attr(object, "PolyOrder")
 32 | }
 33 | 
 34 | getSlope <- function(object){
 35 | 
 36 |   checkIsRDD(object)
 37 |   attr(object, "slope")
 38 | }
 39 | 
 40 | getBW <- function(object, force.na=FALSE){
 41 | 
 42 |   checkIsAnyRDD(object)
 43 |   res <- attr(object, "bw")
 44 |   if(force.na) if(is.null(res)) res <- NA
 45 |   res
 46 | }
 47 | 
 48 | 
 49 | 
 50 | ## return the type of inference used by RDDreg_np
 51 | infType <- function(x) {
 52 |   if(is.null(getCall(x)$inference)) "se" else getCall(x)$inference
 53 | }
 54 | 
 55 | 
 56 | hasCovar <- function(object)
 57 |   UseMethod("hasCovar")
 58 | 
 59 | hasCovar.RDDdata <- function(object)  attr(object, "hasCovar")
 60 | 
 61 | hasCovar.RDDreg <- function(object) { 
 62 |   call <- getCall(object)
 63 |   !is.null(call$covariates)
 64 | }
 65 | 
 66 | getCovar <- function(object){
 67 |   if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects")
 68 |   if(!hasCovar(object)) stop("object has no covariates")
 69 |   
 70 |   rem <- if(isFuzzy(object)) 1:3 else  1:2
 71 |   res <- object[,-rem, drop=FALSE]
 72 |   as.data.frame(res)
 73 | }
 74 | 
 75 | getCovarNames <- function(object){
 76 |   if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects")
 77 |   if(!hasCovar(object)) stop("object has no covariates")
 78 |   
 79 |   rem <- if(isFuzzy(object)) 1:3 else  1:2
 80 |   colnames(object)[-rem]
 81 | }
 82 | 
 83 | getOriginalX <- function(object){
 84 | 
 85 |   cutpoint <- getCutpoint(object)
 86 |   x <- object$model[,"x"]
 87 |   if(cutpoint!=0)  x <- x+cutpoint
 88 |   x
 89 | }
 90 | 
 91 | getOriginalX <- function(object)
 92 |   UseMethod("getOriginalX")
 93 | 
 94 | 
 95 | getOriginalX.RDDreg <- function(object){
 96 |   object$RDDslot$RDDdata[, "x"]
 97 | }
 98 | 
 99 | getOriginalX.RDDdata <- function(object){
100 |   object[, "x"]
101 | }
102 | 
103 | # getOriginalX.RDDreg_np <- function(object){
104 | # 
105 | #   cutpoint <- getCutpoint(object)
106 | #   Xnam <- getXname(object) 
107 | #   x <- object$model[,Xnam]
108 | #   if(cutpoint!=0)  x <- x+cutpoint
109 | #   x
110 | # }
111 | 
112 | 
113 | getOriginalData <- function(object, na.rm=TRUE, classRDD=TRUE)
114 |   UseMethod("getOriginalData")
115 | 
116 | # getOriginalData.RDDreg_np <- function(object, na.rm=TRUE){
117 | # 
118 | #   cutpoint <- getCutpoint(object)
119 | #   Xnam <- getXname(object) 
120 | #   dat <- object$model[,c("y",Xnam)]
121 | #   if(cutpoint!=0)  dat[,Xnam] <- dat[,Xnam] +cutpoint
122 | #   if(na.rm) dat <- dat[apply(dat, 1, function(x) all(!is.na(x))),] # remove na rows
123 | #   dat
124 | # }
125 | 
126 | 
127 | 
128 | getOriginalData.RDDreg <- function(object, na.rm=TRUE, classRDD=TRUE){
129 |   res <- object$RDDslot$RDDdata
130 |   if(na.rm) res <- res[apply(res, 1, function(x) all(!is.na(x))),] # remove na rows
131 |   if(!classRDD) res <- as.data.frame(res)
132 |   res
133 | }
134 | 
135 | 
136 | 
137 | #' @importFrom stats getCall
138 | #' @S3method getCall RDDreg
139 | getCall.RDDreg <- function(x,...) attr(x, "RDDcall")
140 | 
141 | 
142 | #format(Sys.Date(), "%A %Y-%m-%d")
143 | 
144 | 


--------------------------------------------------------------------------------
/RDDtools/misc/locpoly.R:
--------------------------------------------------------------------------------
  1 | locpoly2 <- function(x, y, drv = 0L, degree, kernel = "normal",
  2 |                     bandwidth, gridsize = 401L, bwdisc = 25, range.x,
  3 |                     binned = FALSE, truncate = TRUE)
  4 | 
  5 | {
  6 |     ## Install safeguard against non-positive bandwidths:
  7 |     if (!missing(bandwidth) && bandwidth <= 0)
  8 |         stop("'bandwidth' must be strictly positive")
  9 | 
 10 |     drv <- as.integer(drv)
 11 |     if (missing(degree)) degree <- drv + 1L
 12 |     else degree <- as.integer(degree)
 13 | 
 14 |     if (missing(range.x) && !binned)
 15 |         if (missing(y)) {
 16 |             extra <- 0.05*(max(x) - min(x))
 17 |             range.x <- c(min(x)-extra,  max(x)+extra)
 18 |         } else range.x <- c(min(x), max(x))
 19 | 
 20 |     ## Rename common variables
 21 |     M <- gridsize
 22 |     Q <- as.integer(bwdisc)
 23 |     a <- range.x[1L]
 24 |     b <- range.x[2L]
 25 |     pp <- degree + 1L
 26 |     ppp <- 2L*degree + 1L
 27 |     tau <- 4
 28 | 
 29 |     ## Decide whether a density estimate or regressionestimate is required.
 30 | 
 31 |     if (missing(y))  {  # obtain density estimate
 32 |         y <- NULL
 33 |         n <- length(x)
 34 |         gpoints <- seq(a, b, length = M)
 35 |         xcounts <- linbin(x, gpoints, truncate)
 36 |         ycounts <- (M-1)*xcounts/(n*(b-a))
 37 |         xcounts <- rep(1, M)
 38 |     } else {            # obtain regression estimate
 39 |         ## Bin the data if not already binned
 40 |         if (!binned) {
 41 |             gpoints <- seq(a, b, length = M)
 42 |             out <- rlbin(x, y, gpoints, truncate)
 43 |             xcounts <- out$xcounts
 44 |             ycounts <- out$ycounts
 45 |         } else {
 46 |             xcounts <- x
 47 |             ycounts <- y
 48 |             M <- length(xcounts)
 49 |             gpoints <- seq(a, b, length = M)
 50 |         }
 51 |     }
 52 | 
 53 |     ## Set the bin width
 54 |     delta <- (b-a)/(M-1L)
 55 | 
 56 |     ## Discretise the bandwidths
 57 |     if (length(bandwidth) == M) {
 58 |         hlow <- sort(bandwidth)[1L]
 59 |         hupp <- sort(bandwidth)[M]
 60 |         hdisc <- exp(seq(log(hlow), log(hupp), length = Q))
 61 | 
 62 |         ## Determine value of L for each member of "hdisc"
 63 |         Lvec <- floor(tau*hdisc/delta)
 64 | 
 65 |         ## Determine index of closest entry of "hdisc"
 66 |         ## to each member of "bandwidth"
 67 |         indic <- if (Q > 1L) {
 68 |             lhdisc <- log(hdisc)
 69 |             gap <- (lhdisc[Q]-lhdisc[1L])/(Q-1)
 70 |             if (gap == 0) rep(1, M)
 71 |             else round(((log(bandwidth) - log(sort(bandwidth)[1L]))/gap) + 1)
 72 |         } else rep(1, M)
 73 |     } else if (length(bandwidth) == 1L) {
 74 |         indic <- rep(1, M)
 75 |         Q <- 1L
 76 |         Lvec <- rep(floor(tau*bandwidth/delta), Q)
 77 |         hdisc <- rep(bandwidth, Q)
 78 |     } else
 79 |         stop("'bandwidth' must be a scalar or an array of length 'gridsize'")
 80 | 
 81 |     if (min(Lvec) == 0)
 82 |         stop("Binning grid too coarse for current (small) bandwidth: consider increasing 'gridsize'")
 83 | 
 84 |     ## Allocate space for the kernel vector and final estimate
 85 | 
 86 |     dimfkap <- 2L * sum(Lvec) + Q
 87 |     fkap <- rep(0, dimfkap)
 88 |     curvest <- rep(0, M)
 89 |     midpts <- rep(0, Q)
 90 |     ss <- matrix(0, M, ppp)
 91 |     tt <- matrix(0, M, pp)
 92 |     Smat <- matrix(0, pp, pp)
 93 |     Tvec <- rep(0, pp)
 94 |     ipvt <- rep(0, pp)
 95 | 
 96 | 
 97 | print(Q)
 98 | print(Lvec)
 99 | print(indic)
100 | print(dimfkap)
101 |     ## Call FORTRAN routine "locpol"
102 | 
103 |     out <- .Fortran(F_locpol, as.double(xcounts), as.double(ycounts),
104 |                     as.integer(drv), as.double(delta), as.double(hdisc),
105 |                     as.integer(Lvec), as.integer(indic), as.integer(midpts),
106 |                     as.integer(M), as.integer(Q), as.double(fkap), as.integer(pp),
107 |                     as.integer(ppp), as.double(ss), as.double(tt),
108 |                     as.double(Smat), as.double(Tvec), as.integer(ipvt),
109 |                     as.double(curvest))
110 | 
111 | # print(out)
112 |     curvest <- gamma(drv+1) * out[[19L]]
113 | 
114 |     list(x = gpoints, y = curvest)
115 | }
116 | 
117 | 
118 | environment(locpoly2) <- environment(locpoly)
119 | y <- geyser$waiting
120 | plot(x, y)
121 | fit <- locpoly2(x, y, bandwidth = 0.25)
122 | # fit <- locpoly2(x, y, bandwidth = rep(0.25, 401))
123 | lines(fit)


--------------------------------------------------------------------------------
/RDDtools/R/Waldci.R:
--------------------------------------------------------------------------------
  1 | #' Confint allowing vcov
  2 | #' 
  3 | #' Version of vcov allowing for confint
  4 | #' @param x Object of class lm or else
  5 | #' @param parm  specification of which parameters are to be given confidence intervals, see confint
  6 | #' @param level the confidence level required, see confint()
  7 | #' @param vcov. Specific covariance function to pass to coeftest. See help of sandwich
  8 | #' @param df Degrees of freedom
  9 | #' @param \ldots Further argument
 10 | #' @keywords internal
 11 | 
 12 | waldci <- function(x, parm = NULL, level = 0.95, vcov. = NULL, df = NULL, ...)
 13 | {
 14 |   UseMethod("waldci")
 15 | }
 16 | 
 17 | waldci.default <- function(x, parm = NULL, level = 0.95, vcov. = NULL, df = NULL, ...)
 18 | {
 19 |   ## use S4 methods if loaded
 20 |   coef0 <- if("stats4" %in% loadedNamespaces()) stats4::coef else coef
 21 |   vcov0 <- if("stats4" %in% loadedNamespaces()) stats4::vcov else vcov
 22 | 
 23 |   ## extract coefficients and standard errors
 24 |   est <- coef0(x)
 25 |   if(is.null(vcov.)) se <- vcov0(x) else {
 26 |       if(is.function(vcov.)) se <- vcov.(x)
 27 |         else se <- vcov.
 28 |   }
 29 |   se <- sqrt(diag(se))
 30 | 
 31 |   ## match using names and compute t/z statistics
 32 |   if(!is.null(names(est)) && !is.null(names(se))) {
 33 |     anames <- names(est)[names(est) %in% names(se)]
 34 |     est <- est[anames]
 35 |     se <- se[anames]
 36 |   }
 37 |   
 38 |   ## process level
 39 |   a <- (1 - level)/2
 40 |   a <- c(a, 1 - a)
 41 |   
 42 |   ## get quantile from central limit theorem
 43 |   if(is.null(df)) {
 44 |     df <- try(df.residual(x), silent = TRUE)
 45 |     if(inherits(df, "try-error")) df <- NULL
 46 |   }
 47 |   if(is.null(df)) df <- 0
 48 |   fac <- if(is.finite(df) && df > 0) qt(a, df = df) else qnorm(a)
 49 | 
 50 |   ## set up confidence intervals
 51 |   ci <- cbind(est + fac[1] * se, est + fac[2] * se)
 52 |   colnames(ci) <- paste(format(100 * a, trim = TRUE, scientific = FALSE, digits = 3L), "%")
 53 |   
 54 |   ## process parm
 55 |   if(is.null(parm)) parm <- seq_along(est)
 56 | #   if(is.character(parm)) parm <- which(parm %in% names(est))
 57 | if(is.character(parm)) parm <- which(names(est)%in% parm )
 58 |   ci <- ci[parm, , drop = FALSE]
 59 |   return(ci)
 60 | } 
 61 | 
 62 | 
 63 | ## copy of stats:::format.perc
 64 | format.perc <- function (probs, digits) 
 65 |   paste(format(100 * probs, trim = TRUE, scientific = FALSE, digits = digits), 
 66 |         "%")
 67 | 
 68 | waldci.RDDreg_np <- function(x, level = 0.95, vcov. = NULL, df = Inf, ...){
 69 | 
 70 |   inf_met <- infType(x) ## def in Misc.R
 71 |   if(inf_met=="se"){
 72 |     if(!is.null(vcov.)|!is.infinite(df)) {warning("Arg 'vcov.' and 'df' only work for RDDreg with inf='lm'")
 73 |     }
 74 |     ## code recycled from stats::confint.default
 75 |     co <- RDDcoef(x, allInfo=TRUE)
 76 |     a <- (1 - level)/2
 77 |     a <- c(a, 1 - a)
 78 |     fac <- qnorm(a)
 79 |     pct <- format.perc(a, 3) ## import!!
 80 |     ci <- array(NA, dim = c(1, 2L), dimnames = list("D",   pct))
 81 |     ci[] <- co[,"Estimate"] + co[,"Std. Error"] %o% fac
 82 |     return(ci)
 83 |   } else {
 84 |     waldci.default(x$RDDslot$model, parm = "D", level = level, vcov. = vcov., df = df, ...)  
 85 |   }
 86 | }
 87 | 
 88 | 
 89 | 
 90 | 
 91 | waldci.glm <- function(x, parm = NULL, level = 0.95, vcov. = NULL, df = Inf, ...)
 92 |   waldci.default(x, parm = parm, level = level, vcov. = vcov., df = df, ...)  
 93 | 
 94 | waldci.mlm <- function(x, parm=NULL, level = 0.95, vcov. = NULL, df = NULL, ...)
 95 | {
 96 |   ## obtain vcov
 97 |   v <- if(is.null(vcov.)) vcov(x) else if(is.function(vcov.)) vcov.(x) else vcov.
 98 | 
 99 |   ## nasty hack: replace coefficients so that their names match the vcov() method
100 |   x$coefficients <- structure(as.vector(x$coefficients), .Names = colnames(vcov(x)))
101 | 
102 |   ## call default method
103 |   waldci.default(x, parm = parm, level = level, vcov. = v, df = df, ...)
104 | }
105 | 
106 | waldci.survreg <- function(x, parm = NULL, level = 0.95, vcov. = NULL, df = Inf, ...)
107 | {
108 |   if(is.null(vcov.)) v <- vcov(x) else {
109 |       if(is.function(vcov.)) v <- vcov.(x)
110 |   	else v <- vcov.
111 |   }
112 |   if(length(x$coefficients) < NROW(x$var)) {
113 |     x$coefficients <- c(x$coefficients, "Log(scale)" = log(x$scale))
114 |   }
115 |   waldci.default(x, parm = parm, level = level, vcov. = v, df = df, ...)  
116 | } 
117 | 
118 | 
119 | if(FALSE){
120 | 
121 | library(sandwich)
122 | library(lmtest)
123 | 
124 | reg <- lm(freeny)
125 | 
126 | ### Regular
127 | all(confint(reg)==waldci(reg))
128 | confint(reg)
129 | co_reg <- coeftest(reg)
130 | co_reg[,1] + qnorm(0.975)*co_reg[,2]
131 | co_reg[,1] + qt(0.975, df=reg[["df.residual"]] )*co_reg[,2]
132 | 
133 | ## vcovHC
134 | waldci(reg, vcov.=vcovHC)
135 | co <- coeftest(reg, vcov.=vcovHC)
136 | co[,1] + qnorm(0.975)*co[,2]
137 | co[,1] + qt(0.975, df=reg[["df.residual"]] )*co[,2]
138 | 
139 | }


--------------------------------------------------------------------------------
/RDDtools/R/gen_MC_IK.R:
--------------------------------------------------------------------------------
  1 | #' Generate Monte Carlo simulations of Imbens and Kalyanaraman
  2 | #' 
  3 | #' Generate the simulations reported in Imbens and Kalyanaraman (2012)
  4 | #' @param n The size of sampel to generate
  5 | #' @param version The MC version of Imbens and Kalnayaraman (between 1 and 4).
  6 | #' @param sd The standard deviation of the error term.
  7 | #' @param output Whether to return a data-frame, or already a RDDdata
  8 | #' @param size The size of the effect, this depends on the specific version, defaults are as in IK: 0.04, NULL, 0.1, 0.1
  9 | #' @return An data frame with x and y variables. 
 10 | #' @references TODO
 11 | #' @export
 12 | #' @examples
 13 | #' MC1_dat <- gen_MC_IK()
 14 | #' MC1_rdd <- RDDdata(y=MC1_dat$y, x=MC1_dat$x, cutpoint=0)
 15 | #' 
 16 | #' ## Use np regression:
 17 | #' reg_nonpara <- RDDreg_np(RDDobject=MC1_rdd)
 18 | #' reg_nonpara
 19 | #' 
 20 | #' # Represent the curves:
 21 | #' plotCu <- function(version=1, xlim=c(-0.1,0.1)){
 22 | #'   res <- gen_MC_IK(sd=0.0000001, n=1000, version=version)
 23 | #'   res <- res[order(res$x),]
 24 | #'   ylim <- range(subset(res, x>=min(xlim) & x<=max(xlim), "y"))
 25 | #'   plot(res, type="l", xlim=xlim, ylim=ylim, main=paste("DGP", version))
 26 | #'   abline(v=0)
 27 | #'   xCut <- res[which(res$x==min(res$x[res$x>=0]))+c(0,-1),]
 28 | #'   points(xCut, col=2)
 29 | #' }
 30 | #' layout(matrix(1:4,2, byrow=TRUE))
 31 | #' plotCu(version=1)
 32 | #' plotCu(version=2)
 33 | #' plotCu(version=3)
 34 | #' plotCu(version=4)
 35 | #' layout(matrix(1))
 36 | 
 37 | gen_MC_IK <- function(n=200, version=1, sd=0.1295, output=c("data.frame", "RDDdata"), size){
 38 |  
 39 |   output <- match.arg(output)
 40 |   if(!version%in% c(1:4) |length(version) !=1) stop("arg 'version' should be between 1 and 4")
 41 | 
 42 |   foo <- switch(version, 
 43 | 			"1"=gen_MC_IK_1,
 44 | 			"2"=gen_MC_IK_2,
 45 | 			"3"=gen_MC_IK_3,
 46 | 			"4"=gen_MC_IK_4)
 47 |   if(missing(size)) {
 48 |     size <- switch(version, 
 49 | 		    "1"=0.04,
 50 | 		    "2"=0,
 51 | 		    "3"=0.1,
 52 | 		    "4"=0.1)
 53 |   }
 54 |   res <- foo(n=n, sd=sd, size=size)
 55 |   if(output=="RDDdata"){
 56 |     res <- RDDdata(x=res$x, y=res$y, cutpoint=0)
 57 |   }
 58 |   res
 59 | }
 60 | 
 61 | 
 62 | ####################################
 63 | ######### MC 1
 64 | ####################################
 65 | 
 66 | gen_MC_IK_1 <- function(n=200,  sd=0.1295, size=0.04){
 67 | 
 68 | ## Regressor:
 69 |   Z <- rbeta(n, shape1=2, shape2=4, ncp = 0)
 70 |   X <- 2*Z-1
 71 |   error <- rnorm(n, sd=sd)
 72 | 
 73 | ## Prepare variables:
 74 |   Y <- vector("numeric", length=n)
 75 |   ind_below <- X<0
 76 |   X_low <- X[ind_below]
 77 |   X_up  <- X[!ind_below]
 78 | 
 79 | ## Compute Y variables:
 80 |   Y[ind_below]  <- 0.48 +  1.27*X_low + 7.18*X_low^2 + 20.21* X_low^3 +21.54*X_low^4 +7.33*X_low^5 + error[ind_below]
 81 |   Y[!ind_below] <- 0.48+size +  0.84*X_up - 3*   X_up^2 +  7.99* X_up^3 - 9.01*X_up^4 +3.56*X_up^5 + error[!ind_below]
 82 | 
 83 | ## Result:
 84 |   res <- data.frame(x=X, y=Y)
 85 |   return(res)
 86 | }
 87 | 
 88 | ####################################
 89 | ######### MC 2
 90 | ####################################
 91 | 
 92 | gen_MC_IK_2 <- function(n=200,  sd=0.1295, size=0){
 93 | 
 94 | #   if(!missing(size) && !is.null(size)) warning("Argument 'size' ignored for gen_MC_IK_2")
 95 | ## Regressor:
 96 |   Z <- rbeta(n, shape1=2, shape2=4, ncp = 0)
 97 |   X <- 2*Z-1
 98 |   error <- rnorm(n, sd=sd)
 99 | 
100 | ## Compute Y variables:
101 |   Y <- ifelse(X<0, 3*X^2, 4*X^2+size) + error
102 | 
103 | ## Result:
104 |   res <- data.frame(x=X, y=Y)
105 |   return(res)
106 | }
107 | 
108 |   
109 | ####################################
110 | ######### MC 3
111 | ####################################
112 | 
113 | gen_MC_IK_3 <- function(n=200,  sd=0.1295, size=0.1){
114 | 
115 | ## Regressor:
116 |   Z <- rbeta(n, shape1=2, shape2=4, ncp = 0)
117 |   X <- 2*Z-1
118 |   error <- rnorm(n, sd=sd)
119 | 
120 | ## Compute Y variables:
121 |   Y <- 0.42 + ifelse(X<0, 0, size) + 0.84*X - 3*X^2 +7.99 * X^3-9.01*X^4+3.56*X^5 + error
122 | 
123 | ## Result:
124 |   res <- data.frame(x=X, y=Y)
125 |   return(res)
126 | }
127 | 
128 | ####################################
129 | ######### MC 4
130 | ####################################
131 | 
132 | gen_MC_IK_4 <- function(n=200,  sd=0.1295, size=0.1){
133 | 
134 | ## Regressor:
135 |   Z <- rbeta(n, shape1=2, shape2=4, ncp = 0)
136 |   X <- 2*Z-1
137 |   error <- rnorm(n, sd=sd)
138 | 
139 | ## Compute Y variables:
140 |   Y <- 0.42 + ifelse(X<0, 0, size) + 0.84*X  +7.99 * X^3-9.01*X^4+3.56*X^5 + error
141 | 
142 | ## Result:
143 |   res <- data.frame(x=X, y=Y)
144 |   return(res)
145 | }
146 | 
147 | 
148 | ####################################
149 | ######### MC simple
150 | ####################################
151 | 
152 | gen_MC_simple <- function(n=200, LATE=0.3){
153 | 
154 | ## Regressor:
155 |   x <- rnorm(n)
156 |   D <- x>= 0
157 |   y <- 0.8 + LATE*D+ 0.3*x+0.1*x*D+rnorm(n)
158 |   RDDdata(x=x, y=y, cutpoint=0)
159 | 
160 | }


--------------------------------------------------------------------------------
/RDDtools/vignettes/RDD_refs.bib:
--------------------------------------------------------------------------------
  1 | % This file was created with JabRef 2.7b.
  2 | % Encoding: UTF-8
  3 | 
  4 | @TECHREPORT{CalonicoCattaneoEtAl2012,
  5 |   author = {Sebastian Calonico and Matias D. Cattaneo and Rocio Titiunik},
  6 |   title = {Robust Nonparametric Bias-Corrected Inference in the Regression Discontinuity
  7 | 	Design},
  8 |   year = {2012},
  9 |   owner = {mat},
 10 |   timestamp = {2013.04.17}
 11 | }
 12 | 
 13 | @ARTICLE{ChengFanEtAl1997,
 14 |   author = {Cheng, M.-Y. and Fan, J. and Marron, J. S.},
 15 |   title = {On Automatic Boundary Corrections},
 16 |   journal = {Annals of Statistics},
 17 |   year = {1997},
 18 |   volume = {25},
 19 |   pages = {1691-1708},
 20 |   owner = {mat},
 21 |   timestamp = {2013.04.17}
 22 | }
 23 | 
 24 | @BOOK{FanGijbels1996,
 25 |   title = {Local Polynomial Modeling and its Implications},
 26 |   publisher = {Boca Raton: Chapman and Hall/CRC, Monographs on Statistics and Applied
 27 | 	Probability no. 66},
 28 |   year = {1996},
 29 |   author = {Fan, J. and Gijbels, I.},
 30 |   owner = {mat},
 31 |   timestamp = {2013.04.17}
 32 | }
 33 | 
 34 | @ARTICLE{FanGijbels1992,
 35 |   author = {Fan, J. and Gijbels, I.},
 36 |   title = {Variable Bandwidth and Local Linear Regression Smoothers},
 37 |   journal = {Annals of Statistics},
 38 |   year = {1992},
 39 |   volume = {20},
 40 |   pages = {2008-2036},
 41 |   owner = {mat},
 42 |   timestamp = {2013.04.17}
 43 | }
 44 | 
 45 | @ARTICLE{ImbensKalyanaraman2012,
 46 |   author = {Guido Imbens And Karthik Kalyanaraman},
 47 |   title = {Optimal Bandwidth Choice for the Regression Discontinuity Estimator},
 48 |   journal = {Review of Economic Studies},
 49 |   year = {2012},
 50 |   volume = {79},
 51 |   pages = {933-959},
 52 |   owner = {mat},
 53 |   timestamp = {2013.04.17}
 54 | }
 55 | 
 56 | @ARTICLE{ImbensLemieux2008,
 57 |   author = {Imbens, Guido W. and Lemieux, Thomas},
 58 |   title = {Regression discontinuity designs: A guide to practice},
 59 |   journal = {Journal of Econometrics},
 60 |   year = {2008},
 61 |   volume = {142},
 62 |   pages = {615-635},
 63 |   number = {2},
 64 |   month = {February},
 65 |   abstract = {In Regression Discontinuity (RD) designs for evaluating causal effects
 66 | 	of interventions, assignment to a treatment is determined at least
 67 | 	partly by the value of an observed covariate lying on either side
 68 | 	of a fixed threshold. These designs were first introduced in the
 69 | 	evaluation literature by Thistlewaite and Campbell (1960). With the
 70 | 	exception of a few unpublished theoretical papers, these methods
 71 | 	did not attract much attention in the economics literature until
 72 | 	recently. Starting in the late 1990s, there has been a large number
 73 | 	of studies in economics applying and extending RD methods. In this
 74 | 	paper we review some of the practical and theoretical issues involved
 75 | 	in the implementation of RD methods.<P>(This abstract was borrowed
 76 | 	from another version of this item.)},
 77 |   owner = {matifou},
 78 |   timestamp = {2014.05.21},
 79 |   url = {http://ideas.repec.org/a/eee/econom/v142y2008i2p615-635.html}
 80 | }
 81 | 
 82 | @ARTICLE{Lee2008,
 83 |   author = {David S. Lee},
 84 |   title = {Randomized experiments from non-random selection in U.S. House elections},
 85 |   journal = {Journal of Econometrics},
 86 |   year = {2008},
 87 |   volume = {142},
 88 |   pages = {675-697},
 89 |   owner = {mat},
 90 |   timestamp = {2013.04.17}
 91 | }
 92 | 
 93 | @ARTICLE{LeeLemieux2010,
 94 |   author = {Lee, David S. and Thomas Lemieux},
 95 |   title = {Regression Discontinuity Designs in Economics},
 96 |   journal = {Journal of Economic Literature},
 97 |   year = {2010},
 98 |   volume = {48(2)},
 99 |   pages = {281-355},
100 |   owner = {mat},
101 |   timestamp = {2012.11.19}
102 | }
103 | 
104 | @ARTICLE{McCrary2008,
105 |   author = {McCrary, Justin},
106 |   title = {Manipulation of the Running Variable in the Regression Discontinuity
107 | 	Design: A Density Test},
108 |   journal = {Journal of Econometrics},
109 |   year = {2008},
110 |   volume = {142},
111 |   pages = {698-714},
112 |   owner = {mat},
113 |   timestamp = {2013.04.17}
114 | }
115 | 
116 | @TECHREPORT{Porter2003,
117 |   author = {Porter, Jack},
118 |   title = {Estimation in the Regression Discontinuity Model},
119 |   institution = {University of Wisconsin, Madison, Department of Economics},
120 |   year = {2003},
121 |   owner = {mat},
122 |   timestamp = {2013.04.17}
123 | }
124 | 
125 | @ARTICLE{RuppertSheatherEtAl1995,
126 |   author = {Ruppert, D. and Sheather, S. J. and Wand, M. P.},
127 |   title = {An effective bandwidth selector for local least squares regression},
128 |   journal = {Journal of the American Statistical Association},
129 |   year = {1995},
130 |   volume = {90},
131 |   pages = {1257-1270},
132 |   owner = {mat},
133 |   timestamp = {2013.04.17}
134 | }
135 | 
136 | @comment{jabref-meta: selector_publisher:}
137 | 
138 | @comment{jabref-meta: selector_author:}
139 | 
140 | @comment{jabref-meta: selector_journal:}
141 | 
142 | @comment{jabref-meta: selector_keywords:}
143 | 
144 | 


--------------------------------------------------------------------------------
/RDDtools/tests/RDDtools_vs_rdd.Rout.save:
--------------------------------------------------------------------------------
  1 | 
  2 | R version 2.15.2 (2012-10-26) -- "Trick or Treat"
  3 | Copyright (C) 2012 The R Foundation for Statistical Computing
  4 | ISBN 3-900051-07-0
  5 | Platform: x86_64-pc-linux-gnu (64-bit)
  6 | 
  7 | R is free software and comes with ABSOLUTELY NO WARRANTY.
  8 | You are welcome to redistribute it under certain conditions.
  9 | Type 'license()' or 'licence()' for distribution details.
 10 | 
 11 |   Natural language support but running in an English locale
 12 | 
 13 | R is a collaborative project with many contributors.
 14 | Type 'contributors()' for more information and
 15 | 'citation()' on how to cite R or R packages in publications.
 16 | 
 17 | Type 'demo()' for some demos, 'help()' for on-line help, or
 18 | 'help.start()' for an HTML browser interface to help.
 19 | Type 'q()' to quit R.
 20 | 
 21 | > 
 22 | > library(rdd)
 23 | Loading required package: sandwich
 24 | Loading required package: lmtest
 25 | Loading required package: zoo
 26 | 
 27 | Attaching package: 'zoo'
 28 | 
 29 | The following objects are masked from 'package:base':
 30 | 
 31 |     as.Date, as.Date.numeric
 32 | 
 33 | Loading required package: AER
 34 | Loading required package: car
 35 | Loading required package: survival
 36 | Loading required package: splines
 37 | Loading required package: Formula
 38 | > library(RDDtools)
 39 | KernSmooth 2.23 loaded
 40 | Copyright M. P. Wand 1997-2009
 41 | 
 42 | RDDtools 0.22
 43 | PLEASE NOTE THIS is currently only a development version. 
 44 | Run vignette('RDDtools') for the documentation
 45 | > 
 46 | > set.seed(1234)
 47 | > x<-runif(1000,-1,1)
 48 | > cov<-rnorm(1000)
 49 | > y<-3+2*x+3*cov+10*(x>=0)+rnorm(1000)
 50 | > 
 51 | > RD <- RDDdata(x=x, y=y, cutpoint=0, covar=cov)
 52 | > 
 53 | > ### Simple estimation:
 54 | > bw <- IKbandwidth(X=x, Y=y, cutpoint=0)
 55 | > bw
 56 | [1] 0.6442702
 57 | > rdd_mod <- RDestimate(y~x, bw=bw, se.type="const", model=TRUE)$model[[1]]
 58 | > RDDtools_mod <- RDDreg_np(RD, bw=bw, inference="lm")
 59 | > 
 60 | > rdd_co <- coef(summary(rdd_mod))
 61 | > RDDtools_co <- RDDcoef(RDDtools_mod, allCo=TRUE, allInfo=TRUE)
 62 | > rdd_co
 63 |               Estimate Std. Error    t value      Pr(>|t|)
 64 | (Intercept)  2.3870103  0.3039002  7.8545857  1.670299e-14
 65 | Tr          10.8995093  0.4071983 26.7670789 7.187232e-107
 66 | Xl           0.3076565  1.1003584  0.2795966  7.798762e-01
 67 | Xr           1.0007232  1.0724028  0.9331599  3.510850e-01
 68 | > RDDtools_co
 69 |               Estimate Std. Error    t value      Pr(>|t|)
 70 | (Intercept)  2.3870103  0.3039002  7.8545857  1.670299e-14
 71 | D           10.8995093  0.4071983 26.7670789 7.187232e-107
 72 | x            0.3076565  1.1003584  0.2795966  7.798762e-01
 73 | x_right      0.6930668  1.5365013  0.4510681  6.520914e-01
 74 | > 
 75 | > all.equal(rdd_co[-4,], RDDtools_co[1:3,], check.attributes=FALSE)
 76 | [1] TRUE
 77 | > all.equal(rdd_co[4,1], sum(RDDtools_co[3:4,1]), check.attributes=FALSE)
 78 | [1] TRUE
 79 | > 
 80 | > 
 81 | > ### Covariate estimation:
 82 | > rdd_mod_cov <- RDestimate(y~x|cov, kernel="rectangular", bw=5, model=TRUE, se.type="const")$model[[1]]
 83 | > RDDtools_mod_cov <- RDDreg_lm(RD, bw=5, covariates="cov", covar.opt=list(slope="separate"))
 84 | > 
 85 | > rdd_co_cov <- coef(summary(rdd_mod_cov))
 86 | > RDDtools_co_cov <- RDDcoef(RDDtools_mod_cov, allCo=TRUE, allInfo=TRUE)
 87 | > rdd_co_cov
 88 |                Estimate Std. Error    t value      Pr(>|t|)
 89 | (Intercept)  2.90737195 0.09660411 30.0957385 5.649434e-142
 90 | Tr          10.20606095 0.13062887 78.1302094  0.000000e+00
 91 | Xl           1.81515024 0.16640546 10.9079970  3.027120e-26
 92 | Xr           1.86396889 0.15068992 12.3695656  8.602692e-33
 93 | cov          3.04154403 0.05189778 58.6064361  0.000000e+00
 94 | Tr:cov      -0.03728164 0.06948406 -0.5365496  5.916988e-01
 95 | > RDDtools_co_cov
 96 |                Estimate Std. Error    t value      Pr(>|t|)
 97 | (Intercept)  2.90737195 0.09660411 30.0957385 5.649434e-142
 98 | D           10.20606095 0.13062887 78.1302094  0.000000e+00
 99 | x            1.81515024 0.16640546 10.9079970  3.027120e-26
100 | x_right      0.04881865 0.22449550  0.2174594  8.278950e-01
101 | cov          3.04154403 0.05189778 58.6064361  0.000000e+00
102 | `cov:D`     -0.03728164 0.06948406 -0.5365496  5.916988e-01
103 | > 
104 | > all.equal(rdd_co_cov[-4,], RDDtools_co_cov[-4,], check.attributes=FALSE)
105 | [1] TRUE
106 | > 
107 | > ## Fuzzy
108 | > set.seed(123)
109 | > selec <- rbinom(nrow(RD), 1, prob=ifelse(RD$x<0, 0.1, 0.9))
110 | > RD_rdd_ins <- RDDdata(y=RD$y, x=RD$x, z=selec,cutpoint=0)
111 | > 
112 | > RDDto_reg_fuz <- RDDreg_lm(RD_rdd_ins, bw=0.2)
113 | > rdd_reg_fuz <- RDestimate(y~x+selec, data=RD_rdd_ins, kernel="rectangular", bw=0.2, model=TRUE, se.type="const")$model[[2]][[1]]
114 | > 
115 | > all.equal(RDDcoef(RDDto_reg_fuz),coef(summary(rdd_reg_fuz))[2,1])
116 | [1] TRUE
117 | > all.equal(RDDcoef(RDDto_reg_fuz, allCo=TRUE)[1:3],coef(summary(rdd_reg_fuz))[1:3,1], check.attributes=FALSE)
118 | [1] TRUE
119 | > 
120 | > 
121 | > proc.time()
122 | utilisateur     système      écoulé 
123 |       1.248       0.076       1.325 
124 | 


--------------------------------------------------------------------------------
/RDDtools/R/RDDdata_methods.R:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ### SUMMARY method
  4 | #' @S3method summary RDDdata
  5 | summary.RDDdata <- function(object, ...){
  6 | 
  7 |   cutpoint <- getCutpoint(object)
  8 |   hasCovar_eng <- ifelse(hasCovar(object), "yes", "no")
  9 |   cat("### RDDdata object ###\n")
 10 |   cat("\nCutpoint:", cutpoint, "\n")
 11 |   cat("Sample size:",
 12 | 	"\n\t-Full :", nrow(object), 
 13 | 	"\n\t-Left :", sum(object$x<cutpoint), 
 14 | 	"\n\t-Right:", sum(object$x>=cutpoint))
 15 |   cat("\nCovariates:", hasCovar_eng, "\n")
 16 | }
 17 | 
 18 | #' Plot RDDdata
 19 | #' 
 20 | #' Binned plot of the forcing and outcome variable
 21 | #' 
 22 | #' @param x Object of class RDDdata
 23 | #' @param h The binwidth parameter (note this differs from the bandwidth parameter!)
 24 | #' @param nbins Alternative to h, the total number of bins in the plot.
 25 | #' @param xlim The range of the x data
 26 | #' @param cex Size of the points, see \code{\link{par}}
 27 | #' @param nplot Number of plot to draw
 28 | #' @param device Type of device used. Currently not used.
 29 | #' @param \ldots Further arguments passed to the \code{\link{plot}} function.
 30 | #' @return A plot
 31 | #' @details Produces a simple binned plot averaging values within each interval. The length of the intervals
 32 | #' is specified with the argument \code{h}, specifying the whole binwidth (contrary to the usual bandwidth
 33 | #' argument, that gives half of the length of the kernel window. 
 34 | #' When no bandwidth is given, the bandwidth of Ruppert et al is used, see \code{\link{RDDbw_RSW}}. 
 35 | #' 
 36 | #' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}>
 37 | #' @examples
 38 | #' data(Lee2008)
 39 | #' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
 40 | #' plot(Lee2008_rdd)
 41 | #' 
 42 | #' ## Specify manually the bandwidth:
 43 | #' plot(Lee2008_rdd, h=0.2)
 44 | #' 
 45 | #' ## Show three plots with different bandwidth:
 46 | #' plot(Lee2008_rdd, h=c(0.2,0.3,0.4), nplot=3)
 47 | #' 
 48 | #' ## Specify instead of the bandwidth, the final number of bins:
 49 | #' plot(Lee2008_rdd, nbins=22)
 50 | #'
 51 | #' ## If the specified number of bins is odd, the larger number is given to side with largest range
 52 | #' plot(Lee2008_rdd, nbins=21)
 53 | #' @method plot RDDdata
 54 | #' @S3method plot RDDdata
 55 | 
 56 | 
 57 | ### PLOT method
 58 | plot.RDDdata <- function(x, h, nbins=NULL, xlim=range(object$x, na.rm=TRUE), cex=0.7, nplot=1, device=c("base", "ggplot"),...){
 59 | 
 60 |   object <- x
 61 |   cutpoint <- getCutpoint(object)
 62 |   device <- match.arg(device)
 63 | 
 64 | ## bandwidth: use Ruppert, Sheather and Wand (KernSmooth:::dpill)
 65 |   if(missing(h) & is.null(nbins)) {
 66 |     if(!all(xlim==range(object$x, na.rm=TRUE))){
 67 |       object <- subset(object, x> min(xlim) & x< max(xlim))
 68 |     }
 69 |     h <- RDDbw_RSW(object) 
 70 |     if(is.even(nplot)) {
 71 |       se <- seq(from=1-(sum(1:nplot<(nplot/2)))*0.2, to=1+(sum(1:nplot>(nplot/2)))*0.2, by=.2)
 72 |     } else {
 73 |       se <- seq(from=1-floor(nplot/2)*0.2, to=1+floor(nplot/2)*0.2, by=.2)
 74 |     }
 75 |     hs <- if(nplot==1) h else se *h
 76 |   } else if(!missing(h) & is.null(nbins)){
 77 |     if(length(h)==1){
 78 |       if(is.even(nplot)) {
 79 | 	se <- seq(from=1-(sum(1:nplot<(nplot/2)))*0.2, to=1+(sum(1:nplot>(nplot/2)))*0.2, by=.2)
 80 |       } else {
 81 | 	se <- seq(from=1-floor(nplot/2)*0.2, to=1+floor(nplot/2)*0.2, by=.2)
 82 |       }
 83 |       hs <- if(nplot==1) h else se *h
 84 |     } else {
 85 |       if(length(h==nplot)){
 86 | 	hs <- h
 87 |       } else {
 88 | 	stop("Length of h should be either one or equal to nplot (", nplot, ")")
 89 |       }
 90 |     }
 91 |   } else if(!is.null(nbins)){
 92 |     hs <- rep(0.05, nplot)
 93 |     if(length(nbins)!=nplot){
 94 |       stop("Length of nbins should be equal to nplot (", nplot, ")")
 95 |     }
 96 |   }
 97 | 
 98 | 
 99 |   
100 | 
101 | ## plot
102 | 
103 |   par_orig <- par()
104 |   par(mfrow=c(nplot,1))
105 |   for(i in 1:nplot){
106 |     plotBin(x=object$x, y=object$y, cutpoint=cutpoint, h=hs[i], nbins=nbins[i], xlim=xlim, cex=cex,...)
107 |   }
108 |   par(mfrow=c(1,1))
109 | 
110 | 
111 | 
112 | ## invisible return:
113 |   invisible(object)
114 | }
115 | 
116 | 
117 | 
118 | #' Convert a rdd object to lm
119 | #' @param x An object to convert to lm
120 | #' @return An object of class \code{lm}
121 | #' @seealso \code{\link{as.npreg}} which converts \code{RDDreg} objects into \code{npreg} from package \code{np}.
122 | #' @examples 
123 | #' data(Lee2008)
124 | #' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
125 | #' reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd)
126 | #' reg_para_lm <- as.lm(reg_para)
127 | #' reg_para_lm
128 | #' plot(reg_para_lm, which=4)
129 | #' @export
130 | as.lm <- function(x)
131 |   UseMethod("as.lm")
132 | 
133 | 
134 | as.lm_RDD <- function(x){
135 | 
136 |   at_x <- attributes(x)
137 |   at_x[names(at_x)!="names"] <- NULL
138 |   class(x) <- "lm"
139 | 
140 |   x
141 | }
142 | 
143 | #' @S3method as.lm RDDreg_np
144 | as.lm.RDDreg_np <- function(x) as.lm_RDD(x)
145 | 
146 | #' @S3method as.lm RDDreg
147 | as.lm.RDDreg <- function(x) as.lm_RDD(x)
148 | 
149 | 
150 | 
151 | 
152 | # subset.RDDdata <- function(x,...){
153 | # 
154 | #   res <- subset.data.frame(x,...)
155 | #   attributes(res) <- attributes(x)
156 | #   res
157 | # }
158 | 
159 | 
160 | ### EXAMPLE
161 | if(FALSE){
162 |   library(RDDtools)
163 | #   data(Lee2008)
164 | 
165 | 
166 |   environment(plot.RDDdata) <- environment(RDDdata)
167 | 
168 |   Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
169 |   plot(Lee2008_rdd)
170 | 
171 |   plot(Lee2008_rdd, h=0.2)
172 |   plot(Lee2008_rdd, h=c(0.2,0.3,0.4), nplot=3)
173 | 
174 |   plot(Lee2008_rdd, nbins=21)
175 | 
176 | }
177 | 


--------------------------------------------------------------------------------
/RDDtools/R/STAR_MHE-data.R:
--------------------------------------------------------------------------------
  1 | #' @name STAR_MHE
  2 | #' @title Transformation of the STAR dataset as used in Angrist and Pischke (2008)
  3 | #' @description Transformation of the STAR dataset as used in Table 8.2.1 of Angrist and Pischke (2008) 
  4 | #' @docType data
  5 | #' @usage STAR_MHE
  6 | #' @seealso \code{\link[AER]{STAR}} for the original dataset.
  7 | #' @format A data frame containing 5743 observations and 6 variables. The first variable is from the original dataset, 
  8 | #' all other are created by Angrist and Pischke STAT code.
  9 | #' \describe{
 10 | #' \item{schidkn}{School ID in kindergarden (original variable, schoolidk in \code{\link[AER]{STAR}})}
 11 | #' \item{pscore}{The propensity score  (computed by A & P)}
 12 | #' \item{classid}{The id of the class (computed by A & P)}
 13 | #' \item{cs}{Class size (computed by A & P)}
 14 | #' \item{female, nwhite}{Various covariates (computed by A & P)}
 15 | #' }
 16 | #' @details ). This is a transformation of the dataset from the project STAR (Student/Teacher Achievement Ratio. 
 17 | #' The full dataset is described and available in package AER, \code{\link[AER]{STAR}}. 
 18 | #' The transformed data was obtained using the STATA script krueger.do, obtained from Joshua Angrist website 
 19 | #' (\url{http://economics.mit.edu/faculty/angrist/data1/mhe/krueger}), on the webstar.dta.
 20 | #' @references Krueger, A. (1999) "Experimental Estimates Of Education Production Functions," 
 21 | #' \emph{The Quarterly Journal of Economics}, Vol. 114(2), pages 497-532, May.
 22 | #' @references Angrist, A. ad  Pischke J-S (2008) \emph{Mostly Harmless Econometrics: An Empiricist's Companion}, 
 23 | #' Princeton University press
 24 | #' @source Data obtained using the script krueger.do on data webstar.rda, found on J. Angrist website 
 25 | #' \url{http://economics.mit.edu/faculty/angrist/data1/mhe/krueger}, retrieved on 26 November 2012.
 26 | #' @examples 
 27 | #' data(STAR_MHE)
 28 | #' 
 29 | #' # Compute the group means:
 30 | #' STAR_MHE_means <- aggregate(STAR_MHE[, c("classid", "pscore", "cs")], by=list(STAR_MHE$classid), mean)
 31 | #' 
 32 | #' # Regression of means, with weighted average:
 33 | #' reg_krug_gls <- lm(pscore~cs, data=STAR_MHE_means, weights=cs)
 34 | #' coef(summary(reg_krug_gls))[2,2]
 35 | 
 36 | NULL
 37 | 
 38 | 
 39 | ##### Quick R code used on the output data:
 40 | # STAR_MHE <- read.csv(".../abuelita.csv")
 41 | # STAR_MHE$female <- as.factor(STAR_MHE$female)
 42 | # STAR_MHE$nwhite <- as.factor(STAR_MHE$nwhite)
 43 | # STAR_MHE$n <- NULL
 44 | # 
 45 | # save(STAR_MHE, file="STAR_MHE.rda")
 46 | 
 47 | 
 48 | ##### STATA code krueger.do (retrieved 26 November 2012 on http://economics.mit.edu/faculty/angrist/data1/mhe/krueger)
 49 | # version 9
 50 | # set more 1
 51 | # capture log close
 52 | # log using krueger, text replace
 53 | # 
 54 | # /* create Krueger scaled scores */
 55 | #   
 56 | #   /* reading score */
 57 | #   
 58 | #   clear
 59 | # use webstar
 60 | # 
 61 | # keep if cltypek > 1  		/* regular classes */
 62 | #   keep if treadssk ~= .
 63 | # 
 64 | # sort treadssk
 65 | # gen pread0 = 100*_n/_N
 66 | # 
 67 | # egen pread = mean(pread0), by(treadssk)	/* percentile score in reg. classes */
 68 | #   
 69 | #   keep treadssk pread
 70 | # sort tread
 71 | # keep if tread ~= tread[_n-1]
 72 | # save tempr, replace
 73 | # 
 74 | # /* math score */
 75 | #   
 76 | #   use webstar
 77 | # 
 78 | # keep if cltypek > 1			/* regular classes */
 79 | #   keep if tmathssk ~= .
 80 | # 
 81 | # sort tmathssk
 82 | # gen pmath0 = 100*_n/_N
 83 | # egen pmath = mean(pmath0), by(tmathssk)
 84 | # 
 85 | # keep tmathssk pmath
 86 | # sort tmath
 87 | # keep if tmath ~= tmath[_n-1]
 88 | # save tempm, replace
 89 | # 
 90 | # /* merge percentile scores back on */
 91 | #   
 92 | #   use webstar
 93 | # 
 94 | # keep if stark == 1
 95 | # 
 96 | # sort treadssk
 97 | # merge treadssk using tempr
 98 | # ipolate pread treadssk, gen(pr) epolate
 99 | # drop _merge
100 | # 
101 | # sort tmathssk
102 | # merge tmathssk using tempm
103 | # ipolate pmath tmathssk, gen(pm) epolate
104 | # replace pm = 0 if pm < 0
105 | # drop _merge
106 | # 
107 | # egen pscore = rowmean(pr pm)
108 | # 
109 | # /* make class ids */
110 | #   
111 | #   egen classid1 = group(schidkn cltypek)
112 | # egen cs1 = count(classid1), by(classid1)
113 | # 
114 | # egen classid2 = group(classid1 totexpk hdegk cladk) if cltypek==1 & cs >= 20
115 | # egen classid3 = group(classid1 totexpk hdegk cladk) if cltypek>1 & cs >= 30
116 | # 
117 | # gen temp = classid1*100
118 | # egen classid = rowtotal(temp classid2 classid3)
119 | # egen cs = count(classid), by(classid)
120 | # 
121 | # gen female = ssex == 2
122 | # gen nwhite = srace >= 2 & srace <= 6 if srace ~= .
123 | # 
124 | # keep if cs <= 27 & pscore ~= .
125 | # keep pscore cs schidkn classid female nwhite
126 | # gen n = 1
127 | # 
128 | # save temp, replace
129 | # 
130 | # reg pscore cs, robust
131 | # local se = _se[cs]
132 | # local t = _b[cs]/`se'
133 | # predict r, res
134 | # loneway r classid
135 | # local rho = r(rho)
136 | # 
137 | # collapse cs, by(classid)
138 | # sum cs
139 | # 
140 | # dis r(Var)
141 | # local m = 1 + (r(Var)/r(mean) + r(mean) - 1)*`rho'
142 | # dis `m'
143 | # dis sqrt(`m')
144 | # dis `se'
145 | # dis sqrt(`m')*`se'
146 | # dis `t'/sqrt(`m')
147 | # 
148 | # 
149 | # use temp, clear
150 | # 
151 | # reg pscore cs, robust
152 | # moulton pscore cs, cluster(classid) moulton
153 | # moulton pscore cs, cluster(classid)
154 | # reg pscore cs, cluster(classid)
155 | # brl pscore cs, cluster(classid)
156 | # 
157 | # 
158 | # 
159 | # set seed 123456789
160 | # bootstrap "reg pscore cs" _b, reps(1000) cluster(classid)
161 | # 
162 | # areg pscore, absorb(classid)
163 | # predict hat
164 | # gen ry = pscore - hat + _b[_cons]
165 | # collapse (mean) ry cs (sum) n, by(classid)
166 | # 
167 | # reg ry cs [aw=n]
168 | # 
169 | # 
170 | # log close
171 | # set more 0
172 | 


--------------------------------------------------------------------------------
/RDDtools/R/as.npreg.R:
--------------------------------------------------------------------------------
  1 | #' Convert an RDDreg object to a  \code{npreg} object
  2 | #' 
  3 | #' Convert an RDDobject to a non-parametric regression \code{npreg} from package \code{np}
  4 | #' @param x Object of class \code{RDDreg} created by \code{\link{RDDreg_np}} or \code{\link{RDDreg_lm}}
  5 | #' @param \ldots Further arguments passed to the \code{\link{npregbw}} or \code{\link{npreg}}
  6 | #' @details This function converts an RDDreg object into an \code{npreg} object from package \code{np}
  7 | #' Note that the output won't be the same, since \code{npreg} does not offer a triangualr kernel, but a gaussian or Epanechinkov one. 
  8 | #' Another reason why estimates might differ slightly is that \code{npreg} implements a multivariate kernel, while RDDreg 
  9 | #' proceeds as if the kernerl was univariate. A simple solution to make the multivariate kernel similar to the  univariate one 
 10 | #' is to set the bandwidth for x and Dx to a large number, so that they converge towards a constant, and one obtains back the univariate kernel. 
 11 | #' @export
 12 | #' @return An object of class \code{npreg} or \code{npregbw}
 13 | #' @seealso \code{\link{as.lm}} which converts \code{RDDreg} objects into \code{lm}.
 14 | #' @examples
 15 | #' # Estimate ususal RDDreg:
 16 | #'  data(Lee2008)
 17 | #'  Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
 18 | #'  reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd)
 19 | #' 
 20 | #' ## Convert to npreg:
 21 | #'  reg_nonpara_np <- as.npreg(reg_nonpara)
 22 | #'  reg_nonpara_np
 23 | #'  RDDcoef(reg_nonpara_np, allCo=TRUE, allInfo=TRUE)
 24 | #' 
 25 | #' ## Compare with result obtained with a Gaussian kernel:
 26 | #'  bw_lm <- dnorm(Lee2008_rdd$x, sd=RDDtools:::getBW(reg_nonpara))
 27 | #'  reg_nonpara_gaus <- RDDreg_lm(RDDobject=Lee2008_rdd, w=bw_lm)
 28 | #'  all.equal(RDDcoef(reg_nonpara_gaus),RDDcoef(reg_nonpara_np)) 
 29 | 
 30 | 
 31 | 
 32 | 
 33 | 
 34 | 
 35 | as.npregbw <- function(x,...){
 36 |   res <- as.npregbw_low(x=x, npreg=FALSE,...)
 37 |   res
 38 | }
 39 | 
 40 | #' @rdname as.npregbw
 41 | #' @export
 42 | as.npreg <- function(x,...){
 43 |   res <- as.npregbw_low(x=x, npreg=TRUE,...)
 44 |   res
 45 | }
 46 | 
 47 | 
 48 | as.npregbw_low <- function(x, npreg=FALSE, adjustIK_bw=TRUE, ...){
 49 | 
 50 |   dat <- getOriginalData(x)
 51 |   bw <- getBW(x)
 52 |   cutpoint <- getCutpoint(x)
 53 | 
 54 | ## Specify inputs to npregbw:
 55 | 
 56 |   ## data:
 57 |   x <- dat$x
 58 |   dat_np <- data.frame(y=dat$y, x=x,  D=ifelse(x>=cutpoint,1,0), Dx=ifelse(x>=cutpoint,x,0))
 59 |   dataPoints <- data.frame(x=c(cutpoint,cutpoint), D=c(0,1), Dx=c(0,cutpoint))
 60 | 
 61 |   ## bw:
 62 |   range.x <- range(dat$x, na.rm=TRUE, finite=TRUE)
 63 |   if(adjustIK_bw ){ ## & names(bw) =="h_opt"
 64 |     bw <- RDDbw_IK(dat, kernel="Normal")
 65 |   }
 66 |   bw_other <- 9999*diff(range.x)
 67 |   bws <- c(bw, rep(bw_other, 2))
 68 |   
 69 | 
 70 | ## start npregbw
 71 |   res <- npregbw(bws=bws, formula=y~x+D+Dx, data= dat_np,  regtype = "ll",
 72 | 			eval=dataPoints, bandwidth.compute=FALSE, gradients=TRUE,...)
 73 |   class(res) <- c("RDDreg_npregbw", class(res))
 74 | 
 75 | ## if npreg, return instead model_np <- npreg(bw_np, newdata=dataPoints, gradients=TRUE)
 76 |   if(npreg) {
 77 |     options(np.messages = TRUE) ## otherwise got warnings messages... probably because comes only if loaded!
 78 |     res <- npreg(res, newdata=dataPoints, gradients=TRUE,...)
 79 |     class(res) <- c("RDDreg_npreg", class(res))
 80 |   }
 81 |   attr(res, "RDDdf") <- dat_np
 82 |   attr(res, "cutpoint") <- cutpoint
 83 |   res
 84 | }
 85 | 
 86 | 
 87 | #' @S3method RDDcoef RDDreg_npreg
 88 | RDDcoef.RDDreg_npreg <- function(object, allInfo=FALSE, allCo=FALSE, ...){
 89 | 
 90 |   co <- diff(object$mean)
 91 |   if(allInfo) {
 92 |     se <- sum(object$merr)
 93 |     zval <- co/se
 94 |     pval <- 2 * pnorm(abs(zval), lower.tail = FALSE)
 95 |     res <- cbind(co, se, zval, pval)
 96 |     colnames(res) <- c("Estimate", "Std. Error", "z value", "Pr(>|z|)")
 97 |     rownames(res) <- "D"
 98 |   } else {
 99 |     res <- co
100 |   }
101 | 
102 |   if(allCo){
103 |     cos <- c(object$mean[1], object$grad)
104 |     ses <- c(object$merr[1], object$gerr)
105 | 
106 |     ## X_right:
107 |     dataPoints_Xr <- data.frame(x=0, D=0, Dx=c(0,1))
108 |     Xr <- diff(predict(object, newdata=dataPoints_Xr))
109 | 
110 |     estimates <- c(cos[1], co, cos[2], Xr)
111 |     
112 |     if(allInfo){
113 |       zvals <- cos/ses
114 |       pvals <- 2 * pnorm(abs(zvals), lower.tail = FALSE)
115 |       res <- data.frame("Estimate"   = estimates,
116 | 			"Std. Error" = c(ses[1], se, ses[2:3]),
117 | 			"z value"    = c(zvals[1], zval, zvals[2:3]),
118 | 			"Pr(>|z|)"   = c(pvals[1], pval, pvals[2:3]),
119 | 			check.names=FALSE)
120 |       rownames(res) <- c("(Intercept)", "D", "x_left", "x_right")
121 |     } else {
122 |       res <- estimates
123 |     }
124 |   }
125 | 
126 |   res 
127 | }
128 | 
129 | 
130 | if(FALSE){
131 |   library(RDDtools)
132 |   data(Lee2008)
133 |   Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
134 |   reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd)
135 | 
136 | #   environment(as.npregbw_low) <- environment(RDDdata)
137 |   reg_nonpara_npbw <- as.npregbw(reg_nonpara)
138 |   reg_nonpara_npbw
139 | class(reg_nonpara_npbw)
140 | RDDcoef(reg_nonpara_npbw)
141 | 
142 |   reg_nonpara_np <- as.npreg(reg_nonpara, adjustIK_bw=FALSE)
143 |   reg_nonpara_np
144 | class(reg_nonpara_np)
145 | RDDcoef(reg_nonpara_np)
146 | RDDcoef(reg_nonpara_np, allInfo=TRUE)
147 | RDDcoef(reg_nonpara_np, allInfo=TRUE, allCo=TRUE)
148 | 
149 | ## manual predict:
150 | 
151 | cutpoint <- 0
152 | dataPoints <- data.frame(x=c(cutpoint,cutpoint), D=c(0,1), Dx=c(0,cutpoint))
153 | dataPoints2 <- data.frame(x=0, D=c(0,1), Dx=0)
154 | dataPoints3 <- data.frame(x=c(0,1), D=0, Dx=0)
155 | dataPoints3 <- data.frame(x=0, D=0, Dx=c(0,1))
156 | 
157 | diff(predict(reg_nonpara_np, newdata=dataPoints))
158 | diff(predict(reg_nonpara_np, newdata=dataPoints2))
159 | 
160 | diff(predict(reg_nonpara_np, newdata=dataPoints3))
161 | RDDcoef(reg_nonpara_gaus, allCo=TRUE)
162 | 
163 | ## compare:
164 |   bw_lm <- dnorm(Lee2008_rdd$x, sd=RDDtools:::getBW(reg_nonpara))
165 |   reg_nonpara_gaus <- RDDreg_lm(RDDobject=Lee2008_rdd, w=bw_lm)
166 |   all.equal(RDDcoef(reg_nonpara_gaus),RDDcoef(reg_nonpara_np))
167 |   all.equal(RDDcoef(reg_nonpara_gaus, allCo=TRUE),RDDcoef(reg_nonpara_np, allCo=TRUE), check=FALSE)
168 | 
169 | 
170 | }
171 | 


--------------------------------------------------------------------------------
/RDDtools/tests/simple_MC.Rout.save:
--------------------------------------------------------------------------------
  1 | 
  2 | R version 2.15.2 (2012-10-26) -- "Trick or Treat"
  3 | Copyright (C) 2012 The R Foundation for Statistical Computing
  4 | ISBN 3-900051-07-0
  5 | Platform: x86_64-pc-linux-gnu (64-bit)
  6 | 
  7 | R is free software and comes with ABSOLUTELY NO WARRANTY.
  8 | You are welcome to redistribute it under certain conditions.
  9 | Type 'license()' or 'licence()' for distribution details.
 10 | 
 11 |   Natural language support but running in an English locale
 12 | 
 13 | R is a collaborative project with many contributors.
 14 | Type 'contributors()' for more information and
 15 | 'citation()' on how to cite R or R packages in publications.
 16 | 
 17 | Type 'demo()' for some demos, 'help()' for on-line help, or
 18 | 'help.start()' for an HTML browser interface to help.
 19 | Type 'q()' to quit R.
 20 | 
 21 | > 
 22 | > library(RDDtools)
 23 | Loading required package: AER
 24 | Loading required package: car
 25 | Loading required package: lmtest
 26 | Loading required package: zoo
 27 | 
 28 | Attaching package: 'zoo'
 29 | 
 30 | The following objects are masked from 'package:base':
 31 | 
 32 |     as.Date, as.Date.numeric
 33 | 
 34 | Loading required package: sandwich
 35 | Loading required package: survival
 36 | Loading required package: splines
 37 | KernSmooth 2.23 loaded
 38 | Copyright M. P. Wand 1997-2009
 39 | 
 40 | RDDtools 0.22
 41 | PLEASE NOTE THIS is currently only a development version. 
 42 | Run vignette('RDDtools') for the documentation
 43 | > library(rdd)
 44 | Loading required package: Formula
 45 | > 
 46 | > ## simple MC:
 47 | > set.seed(123)
 48 | > 
 49 | > MC_simple <- function(n=200, CATE=0.3, HATE=0.1){
 50 | +   x <- rnorm(n, mean=20, sd=5)
 51 | +   D <- x>= 20
 52 | +   y <- 0.8 + CATE*D+ 0.3*x+HATE*x*D+rnorm(n, sd=0.1)
 53 | +   cat("effect", CATE+HATE*20, "\n")
 54 | +   RDDdata(x=x, y=y, cutpoint=20)
 55 | + 
 56 | + }
 57 | > 
 58 | > input_mc <- MC_simple(n=1000, CATE=0.4)
 59 | effect 2.4 
 60 | > plot(input_mc)
 61 | > 
 62 | > RDD_bw <- RDDbw_IK(input_mc)
 63 | > 
 64 | > RDD_np_sep <- RDDreg_np(input_mc, bw=RDD_bw)
 65 | > RDD_np_same <- RDDreg_np(input_mc, slope="same", bw=RDD_bw)
 66 | > RDD_np_sep_inflm <- RDDreg_np(input_mc, bw=RDD_bw, inf="lm")
 67 | > RDD_np_same_inflm <- RDDreg_np(input_mc, slope="same", bw=RDD_bw, inf="lm")
 68 | > RDD_lm_sep <- RDDreg_lm(input_mc, bw=RDD_bw)
 69 | > RDD_lm_same <- RDDreg_lm(input_mc, slope="same", bw=RDD_bw)
 70 | > rdd_RDe <- RDestimate(y~x, data=input_mc, cutpoint=20, model=TRUE, bw=RDD_bw)
 71 | > 
 72 | > 
 73 | > printCoefmat(coef(summary(RDD_np_sep_inflm$RDDslot$model)))
 74 |              Estimate Std. Error t value  Pr(>|t|)    
 75 | (Intercept) 6.7943125  0.0074768 908.722 < 2.2e-16 ***
 76 | D           2.4175554  0.0106230 227.578 < 2.2e-16 ***
 77 | x           0.2984534  0.0022980 129.876 < 2.2e-16 ***
 78 | x_right     0.1007346  0.0032831  30.683 < 2.2e-16 ***
 79 | ---
 80 | Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 
 81 | > printCoefmat(coef(summary(RDD_np_same_inflm$RDDslot$model)))
 82 |              Estimate Std. Error t value  Pr(>|t|)    
 83 | (Intercept) 6.9205374  0.0088024  786.21 < 2.2e-16 ***
 84 | D           2.4225702  0.0149756  161.77 < 2.2e-16 ***
 85 | x           0.3478051  0.0023140  150.31 < 2.2e-16 ***
 86 | ---
 87 | Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 
 88 | > printCoefmat(coef(summary(RDD_lm_sep)))
 89 |              Estimate Std. Error t value  Pr(>|t|)    
 90 | (Intercept) 6.7962504  0.0079252 857.552 < 2.2e-16 ***
 91 | D           2.4109453  0.0112070 215.129 < 2.2e-16 ***
 92 | x           0.2992111  0.0017938 166.802 < 2.2e-16 ***
 93 | x_right     0.1018062  0.0025548  39.849 < 2.2e-16 ***
 94 | ---
 95 | Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 
 96 | > printCoefmat(coef(summary(RDD_lm_same)))
 97 |              Estimate Std. Error t value  Pr(>|t|)    
 98 | (Intercept) 6.9762180  0.0106354  655.95 < 2.2e-16 ***
 99 | D           2.4137377  0.0183016  131.89 < 2.2e-16 ***
100 | x           0.3494005  0.0020859  167.51 < 2.2e-16 ***
101 | ---
102 | Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 
103 | > printCoefmat(coef(summary(rdd_RDe $model[[1]])))
104 |              Estimate Std. Error t value  Pr(>|t|)    
105 | (Intercept) 6.7943125  0.0074768  908.72 < 2.2e-16 ***
106 | Tr          2.4175554  0.0106230  227.58 < 2.2e-16 ***
107 | Xl          0.2984534  0.0022980  129.88 < 2.2e-16 ***
108 | Xr          0.3991880  0.0023448  170.24 < 2.2e-16 ***
109 | ---
110 | Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 
111 | > 
112 | > 
113 | > ## few checks:
114 | > plse <- plotSensi(RDD_np_sep, from=5, to=20, by=0.5)
115 | > plotPlacebo(RDD_np_sep)
116 | > 
117 | > plotSensi(RDD_np_same, from=5, to=20, by=0.5)
118 | > plotPlacebo(RDD_np_same)
119 | > 
120 | > a<-plotSensi(RDD_lm_sep, from=5, to=20, by=0.5)
121 | > plotPlacebo(RDD_lm_sep)
122 | > 
123 | > plotSensi(RDD_lm_same, from=5, to=20, by=0.5)
124 | > plotPlacebo(RDD_lm_same)
125 | > 
126 | > #### Other MCs:
127 | > set.seed(123)
128 | > head(gen_MC_IK())
129 |            x         y
130 | 1 -0.5604223 0.0192401
131 | 2 -0.4325322 0.2071696
132 | 3  0.4824464 0.8091620
133 | 4 -0.3013330 0.4993961
134 | 5 -0.2740911 0.4570206
135 | 6 -0.1112708 0.3558237
136 | > 
137 | > set.seed(123)
138 | > head(gen_MC_IK(output="RDDdata"))
139 |            x         y
140 | 1 -0.5604223 0.0192401
141 | 2 -0.4325322 0.2071696
142 | 3  0.4824464 0.8091620
143 | 4 -0.3013330 0.4993961
144 | 5 -0.2740911 0.4570206
145 | 6 -0.1112708 0.3558237
146 | > 
147 | > set.seed(123)
148 | > head(gen_MC_IK(version=2))
149 |            x            y
150 | 1 -0.5604223  0.775848845
151 | 2 -0.4325322  0.486922823
152 | 3  0.4824464  1.011047103
153 | 4 -0.3013330  0.416130145
154 | 5 -0.2740911  0.317010484
155 | 6 -0.1112708 -0.009950054
156 | > 
157 | > set.seed(123)
158 | > head(gen_MC_IK(version=3))
159 |            x          y
160 | 1 -0.5604223 -3.6512588
161 | 2 -0.4325322 -1.5947076
162 | 3  0.4824464  0.8091620
163 | 4 -0.3013330 -0.2635494
164 | 5 -0.2740911 -0.1648652
165 | 6 -0.1112708  0.2298459
166 | > 
167 | > set.seed(123)
168 | > head(gen_MC_IK(version=4))
169 |            x            y
170 | 1 -0.5604223 -2.709039228
171 | 2 -0.4325322 -1.033455253
172 | 3  0.4824464  1.507425459
173 | 4 -0.3013330  0.008855458
174 | 5 -0.2740911  0.060512581
175 | 6 -0.1112708  0.266989475
176 | > 
177 | > proc.time()
178 | utilisateur     système      écoulé 
179 |        1.23        0.07        1.46 
180 | 


--------------------------------------------------------------------------------
/RDDtools/R/deprecated.R:
--------------------------------------------------------------------------------
  1 | 
  2 | plotPlacebo_OLD<- function(RDDregobject, from, to, by=0.1, level=0.95, same_bw=FALSE){
  3 | 
  4 |   object <- RDDregobject
  5 |   bw <- getBW(object)
  6 |   cutpoint <- getCutpoint(object)
  7 |   forc_var <- object$model[,"x^1"]
  8 | 
  9 | ## set grid:
 10 |   if(missing(from)) from <- median(forc_var[forc_var<cutpoint])
 11 |   if(missing(to))   to <-   median(forc_var[forc_var>=cutpoint])
 12 | 
 13 |   seqi <- sort(c(cutpoint,seq(from=from, to=to, by=by)))
 14 |   n_seqi <- length(seqi)
 15 | 
 16 | ## set matrix for results:
 17 |   seq_vals <- matrix(NA, nrow=n_seqi, ncol=4, dimnames=list(seqi, c("LATE", "se", "CI_low", "CI_high")))
 18 | 
 19 | ## get call:
 20 |   object_call <- attr(object, "RDDcall")
 21 | 
 22 | ## original dataset:
 23 |   dat_orig <- eval(object_call$RDDobject)
 24 | 
 25 | ## run each time:
 26 |   for(i in seq_along(seqi)){
 27 |     attr(dat_orig, "cutpoint") <- seqi[i]
 28 |     bw_reg <- if(same_bw) bw else RDDbw_IK(dat_orig)
 29 |     object_new <- RDDreg_np(dat_orig, bw=bw_reg)
 30 |     if(!inherits(object_new, "try-error")){
 31 |       co <- coef(summary(object_new))[2,, drop=FALSE]
 32 |       seq_vals[i,"LATE"] <- co[,1]
 33 |       seq_vals[i,"se"] <- co[,2]
 34 |     }
 35 |   }
 36 | 
 37 | ## compute intervals:
 38 |   probs <- (1 - level)/2
 39 |   probs <- c(probs, 1 - probs)
 40 |   quants <- qnorm(probs)
 41 |   seq_vals[,"CI_low"] <- seq_vals[,"LATE"] +quants[1]*seq_vals[,"se"]
 42 |   seq_vals[,"CI_high"] <- seq_vals[,"LATE"] +quants[2]*seq_vals[,"se"]
 43 | 
 44 | 
 45 | ## plot results:
 46 |   ra <- range(seq_vals[,c("CI_low", "CI_high")], na.rm=TRUE)
 47 |   plot(seqi, seq_vals[,"LATE"], type="l", ylab="LATE", xlab="Cutpoints", ylim=ra)
 48 |   title("Placebo test")
 49 | 
 50 |   lines(seqi, seq_vals[,"CI_low"], lty=2)
 51 |   lines(seqi, seq_vals[,"CI_high"], lty=2) #
 52 |   abline(h=0)
 53 |   
 54 | ## add optim in case: 
 55 |   est <- RDDcoef(object)
 56 |   points(cutpoint, RDDcoef(RDDregobject), col=2)
 57 |   segments(cutpoint,ra[1]-1, cutpoint, est, col="red", lty=2)
 58 |   segments(min(seqi,na.rm=TRUE)-1, est, cutpoint, est, col="red", lty=2)
 59 | 
 60 | ## export (silently) results:
 61 |   invisible(seq_vals)
 62 | }
 63 | 
 64 | 
 65 | plotPlacebo_OTHER_OLD <- function(RDDregobject, from=0.25, to=0.75, by=0.1, level=0.95, same_bw=FALSE, device=c("ggplot", "base")){
 66 | 
 67 |   device <- match.arg(device)
 68 |   object <- RDDregobject
 69 |   bw <- getBW(object)
 70 |   cutpoint <- getCutpoint(object)
 71 |   forc_var <- getOriginalX(RDDregobject)
 72 | 
 73 | ## set grid:
 74 |   quants_left  <- quantile(forc_var[forc_var<cutpoint], probs=c(from, to))
 75 |   quants_right <- quantile(forc_var[forc_var>=cutpoint], probs=c(from, to))
 76 | 
 77 |   seqi_left  <- seq(from=quants_left[1], to=quants_left[2], by=by)
 78 |   seqi_right <- seq(from=quants_right[1], to=quants_right[2], by=by)
 79 |   seqi <- c(seqi_left, seqi_right)
 80 | 
 81 |   n_seqi_left <- length(seqi_left)
 82 |   n_seqi_right <- length(seqi_right)
 83 |   n_seqi <- length(seqi)
 84 | 
 85 | ## set matrix for results:
 86 |   seq_vals <- matrix(NA, nrow=n_seqi, ncol=6)
 87 |   colnames(seq_vals) <- c("cutpoint", "position", "LATE", "se", "CI_low", "CI_high")
 88 |   seq_vals[, "cutpoint"] <- seqi
 89 | 
 90 | ## get call:
 91 |   object_call <- attr(object, "RDDcall")
 92 | 
 93 | ## original dataset:
 94 |   dat_orig <- eval(object_call$RDDobject)
 95 | 
 96 | ## run each time:
 97 |   for(i in seq_along(seqi)){
 98 | 
 99 |     ## select sample:
100 |     if(seqi[i]<cutpoint){
101 |       dat_sides <- subset(dat_orig, x<cutpoint)
102 |     } else {
103 |       dat_sides <- subset(dat_orig, x>cutpoint) ## exclude x>cutpoint
104 |     }
105 | 
106 |     ## change the cutpoint
107 |     attr(dat_sides, "cutpoint") <- seqi[i]
108 | 
109 |     ## Re-estimate model and eventually bw
110 |     bw_reg <- if(same_bw) bw else RDDbw_IK(dat_sides)
111 |     object_new <- RDDreg_np(dat_sides, bw=bw_reg)
112 | 
113 |     ## assign results (LATE and se)
114 |     if(!inherits(object_new, "try-error")){
115 |       co <- coef(summary(object_new))[2,, drop=FALSE]
116 |       seq_vals[i,"LATE"] <- co[,1]
117 |       seq_vals[i,"se"] <- co[,2]
118 |     }
119 |   }
120 | 
121 | ## compute intervals:
122 |   probs <- (1 - level)/2
123 |   probs <- c(probs, 1 - probs)
124 |   quants <- qnorm(probs)
125 |   seq_vals[,"CI_low"] <- seq_vals[,"LATE"] +quants[1]*seq_vals[,"se"]
126 |   seq_vals[,"CI_high"] <- seq_vals[,"LATE"] +quants[2]*seq_vals[,"se"]
127 | 
128 | 
129 | ## plot results:
130 |   # prepare df:
131 |   seq_vals <- as.data.frame(seq_vals)
132 |   seq_vals$position <- ifelse(seq_vals$cutpoint < cutpoint, "left", "right")
133 | 
134 |   # get estimates at true cutpoint :
135 |   est <- RDDcoef(object)
136 |   est_conf <- confint(RDDregobject, level=level)["D",]
137 | 
138 |   if(device=="base"){
139 |     ra <- range(seq_vals[,c("CI_low", "CI_high")], est_conf, na.rm=TRUE)
140 |     xlims <- c(quants_left[1], quants_right[2])
141 | #     ylims <- range(seq_vals[, c("LATE", "CI_low", "CI_high")], est_conf)
142 |     plot(seqi_left, seq_vals[1:n_seqi_left,"LATE"], type="l", ylab="LATE", xlab="Cutpoints", ylim=ra, xlim=xlims)
143 |     title("Placebo test")
144 |     abline(h=0)
145 | 
146 |     # left CI
147 |     lines(seqi_left, seq_vals[1:n_seqi_left,"CI_low"], lty=2)
148 |     lines(seqi_left, seq_vals[1:n_seqi_left,"CI_high"], lty=2) 
149 | 
150 |     # right values:
151 |     lines(seqi_right, seq_vals[(n_seqi_left+1):n_seqi,"LATE"], lty=1)
152 |     lines(seqi_right, seq_vals[(n_seqi_left+1):n_seqi,"CI_low"], lty=2)
153 |     lines(seqi_right, seq_vals[(n_seqi_left+1):n_seqi,"CI_high"], lty=2)
154 | 
155 |     # add estimate at true cutoff 
156 |     points(cutpoint, est, col=2)
157 |     segments(cutpoint,ra[1]-1, cutpoint, est, col="red", lty=2)
158 |     segments(min(seqi,na.rm=TRUE)-1, est, cutpoint, est, col="red", lty=2)
159 |   } else {
160 | 
161 |     est_df <- data.frame(cutpoint=cutpoint, LATE=est, position="middle", CI_low=est_conf[1], CI_high=est_conf[2])
162 | 
163 |     # hack for decent width of error bar:
164 |     last_left <- nrow(subset(seq_vals, position=="left"))
165 |     W <- diff(seq_vals[c(last_left, last_left+1), "cutpoint"])/5
166 | 
167 |     pl <- qplot(x=cutpoint, y=LATE, data=seq_vals, geom="line", colour=position)+
168 | 	  geom_smooth(aes(ymin=CI_low, ymax=CI_high), data=seq_vals, stat="identity")+
169 | 	  theme(legend.position="none")+geom_hline(yintercept=0)+
170 | 	  geom_point(aes(x=cutpoint, y=LATE), data=est_df)+
171 | 	  geom_errorbar(aes(ymin=CI_low, ymax=CI_high), data=est_df, width=W)
172 |     print(pl)
173 |   }
174 | 
175 | ## export (silently) results:
176 |   invisible(seq_vals)
177 | }
178 | 


--------------------------------------------------------------------------------
/RDDtools/R/reg_gen.R:
--------------------------------------------------------------------------------
  1 | #' General polynomial estimator of the regression discontinuity
  2 | #' 
  3 | #' Compute RDD estimate allowing a locally kernel weighted version of any estimation function
  4 | #' possibly on the range specified by bandwidth
  5 | #' @param RDDobject Object of class RDDdata created by \code{\link{RDDdata}}
  6 | #' @param covariates Formula to include covariates
  7 | #' @param order Order of the polynomial regression. 
  8 | #' @param bw A bandwidth to specify the subset on which the kernel weighted regression is estimated
  9 | #' @param weights Optional weights to pass to the lm function. Note this cannot be entered together with \code{bw}
 10 | #' @param slope Whether slopes should be different on left or right (separate), or the same.
 11 | #' @param covar.opt Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}). 
 12 | #' @param fun The function to estimate the parameters
 13 | #' @param \ldots Further arguments passed to fun. See the example. 
 14 | #' @details This function allows the user to use a custom estimating function, instead of the traditional \code{lm()}. 
 15 | #' It is assumed that the custom funciton has following behaviour:
 16 | #' \enumerate{
 17 | #'   \item A formula interface, together with a \code{data} argument
 18 | #'   \item A \code{weight} argument
 19 | #'   \item A coef(summary(x)) returning a data-frame containing a column Estimate
 20 | #' }
 21 | #' Note that for the last requirement, this can be accomodated by writing a specific \code{\link{RDDcoef}} 
 22 | #' function for the class of the object returned by \code{fun}. 
 23 | #' @return An object of class RDDreg_lm and class lm, with specific print and plot methods
 24 | #' @references TODO
 25 | #' @include plotBin.R
 26 | #' @export RDDgenreg 
 27 | #' @examples
 28 | #' ## Step 0: prepare data
 29 | #' data(Lee2008)
 30 | #' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
 31 | #' 
 32 | #' ## Estimate a local probit:
 33 | #' Lee2008_rdd$y <- with(Lee2008_rdd, ifelse(y<quantile(y, 0.25), 0,1))
 34 | #' reg_bin_glm <- RDDgenreg(RDDobject=Lee2008_rdd, fun= glm, family=binomial(link="probit"))
 35 | #' print(reg_bin_glm)
 36 | #' summary(reg_bin_glm)
 37 | #'
 38 | 
 39 | RDDgenreg <- function(RDDobject, fun=glm, covariates=NULL, order=1, bw=NULL, slope=c("separate", "same"), covar.opt=list(strategy=c("include", "residual"), slope=c("same", "separate"), bw=NULL), weights, ...){
 40 | 
 41 |   checkIsRDD(RDDobject)
 42 |   cutpoint <- getCutpoint(RDDobject)
 43 | 
 44 |   slope <- match.arg(slope)
 45 | 
 46 |   if(!missing(weights)&!is.null(bw)) stop("Cannot give both 'bw' and 'weights'")
 47 | 
 48 | 
 49 |   ## Subsetting
 50 |   dat <- as.data.frame(RDDobject)
 51 | 
 52 |   if(!is.null(bw)){
 53 |     weights <- ifelse(dat$x >= cutpoint -bw & dat$x <= cutpoint +bw, 1, 0)
 54 |   } else if(!missing(weights)){
 55 |     weights <- weights
 56 |   } else {
 57 |     weights <- NULL
 58 |   }
 59 | 
 60 | ## Construct data
 61 |   if(missing(weights)) weights <- NULL
 62 |   dat_step1 <- model.matrix(RDDobject, covariates=covariates, order=order, bw=bw, 
 63 | 			    slope=slope, covar.opt=covar.opt)
 64 | 
 65 | ## Regression
 66 |   reg <- fun(y~., data=dat_step1, weights=weights,...)
 67 |   
 68 |   ##Return
 69 |   RDDslot <- list()
 70 |   RDDslot$RDDdata <- RDDobject
 71 |   reg$RDDslot <- RDDslot 
 72 |   class(reg) <- c("RDDreg_lm", "RDDreg", class(reg))
 73 |   attr(reg, "PolyOrder") <- order
 74 |   attr(reg, "cutpoint") <- cutpoint
 75 |   attr(reg, "slope") <- slope
 76 |   attr(reg, "RDDcall") <- match.call()
 77 |   attr(reg, "bw") <- bw
 78 |   reg
 79 | }
 80 | 
 81 | RDDgenreg_old <- function(RDDobject, covariates=".", bw=RDDbw_IK(RDDobject), slope=c("separate", "same"), fun=glm, ...){
 82 | 
 83 |   slope <- match.arg(slope)
 84 |   checkIsRDD(RDDobject)
 85 |   if(!is.function(fun)) stop("Arg 'fun' should be a function")
 86 |   cutpoint <- getCutpoint(RDDobject)
 87 | 
 88 | ## Construct data
 89 |   dat <- as.data.frame(RDDobject)
 90 | 
 91 |   dat_step1 <- dat[, c("y", "x")]
 92 |   dat_step1$x <- dat_step1$x -cutpoint
 93 |   dat_step1$D <- ifelse(dat_step1$x >= 0, 1,0)
 94 |   if(slope=="separate") {
 95 |     dat_step1$x_right <- dat_step1$x*dat_step1$D 
 96 |   }
 97 | 
 98 | ### Weights
 99 |   kernel_w <- Kernel_tri(dat_step1[,"x"], center=0, bw=bw)
100 | 
101 | ## Regression
102 |   reg <- fun(y~., data=dat_step1, weights=kernel_w,...)
103 | 
104 | ##Return
105 |   class(reg) <- c("RDDreg_gen", "RDDreg", class(reg))
106 |   attr(reg, "RDDcall") <- match.call()
107 |   attr(reg, "cutpoint") <- cutpoint
108 |   attr(reg, "bw") <- bw
109 |   reg
110 | }
111 | 
112 | 
113 | if(FALSE){
114 | 
115 |   library(RDDtools)
116 |   data(Lee2008)
117 |   Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
118 | 
119 |   reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd)
120 |   environment(RDDgenreg) <- environment(RDDdata)
121 |   reg_glm_norm <- RDDgenreg(RDDobject=Lee2008_rdd)
122 | 
123 | reg_nonpara
124 | reg_glm_norm
125 | plot(reg_glm_norm) 
126 | 
127 | 
128 | ### Binary example:
129 | 
130 | ## gen from latent model:
131 | gen_MC_binom <- function(n=200, LATE=0.3){
132 |   x <- rnorm(n)
133 |   D <- x>= 0
134 |   y <- 0.8 + LATE*D+ 0.3*x+0.1*x*D+rnorm(n)
135 |   y <- as.integer(ifelse(y> -0.5, 1, 0))
136 |   if(mean(y==1)<0.04) y[sample(c(0,1), prob=c(0.1, 0.9), replace=TRUE, size=n)] <- 1
137 |   RDDdata(x=x, y=y, cutpoint=0)
138 | }
139 | 
140 | mc <- gen_MC_binom()
141 | environment(RDDgenreg) <- environment(RDDdata)
142 | reg_bin_glm <- RDDgenreg(RDDobject=mc, fun= glm, family=binomial(link="probit"))
143 |   
144 | ## quantile:
145 |   library(quantreg)
146 |   MC1_dat <- gen_MC_IK()
147 |   MC1_rdd <- RDDdata(y=MC1_dat$y, x=MC1_dat$x, cutpoint=0)
148 |   
149 |   RDDcoef.rq <- function(object, allInfo=FALSE, ...){
150 |     res <- coef(summary(object))["D",, drop=FALSE]
151 |     if(!allInfo) res <- res[,"coefficients"]
152 |     res
153 |   }
154 |   
155 |   reg_bin_rq1 <- RDDgenreg(RDDobject=MC1_rdd,  fun=rq, tau=0.5, bw=0.5)
156 |   reg_bin_rq1
157 |   coef(reg_bin_rq1)
158 |   RDDcoef(reg_bin_rq1)
159 |   RDDcoef(reg_bin_rq1, allInfo=TRUE)
160 |   summary(reg_bin_rq1)
161 |   
162 |   pl_rq <- plotSensi(reg_bin_rq1, order=1, from=0.1, to=1)
163 |   pl_rq
164 |   
165 | 
166 |   
167 |   
168 |   
169 | ## Monte Carlo
170 |   
171 | doEs<- function(n){
172 | mc <- gen_MC_binom()
173 |   reg_bin_np <- RDDreg_np(RDDobject=mc)
174 |   environment(RDDgenreg) <- environment(RDDdata)
175 |   reg_bin_glm <- RDDgenreg(RDDobject=mc, fun= glm, family=binomial(link="probit"))
176 |   reg_bin_glm_log <- RDDgenreg(RDDobject=mc, fun= glm, family=binomial(link="logit"))
177 | 
178 | a<- RDDtools:::RDDcoef(reg_bin_glm)/2.5
179 | b<- RDDtools:::RDDcoef(reg_bin_glm_log)/4
180 | d<- RDDtools:::RDDcoef(reg_bin_np)
181 | 
182 | res <- c(a, b, d)
183 | names(res) <- c("Probit", "Logit", "LPM")
184 | res
185 | }
186 | 
187 | MC_logs <- replicate(500, doEs())
188 | 
189 | MC_logs2 <- t(MC_logs)
190 | colMeans(MC_logs2)
191 | 
192 | colMeans(MC_logs2-0.2)
193 | apply(MC_logs2, 2, sd)
194 | 
195 | colMeans(MC_logs2-0.2)^2+apply(MC_logs2, 2, var)
196 | colMeans(MC_logs2-0.2)^2+apply(MC_logs2, 2, sd)
197 | 
198 | head(MC_logs)
199 | 
200 | reg_bin_glm
201 | reg_bin_np
202 | 
203 | fav <- mean(dnorm(predict(reg_bin_glm, type = "link")))
204 | fav * coef(swiss_probit)
205 | 
206 | 
207 | }
208 | 


--------------------------------------------------------------------------------
/RDDtools/R/reg_lm.R:
--------------------------------------------------------------------------------
  1 | #' Parametric polynomial estimator of the regression discontinuity
  2 | #' 
  3 | #' Compute a parametric polynomial regression of the ATE, 
  4 | #' possibly on the range specified by bandwidth
  5 | #' @param RDDobject Object of class RDDdata created by \code{\link{RDDdata}}
  6 | #' @param covariates Formula to include covariates
  7 | #' @param order Order of the polynomial regression. 
  8 | #' @param bw A bandwidth to specify the subset on which the parametric regression is estimated
  9 | #' @param covar.strat DEPRECATED, use covar.opt instead. 
 10 | #' @param covar.opt Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}). 
 11 | #' @param weights Optional weights to pass to the lm function. Note this cannot be entered together with \code{bw}
 12 | #' @param slope Whether slopes should be different on left or right (separate), or the same.
 13 | #' @return An object of class RDDreg_lm and class lm, with specific print and plot methods
 14 | #' @details This function estimates the standard \emph{discontinuity regression}:
 15 | #' \deqn{Y=\alpha+\tau D+\beta_{1}(X-c)+\beta_{2}D(X-c)+\epsilon}
 16 | #' with \eqn{\tau} the main parameter of interest. Several versions of the regression can be estimated, either restricting the slopes to be the same, 
 17 | #' i.e \eqn{\beta_{1}=\beta_{2}} (argument \code{slope}). The order of the polynomial in \eqn{X-c} can also be adjusted with argument \code{order}. 
 18 | #' Note that a value of zero can be used, which corresponds to the simple \emph{difference in means}, that one would use if the samples were random. 
 19 | #' Covariates can also be added in the regression, according to the two strategies discussed in Lee and Lemieux (2010, sec 4.5), through argument \code{covar.strat}:
 20 | #' \describe{
 21 | #' \item{include}{Covariates are simply added as supplementary regressors in the RD equation}
 22 | #' \item{residual}{The dependent variable is first regressed on the covariates only, then the RDD equation is applied on the residuals from this first step}}
 23 | #' The regression can also be estimated in a neighborhood of the cutpoint with the argument \code{bw}. This make the parametric regression resemble 
 24 | #' the non-parametric local kernel \code{\link{RDDreg_np}}. Similarly, weights can also be provided (but not simultaneously to \code{bw}). 
 25 | #'
 26 | #' The returned object is a classical \code{lm} object, augmented with a \code{RDDslot}, so usual methods can be applied. As is done in general in R, 
 27 | #' heteroskeadsticity-robust inference can be done later on with the usual function from package \pkg{sandwich}. For the case of clustered observations
 28 | #' a specific function \code{\link{clusterInf}} is provided.  
 29 | #' @references TODO
 30 | #' @include plotBin.R
 31 | #' @import Formula
 32 | #' @importFrom AER ivreg
 33 | #' @export
 34 | #' @examples
 35 | #' ## Step 0: prepare data
 36 | #' data(Lee2008)
 37 | #' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
 38 | #' ## Step 2: regression
 39 | #' # Simple polynomial of order 1:
 40 | #' reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd)
 41 | #' print(reg_para)
 42 | #' plot(reg_para)
 43 | #'
 44 | #' # Simple polynomial of order 4:
 45 | #' reg_para4 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=4)
 46 | #' reg_para4
 47 | #' plot(reg_para4)
 48 | #'
 49 | #' # Restrict sample to bandwidth area:
 50 | #' bw_ik <- RDDbw_IK(Lee2008_rdd)
 51 | #' reg_para_ik <- RDDreg_lm(RDDobject=Lee2008_rdd, bw=bw_ik, order=4)
 52 | #' reg_para_ik
 53 | #' plot(reg_para_ik)
 54 | 
 55 | 
 56 | RDDreg_lm <- function(RDDobject, covariates=NULL, order=1, bw=NULL, slope=c("separate", "same"), covar.opt=list(strategy=c("include", "residual"), slope=c("same", "separate"), bw=NULL), covar.strat=c("include", "residual"), weights){
 57 | 
 58 |   checkIsRDD(RDDobject)
 59 |   cutpoint <- getCutpoint(RDDobject)
 60 |   type <- getType(RDDobject)
 61 | 
 62 |   slope <- match.arg(slope)
 63 | 
 64 |   if(!missing(covar.strat)) warning("covar.strat is (soon) deprecated arg!")  
 65 |   if(!missing(weights)&!is.null(bw)) stop("Cannot give both 'bw' and 'weights'")
 66 | 
 67 | ## Subsetting
 68 |   dat <- as.data.frame(RDDobject)
 69 | 
 70 |   if(!is.null(bw)){
 71 |     weights <- ifelse(dat$x >= cutpoint -bw & dat$x <= cutpoint +bw, 1, 0)
 72 |   } else if(!missing(weights)){
 73 |     weights <- weights
 74 |   } else {
 75 |     weights <- NULL
 76 |   }
 77 | 
 78 | ## Construct data
 79 |   if(missing(weights)) weights <- NULL
 80 |   dat_step1 <- model.matrix(RDDobject, covariates=covariates, order=order, bw=bw, 
 81 | 			    slope=slope, covar.opt=covar.opt)
 82 | 
 83 | ## Regression
 84 |   if(type=="Sharp"){
 85 |     reg <- lm(y~., data=dat_step1, weights=weights)
 86 |     class_reg <- "lm"
 87 |   } else {
 88 |     if(!is.null(covariates)) stop("Covariates currently not implemented for Fuzzy case")
 89 |     reg <- ivreg(y~.-ins|.-D, data=dat_step1, weights=weights)
 90 |     class_reg <- "ivreg"
 91 |   }
 92 |   
 93 | 
 94 | ##Return
 95 |   RDDslot <- list()
 96 |   RDDslot$RDDdata <- RDDobject
 97 |   reg$RDDslot <- RDDslot 
 98 |   class(reg) <- c("RDDreg_lm", "RDDreg", class_reg)
 99 |   attr(reg, "PolyOrder") <- order
100 |   attr(reg, "cutpoint") <- cutpoint
101 |   attr(reg, "slope") <- slope
102 |   attr(reg, "RDDcall") <- match.call()
103 |   attr(reg, "bw") <- bw
104 |   reg
105 | }
106 | 
107 | 
108 | #' @S3method print RDDreg_lm
109 | print.RDDreg_lm <- function(x,...) {
110 | 
111 |   order <- getOrder(x)
112 |   cutpoint <- getCutpoint(x)
113 |   slope <- getSlope(x)
114 |   bw <- getBW(x)
115 |   hasBw <- !is.null(bw)
116 |   bw2 <- if(hasBw) bw else Inf
117 | 
118 |   x_var <- getOriginalX(x)
119 |   n_left  <- sum(x_var >= cutpoint -bw2 & x_var < cutpoint)
120 |   n_right <- sum(x_var >= cutpoint & x_var <= cutpoint+bw2)
121 | 
122 |   cat("### RDD regression: parametric ###\n")
123 |   cat("\tPolynomial order: ", order, "\n")
124 |   cat("\tSlopes: ", slope, "\n")
125 |   if(hasBw)   cat("\tBandwidth: ", bw, "\n")
126 |   cat("\tNumber of obs: ", sum(n_left+n_right), " (left: ", n_left, ", right: ", n_right, ")\n", sep="")
127 | 
128 |   cat("\n\tCoefficient:\n")
129 | 
130 |   printCoefmat(coef(summary(x))[2,, drop=FALSE])
131 | 
132 | }
133 | 
134 | #' @S3method plot RDDreg_lm
135 | plot.RDDreg_lm <- function(x,...) {
136 | 
137 | ## data
138 |   dat <- getOriginalData(x)
139 |   subw <-   if(!is.null(x$weights)) x$weights>0 else rep(TRUE, nrow(dat))
140 |   pred <- data.frame(x=dat$x,y=fitted(x))[subw,]
141 |   
142 | ##plot
143 |   plotBin(dat$x, dat$y, ...)
144 |   lines(pred[order(pred$x),])
145 | }
146 | 
147 | 
148 | 
149 | if(FALSE){
150 | 
151 |   library(RDDtools)
152 |   data(Lee2008)
153 | 
154 |   Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
155 | 
156 | 
157 |   reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd)
158 |   print(x=reg_para )
159 |   summary(reg_para )
160 | 
161 |   reg_para_same <- RDDreg_lm(RDDobject=Lee2008_rdd, slope="same")
162 |   print(x=reg_para_same )
163 |   summary(reg_para_same )
164 | 
165 |   reg_para2 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=2)
166 |   reg_para2
167 |   summary(reg_para2)
168 |   plot(reg_para2)
169 | 
170 |   reg_para2_same <- RDDreg_lm(RDDobject=Lee2008_rdd, order=2, slope="same")
171 |   reg_para2_same
172 |   summary(reg_para2_same)
173 |   plot(reg_para2)
174 | 
175 |   bw_ik <- RDDbw_IK(Lee2008_rdd)
176 |   reg_para_ik <- RDDreg_lm(RDDobject=Lee2008_rdd, bw=bw_ik)
177 |   print(x=reg_para_ik)
178 |   plot(x=reg_para_ik)
179 | 
180 | }


--------------------------------------------------------------------------------
/RDDtools/R/clusterInf.R:
--------------------------------------------------------------------------------
  1 | #' Post-inference for clustered data
  2 | #' 
  3 | #' Correct standard-errors to account for clustered data, doing either a degrees of freedom correction or using a heteroskedasticidty-cluster robust covariance matrix
  4 | #' possibly on the range specified by bandwidth
  5 | #' @param object Object of class lm, from which RDDreg also inherits.
  6 | #' @param clusterVar The variable containing the cluster attributions. 
  7 | #' @param vcov. Specific covariance function to pass to coeftest. See help of sandwich
  8 | #' @param type The type of cluster correction to use: either the degrees of freedom, or a HC matrix. 
  9 | #' @param \ldots Further arguments passed to coeftest
 10 | #' @return The output of the coeftest function, which is itself of class \code{coeftest}
 11 | #' @seealso \code{\link{vcovCluster}}, which implements the cluster-robust covariance matrix estimator used by \code{cluserInf}
 12 | #' @references Wooldridge (2003) Cluster-sample methods in applied econometrics. 
 13 | #' \emph{AmericanEconomic Review}, 93, p. 133-138
 14 | #' @export
 15 | #' @import sandwich
 16 | #' @import lmtest
 17 | #' @examples
 18 | #' data(Lee2008)
 19 | #' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
 20 | #' reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd)
 21 | #' 
 22 | #' # here we just generate randomly a cluster variable:
 23 | #' nlet <- sort(c(outer(letters, letters, paste, sep="")))
 24 | #' clusRandom <- sample(nlet[1:60], size=nrow(Lee2008_rdd), replace=TRUE)
 25 | #'
 26 | #' # now do post-inference:
 27 | #' clusterInf(reg_para, clusterVar=clusRandom)
 28 | #' clusterInf(reg_para, clusterVar=clusRandom, type="HC")
 29 | 
 30 | 
 31 | clusterInf <- function(object, clusterVar, vcov. = NULL, type=c("df-adj", "HC"), ...){
 32 |   
 33 |   if(is.null(clusterVar)) stop("clusterVar seems to be NULL?")
 34 |   type <- match.arg(type)
 35 | 
 36 |   if(type=="df-adj"){
 37 |     nClus <- if(is.factor(clusterVar)) nlevels(clusterVar) else length(unique(clusterVar))
 38 |     res <- coeftest(object, vcov. = vcov., df = nClus, ...)
 39 |   } else {
 40 |     if(!is.null(vcov.)) warning("arg 'vcov.' not used when 'type=HC' (default vcovCluster used)")
 41 |     res <- coeftest(object, vcov. = function(x) vcovCluster(x, clusterVar=clusterVar), ...)
 42 |   }
 43 | 
 44 |   return(res)
 45 | }
 46 | 
 47 | #' @S3method estfun RDDreg_np
 48 | estfun.RDDreg_np <- function(x,...){
 49 |   inf_met <- infType(x) ## def in Misc.R
 50 |   if(inf_met=="se") stop("No 'vcovHC', 'vcovCluster', 'estfun' etc can be applied to RDDrg_np with non-parametric inference estimators")
 51 |   estfun(x$RDDslot$model)
 52 | }
 53 | 
 54 | #' @S3method bread RDDreg_np
 55 | bread.RDDreg_np <- function(x,...){
 56 |   inf_met <- infType(x) ## def in Misc.R
 57 |   if(inf_met=="se") stop("No 'vcovHC', 'vcovCluster', 'estfun' etc can be applied to RDDrg_np with non-parametric inference estimators")
 58 |   bread(x$RDDslot$model)
 59 | } 
 60 | 
 61 | 
 62 | # sandwich.RDDreg_np <- function (x, bread. = bread, meat. = meat, ...){
 63 | #   inf_met <- infType(x) ## def in Misc.R
 64 | #   if(inf_met=="se") stop("No 'vcovHC', 'vcovCluster', 'estfun' etc can be applied to RDDrg_np with non-parametric inference estimators")
 65 | #   sandwich(x$RDDslot$model, bread.=bread., meat.=meat., ...)
 66 | # }
 67 | 
 68 | #' @S3method model.frame RDDreg_np
 69 | model.frame.RDDreg_np <- function (formula, ...) 
 70 |   model.frame(formula$RDDslot$model)
 71 | 
 72 | #' Cluster Heteroskedasticity-consistent estimation of the covariance matrix. 
 73 | #' 
 74 | #' Offer a cluster variant of the usual Heteroskedasticity-consistent 
 75 | #' @param object Object of class lm, from which RDDreg also inherits.
 76 | #' @param clusterVar The variable containing the cluster attributions. 
 77 | #' @return A matrix containing the covariance matrix estimate.
 78 | #' @author Mahmood Arai, see \url{http://people.su.se/~ma/econometrics.html}
 79 | #' @references Cameron, C.,  Gelbach, J. and Miller, D. (2011) Robust Inference With Multiway Clustering,
 80 | #' \emph{Journal of Business and Economic Statistics},  vol. 29(2), pages 238-249.
 81 | #' #' @references Wooldridge (2003) Cluster-sample methods in applied econometrics. 
 82 | #' \emph{AmericanEconomic Review}, 93, p. 133-138
 83 | #' @references Arai, M. (2011) Cluster-robust standard errors using R, Note available \url{http://people.su.se/~ma/clustering.pdf}. 
 84 | #' @export
 85 | #' @seealso \code{\link{clusterInf}} for a direct function, allowing also alternative cluster inference methods. 
 86 | #' See also \code{\link[rms]{robcov}} from package \code{rms} for another implementation of the cluster robust. 
 87 | #' @examples
 88 | #' data(STAR_MHE)
 89 | #' if(all(c(require(sandwich), require(lmtest)))){
 90 | #' 
 91 | #' # Run simple regression:
 92 | #' reg_krug <- lm(pscore~cs, data=STAR_MHE)
 93 | #' 
 94 | #' # Row 1 of Table 8.2.1, inference with standard vcovHC:
 95 | #' coeftest(reg_krug,vcov.=vcovHC(reg_krug, "HC1"))[2,2]
 96 | #' 
 97 | #' # Row 4 of Table 8.2.1, inference with cluster vcovHC:
 98 | #' coeftest(reg_krug,vcov.=vcovCluster(reg_krug, clusterVar=STAR_MHE$classid))[2,2]
 99 | #' }
100 | 
101 | vcovCluster   <- function(object, clusterVar){
102 |   M <- length(unique(clusterVar))   
103 |   N <- length(clusterVar)  	        
104 |   K <- getModelRank(object)
105 |   dfc <- (M/(M-1))*((N-1)/(N-K))  
106 |   uj  <- apply(estfun(object),2, function(x) tapply(x, clusterVar, sum))
107 |   dfc*sandwich(object, meat.=crossprod(uj)/N)
108 | }
109 | 
110 | #' @rdname vcovCluster
111 | #' @param clusterVar1,clusterVar2 The two cluster variables for the 2-cluster case.
112 | #' @export
113 | vcovCluster2 <- function(object, clusterVar1, clusterVar2){
114 |     # R-codes (www.r-project.org) for computing multi-way 
115 |     # clustered-standard errors. Mahmood Arai, Jan 26, 2008. 
116 |     # See: Thompson (2006), Cameron, Gelbach and Miller (2006)
117 |     # and Petersen (2006).
118 |     # reweighting the var-cov matrix for the within model
119 |   
120 |     K   <- getModelRank(object)
121 |     estF <- estfun(object)
122 |     
123 |     clusterVar12 <- paste(clusterVar1,clusterVar2, sep="")
124 |     M1  <- length(unique(clusterVar1))
125 |     M2  <- length(unique(clusterVar2))   
126 |     M12 <- length(unique(clusterVar12))
127 |     N   <- length(clusterVar1)          
128 |     
129 |     dfc1  <- (M1/(M1-1))*((N-1)/(N-K))  
130 |     dfc2  <- (M2/(M2-1))*((N-1)/(N-K))  
131 |     dfc12 <- (M12/(M12-1))*((N-1)/(N-K))  
132 |     
133 |     u1j   <- apply(estF, 2, function(x) tapply(x, clusterVar1,  sum)) 
134 |     u2j   <- apply(estF, 2, function(x) tapply(x, clusterVar2,  sum)) 
135 |     u12j  <- apply(estF, 2, function(x) tapply(x, clusterVar12, sum)) 
136 |     vc1   <-  dfc1*sandwich(object, meat.=crossprod(u1j)/N )
137 |     vc2   <-  dfc2*sandwich(object, meat.=crossprod(u2j)/N )
138 |     vc12  <- dfc12*sandwich(object, meat.=crossprod(u12j)/N)
139 |     vcovMCL <- vc1 + vc2 - vc12
140 |     vcovMCL
141 | }
142 | 
143 | getModelRank <- function(object,...)
144 |   UseMethod("getModelRank")
145 | 
146 | getModelRank.default <- function(object,...) object$rank
147 | 
148 | getModelRank.RDDreg_np <- function(object,...) getModelRank.default(object$RDDslot$model)
149 | 
150 | if(FALSE){
151 | 
152 |   library(RDDtools)
153 |   data(Lee2008)
154 | 
155 |   Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
156 | 
157 | 
158 |   reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd)
159 |   print(x=reg_para )
160 |   summary(reg_para )
161 | 
162 | ## cluster inference
163 |   set.seed(123)
164 |   nlet <- sort(c(outer(letters, letters, paste, sep="")))
165 |   clusRandom <- sample(nlet[1:60], size=nrow(Lee2008_rdd), replace=TRUE)
166 |   clusterInf(reg_para, clusterVar=clusRandom)
167 | 
168 |   clusterInf(reg_para, clusterVar=clusRandom, type="HC")
169 | 
170 | ## compare with rdd:
171 |   library(rdd)
172 |   rddest <- RDestimate(y~x, data=Lee2008, bw=30, kernel="rectangular", model=TRUE)
173 |   rddest_2 <- RDestimate2(y~x, data=Lee2008, bw=30, kernel="rectangular", model=TRUE, cluster=clusRandom)
174 |   coef(summary(reg_para))
175 |   coef(summary(rddest$model[[2]]))
176 | 
177 |   all.equal(clusterInf(reg_para, clusterVar=clusRandom, type="HC")["D", "Std. Error"],rddest_2[["se"]][2])
178 | }


--------------------------------------------------------------------------------
/RDDtools/R/RDDdata.R:
--------------------------------------------------------------------------------
  1 | #'Construct RDDdata
  2 | #' 
  3 | #' Construct the base RDD object, containing x, y and the cutpoint, eventuallay covariates. 
  4 | #' 
  5 | #' @param x Forcing variable
  6 | #' @param y Output
  7 | #' @param covar Exogeneous variables
  8 | #' @param cutpoint Cutpoint
  9 | #' @param labels Additional labels to provide as list (with entries \code{x}, \code{y}, and eventually vector \code{covar}). Unused currently. 
 10 | #' @param data A data-frame for the \code{x} and \code{y} variables. If this is provided, 
 11 | #' the column names can be entered directly for argument \code{x} and \code{y}
 12 | #' @param z Assignment variable for the fuzzy case. 
 13 | #' @return Object of class \code{RDDdata}, inheriting from \code{data.frame}
 14 | #' @export
 15 | #' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}>
 16 | #' @examples
 17 | #' data(Lee2008)
 18 | #' rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0)
 19 | #' rd2 <- RDDdata(x=x, y=y, data=Lee2008, cutpoint=0)
 20 | #' 
 21 | #' # The print() function is the same as the print.data.frame:
 22 | #' rd
 23 | #'
 24 | #' # The summary() and plot() function are specific to RDDdata
 25 | #' summary(rd)
 26 | #' plot(rd)
 27 | 
 28 | 
 29 | RDDdata <- function(y, x, covar, cutpoint, z, labels, data){
 30 | 
 31 |   
 32 | ## check args
 33 |   type <- ifelse(missing(z), "Sharp", "Fuzzy")
 34 |   hasCovar <- !missing(covar)
 35 |   if(missing(cutpoint)) stop("Please provide cutpoint")
 36 |   covar_nam <- deparse(substitute(covar))
 37 | 
 38 | ## Use data in case:
 39 |   if(!missing(data)){
 40 |     pf <- parent.frame()
 41 |     x <- eval(substitute(x), data, enclos = pf) # copy from with.default
 42 |     y <- eval(substitute(y), data, enclos = pf) # copy from with.default
 43 |     if(hasCovar) covar <- eval(substitute(covar), data, enclos = pf) # idem
 44 |   }
 45 |   
 46 | ### Check y, x univariate
 47 |   k_y <- NCOL(y)
 48 |   k_x <- NCOL(x)
 49 | 
 50 |   if(any(!c(k_y, k_x)==1)) stop("y or x should be univariate")
 51 | 
 52 | ### Check y, x, z same size
 53 |   n_y <- NROW(y)
 54 |   n_x <- NROW(x)
 55 |   n_covar <- if(hasCovar) NROW(x) else NULL
 56 | 
 57 |   if(any(c(n_y, n_x) != n_covar)) stop("y or x should be univariate")
 58 | 
 59 | ### Check cutpoint
 60 |   range_x <- range(x, na.rm=TRUE)
 61 |   if(cutpoint<range_x[1] |cutpoint>range_x[2]) stop("Cutpoint outside range of x")
 62 | 
 63 | ## Check labels
 64 |   if(!missing(labels)){
 65 |     if(!is.list(labels)) stop("labels should be a list.")
 66 |     if(is.null(names(labels)) || !all(names(labels)%in%c("x", "y", "covar"))) stop("labels should be a list with components x, and/or y, and/or covar")
 67 |     if(hasCovar){
 68 |       if("covar"%in%names(labels) && length(labels$covar)!=NCOL(covar)) stop("There should be ", NCOL(covar), " values (dim of covar) for component 'covar' in labels")
 69 |     }
 70 |   } else {
 71 |     labels <- list()
 72 |   }
 73 | 
 74 | #   if(is.null(labels$x)) labels$x <- deparse(substitute(x))
 75 | #   if(is.null(labels$y)) labels$y <- deparse(substitute(y))
 76 | #   if(hasCova && is.null(labels$covar)) labels$covar <- if(NCOL(covar)==1) names(deparse(substitute(y))
 77 | 
 78 | ## Assemble data
 79 |   RDDdat <- data.frame(x=x, y=y)
 80 |   if(hasCovar) {
 81 |     RDDdat <- cbind(RDDdat,covar)
 82 |     if(NCOL(covar)==1 && is.null(colnames(covar))) colnames(RDDdat)[3] <- covar_nam
 83 |   } 
 84 | 
 85 |   if(type=="Fuzzy"){
 86 |     RDDdat <- cbind(RDDdat,z)
 87 |   }
 88 | 
 89 | ## return
 90 |   class(RDDdat) <- c("RDDdata", "data.frame")
 91 |   attr(RDDdat, "hasCovar") <- hasCovar
 92 |   attr(RDDdat, "labels") <- labels
 93 |   attr(RDDdat, "cutpoint") <- cutpoint
 94 |   attr(RDDdat, "type") <- type
 95 | 
 96 |   RDDdat
 97 | }
 98 | 
 99 | 
100 | ### Specific subsetting methods
101 | 
102 | #####  @S3method as.data.frame RDDdata
103 | # as.data.frame.RDDdata <- function(x) {
104 | # subset(x, y>
105 | # }as.data.frame.default(x)
106 | 
107 | #' @S3method "[" RDDdata
108 | '[.RDDdata' <- function(x,i,...){
109 |   attr_x <- attributes(x)
110 |   r <- NextMethod("[", object=as.data.frame(x))
111 | 
112 | ## keep attributes only if remains a data frame!
113 |   if(inherits(r, "data.frame")){
114 |     attr_x$row.names <- attr(r, "row.names")
115 |     attr_x$names <- attr(r, "names")
116 |     mostattributes(r) <- attr_x
117 |     attributes(r) <- attributes(r)[match(names(attr_x), names(attributes(r)))]
118 |   }
119 | #   newCla <- class(r)
120 | #   if(any(grepl("RDDdata", newCla))) newCla <- newCla[-grepl("RDDdata", newCla)]
121 | #   print(names(attributes(newCla)))
122 | # 
123 | #   if(!inherits(newCla, "data.frame")) attr(r, "class")[which(attr(r, "class")=="data.frame")] <- newCla
124 |   r
125 | }
126 | 
127 | #' @S3method subset RDDdata
128 | subset.RDDdata <- function (x, subset, select, drop = FALSE, ...) {
129 |   attr_x <- attributes(x)
130 | 
131 | ### subset code: start
132 |     if (missing(subset)) 
133 |         r <- TRUE
134 |     else {
135 |         e <- substitute(subset)
136 |         r <- eval(e, x, parent.frame())
137 |         if (!is.logical(r)) 
138 |             stop("'subset' must evaluate to logical")
139 |         r <- r & !is.na(r)
140 |     }
141 |     if (missing(select)) 
142 |         vars <- TRUE
143 |     else {
144 |         nl <- as.list(seq_along(x))
145 |         names(nl) <- names(x)
146 |         vars <- eval(substitute(select), nl, parent.frame())
147 |     }
148 |     res <- x[r, vars, drop = drop]
149 | ### subset code: end
150 | #   r <- subset.data.frame(x,...)
151 | #   r <- NextMethod("subset")
152 | 
153 | ## keep attributes only if remains a data frame!
154 |   if(inherits(r, "data.frame")){
155 |     attr_x$row.names <- attr(res, "row.names")
156 |     attr_x$names <- attr(res, "names")
157 |     mostattributes(res) <- attr_x
158 |     attributes(res) <- attributes(res)[match(names(attr_x), names(attributes(res)))]
159 |   }
160 |   res
161 | }
162 | 
163 | #' @S3method as.data.frame RDDdata
164 | as.data.frame.RDDdata <- function(x,...){
165 |   class(x) <- "data.frame"
166 |   attr(x, "hasCovar") <- NULL
167 |   attr(x, "labels")   <- NULL 
168 |   attr(x, "cutpoint") <- NULL
169 |   x
170 | }
171 | 
172 | 
173 | if(FALSE){
174 | 
175 | library(RDDtools)
176 | data(Lee2008)
177 | 
178 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
179 | Lee2008_rdd2 <- RDDdata(y=y, x=x,data=Lee2008, cutpoint=0)
180 | 
181 | all.equal(Lee2008_rdd, Lee2008_rdd2)
182 | 
183 | ### wrong covariate setting, legitimate warnings:
184 | Lee2008_rdd_lab1 <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0, labels=c("a","bb"))
185 | Lee2008_rdd_lab2 <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0, labels=list("a","bb"))
186 | Lee2008_rdd_lab3 <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0, labels=list(x="a",u="bb"))
187 | 
188 | ### Covariate setting:
189 | Z <- data.frame(z_con=runif(nrow(Lee2008)), z_dic=factor(sample(letters[1:3], size=nrow(Lee2008), replace=TRUE)))
190 | 
191 | Lee2008_rdd_Z <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0)
192 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0, labels=c("a","bb"))
193 | 
194 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0, labels=list(x="aha"))
195 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0, labels=list(x="aha", u="aa"))
196 | 
197 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0, labels=list(x="aha", covar="aa"))
198 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0, labels=list(x="aha", z=c("aa", "hj")))
199 | 
200 | ### subsetting
201 | dat <- Lee2008_rdd
202 | dat_sub <- subset(Lee2008_rdd, x<1000)
203 | dat_ind <- Lee2008_rdd[1:nrow(Lee2008_rdd),]
204 | dat_ind_1 <- Lee2008_rdd[,1]
205 | dat_ind_2 <- Lee2008_rdd[1:5,]
206 | 
207 | 
208 | all.equal(dat, dat_sub)
209 | all.equal(attributes(dat), attributes(dat_sub))
210 | 
211 | all.equal(dat, dat_ind)
212 | all.equal(attributes(dat), attributes(dat_ind))
213 | 
214 | df<- as.data.frame(Lee2008_rdd)
215 | head(df)
216 | 
217 | 
218 | head(Lee2008_rdd_Z)
219 | colnames(Lee2008_rdd_Z[, -c(1,2)])
220 | attributes(Lee2008_rdd_Z[, -c(1,2)])
221 | 
222 | colnames(subset(Lee2008_rdd_Z,select= c("z1","z2")))
223 | 
224 | colnames(dat_sub)
225 | colnames(dat_ind)
226 | colnames(dat_ind_1)
227 | colnames(dat_ind_2)
228 | }


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | RDDtools: an R package for Regression Discontinuity Design
  2 | ========================================================
  3 | 
  4 | **RDDtools** is a new R package under development, designed to offer a set of tools to run all the steps required for a Regression Discontinuity Design (RDD) Analysis, from primary data visualisation to discontinuity estimation, sensitivity and placebo testing. 
  5 | 
  6 | 
  7 | Installing **RDDtools**
  8 | -----------------------
  9 | 
 10 | This github website hosts the source code. One of the easiest ways to install the package from github is by using the R package **devtools**:
 11 | 
 12 | ```{r eval=FALSE}
 13 | library(devtools)
 14 | install_github(repo="RDDtools", username="MatthieuStigler", subdir="RDDtools")
 15 | ```
 16 | 
 17 | Note however the latest version of RDDtools only works with R 3.0, and that you might need to install  [Rtools](http://stat.ethz.ch/CRAN/bin/windows/Rtools/) if on Windows. 
 18 | 
 19 | 
 20 | Documentation
 21 | -----------------------
 22 | The (preliminary) documentation is available in the help files directly, as well as in the *vignette*. The vignette can be accessed from R with vignette("RDDtools"), or by accessing the [pdf](https://github.com/MatthieuStigler/RDDtools/raw/master/RDDtools/inst/doc/RDDtools.pdf) stored on this github. 
 23 | 
 24 | RDDtools: main features
 25 | -----------------------
 26 | 
 27 | 
 28 | +  Simple visualisation of the data using binned-plot: **plot()**
 29 | 
 30 | + Bandwidth selection:
 31 |   + MSE-RDD bandwidth procedure of [Imbens and Kalyanaraman 2012]: **RDDbw_IK()**
 32 |   + MSE global bandwidth procedure of [Ruppert et al 1995]: **RDDbw_RSW()**
 33 | + Estimation:
 34 |   +  RDD parametric estimation: **RDDreg_lm()** This includes specifying the polynomial order, including covariates with various specifications as advocated in [Imbens and Lemieux 2008].
 35 |   +  RDD local non-parametric estimation: **RDDreg_np()**. Can also include covariates, and allows different types of inference (fully non-parametric, or parametric approximation). 
 36 |   +  RDD generalised estimation: allows to use custom estimating functions to get the RDD coefficient. Could allow for example a probit RDD, or quantile regression.
 37 | + Post-Estimation tools:
 38 |   + Various tools, to obtain predictions at given covariate values ( **RDDpred()** ), or to convert to other classes, to lm ( **as.lm()** ), or to the package *np* ( **as.npreg()** ). 
 39 |   + Function to do inference with clustered data: **clusterInf()** either using a cluster covariance matrix ( **vcovCluster()** ) or by a degrees of freedom correction (as in [Cameron et al. 2008]).
 40 | + Regression sensitivity analysis:
 41 |   + Plot the sensitivity of the coefficient with respect to the bandwith: **plotSensi()**
 42 |   + *Placebo plot* using different cutpoints: **plotPlacebo()**
 43 | + Design sensitivity analysis:
 44 |   + McCrary test of manipulation of the forcing variable: wrapper **dens_test()** to the function **DCdensity()** from package **rdd**. 
 45 |   + Test of equal means of covariates: **covarTest_mean()**
 46 |   + Test of equal density of covariates: **covarTest_dens()**
 47 | + Datasets
 48 |   + Contains the seminal dataset of [Lee 2008]: **Lee2008**
 49 |   + Contains functions to replicate the Monte-Carlo simulations of [Imbens and Kalyanaraman 2012]: **gen_MC_IK()**
 50 | 
 51 | Using RDDtools: a quick example
 52 | -----------------------
 53 | **RDDtools** works in an object-oriented way: the user has to define once the characteristic of the data, creating a *RDDdata* object, on which different anaylsis tools can be applied. 
 54 | 
 55 | ### Data preparation and visualisation
 56 | Load the package, and load the built-in dataset from [Lee 2008]:
 57 | 
 58 | ```{r options, echo=FALSE}
 59 | opts_chunk$set(warning= FALSE, message=FALSE, fig.align="center", fig.path='figuresREADME/')
 60 | ```
 61 | 
 62 | 
 63 | ```{r}
 64 | library(RDDtools)
 65 | data(Lee2008)
 66 | ```
 67 | 
 68 | Declare the data to be a *RDDdata* object:
 69 | 
 70 | ```{r}
 71 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
 72 | ```
 73 | 
 74 | 
 75 | You can now directly summarise and visualise this data:
 76 | 
 77 | ```{r dataPlot}
 78 | summary(Lee2008_rdd)
 79 | plot(Lee2008_rdd)
 80 | ```
 81 | 
 82 | ### Estimation
 83 | 
 84 | #### Parametric
 85 | 
 86 | Estimate parametrically, by fitting a 4th order polynomial:
 87 | ```{r reg_para}
 88 | reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd, order=4)
 89 | reg_para
 90 | 
 91 | plot(reg_para)
 92 | ```
 93 | 
 94 | 
 95 | #### Non-parametric
 96 | As well as run a simple local regression, using the [Imbens and Kalyanaraman 2012] bandwidth:
 97 | ```{r RegPlot}
 98 | bw_ik <- RDDbw_IK(Lee2008_rdd)
 99 | reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd, bw=bw_ik)
100 | print(reg_nonpara)
101 | plot(x=reg_nonpara)
102 | 
103 | ```
104 | 
105 | ### Regression Sensitivity tests:
106 | 
107 | One can easily check the sensitivity of the estimate to different bandwidths:
108 | ```{r SensiPlot}
109 | plotSensi(reg_nonpara, from=0.05, to=1, by=0.1)
110 | ```
111 | 
112 | Or run the Placebo test, estimating the RDD effect based on fake cutpoints:
113 | ```{r placeboPlot}
114 | plotPlacebo(reg_nonpara)
115 | ```
116 | 
117 | ### Design Sensitivity tests:
118 | 
119 | Design sensitivity tests check whether the discontinuity found can actually be attributed ot other causes. Two types of tests are available:
120 | 
121 | + Discontinuity comes from manipulation: test whether there is possible manipulation around the cutoff, McCrary 2008 test: **dens_test()**
122 | + Discontinuity comes from other variables: should test whether discontinuity arises with covariates. Currently, only simple tests of equality of covariates around the threshold are available: 
123 | 
124 | #### Discontinuity comes from manipulation: McCrary test
125 | 
126 | use simply the function **dens_test()**, on either the raw data, or the regression output:
127 | ```{r DensPlot}
128 | dens_test(reg_nonpara)
129 | ```
130 | 
131 | #### Discontinuity comes from covariates: covariates balance tests
132 | 
133 | Two tests available:
134 | + equal means of covariates: **covarTest_mean()**
135 | + equal density of covariates: **covarTest_dens()**
136 | 
137 | 
138 | We need here to simulate some data, given that the Lee (2008) dataset contains no covariates.
139 | We here simulate three variables, with the second having a different mean on the left and the right. 
140 | 
141 | ```{r}
142 | set.seed(123)
143 | n_Lee <- nrow(Lee2008)
144 | Z <- data.frame(z1 = rnorm(n_Lee, sd=2), 
145 |                 z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)), 
146 |                 z3 = sample(letters, size = n_Lee, replace = TRUE))
147 | Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0)
148 | ```
149 | 
150 | 
151 | Run the tests:
152 | ```{r}
153 | ## test for equality of means around cutoff:
154 | covarTest_mean(Lee2008_rdd_Z, bw=0.3)
155 | 
156 | ## Can also use function covarTest_dis() for Kolmogorov-Smirnov test:
157 | covarTest_dis(Lee2008_rdd_Z, bw=0.3)
158 | ```
159 | 
160 | Tests correctly reject equality of the second, and correctly do not reject equality for the first and third. 
161 | 
162 |   [Imbens and Kalyanaraman 2012]: http://ideas.repec.org/a/oup/restud/v79y2012i3p933-959.html "Imbens, G. & Kalyanaraman, K. (2012) Optimal Bandwidth Choice for the Regression Discontinuity Estimator, Review of Economic Studies, 79, 933-959"
163 |   
164 |   [Lee 2008]: http://ideas.repec.org/a/eee/econom/v142y2008i2p675-697.html "Lee, D. S. (2008) Randomized experiments from non-random selection in U.S. House elections, Journal of Econometrics, 142, 675-697"
165 |   
166 |   [Imbens and Lemieux 2008]: http://ideas.repec.org/a/eee/econom/v142y2008i2p615-635.html "Imbens, G. & Lemieux, T. (2008) Regression discontinuity designs: A guide to practice, Journal of Econometrics, Vol. 142(2), pages 615-635"
167 |   
168 |   [Cameron et al. 2008]: http://ideas.repec.org/a/tpr/restat/v90y2008i3p414-427.html "Cameron, Gelbach and Miller (2008) Bootstrap-Based Improvements for Inference with Clustered Errors, The Review of Economics and Statistics, Vol. 90(3), pages 414-427"
169 |   
170 |   [Ruppert et al 1995]: http://www.jstor.org/stable/2291516 "Ruppert, D., Sheather, S. J. and Wand, M. P. (1995). An effective bandwidth selector for local least squares regression. Journal of the American Statistical Association, 90, 1257–1270."
171 | 
172 | 
173 |   


--------------------------------------------------------------------------------
/RDDtools/tests/packageDemo.R:
--------------------------------------------------------------------------------
  1 | library(RDDtools)
  2 | 
  3 | 
  4 | 
  5 | 
  6 | ############################################
  7 | ### STEP 0: Data Manipulation
  8 | ############################################
  9 | data(Lee2008)
 10 | head(Lee2008)
 11 | 
 12 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
 13 | 
 14 | head(Lee2008_rdd)
 15 | 
 16 | summary(Lee2008_rdd)
 17 | 
 18 | ## With covariates
 19 | 
 20 | n_Lee <- nrow(Lee2008)
 21 | 
 22 | set.seed(123)
 23 | Z<- data.frame(z1=rnorm(n_Lee), z2=rnorm(n_Lee, mean=20, sd=2), z3=sample(letters[1:3], size=n_Lee, replace=TRUE))
 24 | Lee2008_rdd_z <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z,cutpoint=0)
 25 | 
 26 | head(Lee2008_rdd_z )
 27 | summary(Lee2008_rdd_z )
 28 | 
 29 | ### Fuzzy
 30 | set.seed(123)
 31 | ins <- rbinom(n_Lee, 1, prob=ifelse(Lee2008$x<0, 0.1, 0.9))
 32 | Lee2008_rdd_ins <- RDDdata(y=Lee2008$y, x=Lee2008$x, z=ins,cutpoint=0)
 33 | table(Lee2008$x<0, ins==0)
 34 | 
 35 | ############################################
 36 | ### STEP 2: Graphical inspection
 37 | ############################################
 38 | 
 39 | ### Plot 
 40 | plot(Lee2008_rdd)
 41 | plot(Lee2008_rdd, nplot=3, h=c(0.02, 0.03, 0.04))
 42 | plot(Lee2008_rdd, nplot=1, h=0.1)
 43 | 
 44 | plot(Lee2008_rdd, xlim=c(-0.5, 0.5))
 45 | 
 46 | # plot(Lee2008_rdd, xlim=c(-0.5, 0.5), type="ggplot")
 47 | 
 48 | 
 49 | ############################################
 50 | ### STEP 2: Regression
 51 | ############################################
 52 | 
 53 | ## few bandwidths:
 54 | RDDbw_RSW(Lee2008_rdd)
 55 | RDDbw_IK(Lee2008_rdd)
 56 | 
 57 | 
 58 | ###### Parametric regression ######
 59 | # Simple polynomial of order 1:
 60 | reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd)
 61 | print(reg_para)
 62 | summary(reg_para)
 63 | plot(reg_para)
 64 | 
 65 | all.equal(unlist(RDDpred(reg_para)), RDDcoef(reg_para, allInfo=TRUE)[1:2], check.attributes=FALSE)
 66 | 
 67 | ## Difference in means regression:
 68 | # Simple polynomial of order 0:
 69 | reg_para_0 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=0)
 70 | print(reg_para_0)
 71 | summary(reg_para_0)
 72 | plot(reg_para_0)
 73 | 
 74 | 
 75 | ## Simple polynomial of order 4:
 76 | reg_para4 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=4)
 77 | reg_para4
 78 | plot(reg_para4)
 79 | all.equal(unlist(RDDpred(reg_para4)), RDDcoef(reg_para4, allInfo=TRUE)[1:2], check.attributes=FALSE)
 80 | 
 81 | ## Restrict sample to bandwidth area:
 82 | bw_ik <- RDDbw_IK(Lee2008_rdd)
 83 | reg_para_ik <- RDDreg_lm(RDDobject=Lee2008_rdd, bw=bw_ik, order=4)
 84 | reg_para_ik
 85 | plot(reg_para_ik)
 86 | 
 87 | all.equal(unlist(RDDpred(reg_para_ik)), RDDcoef(reg_para_ik, allInfo=TRUE)[1:2], check.attributes=FALSE)
 88 | 
 89 | ## Fuzzy reg
 90 | reg_para_fuzz <- RDDreg_lm(Lee2008_rdd_ins)
 91 | coef(reg_para_fuzz)
 92 | summary(reg_para_fuzz)
 93 | 
 94 | ## Covariates:
 95 | reg_para4_cov <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates=".")
 96 | reg_para4_cov
 97 | summary(reg_para4_cov)
 98 | 
 99 | reg_para4_cov_slSep <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates=".", covar.opt=list(slope="separate"))
100 | summary(reg_para4_cov_slSep)
101 | RDDpred(reg_para4_cov_slSep)
102 | RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0, 0.2, 0.2), z2=c(0,20,20), z3b=c(0,1,0), z3c=c(0,0,1)))
103 | 
104 | 
105 | reg_para4_cov_startR <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates=".", covar.opt=list(strategy="residual"))
106 | reg_para4_cov_startR
107 | summary(reg_para4_cov_startR)
108 | 
109 | plot(reg_para4_cov)
110 | 
111 | reg_para4_cov_startR_sl2 <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates=".", covar.opt=list(strategy="residual", slope="separate"))
112 | summary(reg_para4_cov_startR_sl2)
113 | 
114 | reg_para4_cov_2 <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z3+I(z1^2)")
115 | reg_para4_cov_2
116 | summary(reg_para4_cov_2)
117 | 
118 | ###### Non-parametric regression ######
119 | reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd)
120 | print(reg_nonpara)
121 | summary(reg_nonpara)
122 | plot(x=reg_nonpara)
123 | 
124 | reg_nonpara_inflm <- RDDreg_np(RDDobject=Lee2008_rdd, inference="lm")
125 | print(reg_nonpara_inflm)
126 | summary(reg_nonpara_inflm)
127 | plot(x=reg_nonpara_inflm)
128 | 
129 | 
130 | reg_nonpara_sameSl <- RDDreg_np(RDDobject=Lee2008_rdd, slope="same")
131 | print(reg_nonpara_sameSl)
132 | summary(reg_nonpara_sameSl)
133 | 
134 | 
135 | ###### PLOT SENSI ######
136 | plSe_reg_para <- plotSensi(reg_para_ik, order=4:6)
137 | plSe_reg_para_fac <- plotSensi(reg_para_ik, type="facet", order=4:6)
138 | plSe_reg_para
139 | plSe_reg_para_fac
140 | 
141 | 
142 | plSe_reg_nonpara <- plotSensi(reg_nonpara)
143 | plSe_reg_nonpara
144 | 
145 | plSe_reg_nonpara_HC <- plotSensi(reg_nonpara_inflm, vcov. =function(x) vcovCluster(x, clusterVar=model.frame(x)$x))
146 | plSe_reg_nonpara_HC
147 | 
148 | plSe_reg_para_0 <- plotSensi(reg_para_0, plot=FALSE)
149 | plSe_reg_para_0
150 | 
151 | plSe_reg_para_0_gg <- plotSensi(reg_para_0, plot=FALSE, output="ggplot")
152 | str(plSe_reg_para_0_gg)
153 | 
154 | 
155 | ###### Post-inference: ######
156 | 
157 | clusterInf(reg_para, clusterVar=model.frame(reg_para)$x, type="df-adj")
158 | clusterInf(reg_para, clusterVar=model.frame(reg_para)$x, type="HC")
159 | 
160 | 
161 | ############################################
162 | ### STEP 3: Validty tests
163 | ############################################
164 | 
165 | ## Placebo test:
166 | placeb_dat_reg_nonpara <- computePlacebo(reg_nonpara)
167 | 
168 | plotPlacebo(placeb_dat_reg_nonpara)
169 | plotPlacebo(placeb_dat_reg_nonpara, device="base")
170 | 
171 | 
172 | plotPlaceboDens(placeb_dat_reg_nonpara)
173 | 
174 | ## check invisible return:
175 | ptPl_reg_nonpara <- plotPlacebo(reg_nonpara, plot=FALSE)
176 | ptPl_reg_nonpara
177 | 
178 | ptPl_reg_nonpara2 <- plotPlacebo(reg_nonpara, plot=FALSE, output="ggplot")
179 | ptPl_reg_nonpara2
180 | 
181 | # with HC:
182 | ptPl_reg_nonpara_HC <- plotPlacebo(reg_nonpara_inflm, vcov. =function(x) vcovCluster(x, clusterVar=model.frame(x)$x))
183 | ptPl_reg_nonpara_HC
184 | 
185 | ptPl_reg_para_0 <- plotPlacebo(reg_para_0)
186 | ptPl_reg_para_0
187 | 
188 | 
189 | 
190 | ## density tests
191 | dens_test(Lee2008_rdd)
192 | dens_test(reg_para_0, plot=FALSE)
193 | dens_test(reg_nonpara, plot=FALSE)$test.output[c("theta", "se", "z", "p", "binsize", "bw", "cutpoint")]
194 | 
195 | 
196 | ## Covariates tests
197 | covarTest_mean(Lee2008_rdd_z)
198 | covarTest_mean(Lee2008_rdd_z, bw=0.1)
199 | covarTest_dis(Lee2008_rdd_z)
200 | covarTest_dis(Lee2008_rdd_z, bw=0.1)
201 | 
202 | covarTest_mean(reg_para4_cov)
203 | covarTest_dis(reg_para4_cov)
204 | #### as npreg
205 |   reg_nonpara_np <- as.npreg(reg_nonpara, adjustIK_bw=FALSE)
206 |   reg_nonpara_np
207 |   RDDcoef(reg_nonpara_np)
208 |   RDDcoef(reg_nonpara_np, allCo=TRUE)
209 |   RDDcoef(reg_nonpara_np, allInfo=TRUE)
210 |   RDDcoef(reg_nonpara_np, allInfo=TRUE, allCo=TRUE)
211 | 
212 | ## Compare with result obtained with a Gaussian kernel:
213 |   bw_lm <- dnorm(Lee2008_rdd$x, sd=RDDtools:::getBW(reg_nonpara))
214 |   reg_nonpara_gaus <- RDDreg_lm(RDDobject=Lee2008_rdd, w=bw_lm)
215 |   all.equal(RDDcoef(reg_nonpara_gaus, allCo=TRUE),RDDcoef(reg_nonpara_np, allCo=TRUE), check.attributes=FALSE) 
216 | 
217 | 
218 | 
219 | #### methods
220 | 
221 | regs_all <- list(reg_para=reg_para,
222 |                  reg_para_0=reg_para_0,
223 |                  reg_para4=reg_para4,
224 |                  reg_para_ik=reg_para_ik,
225 |                  reg_para_fuzz=reg_para_fuzz,
226 |                  reg_para4_cov=reg_para4_cov,
227 |                  reg_para4_cov_slSep=reg_para4_cov_slSep,
228 |                  reg_para4_cov_startR=reg_para4_cov_startR,
229 |                  reg_para4_cov_startR_sl2=reg_para4_cov_startR_sl2,
230 |                  reg_nonpara=reg_nonpara,
231 |                  reg_nonpara_inflm=reg_nonpara_inflm,
232 |                  reg_nonpara_sameSl=reg_nonpara_sameSl)
233 | capply <- function(x){
234 |   n.obs <- sapply(x, length)
235 |   seq.max <- seq_len(max(n.obs))
236 |   t(sapply(x, "[", i = seq.max))
237 | }
238 | 
239 | capply(lapply(regs_all, coef))
240 | sapply(regs_all, RDDcoef)
241 | RDDpred_issue <- c("reg_para_0", "reg_para_fuzz", "reg_nonpara", "reg_nonpara_sameSl")
242 | sapply(regs_all[!names(regs_all)%in%RDDpred_issue], RDDpred)
243 | 
244 | sapply(regs_all, RDDtools:::getCutpoint)
245 | lapply(regs_all, plotSensi, plot=FALSE)
246 | 
247 | sapply(regs_all, function(x) dens_test(x, plot=FALSE)[c("p.value", "statistic", "estimate")])
248 | 
249 | 


--------------------------------------------------------------------------------
/RDDtools/R/bw_IK.R:
--------------------------------------------------------------------------------
  1 | #' Imbens-Kalyanaraman Optimal Bandwidth Calculation
  2 | #' 
  3 | #' Imbens-Kalyanaraman optimal bandwidth
  4 | #' for local linear regression in Regression discontinuity designs.
  5 | #' 
  6 | #' @param RDDobject of class RDDdata created by \code{\link{RDDdata}}
  7 | #' @param kernel The type of kernel used: either \code{triangular} or \code{uniform}. 
  8 | #' @return The optimal bandwidth
  9 | #' @references Imbens, Guido and Karthik Kalyanaraman. (2012) "Optimal Bandwidth Choice for the regression discontinuity estimator," 
 10 | #' Review of Economic Studies (2012) 79, 933-959
 11 | #' @seealso \code{\link{RDDbw_RSW}} Global bandwidth selector of Ruppert, Sheather and Wand (1995) 
 12 | #' @export
 13 | #' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}>
 14 | #' @examples
 15 | #' data(Lee2008)
 16 | #' rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0)
 17 | #' RDDbw_IK(rd)
 18 | 
 19 | 
 20 | RDDbw_IK <-function(RDDobject, kernel=c("Triangular", "Uniform", "Normal")) {
 21 | 
 22 |   kernel <- match.arg(kernel)
 23 |   checkIsRDD(RDDobject)
 24 |   cutpoint <- getCutpoint(RDDobject)
 25 | 
 26 |   res <- RDDbw_IK_low(X=RDDobject$x,Y=RDDobject$y,threshold=cutpoint,verbose=FALSE, type="RES", returnBig=FALSE, kernel=kernel)
 27 |   return(res)
 28 | 
 29 | }
 30 | 
 31 | IK_bias <-function(RDDobject, kernel=c("Triangular", "Uniform", "Normal"), bw) {
 32 | 
 33 |   kernel <- match.arg(kernel)
 34 |   checkIsRDD(RDDobject)
 35 |   cutpoint <- getCutpoint(RDDobject)
 36 | 
 37 |   resB <- RDDbw_IK_low(X=RDDobject$x,Y=RDDobject$y,threshold=cutpoint,verbose=FALSE, type="RES", returnBig=TRUE, kernel=kernel)
 38 | 
 39 | ## compute C1: see IK equ 5, and Fan Jijbels (1996, 3.23)
 40 | # is done in R with locpol, computeMu(i=2, equivKernel(TrianK, nu=0, deg=1, lower=0, upper=Inf), lower=0, upper=Inf)
 41 |   C1 <- switch(kernel, "Triangular"= -0.1, "Uniform"= -0.1666667, "Normal"= -0.7519384) ## from: 
 42 | 
 43 | ## Compute bias as in IK equ:5, 
 44 | # note here 1/4 is outside C1
 45 |   if(missing(bw))  bw <- resB$h_opt
 46 |   res<-  C1 * 1/2 * bw^2 *(resB$m2_right-resB$m2_left)
 47 |   return(res)
 48 | 
 49 | }
 50 | 
 51 | IK_var <-function(RDDobject, kernel=c("Triangular", "Uniform", "Normal"), bw) {
 52 | 
 53 |   kernel <- match.arg(kernel)
 54 |   checkIsRDD(RDDobject)
 55 |   cutpoint <- getCutpoint(RDDobject)
 56 | 
 57 |   resB <- RDDbw_IK_low(X=RDDobject$x,Y=RDDobject$y,threshold=cutpoint,verbose=FALSE, type="RES", returnBig=TRUE, kernel=kernel)
 58 | 
 59 | ## compute C2: see IK equ 5, and Fan Jijbels (1996, 3.23)
 60 | # is done in R with locpol, computeRK(equivKernel(TrianK, nu=0, deg=1, lower=0, upper=Inf), lower=0, upper=Inf)
 61 |   C2 <- switch(kernel, "Triangular"= 4.8, "Uniform"= 4, "Normal"=1.785961) ## from: 
 62 | 
 63 | ## Compute var as in IK equ:5, 
 64 |   if(missing(bw))  bw <- resB$h_op
 65 |   elem1 <- (resB$var_inh_left+resB$var_inh_right)/resB$f_cu
 66 |   elem2 <- C2/(nrow(RDDobject)*bw)
 67 |   res <- elem1*elem2
 68 |   res
 69 | }
 70 | 
 71 | IK_amse <- function(RDDobject, kernel=c("Triangular", "Uniform", "Normal"), bw) {
 72 | 
 73 |   var <- IK_var(RDDobject=RDDobject, kernel=kernel, bw=bw)
 74 |   bias <- IK_bias(RDDobject=RDDobject, kernel=kernel, bw=bw)
 75 |   res <- bias^2+var
 76 |   res
 77 | }
 78 | 
 79 | 
 80 | RDDbw_IK_low <-function (X,Y,threshold=0,verbose=FALSE, type=c("RES", "RES_imp","WP"), returnBig=FALSE, kernel=c("Triangular", "Uniform", "Normal")) {
 81 |   
 82 |   type <- match.arg(type)
 83 |   kernel <- match.arg(kernel)
 84 | 
 85 | 
 86 |   N <- length(X)
 87 |   N_left  <- sum(X<threshold, na.rm=TRUE)
 88 |   N_right <- sum(X>=threshold, na.rm=TRUE)
 89 | 
 90 |   
 91 | ##########
 92 | ### STEP 1
 93 | ##########
 94 | 
 95 | ## Silverman bandwidth
 96 |   h1 <-  1.84*sd(X)*N^(-1/5)
 97 |   if(verbose)   cat("\n-h1:", h1)
 98 | 
 99 | ## f(cut)
100 |   isIn_h1_left  <- X>=(threshold-h1) & X<threshold
101 |   isIn_h1_right <- X>=threshold & X<=(threshold+h1)
102 | 
103 |   NisIn_h1_left  <- sum(isIn_h1_left, na.rm=TRUE)
104 |   NisIn_h1_right <- sum(isIn_h1_right, na.rm=TRUE)
105 |   if(verbose)   cat("\n-N left /right:", NisIn_h1_left, NisIn_h1_right)
106 | 
107 | 
108 |   f_cut <-(NisIn_h1_left+NisIn_h1_right)/(2*N*h1)
109 |   if(verbose)   cat("\n-f(threshold):", f_cut)
110 | 
111 | ## Variances : Equ (13) 
112 | 
113 |   var_inh_left  <- var(Y[isIn_h1_left], na.rm=TRUE)
114 |   var_inh_right <- var(Y[isIn_h1_right], na.rm=TRUE)
115 | 
116 | # problem with working pap0er: Equ 4.9 is different!
117 |   if(type=="WP"){
118 |     denom <- 1/(NisIn_h1_left+NisIn_h1_right) 
119 |     var_inh_global <- denom* ((NisIn_h1_left-1)* var_inh_left + (NisIn_h1_right-1)* var_inh_right)
120 |   }
121 | 
122 |   if(verbose){
123 |     cat("\n-Sigma^2 left:", var_inh_left, "\n-Sigma^2 right:", var_inh_right)
124 |   }
125 | ##########
126 | ### STEP 2
127 | ##########
128 | 
129 | 
130 | ## Global function of order 3: Equ (14) 
131 |   reg <-lm(Y~I(X>=threshold)+I(X-threshold)+I((X-threshold)^2)+I((X-threshold)^3))
132 |   m3<- 6*coef(reg)[5]
133 |   if(verbose)   cat("\n-m3:", m3)
134 | 
135 | 
136 | ## left and right bandwidths: Equ (15) 
137 |   Ck_h2 <- 3.556702 # 7200^(1/7)
138 |   h2_left  <- Ck_h2 * ( var_inh_left  /(f_cut*m3^2))^(1/7) * N_left^(-1/7)
139 |   h2_right <- Ck_h2 * ( var_inh_right /(f_cut*m3^2))^(1/7) * N_right^(-1/7)
140 | 
141 |   if(verbose)   cat("\n-h2 left:", h2_left, "\n-h2 right:", h2_right)
142 | 
143 | ## second derivatives right/left
144 |   isIn_h2_left  <- X>=(threshold-h2_left) & X<threshold
145 |   isIn_h2_right <- X>=threshold & X<=(threshold+h2_right)
146 | 
147 |   N_h2_left <- sum(isIn_h2_left, na.rm=TRUE)
148 |   N_h2_right <- sum(isIn_h2_right, na.rm=TRUE)
149 | 
150 |   reg2_left  <-lm(Y~ I(X-threshold)+I((X-threshold)^2),subset=isIn_h2_left)
151 |   reg2_right <-lm(Y~ I(X-threshold)+I((X-threshold)^2),subset=isIn_h2_right)
152 | 
153 |   m2_left  <- as.numeric(2*coef(reg2_left)[3])
154 |   m2_right <- as.numeric(2*coef(reg2_right)[3])
155 | 
156 |   if(verbose)   cat("\n-m2 left:", m2_left, "\n-m2 right:", m2_right)
157 | 
158 | ##########
159 | ### STEP 3
160 | ##########
161 | 
162 | ## Regularization: Equ (16) 
163 |   if(type=="RES"){
164 |     r_left  <- (2160*var_inh_left)  / (N_h2_left *h2_left^4)
165 |     r_right <- (2160*var_inh_right) / (N_h2_right*h2_right^4)
166 |   } else {
167 |     r_left  <- (2160*var_inh_global) / (N_h2_left *h2_left^4)
168 |     r_right <- (2160*var_inh_global) / (N_h2_right*h2_right^4)
169 |   }
170 | 
171 | 
172 |   if(verbose)   cat("\n-Reg left:", r_left, "\n-Reg right:", r_right)
173 | 
174 | ## Compute kernel dependent constant: (see file ~/Dropbox/HEI/rdd/Rcode/IK bandwidth/bandwidth_comput.R)
175 |   Ck <- switch(kernel, "Triangular"=3.4375, "Uniform"=2.70192, "Normal"=1.25864) # is not 5.4 as in paper since our kernel is on I(|x|<1), not <1/2
176 | 
177 | ## Final bandwidth: Equ (17) 
178 |   h_opt <- Ck * ( (var_inh_left+ var_inh_right) / (f_cut * ((m2_right-m2_left)^2 + r_left +r_right)))^(1/5) * N^(-1/5)
179 |   names(h_opt) <- "h_opt"
180 | 
181 |   if(verbose)   cat("\n\n")
182 | 
183 | ###
184 |   if(returnBig){
185 |     res<- list()
186 |     res$h_opt <- as.numeric(h_opt)
187 |     res$var_inh_left <- var_inh_left
188 |     res$var_inh_right <- var_inh_right
189 |     res$m2_right <- m2_right
190 |     res$m2_left <- m2_left
191 |     res$f_cut <- f_cut
192 |     res$h2_left <- h2_left
193 |     res$h2_right <- h2_right
194 |   } else {
195 |   res <- h_opt
196 |   }
197 | 
198 |  return(res)
199 | }
200 | 
201 | if(FALSE){
202 |   lee_dat4 <- read.csv("/home/mat/Dropbox/HEI/rdd/Rcode/IK bandwidth/datasets/imbens_from_MATLAB.csv", header=FALSE)
203 |   colnames(lee_dat4) <- c("X", "Y")
204 |   IKbandwidth3(X=lee_dat4$X, Y=lee_dat4$Y, verbose=TRUE)
205 |   IKbandwidth3(X=lee_dat4$X, Y=lee_dat4$Y, verbose=TRUE, type="WP")
206 |   IKbandwidth3(X=lee_dat4$X, Y=lee_dat4$Y, verbose=FALSE, returnBig=TRUE)
207 | 
208 | 
209 | data(Lee2008)
210 | Lee2008_rdd <- RDDdata(x=Lee2008$x,y=Lee2008$y , cutpoint=0)
211 | 
212 | ### 
213 | bw_IK <- RDDbw_IK(Lee2008_rdd)
214 | bws <- sort(c(bw_IK, seq(0.05, 0.5, by=0.05)))
215 | bi <- Vectorize(IK_bias, vectorize.args="bw")(Lee2008_rdd, bw=bws)
216 | va <- Vectorize(IK_var, vectorize.args="bw")(Lee2008_rdd, bw=bws)
217 | ms <- Vectorize(IK_amse, vectorize.args="bw")(Lee2008_rdd, bw=bws)
218 | 
219 | df<- data.frame(bw=rep(bws,3), value=c(ms, va, bi^2), type=rep(c("ms", "va", "bias^2"), each=length(bws)))
220 | 
221 | 
222 | # qplot(x=bw, y=value, data=df, geom="line", colour=type)+geom_point(data=subset(df, value==min(subset(df, type=="ms", "value"))))
223 | 
224 | bws_03 <- sort(c(bw_IK, seq(0.25, 0.35, by=0.005)))
225 | ms_03 <- Vectorize(IK_amse, vectorize.args="bw")(Lee2008_rdd, bw=bws_03)
226 | df2 <- data.frame(bw=bws_03,mse=ms_03)
227 | 
228 | subset(df2, mse==min(mse)) ## 1.78, not 1.74 from: 
229 | qplot(x=bw, y=mse, data=df2, geom="line")
230 | }
231 | 


--------------------------------------------------------------------------------
/RDDtools/R/covarTests.R:
--------------------------------------------------------------------------------
  1 | #' Testing for balanced covariates: equality of means with t-test
  2 | #' 
  3 | #' Tests equality of means by a t-test for each covariate, between the two full groups or around the discontinuity threshold 
  4 | #' 
  5 | #' @param object object of class RDDdata
  6 | #' @param bw a bandwidth
  7 | #' @param paired Argument of the \code{\link{t.test}} function: logical indicating whether you want paired t-tests.
  8 | #' @param var.equal Argument of the \code{\link{t.test}} function:  logical variable indicating whether to treat the two variances as being equal
  9 | #' @param p.adjust Whether to adjust the p-values for multiple testing. Uses the \code{\link{p.adjust}} function
 10 | #' @param \ldots currently not used
 11 | #' @return A data frame with, for each covariate, the mean on each size, the difference, t-stat and ts p-value. 
 12 | #' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}>
 13 | #' @seealso \code{\link{covarTest_dis}} for the Kolmogorov-Smirnov test of equality of distribution
 14 | #' @examples
 15 | #' data(Lee2008)
 16 | #' 
 17 | #' ## Add randomly generated covariates
 18 | #' set.seed(123)
 19 | #' n_Lee <- nrow(Lee2008)
 20 | #' Z <- data.frame(z1 = rnorm(n_Lee, sd=2), 
 21 | #'                 z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)), 
 22 | #'                 z3 = sample(letters, size = n_Lee, replace = TRUE))
 23 | #' Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0)
 24 | #' 
 25 | #' ## test for equality of means around cutoff:
 26 | #' covarTest_mean(Lee2008_rdd_Z, bw=0.3)
 27 | #' 
 28 | #' ## Can also use function covarTest_dis() for Kolmogorov-Smirnov test:
 29 | #' covarTest_dis(Lee2008_rdd_Z, bw=0.3)
 30 | #' 
 31 | #' ## covarTest_mean works also on regression outputs (bw will be taken from the model)
 32 | #' reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd_Z)
 33 | #' covarTest_mean(reg_nonpara)
 34 | 
 35 | 
 36 | 
 37 | 
 38 | 
 39 | #' @export
 40 | covarTest_mean <- function(object, bw=NULL, paired = FALSE, var.equal = FALSE, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) 
 41 |   UseMethod("covarTest_mean")
 42 | 
 43 | #' @rdname covarTest_mean
 44 | #' @method covarTest_mean RDDdata
 45 | #' @S3method covarTest_mean RDDdata
 46 | covarTest_mean.RDDdata <- function(object, bw=NULL, paired = FALSE, var.equal = FALSE, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) {
 47 | 
 48 |   cutpoint <- getCutpoint(object)
 49 |   covar <- getCovar(object)
 50 |   cutvar <- object$x
 51 | 
 52 |   covarTest_mean_low(covar=covar,cutvar=cutvar,cutpoint=cutpoint, bw=bw, paired = paired, var.equal = var.equal, p.adjust=p.adjust)
 53 | 
 54 | }
 55 | 
 56 | 
 57 | #' @rdname covarTest_mean
 58 | #' @method covarTest_mean RDDreg
 59 | #' @S3method covarTest_mean RDDreg
 60 | covarTest_mean.RDDreg <- function(object, bw=NULL, paired = FALSE, var.equal = FALSE, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) {
 61 |   
 62 |   cutpoint <- getCutpoint(object)
 63 |   dat <- object$RDDslot$RDDdata
 64 |   covar <- getCovar(dat)
 65 |   cutvar <- dat$x
 66 |   if(is.null(bw)) bw <- getBW(object)
 67 |   
 68 |   covarTest_mean_low(covar=covar,cutvar=cutvar,cutpoint=cutpoint, bw=bw, paired = paired, var.equal = var.equal, p.adjust=p.adjust)
 69 |   
 70 | }
 71 | 
 72 | 
 73 | covarTest_mean_low <- function(covar,cutvar, cutpoint, bw=NULL, paired = FALSE, var.equal = FALSE, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) {
 74 | 
 75 |   p.adjust <- match.arg(p.adjust)
 76 | 
 77 | ## subset
 78 |   if(!is.null(bw)){
 79 |     isInH <- cutvar >= cutpoint -bw & cutvar <= cutpoint +bw
 80 |     covar <- covar[isInH,]
 81 |     cutvar <- cutvar[isInH]
 82 |   }
 83 |   regime <- cutvar < cutpoint
 84 | 
 85 | ## Split data
 86 |   covar_num <- sapply(covar, as.numeric)
 87 | 
 88 |   tests <-apply(covar_num, 2, function(x) t.test(x[regime], x[!regime], paired=paired, var.equal=var.equal))
 89 |   tests_vals <- sapply(tests, function(x) c(x[["estimate"]], diff(x[["estimate"]]),x[c("statistic", "p.value")]))
 90 | 
 91 | ## Adjust p values if required:
 92 |   if(p.adjust!="none") tests_vals["p.value",] <- p.adjust(tests_vals["p.value",], method=p.adjust)
 93 | 
 94 | ## Print results
 95 |   res <- t(tests_vals)
 96 |   colnames(res)[3] <- "Difference"
 97 |   res
 98 | 
 99 | 
100 | }
101 | 
102 | 
103 | 
104 | 
105 | #' Testing for balanced covariates: equality of distribution
106 | #' 
107 | #' Tests equality of distribution with a Kolmogorov-Smirnov for each covariates, between the two full groups or around the discontinuity threshold 
108 | #' 
109 | #' @param object object of class RDDdata
110 | #' @param bw a bandwidth
111 | #' @param exact Argument of the \code{\link{ks.test}} function: NULL or a logical indicating whether an exact p-value should be computed.
112 | #' @param p.adjust Whether to adjust the p-values for multiple testing. Uses the \code{\link{p.adjust}} function
113 | #' @param \ldots currently not used
114 | #' @return A data frame  with, for each covariate, the K-S statistic and its p-value. 
115 | #' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}>
116 | #' @seealso \code{\link{covarTest_mean}} for the t-test of equality of means
117 | #' @examples
118 | #' data(Lee2008)
119 | #' 
120 | #' ## Add randomly generated covariates
121 | #' set.seed(123)
122 | #' n_Lee <- nrow(Lee2008)
123 | #' Z <- data.frame(z1 = rnorm(n_Lee, sd=2), 
124 | #'                 z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)), 
125 | #'                 z3 = sample(letters, size = n_Lee, replace = TRUE))
126 | #' Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0)
127 | #' 
128 | #' ## Kolmogorov-Smirnov test of equality in distribution:
129 | #' covarTest_dis(Lee2008_rdd_Z, bw=0.3)
130 | #' 
131 | #' ## Can also use function covarTest_dis() for a t-test for equality of means around cutoff:
132 | #' covarTest_mean(Lee2008_rdd_Z, bw=0.3)
133 | #' ## covarTest_dis works also on regression outputs (bw will be taken from the model)
134 | #' reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd_Z)
135 | #' covarTest_dis(reg_nonpara)
136 | 
137 | #' @export
138 | covarTest_dis <- function(object, bw,  exact=NULL, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni"))
139 |   UseMethod("covarTest_dis")
140 | 
141 | #' @rdname covarTest_dis
142 | #' @method covarTest_dis RDDdata
143 | #' @S3method covarTest_dis RDDdata
144 | covarTest_dis.RDDdata <- function(object, bw=NULL, exact = FALSE,  p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) {
145 | 
146 |   cutpoint <- getCutpoint(object)
147 |   covar <- getCovar(object)
148 |   cutvar <- object$x
149 | 
150 |   covarTest_dis_low(covar=covar,cutvar=cutvar,cutpoint=cutpoint, bw=bw, exact= exact, p.adjust=p.adjust)
151 | 
152 | }
153 | 
154 | #' @rdname covarTest_dis
155 | #' @method covarTest_dis RDDreg
156 | #' @S3method covarTest_dis RDDreg
157 | covarTest_dis.RDDreg <- function(object, bw=NULL, exact = FALSE,  p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) {
158 |   
159 |   cutpoint <- getCutpoint(object)
160 |   dat <- object$RDDslot$RDDdata
161 |   covar <- getCovar(dat)
162 |   cutvar <- dat$x
163 |   if(is.null(bw)) bw <- getBW(object)
164 |   
165 |   covarTest_dis_low(covar=covar,cutvar=cutvar,cutpoint=cutpoint, bw=bw, exact= exact, p.adjust=p.adjust)
166 |   
167 | }
168 | 
169 | covarTest_dis_low <- function(covar,cutvar, cutpoint, bw=NULL, exact=NULL, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) {
170 | 
171 |   p.adjust <- match.arg(p.adjust)
172 | 
173 | ## subset
174 |   if(!is.null(bw)){
175 |     isInH <- cutvar >= cutpoint -bw & cutvar <= cutpoint +bw
176 |     covar <- covar[isInH,]
177 |     cutvar <- cutvar[isInH]
178 |   }
179 |   regime <- cutvar < cutpoint
180 | 
181 | 
182 | 
183 | ## Split data
184 |   covar_num <- sapply(covar, as.numeric)
185 | 
186 |   tests <-apply(covar_num, 2, function(x) ks.test(x[regime], x[!regime], exact=exact))
187 |   tests_vals <- sapply(tests, function(x) x[c("statistic", "p.value")])
188 | 
189 | ## Adjust p values if required:
190 |   if(p.adjust!="none") tests_vals["p.value",] <- p.adjust(tests_vals["p.value",], method=p.adjust)
191 | 
192 | ## Print results
193 |   res <- t(tests_vals)
194 |   res
195 | 
196 | 
197 | }
198 | 
199 | 
200 | ##########################################
201 | ###### TODO
202 | ##########################################
203 | ## -mean: can use t.test for factors? What else? Count test? Warn for character/factors!
204 | ## -mean: add multivariate hotelling
205 | ## -ks: ok for factors?
206 | ## -do qqplot?
207 | ## -add methods for regs? Once converted to other objects...
208 | ## -add example and bettet output documentation
209 | ##
210 | ##
211 | ##
212 | 
213 | ##########################################
214 | ###### TESTS
215 | ##########################################
216 | 
217 | if(FALSE){
218 | library(Hotelling)
219 | library(mvtnorm)
220 | 
221 | data <- rmvnorm(n=200, mean=c(1,2))
222 | spli <- sample(c(TRUE, FALSE), size=200, replace=TRUE)
223 | 
224 | a<-hotel.stat(data[spli,],data[!spli,])
225 | a
226 | 
227 | b<-hotel.test(data[spli,],data[!spli,])
228 | b
229 | b$stats
230 | 
231 | }
232 | 
233 | 
234 | 
235 | 
236 | if(FALSE){
237 | library(RDDtools)
238 | data(Lee2008)
239 | 
240 | Z <- data.frame(z_con=runif(nrow(Lee2008)), z_dic=factor(sample(letters[1:3], size=nrow(Lee2008), replace=TRUE)))
241 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0)
242 | 
243 | 
244 | covarTest_mean(object=Lee2008_rdd)
245 | covarTest_dis(object=Lee2008_rdd)
246 | 
247 | 
248 | 
249 | }
250 | 


--------------------------------------------------------------------------------
/RDDtools/tests/RDDpred.R:
--------------------------------------------------------------------------------
  1 | library(RDDtools)
  2 | library(car)
  3 | 
  4 | 
  5 | #### DATA
  6 | data(Lee2008)
  7 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
  8 | 
  9 | n_Lee <- nrow(Lee2008)
 10 | 
 11 | set.seed(123)
 12 | Z<- data.frame(z1=rnorm(n_Lee), z2=rnorm(n_Lee, mean=20, sd=2), z3=sample(letters[1:3], size=n_Lee, replace=TRUE))
 13 | Lee2008_rdd_z <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z,cutpoint=0)
 14 | 
 15 | #### REGS
 16 | bw_IK <- RDDbw_IK(Lee2008_rdd_z)
 17 | w_IK <- RDDtools:::Kernel_tri(Lee2008_rdd_z$x, 0, bw_IK)
 18 | reg_para4_cov_slSep <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z1", covar.opt=list(slope="separate"))
 19 | reg_para4_cov_slSep_W <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z1", covar.opt=list(slope="separate"), weights=w_IK)
 20 | reg_np_cov <- RDDreg_np(RDDobject=Lee2008_rdd_z, covariates="z1", bw=bw_IK, inference="lm")
 21 | 
 22 | 
 23 | 
 24 | 
 25 | reg_para4_cov_slSep_2Z <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z1+z2", covar.opt=list(slope="separate"))
 26 | 
 27 | reg_li <- list( reg_para4_cov_slSep=reg_para4_cov_slSep, 
 28 | 		reg_para4_cov_slSep_W=reg_para4_cov_slSep_W,
 29 | 		reg_np_cov=reg_np_cov,
 30 | 		reg_para4_cov_slSep_2Z=reg_para4_cov_slSep_2Z)
 31 | 
 32 | checkRDDmean <- function(x, n=5){
 33 |   covDF <- model.frame(x)
 34 |   zDF <- grep("z", colnames(covDF), value=FALSE)
 35 |   hasD <- zDF[-grep(":", colnames(covDF)[zDF])]
 36 | 
 37 |   DF_1 <- covDF[1:n,hasD, drop=FALSE]
 38 |   DF_2 <- data.frame(t(colMeans(DF_1)))
 39 | 
 40 |   pred_1 <- RDDpred(x, covdata=DF_1, stat="mean")
 41 |   pred_2 <- RDDpred(x, covdata=DF_2)
 42 |   all.equal(pred_1, pred_2, check.attributes=FALSE)
 43 | }
 44 | 
 45 | sapply(reg_li, checkRDDmean)
 46 | 
 47 | sapply(reg_li, function(x) all.equal(unlist(RDDpred(x)),RDDcoef(x, allInfo=TRUE)[1,1:2], check.attributes=FALSE))
 48 | 
 49 | 
 50 | # 
 51 | # reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd)
 52 | # print(reg_para)
 53 | # summary(reg_para)
 54 | # plot(reg_para)
 55 | # 
 56 | # formula(reg_para)
 57 | # 
 58 | # update(as.formula("y ~ D + `x^1` + `x^1_right`"), reg_para)
 59 | # reg_para_l <- as.lm(reg_para)
 60 | # # update(reg_para_l, y ~ D + `x^1` + `x^1_right`)
 61 | # 
 62 | # mf <- model.frame(reg_para)
 63 | # 
 64 | # lm("y ~ D + `x^1` + `x^1_right`", mf)
 65 | # a<-lm("y ~ -1 + D +I(1-D) + `x^1` + `x^1_right`", mf)
 66 | # diff(coef(a)[2:1])
 67 | # coef(reg_para)
 68 | # 
 69 | # # deltaMethod(a, "I(1-D) - D", parameterNames=paste("a", 1:4, sep=""))
 70 | # deltaMethod(a, "a1 - a2", parameterNames=paste("a", 1:4, sep=""))
 71 | # coef(summary(reg_para))[2,]
 72 | # 
 73 | # reg_para4_cov_slSep <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z1", covar.opt=list(slope="separate"))
 74 | # 
 75 | mf_2 <- model.frame(reg_para4_cov_slSep)
 76 | # formula(reg_para4_cov_slSep)
 77 | # 
 78 | aa <- lm("y ~ D + `x` + `x^2` + `x^3` + `x^4` + `x_right` + `x^2_right` + `x^3_right` + `x^4_right` + z1 + `z1:D`", data=mf_2)
 79 | aaa <- lm("y ~ -1+ D + I(1-D)+`x` + `x^2` + `x^3` + `x^4` + `x_right` + `x^2_right` + `x^3_right` + `x^4_right` + z1 + `z1:D`", data=mf_2)
 80 | # 
 81 | # diff(coef(aaa)[2:1])
 82 | # RDDpred(reg_para4_cov_slSep)
 83 | # RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=0))
 84 | # 
 85 | # RDDcoef(reg_para4_cov_slSep, allInfo=TRUE)
 86 | 
 87 | ## compare RDDpred and Delta at 1:
 88 | rdd_p_1 <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=1))
 89 | delta_1 <- deltaMethod(aaa, "a1 - a2 + a12", parameterNames=paste("a", 1:12, sep=""))
 90 | rdd_p_1
 91 | delta_1
 92 | all.equal(unlist(rdd_p_1), drop(as.matrix(delta_1[1:2])), check.attributes=FALSE)
 93 | 
 94 | ## compare RDDpred and Delta at 0:
 95 | rdd_p_0 <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=0))
 96 | rdd_c_0 <- RDDcoef(reg_para4_cov_slSep, allInfo=TRUE)
 97 | delta_0 <- deltaMethod(aaa, "a1 - a2 ", parameterNames=paste("a", 1:12, sep=""))
 98 | rdd_p_0
 99 | rdd_c_0
100 | delta_0
101 | all.equal(unlist(rdd_p_0), drop(as.matrix(delta_0[1:2])), check.attributes=FALSE)
102 | all.equal(unlist(rdd_p_0), drop(as.matrix(rdd_c_0[1:2])), check.attributes=FALSE)
103 | 
104 | ## compare RDDpred and Delta at 2 points:
105 | rdd_p_01_AGG <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0.5)))
106 | rdd_p_01_all <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0, 1)))
107 | rdd_p_01_S <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0, 1)), stat="sum")
108 | rdd_p_01_M <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0, 1)), stat="mean")
109 | 
110 | delta_01_S <- deltaMethod(aaa, "2*(a1 - a2) +1*a12", parameterNames=paste("a", 1:12, sep=""))
111 | delta_01_M <- deltaMethod(aaa, "(2*(a1 - a2) +1*a12)/2", parameterNames=paste("a", 1:12, sep=""))
112 | delta_01_S
113 | delta_01_M
114 | 
115 | all(delta_01_S/2==delta_01_M)
116 | 
117 | ## compare individuals (stat=ident)
118 | all.equal(rdd_p_01_all$fit, c(delta_0[1,1], delta_1[1,1]))
119 | all.equal(rdd_p_01_all$se.fit, c(delta_0[1,2], delta_1[1,2]))
120 | c(rdd_p_01_M$fit/2, rdd_p_01_AGG$fit)
121 | 
122 | ## compare sum (stat=sum)
123 | all.equal(unlist(rdd_p_01_S), drop(as.matrix(delta_01_S[1:2])), check.attributes=FALSE)
124 | 
125 | ## compare mean (stat=mean)
126 | all.equal(unlist(rdd_p_01_M), drop(as.matrix(delta_01_M[1:2])), check.attributes=FALSE)
127 | all.equal(rdd_p_01_M$fit, rdd_p_01_S$fit/2)
128 | all.equal(rdd_p_01_M$fit, rdd_p_01_AGG$fit, check.attributes=FALSE)
129 | all.equal(rdd_p_01_M$se.fit, rdd_p_01_AGG$se.fit, check.attributes=FALSE)
130 | 
131 | ## compare RDDpred and Delta at 5 first points:
132 | ind_z_pos <- head(which(Lee2008_rdd_z$z1>0),5)
133 | 
134 | rdd_p_01_5z_S <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=Lee2008_rdd_z$z1[1:5]), stat="sum")
135 | rdd_p_01_5z_Sb <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=sum(Lee2008_rdd_z$z1[1:5])), stat="sum")
136 | rdd_p_01_5zPos_S <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=Lee2008_rdd_z$z1[ind_z_pos]), stat="sum")
137 | rdd_p_01_5zPos_Sb <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=sum(Lee2008_rdd_z$z1[ind_z_pos])), stat="sum")
138 | rdd_p_01_5z_M <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=Lee2008_rdd_z$z1[1:5]), stat="mean")
139 | rdd_p_01_5z_Mb <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=mean(Lee2008_rdd_z$z1[1:5])), stat="mean")
140 | rdd_p_01_ALLz_M <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=Lee2008_rdd_z$z1), stat="mean")
141 | rdd_p_01_ALLz_Mb <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=mean(Lee2008_rdd_z$z1)), stat="mean")
142 | 
143 | del <- function(x, mean=FALSE) {
144 |   n <- length(x)
145 |   res <- paste(c(paste(n, "*(a1-a2) "), paste(x, "*a12", sep="")), collapse=" +")
146 |   su <- sum(x)
147 |   sig <- if(sign(su)==1) "+" else  NULL
148 |   res <- paste(n, "*(a1-a2) ", sig, su, "*a12", sep="")
149 |   if(mean) res <- paste("(", res, ")/", n, sep="")
150 |   res
151 | }
152 | 
153 | del(x=Lee2008_rdd_z$z1[1:5])
154 | delta_01_5z_S <- deltaMethod(aaa, del(x=Lee2008_rdd_z$z1[1:5]), parameterNames=paste("a", 1:12, sep=""), func="RDD")
155 | delta_01_5z_M <- deltaMethod(aaa, del(x=Lee2008_rdd_z$z1[1:5], mean=TRUE), parameterNames=paste("a", 1:12, sep=""), func="RDD")
156 | 
157 | all.equal(unlist(rdd_p_01_5z_S), drop(as.matrix(delta_01_5z_S[1:2])), check.attributes=FALSE)
158 | all.equal(unlist(rdd_p_01_5z_Sb), drop(as.matrix(delta_01_5z_S[1:2])), check.attributes=FALSE)
159 | all.equal(unlist(rdd_p_01_5z_M), drop(as.matrix(delta_01_5z_M[1:2])), check.attributes=FALSE)
160 | all.equal(unlist(rdd_p_01_5z_Mb), drop(as.matrix(delta_01_5z_M[1:2])), check.attributes=FALSE)
161 | 
162 | ## All z:
163 | # all.equal(rdd_p_01_ALLz_M, rdd_p_01_ALLz_Mb, check.attributes=FALSE)
164 | 
165 | #### Weighted mean!!
166 | w_5 <- c(0.1, 0.2, 0.4, 0.2, 0.1)
167 | w <- c(0.4, 0.6)
168 | rdd_p_01_Sid <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0.2,1)), stat="identity")
169 | wm <- weighted.mean(rdd_p_01_Sid$fit , w=w)
170 | 
171 | delta_2z_w <- deltaMethod(aaa, "0.4*(a1 - a2) + 0.4*0.2*a12+0.6*(a1 - a2) + 0.6*a12", parameterNames=paste("a", 1:12, sep=""))
172 | delta_2z_w2 <- deltaMethod(aaa, "1*(a1 - a2) + 0.4*0.2*a12 + 0.6*a12", parameterNames=paste("a", 1:12, sep=""))
173 | delta_2z_w3 <- deltaMethod(aaa, "1*(a1 - a2) + a12*(0.4*0.2 + 0.6)", parameterNames=paste("a", 1:12, sep=""))
174 | all(delta_2z_w==delta_2z_w2)
175 | all.equal(delta_2z_w, delta_2z_w3, check.attributes=FALSE)
176 | all.equal(delta_2z_w[1,1],wm)
177 | 
178 | rdd_p_01_W_S <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0.2,1)), stat="sum", weights=w)
179 | rdd_p_01_W_M <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0.2,1)), stat="mean", weights=w)
180 | all.equal(rdd_p_01_W_M$fit,wm)
181 | 
182 | all.equal(unlist(rdd_p_01_W_S), drop(as.matrix(delta_2z_w2[1:2])), check.attributes=FALSE)
183 | all.equal(unlist(rdd_p_01_W_M), drop(as.matrix(delta_2z_w2[1:2])), check.attributes=FALSE)
184 | 
185 | 
186 | ###### 2 Z:
187 | df_2Z_5z <- Lee2008_rdd_z[1:5, c("z1", "z2")]
188 | df_2Z_5z_M <- data.frame(t(colMeans(df_2Z_5z)))
189 | df_2Z_5z_Mw <- data.frame(t(apply(df_2Z_5z, 2, weighted.mean, w=w_5)))
190 | 
191 | rdd_p_sZ_5z_S <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z, stat="sum")
192 | rdd_p_sZ_5z_M <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z, stat="mean")
193 | rdd_p_sZ_5z_Mb <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z_M, stat="sum")
194 | 
195 | rdd_p_sZ_5z_MW <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z, stat="mean", weights=w_5)
196 | rdd_p_sZ_5z_MWb <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z_Mw, stat="sum")
197 | 
198 | all.equal(rdd_p_sZ_5z_M, rdd_p_sZ_5z_Mb, check.attributes=FALSE)
199 | all.equal(rdd_p_sZ_5z_MW, rdd_p_sZ_5z_MWb, check.attributes=FALSE)
200 | 


--------------------------------------------------------------------------------
/RDDtools/R/RDDpred.R:
--------------------------------------------------------------------------------
  1 | #' RDD coefficient prediction
  2 | #'
  3 | #' Function to predict the RDD coefficient in presence of covariate (without covariates, returns the same than \code{\link{RDDcoef}})
  4 | #' @param object A RDD regression object
  5 | #' @param covdata New data.frame specifying the values of the covariates, can have multiple rows. 
  6 | #' @param se.fit A switch indicating if standard errors are required.
  7 | #' @param vcov. Specific covariance function (see package sandwich ), by default uses the \code{\link{vcov}}
  8 | #' @param newdata Another data on which to evaluate the x/D variables. Useful in very few cases. 
  9 | #' @param stat The statistic to use if there are multiple predictions, 'identity' just returns the single values, 'mean' averages them 
 10 | #' @param weights Eventual weights for the averaging of the predicted values. 
 11 | #' @details The function \code{RDDpred} does a simple prediction of the RDD effect
 12 | #'  \deqn{RDDeffect= \mu(x, z, D=1) - \mu(x, z, D=0)}
 13 | #' When there are no covariates (and z is irrelevant in the equation above), this amounts exactly to the usual RDD coefficient, 
 14 | #' shown in the outputs, or obtained with \code{\link{RDDcoef}}. If there were covariates, and if these covariates were estimated using the 
 15 | #' \dQuote{include} \emph{strategy} and with different coefficients left and right to the cutoff (i.e.
 16 | #' had argument \emph{slope} = \dQuote{separate}), than the RDD effect is also dependent on the value of the covariate(s). 
 17 | #' \code{RDDpred} allows to set the value of the covariate(s) at which to evaluate the RDD effect, by providing a data.frame with
 18 | #' the values for the covariates. Note that the effect can be evaluated at multiple points, if you provide multiple rows of \code{covdata}. 
 19 | #'
 20 | #' In pressence of covariate-specific RDD effect, one may wish to estimate an average effect. This can be done by setting the argument \code{stat="mean"}. 
 21 | #' Weights can additionally be added, with the argument \code{weights}, to obtain a weighted-average of the predictions. Note however that in most cases, 
 22 | #' this will be equivalent to provide covariates at their (weighted) mean value, which will be much faster also!
 23 | #'
 24 | #' Standard errors, obtained setting the argument \code{se.fit=TRUE}, are computed using following formula:
 25 | #'  \deqn{x_i \Omega x_i^{'}}
 26 | #' where \eqn{\Omega} is the estimated variance-covariance matrix ( by default \eqn{\sigma^2(X^{'}X)^{-1}} using \code{\link{vcov}}) and 
 27 | #' \eqn{x_i} is the input data (a mix of covdata and input data). If one wishes individual predictions, standard errors are simply obtained 
 28 | #' as the square of that diagonal matrix, whereas for mean/sum, covariances are taken into account. 
 29 | #' @return Returns the predicted value(s), and, if se.fit=TRUE, their standard errors. 
 30 | #' @export
 31 | #' @references Froehlich (2007) Regression discontinuity design with covariates, IZA discussion paper 3024
 32 | #' @examples
 33 | #' ## Load data, add (artificial) covariates:
 34 | #'   data(Lee2008)
 35 | #'   n_Lee <- nrow(Lee2008)
 36 | #'   z1 <- runif(n_Lee)
 37 | #'   Lee2008_rdd <- RDDdata(y=y, x=x, data=Lee2008, covar=z1, cutpoint=0)
 38 | #' 
 39 | #' ## estimation without covariates: RDDpred is the same than RDDcoef:
 40 | #'   reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd)
 41 | #' 
 42 | #'   RDDpred(reg_para)
 43 | #'   RDDcoef(reg_para, allInfo=TRUE)
 44 | #' 
 45 | #' ## estimation with covariates: 
 46 | #'   reg_para_cov <- RDDreg_lm(RDDobject=Lee2008_rdd, covariates="z1", covar.opt=list(slope="separate"))
 47 | #'   RDDpred(reg_para_cov, covdata=data.frame(z1=0)) ## should obtain same result than with RDestimate
 48 | #'   RDDpred(reg_para_cov, covdata=data.frame(z1=0.5)) #evaluate at mean of z1 (as comes from uniform)
 49 | 
 50 | RDDpred <- function(object, covdata, se.fit=TRUE, vcov. = NULL, newdata, stat=c("identity", "sum", "mean"), weights){
 51 | 
 52 |   stat <- match.arg(stat)
 53 | 
 54 |   if(!missing(weights)) {
 55 |     if(missing(covdata)) stop("Arg 'weights' only useful with arg 'covdata'")
 56 |     if(stat=="identity") stop("Argument 'weights' not useful when arg: stat='identity'")
 57 |     if(stat=="sum") {
 58 |       warning("Providing weights for a sum makes little sense?!")
 59 |     }
 60 |     if(length(weights)!=NROW(covdata)) stop("Weights should be of the same length than covdata")
 61 |   }
 62 | 
 63 |   x_call <- getCall(object)
 64 |   hasCo <- hasCovar(object)
 65 | 
 66 |   if(is.null(x_call$covar.opt)){
 67 |     covar.slope <- "same"
 68 |     covar.strat <- "include"
 69 |   } else {
 70 |     covar.slope <- ifelse(is.null(x_call$covar.opt$slope), "same", x_call$covar.opt$slope)
 71 |     covar.strat <- ifelse(is.null(x_call$covar.opt$strategy), "include", x_call$covar.opt$strategy)
 72 |   }
 73 | 
 74 | 
 75 | ## get original data structure:
 76 |   mf <- model.frame(object)[1:2,-1]
 77 |   if(any(grepl("\\(weights\\)", colnames(mf)))) mf <- mf[,-grep("\\(weights\\)", colnames(mf))]
 78 | 
 79 | ## Fill orig struc with 0/1
 80 |   if(missing(newdata)){
 81 |     which.D <- grep("^D$", colnames(mf))
 82 |     mf[,which.D] <- c(0,1) ## set coeff of interest
 83 |     mf[,-which.D] <- 0 ## remove others (not absolutely necessary actually)
 84 |     newdata <- mf
 85 |   }
 86 | 
 87 | ## Merge covdata with newdata:
 88 | 
 89 |   if(!missing(covdata)){
 90 |     if(covar.strat=="residual") stop("Do not provide 'covdata' if covariates were use with 'residual' strategy")
 91 |     if(covar.slope=="separate"){
 92 |       Nrow_cov <- nrow(covdata)
 93 |       if(Nrow_cov>1) newdata <- newdata[c(1, rep(2,Nrow_cov)),]
 94 |       if(!is.null(rownames(covdata))) { 
 95 | 	  if("1" %in% rownames(covdata)) rownames(newdata)[1] <- "0"
 96 | 	  rownames(newdata)[-1] <- rownames(covdata)
 97 |       } else {
 98 | 	rownames(newdata) <- c(0, seq_len(Nrow_cov))
 99 |       }
100 |       colnames_cov <- colnames(covdata)
101 |       ind <- seq(from=2, by=2, length.out=Nrow_cov)
102 |       if(!all(colnames_cov%in% colnames(newdata))) stop("Arg 'covdata' contains colnames not in the data")
103 |       newdata[2:nrow(newdata), paste(colnames(covdata), "D", sep=":")] <- covdata
104 |     }
105 |   } 
106 | 
107 |   multiN <- nrow(newdata)>2
108 | 
109 | ## Merge and check no NAs
110 |   X_i <- as.matrix(cbind(1,newdata))
111 |   if(any(is.na(X_i))){
112 |     warning("data contains NA. Were removed")
113 |     X_i <- X_i[-apply(X_i, 1, function(x) any(is.na(x))),]
114 |   }
115 | 
116 | ## Set up variance matrix: X_i (X'X)^{-1} X_i'
117 |   if(is.null(vcov.)) vcov. <- vcov(object)
118 |   X_inv <- vcov.
119 |   mat <- X_i%*%X_inv%*%t(X_i)
120 | 
121 | ## preds:
122 | 
123 |   if(!multiN) {
124 |     pred_point <- drop(diff(X_i%*%RDDcoef(object, allCo=TRUE)))
125 |     if(se.fit) pred_se    <- sqrt(sum(c(diag(mat), -2*mat[1,2])))
126 |   } else {
127 |     d <- X_i%*%coef(object)
128 | 
129 |     
130 |     Mat_SUM  <- cbind( 1, diag(nrow(d)-1))
131 |     Mat_DIAG <- matrix(diag(mat), ncol=1)
132 |     if(missing(weights)) {
133 |       MAT_SmallSum <- matrix(c(-(nrow(d)-1), rep(1,nrow(d)-1  )), nrow=1) ## create vector: [- n-1, 1, 1, 1....]
134 |     } else {
135 |       MAT_SmallSum <- matrix(c(-1, weights), nrow=1)                            ## create vector: [- 1, w_1, w_2, w_n]
136 |     }
137 |     
138 |     if(stat=="identity"){
139 |       Mat_DIFF <- Mat_SUM
140 |       Mat_DIFF[,1] <- -1
141 |       pred_point <- drop(Mat_DIFF%*%d)
142 |       if(se.fit) pred_se <- drop(sqrt(Mat_SUM %*%Mat_DIAG -2* mat[1,2:ncol(mat)]))
143 |     } else {
144 |       if(stat=="mean" & missing(weights)) MAT_SmallSum <- MAT_SmallSum/Nrow_cov
145 |       pred_point <- drop(MAT_SmallSum%*%d)
146 |       if(se.fit) pred_se <- drop(sqrt(MAT_SmallSum%*%mat%*%t(MAT_SmallSum)))
147 |     }
148 |   }
149 | 
150 | 
151 | ## result:
152 |   if(se.fit){
153 |     res <- list()
154 |     res$fit <- pred_point
155 |     res$se.fit <- pred_se
156 |   } else {
157 |     res <- pred_point
158 |   }
159 | res
160 | }
161 | 
162 | if(FALSE){
163 |   library(RDDtools)
164 |   data(Lee2008)
165 |   head(Lee2008)
166 | 
167 |   Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0)
168 | 
169 |   set.seed(123)
170 |   n_Lee <- nrow(Lee2008)
171 |   Z<- data.frame(z1=rnorm(n_Lee), z2=rnorm(n_Lee, mean=20, sd=2), z3=sample(letters[1:3], size=n_Lee, replace=TRUE))
172 |   Lee2008_rdd_z <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z,utpoint=0)
173 | 
174 | ## use:
175 |   reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd)
176 | 
177 |   RDDpred(reg_para)
178 |   RDDcoef(reg_para, allInfo=TRUE)
179 |   all.equal(unlist(RDDpred(reg_para)), RDDcoef(reg_para, allInfo=TRUE)[1:2], check=FALSE)
180 | 
181 | ## pred other coefs: 
182 |   pred_Xr <- RDDpred(reg_para, newdata= data.frame(Tr=0, Xl=0, Xr=c(0,1)))
183 |   all.equal(RDDcoef(reg_para, allInfo=TRUE, allCo=TRUE)[4,1:2], unlist(pred_Xr), check=FALSE)
184 | 
185 |   pred_Xl <- RDDpred(reg_para, newdata= data.frame(Tr=0, Xl=c(0,1), Xr=0))
186 |   all.equal(RDDcoef(reg_para, allInfo=TRUE, allCo=TRUE)[3,1:2], unlist(pred_Xl), check=FALSE)
187 | 
188 |   reg_para2 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=2)
189 |   RDDpred(reg_para2)
190 |   all.equal(unlist(RDDpred(reg_para2)), RDDcoef(reg_para2, allInfo=TRUE)[1:2], check=FALSE)
191 | 
192 | 
193 | ### Covariates
194 |   reg_para4_cov <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=1, covariates="z1", covar.opt=list(slope="separate"))
195 |   reg_para4_cov
196 |   summary(reg_para4_cov)
197 | 
198 |   RDDpred(reg_para4_cov)
199 |   all.equal(unlist(RDDpred(reg_para4_cov)), RDDcoef(reg_para4_cov, allInfo=TRUE)[1:2], check=FALSE)
200 | 
201 |   all.equal(RDDpred(reg_para4_cov, covdata=data.frame(z1=0)),RDDpred(reg_para4_cov))
202 | 
203 | ### Check RDDpred:
204 | vec_eval <- c(2,4,4,5,6)
205 | estim_sep <- lapply(vec_eval, function(x) RDDpred(object=reg_para4_cov, covdata=data.frame(z1=x)))
206 | estim_toget <- RDDpred(reg_para4_cov, covdata=data.frame(z1=vec_eval))
207 | 
208 | all(estim_toget$fit==sapply(estim_sep, function(x) x$fit))
209 | all(estim_toget$se.fit==sapply(estim_sep, function(x) x$se.fit))
210 | 
211 | environment(RDDpred) <- environment(RDDreg_lm)
212 | sum(RDDpred(reg_para4_cov, covdata=data.frame(z1=c(0,1,2,1)))$fit) 
213 | # RDDpred(x=reg_para4_cov, covdata=data.frame(z1=c(2,4,4,4,5,6)))
214 | # RDDpred(reg_para4_cov)
215 | 
216 | }
217 | 


--------------------------------------------------------------------------------