├── RDDtools ├── .Rbuildignore ├── data │ ├── Lee2008.rda │ └── STAR_MHE.rda ├── inst │ └── doc │ │ └── RDDtools.pdf ├── vignettes │ ├── RDDtools.pdf │ └── RDD_refs.bib ├── R │ ├── myRDD-package.R │ ├── various_code.R │ ├── RDDcoef.R │ ├── Lee2008-data.R │ ├── qplot_experim.R │ ├── plotBin.R │ ├── dens_test.R │ ├── model.matrix.RDD.R │ ├── bw_ROT.R │ ├── get_methods.R │ ├── Waldci.R │ ├── gen_MC_IK.R │ ├── RDDdata_methods.R │ ├── STAR_MHE-data.R │ ├── as.npreg.R │ ├── deprecated.R │ ├── reg_gen.R │ ├── reg_lm.R │ ├── clusterInf.R │ ├── RDDdata.R │ ├── bw_IK.R │ ├── covarTests.R │ └── RDDpred.R ├── man │ ├── RDDtools-package.Rd │ ├── ROT_bw.Rd │ ├── as.lm.Rd │ ├── waldci.Rd │ ├── dens_test.Rd │ ├── Lee2008.Rd │ ├── RDDcoef.Rd │ ├── RDDbw_RSW.Rd │ ├── plotBin.Rd │ ├── RDDbw_IK.Rd │ ├── RDDdata.Rd │ ├── gen_MC_IK.Rd │ ├── RDDreg_np.Rd │ ├── clusterInf.Rd │ ├── plot.RDDdata.Rd │ ├── vcovCluster.Rd │ ├── as.npregbw.Rd │ ├── STAR_MHE.Rd │ ├── covarTest_dis.Rd │ ├── RDDgenreg.Rd │ ├── covarTest_mean.Rd │ ├── plotSensi.Rd │ ├── plotPlacebo.Rd │ ├── RDDreg_lm.Rd │ └── RDDpred.Rd ├── misc │ ├── TODO │ ├── test_RDest_t0_RDDreg.R │ ├── locpoly.f │ └── locpoly.R ├── DESCRIPTION ├── NEWS ├── tests │ ├── RDDtools_vs_rdd.R │ ├── simple_MC.R │ ├── RDDtools_vs_rdd.Rout.save │ ├── simple_MC.Rout.save │ ├── packageDemo.R │ └── RDDpred.R └── NAMESPACE ├── figuresREADME ├── DensPlot.png ├── RegPlot.png ├── dataPlot.png ├── reg_para.png ├── SensiPlot.png └── placeboPlot.png └── README.Rmd /RDDtools/.Rbuildignore: -------------------------------------------------------------------------------- 1 | misc 2 | -------------------------------------------------------------------------------- /RDDtools/data/Lee2008.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/RDDtools/data/Lee2008.rda -------------------------------------------------------------------------------- /RDDtools/data/STAR_MHE.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/RDDtools/data/STAR_MHE.rda -------------------------------------------------------------------------------- /figuresREADME/DensPlot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/figuresREADME/DensPlot.png -------------------------------------------------------------------------------- /figuresREADME/RegPlot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/figuresREADME/RegPlot.png -------------------------------------------------------------------------------- /figuresREADME/dataPlot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/figuresREADME/dataPlot.png -------------------------------------------------------------------------------- /figuresREADME/reg_para.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/figuresREADME/reg_para.png -------------------------------------------------------------------------------- /figuresREADME/SensiPlot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/figuresREADME/SensiPlot.png -------------------------------------------------------------------------------- /figuresREADME/placeboPlot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/figuresREADME/placeboPlot.png -------------------------------------------------------------------------------- /RDDtools/inst/doc/RDDtools.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/RDDtools/inst/doc/RDDtools.pdf -------------------------------------------------------------------------------- /RDDtools/vignettes/RDDtools.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MatthieuStigler/RDDtools/HEAD/RDDtools/vignettes/RDDtools.pdf -------------------------------------------------------------------------------- /RDDtools/R/myRDD-package.R: -------------------------------------------------------------------------------- 1 | #' Regression Discontinuity Design 2 | #' 3 | #' Provides function to do a comprehensive regression discontinuity analysis. 4 | #' 5 | #' @name RDDtools-package 6 | #' @aliases RDDtools 7 | #' @docType package 8 | #' @import KernSmooth 9 | #' @import np 10 | #' @import ggplot2 11 | #' @title Regression Discontinuity Design 12 | #' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> 13 | NULL 14 | -------------------------------------------------------------------------------- /RDDtools/man/RDDtools-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \docType{package} 3 | \name{RDDtools-package} 4 | \alias{RDDtools} 5 | \alias{RDDtools-package} 6 | \title{Regression Discontinuity Design} 7 | \description{ 8 | Regression Discontinuity Design 9 | } 10 | \details{ 11 | Provides function to do a comprehensive regression discontinuity analysis. 12 | } 13 | \author{ 14 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> 15 | } 16 | 17 | -------------------------------------------------------------------------------- /RDDtools/R/various_code.R: -------------------------------------------------------------------------------- 1 | ### MISC 2 | is.even <- function (a) { 3 | a%%2 == 0 4 | } 5 | 6 | 7 | Kernel_tri <- function(X, center, bw) { 8 | ifelse(abs(X - center) > bw, 0, 1 - (abs(X - center) / bw)) 9 | } 10 | 11 | Kernel_uni <- function(X, center, bw) { 12 | ifelse(abs(X - center) > bw, 0, 1) 13 | } 14 | 15 | .onLoad <- function(libname, pkgname) 16 | packageStartupMessage("\nRDDtools ", utils::packageVersion("RDDtools"), 17 | "\nPLEASE NOTE THIS is currently only a development version. \nRun vignette('RDDtools') for the documentation") 18 | -------------------------------------------------------------------------------- /RDDtools/man/ROT_bw.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{ROT_bw} 3 | \alias{ROT_bw} 4 | \title{Bandwidth selector} 5 | \usage{ 6 | ROT_bw(object) 7 | } 8 | \arguments{ 9 | \item{object}{object of class RDDdata} 10 | } 11 | \description{ 12 | implements dpill 13 | } 14 | \examples{ 15 | #No discontinuity 16 | } 17 | \author{ 18 | Drew Dimmery <\email{drewd@nyu.edu}> 19 | } 20 | \references{ 21 | McCrary, Justin. (2008) "Manipulation of the running variable in the regression discontinuity design: A density test," \emph{Journal of Econometrics}. 142(2): 698-714. \url{http://dx.doi.org/10.1016/j.jeconom.2007.05.005} 22 | } 23 | 24 | -------------------------------------------------------------------------------- /RDDtools/misc/TODO: -------------------------------------------------------------------------------- 1 | 2 | 3 | -plot.RDDreg_np: use np with confidence interval instead! 4 | -plot.RDDreg_np: option one/two sided 5 | 6 | -plot.RDDreg_np: understand differences in plotSensi and plot.RDDreg_np !! see slides 29 7 | 8 | -plotSensi: legend: LATE and CI 9 | -plotSensi: also compare slope specifications? 10 | 11 | -PLACEBO Y: change y, at true cutoff 12 | 13 | -RDDreg_lm: info on bw 14 | 15 | -labels 16 | 17 | -Check McCary var estimate 18 | - 19 | 20 | ## Long term 21 | -CI? clustering?! eventually bootstrap? 22 | -covariate: in param reg! see fröhlich, extend to plotSensi/placebo! 23 | -median/quantile LATE, see fröhlich 24 | -GLM parametric 25 | -------------------------------------------------------------------------------- /RDDtools/DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: RDDtools 2 | Type: Package 3 | Title: A toolbox for regression discontinuity design (RDD) 4 | Version: 0.22 5 | Date: 21/05/2014 6 | Authors@R: person("Matthieu", "Stigler", role = c("aut","cre"), 7 | email="Matthieu.Stigler@iheid.ch") 8 | Maintainer: Matthieu Stigler 9 | Imports: 10 | KernSmooth, 11 | ggplot2, 12 | rdd, 13 | np, 14 | sandwich, 15 | lmtest, 16 | Formula, 17 | locpol, 18 | methods 19 | Depends: 20 | AER 21 | Suggests: 22 | stats4, 23 | car 24 | Description: Provides a set of functions for RDD, from data visualisation, 25 | estimation and testing. 26 | License: GPL (>= 2) 27 | -------------------------------------------------------------------------------- /RDDtools/man/as.lm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{as.lm} 3 | \alias{as.lm} 4 | \title{Convert a rdd object to lm} 5 | \usage{ 6 | as.lm(x) 7 | } 8 | \arguments{ 9 | \item{x}{An object to convert to lm} 10 | } 11 | \value{ 12 | An object of class \code{lm} 13 | } 14 | \description{ 15 | Convert a rdd object to lm 16 | } 17 | \examples{ 18 | data(Lee2008) 19 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 20 | reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) 21 | reg_para_lm <- as.lm(reg_para) 22 | reg_para_lm 23 | plot(reg_para_lm, which=4) 24 | } 25 | \seealso{ 26 | \code{\link{as.npreg}} which converts \code{RDDreg} objects into \code{npreg} from package \code{np}. 27 | } 28 | 29 | -------------------------------------------------------------------------------- /RDDtools/man/waldci.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{waldci} 3 | \alias{waldci} 4 | \title{Confint allowing vcov} 5 | \usage{ 6 | waldci(x, parm = NULL, level = 0.95, vcov. = NULL, df = NULL, ...) 7 | } 8 | \arguments{ 9 | \item{x}{Object of class lm or else} 10 | 11 | \item{parm}{specification of which parameters are to be given confidence intervals, see confint} 12 | 13 | \item{level}{the confidence level required, see confint()} 14 | 15 | \item{vcov.}{Specific covariance function to pass to coeftest. See help of sandwich} 16 | 17 | \item{df}{Degrees of freedom} 18 | 19 | \item{\ldots}{Further argument} 20 | } 21 | \description{ 22 | Version of vcov allowing for confint 23 | } 24 | \keyword{internal} 25 | 26 | -------------------------------------------------------------------------------- /RDDtools/man/dens_test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{dens_test} 3 | \alias{dens_test} 4 | \title{Run the McCracy test for manipulation of the forcing variable} 5 | \usage{ 6 | dens_test(RDDobject, bin = NULL, bw = NULL, plot = TRUE, ...) 7 | } 8 | \arguments{ 9 | \item{RDDobject}{object of class RDDdata} 10 | 11 | \item{bin}{Argument of the \code{\link{DCdensity}} function, the binwidth} 12 | 13 | \item{bw}{Argument of the \code{\link{DCdensity}} function, the bandwidth} 14 | 15 | \item{plot}{Whether to return a plot. Logical, default ot TRUE.} 16 | 17 | \item{\ldots}{Further arguments passed to \code{\link[rdd]{DCdensity}}.} 18 | } 19 | \description{ 20 | Calls the \code{\link[rdd]{DCdensity}} test from package \code{rdd} on a \code{RDDobject}. 21 | } 22 | \examples{ 23 | library(RDDtools) 24 | data(Lee2008) 25 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 26 | dens_test(Lee2008_rdd) 27 | } 28 | 29 | -------------------------------------------------------------------------------- /RDDtools/man/Lee2008.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \docType{data} 3 | \name{Lee2008} 4 | \alias{Lee2008} 5 | \title{Dataset used in Lee (2008)} 6 | \format{A data frame with 6558 observations and two variables: 7 | \describe{ 8 | \item{x}{Vote at election t-1} 9 | \item{y}{Vote at election t} 10 | }} 11 | \source{ 12 | Guido Imbens webpage: \url{http://scholar.harvard.edu/imbens/scholar_software/regression-discontinuity} 13 | } 14 | \usage{ 15 | Lee2008 16 | } 17 | \description{ 18 | U.S. House elections data 19 | } 20 | \examples{ 21 | data(Lee2008) 22 | RDDlee <- RDDdata(x=x, y=y, data=Lee2008, cutpoint=0) 23 | summary(RDDlee) 24 | plot(RDDlee) 25 | } 26 | \references{ 27 | Imbens, Guido and Karthik Kalyanaraman. (2012) "Optimal Bandwidth Choice for the regression discontinuity estimator," 28 | Review of Economic Studies (2012) 79, 933-959 29 | 30 | Lee, D. (2008) Randomized experiments from non-random selection in U.S. House elections, 31 | \emph{Journal of Econometrics}, 142, 675-697 32 | } 33 | 34 | -------------------------------------------------------------------------------- /RDDtools/man/RDDcoef.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{RDDcoef} 3 | \alias{RDDcoef} 4 | \alias{RDDcoef.RDDreg_np} 5 | \alias{RDDcoef.default} 6 | \title{RDD coefficient} 7 | \usage{ 8 | RDDcoef(object, allInfo = FALSE, allCo = FALSE, ...) 9 | 10 | \method{RDDcoef}{default}(object, allInfo = FALSE, allCo = FALSE, ...) 11 | 12 | \method{RDDcoef}{RDDreg_np}(object, allInfo = FALSE, allCo = FALSE, ...) 13 | } 14 | \arguments{ 15 | \item{object}{A RDD regression object} 16 | 17 | \item{allInfo}{whether to return just the coefficients (allInfo=FALSE) or also the se/t stat/pval.} 18 | 19 | \item{allCo}{Whether to give only the RDD coefficient (allCo=FALSE) or all coefficients} 20 | 21 | \item{\ldots}{Further arguments passed to/from specific methods} 22 | } 23 | \value{ 24 | Either a numeric value of the RDD coefficient estimate, or a data frame with the estimate, 25 | its standard value, t test and p-value and 26 | } 27 | \description{ 28 | Function to access the RDD coefficient in the various regressions 29 | } 30 | 31 | -------------------------------------------------------------------------------- /RDDtools/man/RDDbw_RSW.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{RDDbw_RSW} 3 | \alias{RDDbw_RSW} 4 | \title{Global bandwidth selector of Ruppert, Sheather and Wand (1995) from package \pkg{KernSmooth}} 5 | \usage{ 6 | RDDbw_RSW(object, type = c("global", "sided")) 7 | } 8 | \arguments{ 9 | \item{object}{object of class RDDdata created by \code{\link{RDDdata}}} 10 | 11 | \item{type}{Whether to choose a global bandwidth for the whole function (\code{global}) 12 | or for each side (\code{sided})} 13 | } 14 | \value{ 15 | One (or two for \code{sided}) bandwidth value. 16 | } 17 | \description{ 18 | Uses the global bandwidth selector of Ruppert, Sheather and Wand (1995) 19 | either to the whole function, or to the functions below and above the cutpoint. 20 | } 21 | \examples{ 22 | data(Lee2008) 23 | rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0) 24 | RDDbw_RSW(rd) 25 | } 26 | \references{ 27 | See \code{\link[KernSmooth]{dpill}} 28 | } 29 | \seealso{ 30 | \code{\link{RDDbw_IK}} Local RDD bandwidth selector using the plug-in method of Imbens and Kalyanaraman (2012) 31 | } 32 | 33 | -------------------------------------------------------------------------------- /RDDtools/man/plotBin.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{plotBin} 3 | \alias{plotBin} 4 | \title{Bin plotting} 5 | \usage{ 6 | plotBin(x, y, h = 0.05, nbins = NULL, cutpoint = 0, plot = TRUE, 7 | type = c("value", "number"), xlim = range(x, na.rm = TRUE), cex = 0.9, 8 | main = NULL, xlab, ylab, ...) 9 | } 10 | \arguments{ 11 | \item{x}{Forcing variable} 12 | 13 | \item{y}{Output} 14 | 15 | \item{h}{the bandwidth (defaults to \code{2*sd(runvar)*length(runvar)^(-.5)})} 16 | 17 | \item{cutpoint}{Cutpoint} 18 | 19 | \item{plot}{Logical. Whether to plot or only returned silently} 20 | 21 | \item{type}{Whether returns the y averages, or the x frequencies} 22 | 23 | \item{xlim,cex,main,xlab,ylab}{Usual parameters passed to plot(), see \code{\link{par}}} 24 | 25 | \item{\ldots}{further arguments passed to plot.} 26 | } 27 | \value{ 28 | Returns silently values 29 | } 30 | \description{ 31 | Do a "scatterplot bin smoothing" 32 | } 33 | \author{ 34 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> 35 | } 36 | \references{ 37 | McCrary, Justin. 38 | } 39 | \keyword{internal} 40 | 41 | -------------------------------------------------------------------------------- /RDDtools/man/RDDbw_IK.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{RDDbw_IK} 3 | \alias{RDDbw_IK} 4 | \title{Imbens-Kalyanaraman Optimal Bandwidth Calculation} 5 | \usage{ 6 | RDDbw_IK(RDDobject, kernel = c("Triangular", "Uniform", "Normal")) 7 | } 8 | \arguments{ 9 | \item{RDDobject}{of class RDDdata created by \code{\link{RDDdata}}} 10 | 11 | \item{kernel}{The type of kernel used: either \code{triangular} or \code{uniform}.} 12 | } 13 | \value{ 14 | The optimal bandwidth 15 | } 16 | \description{ 17 | Imbens-Kalyanaraman optimal bandwidth 18 | for local linear regression in Regression discontinuity designs. 19 | } 20 | \examples{ 21 | data(Lee2008) 22 | rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0) 23 | RDDbw_IK(rd) 24 | } 25 | \author{ 26 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> 27 | } 28 | \references{ 29 | Imbens, Guido and Karthik Kalyanaraman. (2012) "Optimal Bandwidth Choice for the regression discontinuity estimator," 30 | Review of Economic Studies (2012) 79, 933-959 31 | } 32 | \seealso{ 33 | \code{\link{RDDbw_RSW}} Global bandwidth selector of Ruppert, Sheather and Wand (1995) 34 | } 35 | 36 | -------------------------------------------------------------------------------- /RDDtools/NEWS: -------------------------------------------------------------------------------- 1 | 2 | RDDtools 0.22 3 | =========== 4 | Updated on 21/5/14 5 | 6 | * RDDdata: change arg z to covar, add new argument z for sharp, currently unused. 7 | 8 | * dens_test: work now on RDDreg, return object htest 9 | 10 | * Multiple changes in help files 11 | 12 | * Correct import, suggests, calls to ::: 13 | 14 | RDDtools 0.21 15 | =========== 16 | Updated on 25/7/13 17 | 18 | * Add new function RDDpred 19 | 20 | * Add new model.matrix.RDDdata, preparing all output, now used by all RDDreg_np, RDDreg_lm, RDDgenre... 21 | 22 | * Add method vcov.RDDreg, as.lm.RDDreg 23 | 24 | * Add enw function vcovCluster2, complement doc, add M Arai, 25 | 26 | * Add data STAR_MHE 27 | 28 | * Many small fixes 29 | 30 | RDDtools 0.2 31 | =========== 32 | Updated on 16/7/13 33 | 34 | * Add new option to have separate or same covariates 35 | 36 | * Add as.nprg, to convert to a np regression from package np 37 | 38 | * Add RDDcoef, working on multiple models (lm, np, npreg). 39 | 40 | * Many fixes... 41 | 42 | RDDtools 0.1 43 | =========== 44 | Initial commit on 29/04/2013 45 | 46 | * Initial commit, containing RDDdata, RDDreg_lm, RDDreg_np, plotSensi, plotPlacebo, etc... 47 | -------------------------------------------------------------------------------- /RDDtools/R/RDDcoef.R: -------------------------------------------------------------------------------- 1 | #' RDD coefficient 2 | #' 3 | #' Function to access the RDD coefficient in the various regressions 4 | #' @param object A RDD regression object 5 | #' @param allInfo whether to return just the coefficients (allInfo=FALSE) or also the se/t stat/pval. 6 | #' @param allCo Whether to give only the RDD coefficient (allCo=FALSE) or all coefficients 7 | #' @param \ldots Further arguments passed to/from specific methods 8 | #' @return Either a numeric value of the RDD coefficient estimate, or a data frame with the estimate, 9 | #' its standard value, t test and p-value and 10 | #' @export 11 | RDDcoef <- function(object, allInfo=FALSE, allCo=FALSE, ...) 12 | UseMethod("RDDcoef") 13 | 14 | #' @rdname RDDcoef 15 | #' @method RDDcoef default 16 | #' @S3method RDDcoef default 17 | RDDcoef.default <- function(object, allInfo=FALSE, allCo=FALSE, ...){ 18 | res <- coef(summary(object)) 19 | if(!allCo) res <- res["D",, drop=FALSE] 20 | if(!allInfo) res <- res[,"Estimate"] 21 | res 22 | } 23 | 24 | #' @rdname RDDcoef 25 | #' @method RDDcoef RDDreg_np 26 | #' @S3method RDDcoef RDDreg_np 27 | RDDcoef.RDDreg_np <- function(object, allInfo=FALSE, allCo=FALSE, ...){ 28 | res<- object$coefMat 29 | if(!allCo) res <- res["D",, drop=FALSE] 30 | if(!allInfo) res <- res[,"Estimate"] 31 | res 32 | } 33 | 34 | -------------------------------------------------------------------------------- /RDDtools/R/Lee2008-data.R: -------------------------------------------------------------------------------- 1 | #' @name Lee2008 2 | #' @title Dataset used in Lee (2008) 3 | #' @description U.S. House elections data 4 | #' @docType data 5 | #' @usage Lee2008 6 | #' @description Dataset described used in Imbens and Kalyamaran (2012), and probably the same dataset used in Lee (2008), 7 | #' @format A data frame with 6558 observations and two variables: 8 | #' \describe{ 9 | #' \item{x}{Vote at election t-1} 10 | #' \item{y}{Vote at election t} 11 | #' } 12 | #' @source Guido Imbens webpage: \url{http://scholar.harvard.edu/imbens/scholar_software/regression-discontinuity} 13 | #' @references Imbens, Guido and Karthik Kalyanaraman. (2012) "Optimal Bandwidth Choice for the regression discontinuity estimator," 14 | #' Review of Economic Studies (2012) 79, 933-959 15 | #' @references Lee, D. (2008) Randomized experiments from non-random selection in U.S. House elections, 16 | #' \emph{Journal of Econometrics}, 142, 675-697 17 | #' @examples 18 | #' data(Lee2008) 19 | #' RDDlee <- RDDdata(x=x, y=y, data=Lee2008, cutpoint=0) 20 | #' summary(RDDlee) 21 | #' plot(RDDlee) 22 | 23 | 24 | NULL 25 | # Lee2008 <- read.csv("/home/mat/Dropbox/HEI/rdd/Rcode/IK bandwidth/datasets/imbens_from_MATLAB.csv", header=FALSE) 26 | # colnames(Lee2008) <- c("x", "y") 27 | # save(Lee2008, file="/home/mat/Dropbox/HEI/rdd/Rcode/RDDtools/data/Lee2008.rda") -------------------------------------------------------------------------------- /RDDtools/man/RDDdata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{RDDdata} 3 | \alias{RDDdata} 4 | \title{Construct RDDdata} 5 | \usage{ 6 | RDDdata(y, x, covar, cutpoint, z, labels, data) 7 | } 8 | \arguments{ 9 | \item{x}{Forcing variable} 10 | 11 | \item{y}{Output} 12 | 13 | \item{covar}{Exogeneous variables} 14 | 15 | \item{cutpoint}{Cutpoint} 16 | 17 | \item{labels}{Additional labels to provide as list (with entries \code{x}, \code{y}, and eventually vector \code{covar}). Unused currently.} 18 | 19 | \item{data}{A data-frame for the \code{x} and \code{y} variables. If this is provided, 20 | the column names can be entered directly for argument \code{x} and \code{y}} 21 | 22 | \item{z}{Assignment variable for the fuzzy case.} 23 | } 24 | \value{ 25 | Object of class \code{RDDdata}, inheriting from \code{data.frame} 26 | } 27 | \description{ 28 | Construct the base RDD object, containing x, y and the cutpoint, eventuallay covariates. 29 | } 30 | \examples{ 31 | data(Lee2008) 32 | rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0) 33 | rd2 <- RDDdata(x=x, y=y, data=Lee2008, cutpoint=0) 34 | 35 | # The print() function is the same as the print.data.frame: 36 | rd 37 | 38 | # The summary() and plot() function are specific to RDDdata 39 | summary(rd) 40 | plot(rd) 41 | } 42 | \author{ 43 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> 44 | } 45 | 46 | -------------------------------------------------------------------------------- /RDDtools/misc/test_RDest_t0_RDDreg.R: -------------------------------------------------------------------------------- 1 | 2 | if(FALSE){ 3 | RDDdat_choice <- RDDdata(y=choice_pg, x=xx, cutpoint=30) 4 | RDDdat_risk <- RDDdata(y=risky_option, x=xx, cutpoint=30) 5 | 6 | RDbw_choice <- RDDbw_IK(RDDdat_choice) 7 | RDbw_risk <- RDDbw_IK(RDDdat_risk) 8 | 9 | rd_choice <- RDDreg_np(RDDdat_choice) 10 | rd_choice_sam <- RDDreg_np(RDDdat_choice, slope="same") 11 | rd_choice_lm <- RDDreg_lm(RDDdat_choice) 12 | 13 | rd_risk <- RDDreg_np(RDDdat_risk) 14 | rd_risk_sam <- RDDreg_np(RDDdat_risk, slope="same") 15 | 16 | 17 | rd_choice 18 | rd_choice_sam 19 | rd_choice_lm 20 | rdd_choice <- RDestimate(choice_pg~xx, cutpoint=30, bw=RDbw_choice, model=TRUE) 21 | coef(summary(rdd_choice$model[[1]])) 22 | coef(summary(rd_choice)) 23 | plot(rd_choice) 24 | 25 | RDest_t0_RDDreg <- function(RDest){ 26 | 27 | cutpoint <- getCall(RDest)$cutpoint 28 | co_RDest <- coef(summary(RDest$model[[1]]))[,1] 29 | 30 | coefs <- vector("numeric", 4) 31 | names(coefs) <- c("(Intercept)", "x", "D", "x_right") 32 | coefs["x"] <- co_RDest["Xl"] 33 | coefs["x_right"] <- co_RDest["Xr"]-co_RDest["Xl"] 34 | coefs["(Intercept)"] <- co_RDest["(Intercept)"] - cutpoint*co_RDest["Xl"] 35 | coefs["D"] <- co_RDest["Tr"] + cutpoint*(co_RDest["Xl"]-co_RDest["Xr"]) 36 | coefs 37 | } 38 | 39 | RDest_t0_RDDreg(RDest=rdd_choice) 40 | coef(summary(rd_choice))[,1] 41 | 42 | rd_risk 43 | rd_risk_sam 44 | RDestimate(risky_option~xx, cutpoint=30, bw=RDbw_risk) 45 | 46 | 47 | head(model.frame(RDestimate(risky_option~xx, cutpoint=30, model=TRUE, bw=RDbw_risk)$model[[1]])) 48 | 49 | head(model.frame(rd_choice_sam$model)) 50 | 51 | } -------------------------------------------------------------------------------- /RDDtools/man/gen_MC_IK.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{gen_MC_IK} 3 | \alias{gen_MC_IK} 4 | \title{Generate Monte Carlo simulations of Imbens and Kalyanaraman} 5 | \usage{ 6 | gen_MC_IK(n = 200, version = 1, sd = 0.1295, output = c("data.frame", 7 | "RDDdata"), size) 8 | } 9 | \arguments{ 10 | \item{n}{The size of sampel to generate} 11 | 12 | \item{version}{The MC version of Imbens and Kalnayaraman (between 1 and 4).} 13 | 14 | \item{sd}{The standard deviation of the error term.} 15 | 16 | \item{output}{Whether to return a data-frame, or already a RDDdata} 17 | 18 | \item{size}{The size of the effect, this depends on the specific version, defaults are as in IK: 0.04, NULL, 0.1, 0.1} 19 | } 20 | \value{ 21 | An data frame with x and y variables. 22 | } 23 | \description{ 24 | Generate the simulations reported in Imbens and Kalyanaraman (2012) 25 | } 26 | \examples{ 27 | MC1_dat <- gen_MC_IK() 28 | MC1_rdd <- RDDdata(y=MC1_dat$y, x=MC1_dat$x, cutpoint=0) 29 | 30 | ## Use np regression: 31 | reg_nonpara <- RDDreg_np(RDDobject=MC1_rdd) 32 | reg_nonpara 33 | 34 | # Represent the curves: 35 | plotCu <- function(version=1, xlim=c(-0.1,0.1)){ 36 | res <- gen_MC_IK(sd=0.0000001, n=1000, version=version) 37 | res <- res[order(res$x),] 38 | ylim <- range(subset(res, x>=min(xlim) & x<=max(xlim), "y")) 39 | plot(res, type="l", xlim=xlim, ylim=ylim, main=paste("DGP", version)) 40 | abline(v=0) 41 | xCut <- res[which(res$x==min(res$x[res$x>=0]))+c(0,-1),] 42 | points(xCut, col=2) 43 | } 44 | layout(matrix(1:4,2, byrow=TRUE)) 45 | plotCu(version=1) 46 | plotCu(version=2) 47 | plotCu(version=3) 48 | plotCu(version=4) 49 | layout(matrix(1)) 50 | } 51 | \references{ 52 | TODO 53 | } 54 | 55 | -------------------------------------------------------------------------------- /RDDtools/man/RDDreg_np.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{RDDreg_np} 3 | \alias{RDDreg_np} 4 | \title{Parametric polynomial estimator of the regression discontinuity} 5 | \usage{ 6 | RDDreg_np(RDDobject, covariates = NULL, bw = RDDbw_IK(RDDobject), 7 | slope = c("separate", "same"), inference = c("np", "lm"), 8 | covar.opt = list(slope = c("same", "separate"), bw = NULL)) 9 | } 10 | \arguments{ 11 | \item{RDDobject}{Object of class RDDdata created by \code{\link{RDDdata}}} 12 | 13 | \item{covariates}{TODO} 14 | 15 | \item{bw}{A bandwidth to specify the subset on which the parametric regression is estimated} 16 | 17 | \item{inference}{Type of inference to conduct: non-parametric one (\code{np}) or standard (\code{lm}). See details.} 18 | 19 | \item{slope}{Whether slopes should be different on left or right (separate), or the same.} 20 | 21 | \item{covar.opt}{Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}).} 22 | } 23 | \value{ 24 | An object of class RDDreg_np and class lm, with specific print and plot methods 25 | } 26 | \description{ 27 | Compute a parametric polynomial regression of the ATE, 28 | possibly on the range specified by bandwidth 29 | } 30 | \examples{ 31 | ## Step 0: prepare data 32 | data(Lee2008) 33 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 34 | ## Step 2: regression 35 | # Simple polynomial of order 1: 36 | reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) 37 | print(reg_nonpara) 38 | plot(reg_nonpara) 39 | } 40 | \references{ 41 | TODO 42 | } 43 | \seealso{ 44 | \code{\link{RDDbw_IK}} Bandwidth selection using the plug-in bandwidth of Imbens and Kalyanaraman (2012) 45 | } 46 | 47 | -------------------------------------------------------------------------------- /RDDtools/tests/RDDtools_vs_rdd.R: -------------------------------------------------------------------------------- 1 | 2 | library(rdd) 3 | library(RDDtools) 4 | 5 | set.seed(1234) 6 | x<-runif(1000,-1,1) 7 | cov<-rnorm(1000) 8 | y<-3+2*x+3*cov+10*(x>=0)+rnorm(1000) 9 | 10 | RD <- RDDdata(x=x, y=y, cutpoint=0, covar=cov) 11 | 12 | ### Simple estimation: 13 | bw <- IKbandwidth(X=x, Y=y, cutpoint=0) 14 | bw 15 | rdd_mod <- RDestimate(y~x, bw=bw, se.type="const", model=TRUE)$model[[1]] 16 | RDDtools_mod <- RDDreg_np(RD, bw=bw, inference="lm") 17 | 18 | rdd_co <- coef(summary(rdd_mod)) 19 | RDDtools_co <- RDDcoef(RDDtools_mod, allCo=TRUE, allInfo=TRUE) 20 | rdd_co 21 | RDDtools_co 22 | 23 | all.equal(rdd_co[-4,], RDDtools_co[1:3,], check.attributes=FALSE) 24 | all.equal(rdd_co[4,1], sum(RDDtools_co[3:4,1]), check.attributes=FALSE) 25 | 26 | 27 | ### Covariate estimation: 28 | rdd_mod_cov <- RDestimate(y~x|cov, kernel="rectangular", bw=5, model=TRUE, se.type="const")$model[[1]] 29 | RDDtools_mod_cov <- RDDreg_lm(RD, bw=5, covariates="cov", covar.opt=list(slope="separate")) 30 | 31 | rdd_co_cov <- coef(summary(rdd_mod_cov)) 32 | RDDtools_co_cov <- RDDcoef(RDDtools_mod_cov, allCo=TRUE, allInfo=TRUE) 33 | rdd_co_cov 34 | RDDtools_co_cov 35 | 36 | all.equal(rdd_co_cov[-4,], RDDtools_co_cov[-4,], check.attributes=FALSE) 37 | 38 | ## Fuzzy 39 | set.seed(123) 40 | selec <- rbinom(nrow(RD), 1, prob=ifelse(RD$x<0, 0.1, 0.9)) 41 | RD_rdd_ins <- RDDdata(y=RD$y, x=RD$x, z=selec,cutpoint=0) 42 | 43 | RDDto_reg_fuz <- RDDreg_lm(RD_rdd_ins, bw=0.2) 44 | rdd_reg_fuz <- RDestimate(y~x+selec, data=RD_rdd_ins, kernel="rectangular", bw=0.2, model=TRUE, se.type="const")$model[[2]][[1]] 45 | 46 | all.equal(RDDcoef(RDDto_reg_fuz),coef(summary(rdd_reg_fuz))[2,1]) 47 | all.equal(RDDcoef(RDDto_reg_fuz, allCo=TRUE)[1:3],coef(summary(rdd_reg_fuz))[1:3,1], check.attributes=FALSE) 48 | 49 | -------------------------------------------------------------------------------- /RDDtools/man/clusterInf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{clusterInf} 3 | \alias{clusterInf} 4 | \title{Post-inference for clustered data} 5 | \usage{ 6 | clusterInf(object, clusterVar, vcov. = NULL, type = c("df-adj", "HC"), ...) 7 | } 8 | \arguments{ 9 | \item{object}{Object of class lm, from which RDDreg also inherits.} 10 | 11 | \item{clusterVar}{The variable containing the cluster attributions.} 12 | 13 | \item{vcov.}{Specific covariance function to pass to coeftest. See help of sandwich} 14 | 15 | \item{type}{The type of cluster correction to use: either the degrees of freedom, or a HC matrix.} 16 | 17 | \item{\ldots}{Further arguments passed to coeftest} 18 | } 19 | \value{ 20 | The output of the coeftest function, which is itself of class \code{coeftest} 21 | } 22 | \description{ 23 | Correct standard-errors to account for clustered data, doing either a degrees of freedom correction or using a heteroskedasticidty-cluster robust covariance matrix 24 | possibly on the range specified by bandwidth 25 | } 26 | \examples{ 27 | data(Lee2008) 28 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 29 | reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) 30 | 31 | # here we just generate randomly a cluster variable: 32 | nlet <- sort(c(outer(letters, letters, paste, sep=""))) 33 | clusRandom <- sample(nlet[1:60], size=nrow(Lee2008_rdd), replace=TRUE) 34 | 35 | # now do post-inference: 36 | clusterInf(reg_para, clusterVar=clusRandom) 37 | clusterInf(reg_para, clusterVar=clusRandom, type="HC") 38 | } 39 | \references{ 40 | Wooldridge (2003) Cluster-sample methods in applied econometrics. 41 | \emph{AmericanEconomic Review}, 93, p. 133-138 42 | } 43 | \seealso{ 44 | \code{\link{vcovCluster}}, which implements the cluster-robust covariance matrix estimator used by \code{cluserInf} 45 | } 46 | 47 | -------------------------------------------------------------------------------- /RDDtools/NAMESPACE: -------------------------------------------------------------------------------- 1 | S3method("[",RDDdata) 2 | S3method(RDDcoef,RDDreg_np) 3 | S3method(RDDcoef,RDDreg_npreg) 4 | S3method(RDDcoef,default) 5 | S3method(as.data.frame,RDDdata) 6 | S3method(as.lm,RDDreg) 7 | S3method(as.lm,RDDreg_np) 8 | S3method(bread,RDDreg_np) 9 | S3method(covarTest_dis,RDDdata) 10 | S3method(covarTest_dis,RDDreg) 11 | S3method(covarTest_mean,RDDdata) 12 | S3method(covarTest_mean,RDDreg) 13 | S3method(estfun,RDDreg_np) 14 | S3method(getCall,RDDreg) 15 | S3method(model.frame,RDDreg_np) 16 | S3method(model.matrix,RDDdata) 17 | S3method(plot,RDDdata) 18 | S3method(plot,RDDreg_lm) 19 | S3method(plot,RDDreg_np) 20 | S3method(plotPlacebo,PlaceboVals) 21 | S3method(plotPlacebo,RDDreg) 22 | S3method(plotPlaceboDens,PlaceboVals) 23 | S3method(plotPlaceboDens,RDDreg) 24 | S3method(plotSensi,RDDreg_lm) 25 | S3method(plotSensi,RDDreg_np) 26 | S3method(print,RDDreg_lm) 27 | S3method(print,RDDreg_np) 28 | S3method(print,summary.RDDreg_np) 29 | S3method(subset,RDDdata) 30 | S3method(summary,RDDdata) 31 | S3method(summary,RDDreg_np) 32 | S3method(vcov,RDDreg_np) 33 | export(RDDbw_IK) 34 | export(RDDbw_RSW) 35 | export(RDDcoef) 36 | export(RDDdata) 37 | export(RDDgenreg) 38 | export(RDDpred) 39 | export(RDDreg_lm) 40 | export(RDDreg_np) 41 | export(ROT_bw) 42 | export(as.lm) 43 | export(as.npreg) 44 | export(as.npregbw) 45 | export(clusterInf) 46 | export(computePlacebo) 47 | export(covarTest_dis) 48 | export(covarTest_mean) 49 | export(dens_test) 50 | export(gen_MC_IK) 51 | export(plotPlacebo) 52 | export(plotPlaceboDens) 53 | export(plotSensi) 54 | export(vcovCluster) 55 | export(vcovCluster2) 56 | import(Formula) 57 | import(KernSmooth) 58 | import(ggplot2) 59 | import(lmtest) 60 | import(methods) 61 | import(np) 62 | import(rdd) 63 | import(sandwich) 64 | importFrom(AER,ivreg) 65 | importFrom(locpol,gaussK) 66 | importFrom(locpol,locpol) 67 | importFrom(stats,getCall) 68 | -------------------------------------------------------------------------------- /RDDtools/tests/simple_MC.R: -------------------------------------------------------------------------------- 1 | 2 | library(RDDtools) 3 | library(rdd) 4 | 5 | ## simple MC: 6 | set.seed(123) 7 | 8 | MC_simple <- function(n=200, CATE=0.3, HATE=0.1){ 9 | x <- rnorm(n, mean=20, sd=5) 10 | D <- x>= 20 11 | y <- 0.8 + CATE*D+ 0.3*x+HATE*x*D+rnorm(n, sd=0.1) 12 | cat("effect", CATE+HATE*20, "\n") 13 | RDDdata(x=x, y=y, cutpoint=20) 14 | 15 | } 16 | 17 | input_mc <- MC_simple(n=1000, CATE=0.4) 18 | plot(input_mc) 19 | 20 | RDD_bw <- RDDbw_IK(input_mc) 21 | 22 | RDD_np_sep <- RDDreg_np(input_mc, bw=RDD_bw) 23 | RDD_np_same <- RDDreg_np(input_mc, slope="same", bw=RDD_bw) 24 | RDD_np_sep_inflm <- RDDreg_np(input_mc, bw=RDD_bw, inf="lm") 25 | RDD_np_same_inflm <- RDDreg_np(input_mc, slope="same", bw=RDD_bw, inf="lm") 26 | RDD_lm_sep <- RDDreg_lm(input_mc, bw=RDD_bw) 27 | RDD_lm_same <- RDDreg_lm(input_mc, slope="same", bw=RDD_bw) 28 | rdd_RDe <- RDestimate(y~x, data=input_mc, cutpoint=20, model=TRUE, bw=RDD_bw) 29 | 30 | 31 | printCoefmat(coef(summary(RDD_np_sep_inflm$RDDslot$model))) 32 | printCoefmat(coef(summary(RDD_np_same_inflm$RDDslot$model))) 33 | printCoefmat(coef(summary(RDD_lm_sep))) 34 | printCoefmat(coef(summary(RDD_lm_same))) 35 | printCoefmat(coef(summary(rdd_RDe $model[[1]]))) 36 | 37 | 38 | ## few checks: 39 | plse <- plotSensi(RDD_np_sep, from=5, to=20, by=0.5) 40 | plotPlacebo(RDD_np_sep) 41 | 42 | plotSensi(RDD_np_same, from=5, to=20, by=0.5) 43 | plotPlacebo(RDD_np_same) 44 | 45 | a<-plotSensi(RDD_lm_sep, from=5, to=20, by=0.5) 46 | plotPlacebo(RDD_lm_sep) 47 | 48 | plotSensi(RDD_lm_same, from=5, to=20, by=0.5) 49 | plotPlacebo(RDD_lm_same) 50 | 51 | #### Other MCs: 52 | set.seed(123) 53 | head(gen_MC_IK()) 54 | 55 | set.seed(123) 56 | head(gen_MC_IK(output="RDDdata")) 57 | 58 | set.seed(123) 59 | head(gen_MC_IK(version=2)) 60 | 61 | set.seed(123) 62 | head(gen_MC_IK(version=3)) 63 | 64 | set.seed(123) 65 | head(gen_MC_IK(version=4)) 66 | -------------------------------------------------------------------------------- /RDDtools/man/plot.RDDdata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{plot.RDDdata} 3 | \alias{plot.RDDdata} 4 | \title{Plot RDDdata} 5 | \usage{ 6 | \method{plot}{RDDdata}(x, h, nbins = NULL, xlim = range(object$x, na.rm = 7 | TRUE), cex = 0.7, nplot = 1, device = c("base", "ggplot"), ...) 8 | } 9 | \arguments{ 10 | \item{x}{Object of class RDDdata} 11 | 12 | \item{h}{The binwidth parameter (note this differs from the bandwidth parameter!)} 13 | 14 | \item{nbins}{Alternative to h, the total number of bins in the plot.} 15 | 16 | \item{xlim}{The range of the x data} 17 | 18 | \item{cex}{Size of the points, see \code{\link{par}}} 19 | 20 | \item{nplot}{Number of plot to draw} 21 | 22 | \item{device}{Type of device used. Currently not used.} 23 | 24 | \item{\ldots}{Further arguments passed to the \code{\link{plot}} function.} 25 | } 26 | \value{ 27 | A plot 28 | } 29 | \description{ 30 | Binned plot of the forcing and outcome variable 31 | } 32 | \details{ 33 | Produces a simple binned plot averaging values within each interval. The length of the intervals 34 | is specified with the argument \code{h}, specifying the whole binwidth (contrary to the usual bandwidth 35 | argument, that gives half of the length of the kernel window. 36 | When no bandwidth is given, the bandwidth of Ruppert et al is used, see \code{\link{RDDbw_RSW}}. 37 | } 38 | \examples{ 39 | data(Lee2008) 40 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 41 | plot(Lee2008_rdd) 42 | 43 | ## Specify manually the bandwidth: 44 | plot(Lee2008_rdd, h=0.2) 45 | 46 | ## Show three plots with different bandwidth: 47 | plot(Lee2008_rdd, h=c(0.2,0.3,0.4), nplot=3) 48 | 49 | ## Specify instead of the bandwidth, the final number of bins: 50 | plot(Lee2008_rdd, nbins=22) 51 | 52 | ## If the specified number of bins is odd, the larger number is given to side with largest range 53 | plot(Lee2008_rdd, nbins=21) 54 | } 55 | \author{ 56 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> 57 | } 58 | 59 | -------------------------------------------------------------------------------- /RDDtools/man/vcovCluster.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{vcovCluster} 3 | \alias{vcovCluster} 4 | \alias{vcovCluster2} 5 | \title{Cluster Heteroskedasticity-consistent estimation of the covariance matrix.} 6 | \usage{ 7 | vcovCluster(object, clusterVar) 8 | 9 | vcovCluster2(object, clusterVar1, clusterVar2) 10 | } 11 | \arguments{ 12 | \item{object}{Object of class lm, from which RDDreg also inherits.} 13 | 14 | \item{clusterVar}{The variable containing the cluster attributions.} 15 | 16 | \item{clusterVar1,clusterVar2}{The two cluster variables for the 2-cluster case.} 17 | } 18 | \value{ 19 | A matrix containing the covariance matrix estimate. 20 | } 21 | \description{ 22 | Offer a cluster variant of the usual Heteroskedasticity-consistent 23 | } 24 | \examples{ 25 | data(STAR_MHE) 26 | if(all(c(require(sandwich), require(lmtest)))){ 27 | 28 | # Run simple regression: 29 | reg_krug <- lm(pscore~cs, data=STAR_MHE) 30 | 31 | # Row 1 of Table 8.2.1, inference with standard vcovHC: 32 | coeftest(reg_krug,vcov.=vcovHC(reg_krug, "HC1"))[2,2] 33 | 34 | # Row 4 of Table 8.2.1, inference with cluster vcovHC: 35 | coeftest(reg_krug,vcov.=vcovCluster(reg_krug, clusterVar=STAR_MHE$classid))[2,2] 36 | } 37 | } 38 | \author{ 39 | Mahmood Arai, see \url{http://people.su.se/~ma/econometrics.html} 40 | } 41 | \references{ 42 | Cameron, C., Gelbach, J. and Miller, D. (2011) Robust Inference With Multiway Clustering, 43 | \emph{Journal of Business and Economic Statistics}, vol. 29(2), pages 238-249. 44 | #' 45 | 46 | Wooldridge (2003) Cluster-sample methods in applied econometrics. 47 | \emph{AmericanEconomic Review}, 93, p. 133-138 48 | 49 | Arai, M. (2011) Cluster-robust standard errors using R, Note available \url{http://people.su.se/~ma/clustering.pdf}. 50 | } 51 | \seealso{ 52 | \code{\link{clusterInf}} for a direct function, allowing also alternative cluster inference methods. 53 | See also \code{\link[rms]{robcov}} from package \code{rms} for another implementation of the cluster robust. 54 | } 55 | 56 | -------------------------------------------------------------------------------- /RDDtools/R/qplot_experim.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | gplot <- function(x, h, xlim=range(object$x, na.rm=TRUE), cex=0.7, nplot=3,type=c("base", "ggplot"),...){ 4 | object <- x 5 | cutpoint <- getCutpoint(object) 6 | 7 | ## bandwidth: use Ruppert, Sheather and Wand (KernSmooth:::dpill) 8 | if(missing(h)) { 9 | if(!all(xlim==range(object$x, na.rm=TRUE))){ 10 | object <- subset(object, object$x> min(xlim) & object$x< max(xlim)) 11 | } 12 | h <- RDDbw_RSW(object) 13 | if(is.even(nplot)) { 14 | se <- seq(from=1-(sum(1:nplot<(nplot/2)))*0.2, to=1+(sum(1:nplot>(nplot/2)))*0.2, by=.2) 15 | } else { 16 | se <- seq(from=1-floor(nplot/2)*0.2, to=1+floor(nplot/2)*0.2, by=.2) 17 | } 18 | hs <- if(nplot==1) h else se *h 19 | } else { 20 | if(length(h)==1){ 21 | if(is.even(nplot)) { 22 | se <- seq(from=1-(sum(1:nplot<(nplot/2)))*0.2, to=1+(sum(1:nplot>(nplot/2)))*0.2, by=.2) 23 | } else { 24 | se <- seq(from=1-floor(nplot/2)*0.2, to=1+floor(nplot/2)*0.2, by=.2) 25 | } 26 | hs <- if(nplot==1) h else se *h 27 | } else { 28 | if(length(h==nplot)){ 29 | hs <- h 30 | } else { 31 | stop("Length of h should be either one or equal to nplot (", nplot, ")") 32 | } 33 | } 34 | } 35 | 36 | 37 | 38 | 39 | ## plot 40 | if(type=="base"){ 41 | par_orig <- par() 42 | par(mfrow=c(nplot,1)) 43 | for(i in 1:nplot){ 44 | plotBin(x=object$x, y=object$y, cutpoint=cutpoint, h=hs[i], xlim=xlim, cex=cex) 45 | } 46 | } else { 47 | 48 | plotBin_out <- plotBin(x=object$x, y=object$y, cutpoint=cutpoint, h=hs[1], xlim=xlim, cex=cex, plot=FALSE) 49 | plotBin_out$h <- rep(hs[1], nrow(plotBin_out)) 50 | for(i in 2:nplot){ 51 | new <- plotBin(x=object$x, y=object$y, cutpoint=cutpoint, h=hs[i], xlim=xlim, cex=cex) 52 | new$h <- rep(hs[i], nrow(new)) 53 | plotBin_out <- rbind(plotBin_out, new) 54 | } 55 | 56 | plotBin_out$h <- round(plotBin_out$h,4) 57 | qplot(x=x, y=y, data=plotBin_out)+facet_grid(h~.) 58 | 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /RDDtools/man/as.npregbw.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{as.npregbw} 3 | \alias{as.npreg} 4 | \alias{as.npregbw} 5 | \title{Convert an RDDreg object to a \code{npreg} object} 6 | \usage{ 7 | as.npregbw(x, ...) 8 | 9 | as.npreg(x, ...) 10 | } 11 | \arguments{ 12 | \item{x}{Object of class \code{RDDreg} created by \code{\link{RDDreg_np}} or \code{\link{RDDreg_lm}}} 13 | 14 | \item{\ldots}{Further arguments passed to the \code{\link{npregbw}} or \code{\link{npreg}}} 15 | } 16 | \value{ 17 | An object of class \code{npreg} or \code{npregbw} 18 | } 19 | \description{ 20 | Convert an RDDobject to a non-parametric regression \code{npreg} from package \code{np} 21 | } 22 | \details{ 23 | This function converts an RDDreg object into an \code{npreg} object from package \code{np} 24 | Note that the output won't be the same, since \code{npreg} does not offer a triangualr kernel, but a gaussian or Epanechinkov one. 25 | Another reason why estimates might differ slightly is that \code{npreg} implements a multivariate kernel, while RDDreg 26 | proceeds as if the kernerl was univariate. A simple solution to make the multivariate kernel similar to the univariate one 27 | is to set the bandwidth for x and Dx to a large number, so that they converge towards a constant, and one obtains back the univariate kernel. 28 | } 29 | \examples{ 30 | # Estimate ususal RDDreg: 31 | data(Lee2008) 32 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 33 | reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) 34 | 35 | ## Convert to npreg: 36 | reg_nonpara_np <- as.npreg(reg_nonpara) 37 | reg_nonpara_np 38 | RDDcoef(reg_nonpara_np, allCo=TRUE, allInfo=TRUE) 39 | 40 | ## Compare with result obtained with a Gaussian kernel: 41 | bw_lm <- dnorm(Lee2008_rdd$x, sd=RDDtools:::getBW(reg_nonpara)) 42 | reg_nonpara_gaus <- RDDreg_lm(RDDobject=Lee2008_rdd, w=bw_lm) 43 | all.equal(RDDcoef(reg_nonpara_gaus),RDDcoef(reg_nonpara_np)) 44 | } 45 | \seealso{ 46 | \code{\link{as.lm}} which converts \code{RDDreg} objects into \code{lm}. 47 | } 48 | 49 | -------------------------------------------------------------------------------- /RDDtools/man/STAR_MHE.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \docType{data} 3 | \name{STAR_MHE} 4 | \alias{STAR_MHE} 5 | \title{Transformation of the STAR dataset as used in Angrist and Pischke (2008)} 6 | \format{A data frame containing 5743 observations and 6 variables. The first variable is from the original dataset, 7 | all other are created by Angrist and Pischke STAT code. 8 | \describe{ 9 | \item{schidkn}{School ID in kindergarden (original variable, schoolidk in \code{\link[AER]{STAR}})} 10 | \item{pscore}{The propensity score (computed by A & P)} 11 | \item{classid}{The id of the class (computed by A & P)} 12 | \item{cs}{Class size (computed by A & P)} 13 | \item{female, nwhite}{Various covariates (computed by A & P)} 14 | }} 15 | \source{ 16 | Data obtained using the script krueger.do on data webstar.rda, found on J. Angrist website 17 | \url{http://economics.mit.edu/faculty/angrist/data1/mhe/krueger}, retrieved on 26 November 2012. 18 | } 19 | \usage{ 20 | STAR_MHE 21 | } 22 | \description{ 23 | Transformation of the STAR dataset as used in Table 8.2.1 of Angrist and Pischke (2008) 24 | } 25 | \details{ 26 | ). This is a transformation of the dataset from the project STAR (Student/Teacher Achievement Ratio. 27 | The full dataset is described and available in package AER, \code{\link[AER]{STAR}}. 28 | The transformed data was obtained using the STATA script krueger.do, obtained from Joshua Angrist website 29 | (\url{http://economics.mit.edu/faculty/angrist/data1/mhe/krueger}), on the webstar.dta. 30 | } 31 | \examples{ 32 | data(STAR_MHE) 33 | 34 | # Compute the group means: 35 | STAR_MHE_means <- aggregate(STAR_MHE[, c("classid", "pscore", "cs")], by=list(STAR_MHE$classid), mean) 36 | 37 | # Regression of means, with weighted average: 38 | reg_krug_gls <- lm(pscore~cs, data=STAR_MHE_means, weights=cs) 39 | coef(summary(reg_krug_gls))[2,2] 40 | } 41 | \references{ 42 | Krueger, A. (1999) "Experimental Estimates Of Education Production Functions," 43 | \emph{The Quarterly Journal of Economics}, Vol. 114(2), pages 497-532, May. 44 | 45 | Angrist, A. ad Pischke J-S (2008) \emph{Mostly Harmless Econometrics: An Empiricist's Companion}, 46 | Princeton University press 47 | } 48 | \seealso{ 49 | \code{\link[AER]{STAR}} for the original dataset. 50 | } 51 | 52 | -------------------------------------------------------------------------------- /RDDtools/man/covarTest_dis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{covarTest_dis} 3 | \alias{covarTest_dis} 4 | \alias{covarTest_dis.RDDdata} 5 | \alias{covarTest_dis.RDDreg} 6 | \title{Testing for balanced covariates: equality of distribution} 7 | \usage{ 8 | covarTest_dis(object, bw, exact = NULL, p.adjust = c("none", "holm", "BH", 9 | "BY", "hochberg", "hommel", "bonferroni")) 10 | 11 | \method{covarTest_dis}{RDDdata}(object, bw = NULL, exact = FALSE, 12 | p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", 13 | "bonferroni")) 14 | 15 | \method{covarTest_dis}{RDDreg}(object, bw = NULL, exact = FALSE, 16 | p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", 17 | "bonferroni")) 18 | } 19 | \arguments{ 20 | \item{object}{object of class RDDdata} 21 | 22 | \item{bw}{a bandwidth} 23 | 24 | \item{exact}{Argument of the \code{\link{ks.test}} function: NULL or a logical indicating whether an exact p-value should be computed.} 25 | 26 | \item{p.adjust}{Whether to adjust the p-values for multiple testing. Uses the \code{\link{p.adjust}} function} 27 | 28 | \item{\ldots}{currently not used} 29 | } 30 | \value{ 31 | A data frame with, for each covariate, the K-S statistic and its p-value. 32 | } 33 | \description{ 34 | Tests equality of distribution with a Kolmogorov-Smirnov for each covariates, between the two full groups or around the discontinuity threshold 35 | } 36 | \examples{ 37 | data(Lee2008) 38 | 39 | ## Add randomly generated covariates 40 | set.seed(123) 41 | n_Lee <- nrow(Lee2008) 42 | Z <- data.frame(z1 = rnorm(n_Lee, sd=2), 43 | z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)), 44 | z3 = sample(letters, size = n_Lee, replace = TRUE)) 45 | Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0) 46 | 47 | ## Kolmogorov-Smirnov test of equality in distribution: 48 | covarTest_dis(Lee2008_rdd_Z, bw=0.3) 49 | 50 | ## Can also use function covarTest_dis() for a t-test for equality of means around cutoff: 51 | covarTest_mean(Lee2008_rdd_Z, bw=0.3) 52 | ## covarTest_dis works also on regression outputs (bw will be taken from the model) 53 | reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd_Z) 54 | covarTest_dis(reg_nonpara) 55 | } 56 | \author{ 57 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> 58 | } 59 | \seealso{ 60 | \code{\link{covarTest_mean}} for the t-test of equality of means 61 | } 62 | 63 | -------------------------------------------------------------------------------- /RDDtools/R/plotBin.R: -------------------------------------------------------------------------------- 1 | #' Bin plotting 2 | #' 3 | #' Do a "scatterplot bin smoothing" 4 | #' 5 | #' @param x Forcing variable 6 | #' @param y Output 7 | #' @param h the bandwidth (defaults to \code{2*sd(runvar)*length(runvar)^(-.5)}) 8 | #' @param cutpoint Cutpoint 9 | #' @param plot Logical. Whether to plot or only returned silently 10 | #' @param type Whether returns the y averages, or the x frequencies 11 | #' @param xlim,cex,main,xlab,ylab Usual parameters passed to plot(), see \code{\link{par}} 12 | #' @param \ldots further arguments passed to plot. 13 | #' @return Returns silently values 14 | #' @references McCrary, Justin. 15 | #' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> 16 | #' @keywords internal 17 | 18 | plotBin <- function(x, y, h=0.05, nbins=NULL, cutpoint=0, plot=TRUE, type=c("value", "number"),xlim=range(x, na.rm=TRUE), cex=0.9,main=NULL, xlab, ylab, ...){ 19 | 20 | type <- match.arg(type) 21 | x_name <- if(missing(xlab)) deparse(substitute(x)) else xlab 22 | y_name <- if(missing(ylab)) deparse(substitute(y)) else ylab 23 | 24 | 25 | ## Set intervals and midpoints 26 | min_x <- min(xlim) 27 | max_x <- max(xlim) 28 | 29 | if(!is.null(nbins)) h <- diff(xlim)/nbins 30 | 31 | K0 <- ceiling((cutpoint-min_x)/h) # Number of cells on left 32 | K1 <- ceiling((cutpoint+max_x)/h) # Number of cells on right 33 | K <- K0+K1 34 | if(!is.null(nbins) && K!=nbins) { 35 | ranges <- c(cutpoint-min_x, cutpoint+max_x) 36 | if(which.min(ranges)==1) { 37 | K0 <- K0-1 38 | } else { 39 | K1 <- K1-1 40 | } 41 | K <- K0+K1 42 | } 43 | 44 | b_k <- cutpoint - (K0-c(1:K)+1)*h # Lee and Lemieux (2010) p. 308 45 | mid_points_bk <- b_k+h/2 46 | n_bins <- length(mid_points_bk) 47 | brk <- c(b_k,cutpoint + (K1+2)*h) 48 | 49 | ## compute output (mean of count) 50 | intervs <- cut(x, breaks=brk, include.lowest=TRUE) 51 | table_intervs <- table(intervs) 52 | n_non0_intervs <- sum(table_intervs!=0) 53 | 54 | y2 <- switch(type, 55 | "value" =tapply(y, intervs, mean, na.rm=TRUE), 56 | "number" =table_intervs) 57 | 58 | 59 | ## plot 60 | if(plot){ 61 | plot(mid_points_bk, as.numeric(y2), pch=19, cex=cex, xlab=x_name, ylab=y_name, xlim=xlim,...) 62 | title(main=main, sub=paste("h=", round(h,4), ",\\tn bins=", n_non0_intervs, sep="")) 63 | abline(v=cutpoint, lty=2) 64 | } 65 | 66 | ## return invisible result 67 | res <- data.frame(x=mid_points_bk,y=y2) 68 | invisible(res) 69 | } 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /RDDtools/man/RDDgenreg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{RDDgenreg} 3 | \alias{RDDgenreg} 4 | \title{General polynomial estimator of the regression discontinuity} 5 | \usage{ 6 | RDDgenreg(RDDobject, fun = glm, covariates = NULL, order = 1, bw = NULL, 7 | slope = c("separate", "same"), covar.opt = list(strategy = c("include", 8 | "residual"), slope = c("same", "separate"), bw = NULL), weights, ...) 9 | } 10 | \arguments{ 11 | \item{RDDobject}{Object of class RDDdata created by \code{\link{RDDdata}}} 12 | 13 | \item{covariates}{Formula to include covariates} 14 | 15 | \item{order}{Order of the polynomial regression.} 16 | 17 | \item{bw}{A bandwidth to specify the subset on which the kernel weighted regression is estimated} 18 | 19 | \item{weights}{Optional weights to pass to the lm function. Note this cannot be entered together with \code{bw}} 20 | 21 | \item{slope}{Whether slopes should be different on left or right (separate), or the same.} 22 | 23 | \item{covar.opt}{Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}).} 24 | 25 | \item{fun}{The function to estimate the parameters} 26 | 27 | \item{\ldots}{Further arguments passed to fun. See the example.} 28 | } 29 | \value{ 30 | An object of class RDDreg_lm and class lm, with specific print and plot methods 31 | } 32 | \description{ 33 | Compute RDD estimate allowing a locally kernel weighted version of any estimation function 34 | possibly on the range specified by bandwidth 35 | } 36 | \details{ 37 | This function allows the user to use a custom estimating function, instead of the traditional \code{lm()}. 38 | It is assumed that the custom funciton has following behaviour: 39 | \enumerate{ 40 | \item A formula interface, together with a \code{data} argument 41 | \item A \code{weight} argument 42 | \item A coef(summary(x)) returning a data-frame containing a column Estimate 43 | } 44 | Note that for the last requirement, this can be accomodated by writing a specific \code{\link{RDDcoef}} 45 | function for the class of the object returned by \code{fun}. 46 | } 47 | \examples{ 48 | ## Step 0: prepare data 49 | data(Lee2008) 50 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 51 | 52 | ## Estimate a local probit: 53 | Lee2008_rdd$y <- with(Lee2008_rdd, ifelse(ymax(seq_breaks)) seq_breaks <- c(seq_breaks, max_x+0.001) 34 | hist(RDDobject$x, breaks=seq_breaks) 35 | abline(v=cutpoint, col=2, lty=2) 36 | } 37 | 38 | test.htest <- list() 39 | test.htest$statistic <- c("z-val"=test$z) 40 | test.htest$p.value <- test$p 41 | test.htest$data.name <- deparse(substitute(RDDobject)) 42 | test.htest$method <- "McCrary Test for no discontinuity of density around cutpoint" 43 | test.htest$alternative <- "Density is discontinuous around cutpoint" 44 | test.htest$estimate <- c(Discontinuity=test$theta) 45 | test.htest$test.output <- test 46 | class(test.htest) <- "htest" 47 | return(test.htest) 48 | } 49 | 50 | # print.MCcraryTest <- function(x,...){ 51 | # cat("#### MC Crary Test of no discontinuity in density\n\n") 52 | # cat("Estimate of discontinuity:\t", x$theta, "\n") 53 | # cat("z-value:\t", x$z, "\t p-value:\t", x$p, "\n") 54 | # } 55 | 56 | if(FALSE){ 57 | 58 | library(RDDtools) 59 | data(Lee2008) 60 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 61 | 62 | dens_test(Lee2008_rdd) 63 | 64 | } -------------------------------------------------------------------------------- /RDDtools/man/covarTest_mean.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{covarTest_mean} 3 | \alias{covarTest_mean} 4 | \alias{covarTest_mean.RDDdata} 5 | \alias{covarTest_mean.RDDreg} 6 | \title{Testing for balanced covariates: equality of means with t-test} 7 | \usage{ 8 | covarTest_mean(object, bw = NULL, paired = FALSE, var.equal = FALSE, 9 | p.adjust = c("none", "holm", "BH", "BY", "hochberg", "hommel", 10 | "bonferroni")) 11 | 12 | \method{covarTest_mean}{RDDdata}(object, bw = NULL, paired = FALSE, 13 | var.equal = FALSE, p.adjust = c("none", "holm", "BH", "BY", "hochberg", 14 | "hommel", "bonferroni")) 15 | 16 | \method{covarTest_mean}{RDDreg}(object, bw = NULL, paired = FALSE, 17 | var.equal = FALSE, p.adjust = c("none", "holm", "BH", "BY", "hochberg", 18 | "hommel", "bonferroni")) 19 | } 20 | \arguments{ 21 | \item{object}{object of class RDDdata} 22 | 23 | \item{bw}{a bandwidth} 24 | 25 | \item{paired}{Argument of the \code{\link{t.test}} function: logical indicating whether you want paired t-tests.} 26 | 27 | \item{var.equal}{Argument of the \code{\link{t.test}} function: logical variable indicating whether to treat the two variances as being equal} 28 | 29 | \item{p.adjust}{Whether to adjust the p-values for multiple testing. Uses the \code{\link{p.adjust}} function} 30 | 31 | \item{\ldots}{currently not used} 32 | } 33 | \value{ 34 | A data frame with, for each covariate, the mean on each size, the difference, t-stat and ts p-value. 35 | } 36 | \description{ 37 | Tests equality of means by a t-test for each covariate, between the two full groups or around the discontinuity threshold 38 | } 39 | \examples{ 40 | data(Lee2008) 41 | 42 | ## Add randomly generated covariates 43 | set.seed(123) 44 | n_Lee <- nrow(Lee2008) 45 | Z <- data.frame(z1 = rnorm(n_Lee, sd=2), 46 | z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)), 47 | z3 = sample(letters, size = n_Lee, replace = TRUE)) 48 | Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0) 49 | 50 | ## test for equality of means around cutoff: 51 | covarTest_mean(Lee2008_rdd_Z, bw=0.3) 52 | 53 | ## Can also use function covarTest_dis() for Kolmogorov-Smirnov test: 54 | covarTest_dis(Lee2008_rdd_Z, bw=0.3) 55 | 56 | ## covarTest_mean works also on regression outputs (bw will be taken from the model) 57 | reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd_Z) 58 | covarTest_mean(reg_nonpara) 59 | } 60 | \author{ 61 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> 62 | } 63 | \seealso{ 64 | \code{\link{covarTest_dis}} for the Kolmogorov-Smirnov test of equality of distribution 65 | } 66 | 67 | -------------------------------------------------------------------------------- /RDDtools/man/plotSensi.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{plotSensi} 3 | \alias{plotSensi} 4 | \alias{plotSensi.RDDreg_lm} 5 | \alias{plotSensi.RDDreg_np} 6 | \title{Plot the sensitivity to the bandwidth} 7 | \usage{ 8 | plotSensi(RDDregobject, from, to, by = 0.01, level = 0.95, 9 | output = c("data", "ggplot"), plot = TRUE, ...) 10 | 11 | \method{plotSensi}{RDDreg_np}(RDDregobject, from, to, by = 0.05, 12 | level = 0.95, output = c("data", "ggplot"), plot = TRUE, 13 | device = c("ggplot", "base"), vcov. = NULL, ...) 14 | 15 | \method{plotSensi}{RDDreg_lm}(RDDregobject, from, to, by = 0.05, 16 | level = 0.95, output = c("data", "ggplot"), plot = TRUE, order, 17 | type = c("colour", "facet"), ...) 18 | } 19 | \arguments{ 20 | \item{RDDregobject}{object of a RDD regression, from either \code{\link{RDDreg_lm}} or \code{\link{RDDreg_np}}} 21 | 22 | \item{from}{First bandwidth point. Default value is max(1e-3, bw-0.1)} 23 | 24 | \item{to}{Last bandwidth point. Default value is bw+0.1} 25 | 26 | \item{by}{Increments in the \code{from} \code{to} sequence} 27 | 28 | \item{level}{Level of the confidence interval} 29 | 30 | \item{order}{For parametric models (from \code{\link{RDDreg_lm}}), the order of the polynomial.} 31 | 32 | \item{type}{For parametric models (from \code{\link{RDDreg_lm}}) whether different orders are represented as different colour or as different facets.} 33 | 34 | \item{device}{Whether to draw a base or a ggplot graph.} 35 | 36 | \item{output}{Whether to return (invisibly) the data frame containing the bandwidths and corresponding estimates, or the ggplot object} 37 | 38 | \item{plot}{Whether to actually plot the data.} 39 | 40 | \item{\ldots}{Further arguments passed to specific methods} 41 | 42 | \item{vcov.}{Specific covariance function to pass to coeftest. See help of package \code{\link[sandwich]{sandwich}}} 43 | } 44 | \value{ 45 | A data frame containing the bandwidths and corresponding estimates and confidence intervals. 46 | } 47 | \description{ 48 | Draw a plot showing the LATE estimates depending on multiple bandwidths 49 | } 50 | \examples{ 51 | data(Lee2008) 52 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 53 | 54 | #Non-parametric estimate 55 | bw_ik <- RDDbw_IK(Lee2008_rdd) 56 | reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd, bw=bw_ik) 57 | plotSensi(reg_nonpara) 58 | plotSensi(reg_nonpara, device="base") 59 | 60 | #Parametric estimate: 61 | reg_para_ik <- RDDreg_lm(RDDobject=Lee2008_rdd, order=4, bw=bw_ik) 62 | plotSensi(reg_para_ik) 63 | plotSensi(reg_para_ik, type="facet") 64 | } 65 | \author{ 66 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> 67 | } 68 | 69 | -------------------------------------------------------------------------------- /RDDtools/man/plotPlacebo.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{plotPlacebo} 3 | \alias{computePlacebo} 4 | \alias{plotPlacebo} 5 | \alias{plotPlacebo.RDDreg} 6 | \alias{plotPlaceboDens} 7 | \alias{plotPlaceboDens.RDDreg} 8 | \title{Draw a (density) plot of placebo tests} 9 | \usage{ 10 | plotPlacebo(object, device = c("ggplot", "base"), ...) 11 | 12 | \method{plotPlacebo}{RDDreg}(object, device = c("ggplot", "base"), 13 | from = 0.25, to = 0.75, by = 0.1, level = 0.95, same_bw = FALSE, 14 | vcov. = NULL, plot = TRUE, output = c("data", "ggplot"), ...) 15 | 16 | plotPlaceboDens(object, device = c("ggplot", "base"), ...) 17 | 18 | \method{plotPlaceboDens}{RDDreg}(object, device = c("ggplot", "base"), 19 | from = 0.25, to = 0.75, by = 0.1, level = 0.95, same_bw = FALSE, 20 | vcov. = NULL, ...) 21 | 22 | computePlacebo(object, from = 0.25, to = 0.75, by = 0.1, level = 0.95, 23 | same_bw = FALSE, vcov. = NULL) 24 | } 25 | \arguments{ 26 | \item{object}{the output of an RDD regression} 27 | 28 | \item{device}{Whether to draw a base or a ggplot graph.} 29 | 30 | \item{\ldots}{Further arguments passed to specific methods.} 31 | 32 | \item{vcov.}{Specific covariance function to pass to coeftest. See help of package \code{\link[sandwich]{sandwich}}.} 33 | 34 | \item{plot}{Whether to actually plot the data.} 35 | 36 | \item{output}{Whether to return (invisibly) the data frame containing the bandwidths and corresponding estimates, or the ggplot object} 37 | 38 | \item{from}{Starting point of the fake cutpoints sequence. Refers ot the quantile of each side of the true cutpoint} 39 | 40 | \item{to}{Ending point of the fake cutpoints sequence. Refers ot the quantile of each side of the true cutpoint} 41 | 42 | \item{by}{Increments of the from-to sequence} 43 | 44 | \item{level}{Level of the confidence interval shown} 45 | 46 | \item{same_bw}{Whether to re-estimate the bandwidth at each point} 47 | } 48 | \value{ 49 | A data frame containing the cutpoints, their corresponding estimates and confidence intervals. 50 | } 51 | \description{ 52 | Draw a plot of placebo tests, estimating the impact on fake cutpoints 53 | } 54 | \examples{ 55 | data(Lee2008) 56 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 57 | reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) 58 | plotPlacebo(reg_nonpara) 59 | 60 | # Use with another vcov function; cluster case 61 | reg_nonpara_lminf <- RDDreg_np(RDDobject=Lee2008_rdd, inference="lm") 62 | # need to be a function applied to updated object! 63 | vc <- function(x) vcovCluster(x, clusterVar=model.frame(x)$x) 64 | plotPlacebo(reg_nonpara_lminf, vcov. = vc) 65 | } 66 | \author{ 67 | Matthieu Stigler <\email{Matthieu.Stigler@gmail.com}> 68 | } 69 | 70 | -------------------------------------------------------------------------------- /RDDtools/R/model.matrix.RDD.R: -------------------------------------------------------------------------------- 1 | #' @S3method model.matrix RDDdata 2 | 3 | model.matrix.RDDdata <- function(object, covariates=NULL, order=1, bw=NULL, slope=c("separate", "same"), covar.opt=list(strategy=c("include", "residual"), slope=c("same", "separate"), bw=NULL), covar.strat=c("include", "residual"), ...){ 4 | 5 | checkIsRDD(object) 6 | RDDobject <- object 7 | type <- getType(object) 8 | 9 | if(!missing(covar.strat)) warning("covar.strat is (soon) deprecated arg!") 10 | 11 | slope <- match.arg(slope) 12 | covar.strat <- match.arg(covar.opt$strategy, choices=c("include", "residual")) 13 | covar.slope <- match.arg(covar.opt$slope, choices=c("same", "separate")) 14 | 15 | cutpoint <- getCutpoint(RDDobject) 16 | if(!is.null(covariates) & !hasCovar(RDDobject)) stop("Arg 'covariates' was specified, but no covariates found in 'RDDobject'.") 17 | 18 | ## Construct data 19 | dat <- as.data.frame(RDDobject) 20 | 21 | dat_step1 <- dat[, c("y", "x")] 22 | dat_step1$x <- dat_step1$x -cutpoint 23 | 24 | L <- ifelse(dat_step1$x>= 0, 1,0) 25 | dat_step1$D <- if(type=="Sharp") L else object$z 26 | 27 | if(order>0){ 28 | polys <- poly(dat_step1$x, degree=order, raw=TRUE) 29 | colnames(polys) <- paste("x", 1:order, sep="^") 30 | dat_step1 <- cbind(dat_step1[,c("y", "D")],polys) 31 | if(slope=="separate") { 32 | polys2 <- polys*L 33 | colnames(polys2) <- paste(colnames(polys), "right", sep="_") 34 | dat_step1 <- cbind(dat_step1,polys2) 35 | } 36 | } else { 37 | dat_step1$x <- NULL 38 | } 39 | 40 | ## Covariates 41 | if(!is.null(covariates)){ 42 | covar <- getCovar(RDDobject) 43 | formu.cova <- covariates 44 | 45 | if(grepl("\\.", formu.cova)) formu.cova <- paste(colnames(covar), collapse=" + ") 46 | if(covar.slope=="separate") { 47 | formu.cova <- paste(formu.cova, "+", paste("D*(", formu.cova,")", sep=""), sep=" ") 48 | covar$D <- dat_step1$D 49 | } 50 | 51 | formula.cova <- as.formula(paste("~", formu.cova)) 52 | mf <- model.frame(formula.cova, covar, na.action=na.pass) 53 | M_covar <- model.matrix(formula.cova, data=mf) 54 | 55 | if(covar.strat=="residual"){ 56 | M_covar <- data.frame(y=dat_step1$y, M_covar) 57 | first_stage <- lm(y~., data=M_covar) ## regress y on covariates only 58 | dat_step1$y <- residuals(first_stage) ## change in original data 59 | } else { 60 | rem <- switch(covar.slope, "separate"="^D$|(Intercept)", "same" ="(Intercept)") 61 | M_covar <- M_covar[,-grep(rem, colnames(M_covar)), drop=FALSE ] 62 | dat_step1 <- cbind(dat_step1, M_covar) ## add covar as regressors 63 | } 64 | } 65 | 66 | ## Colnames cleaning 67 | colnames(dat_step1) <- gsub("x\\^1", "x", colnames(dat_step1)) 68 | 69 | ## 70 | if(type=="Fuzzy") dat_step1$ins <- L 71 | 72 | ## return results: 73 | dat_step1 74 | } -------------------------------------------------------------------------------- /RDDtools/misc/locpoly.f: -------------------------------------------------------------------------------- 1 | c Part of R package KernSmooth 2 | c Copyright (C) 1995 M. P. Wand 3 | c 4 | c Unlimited use and distribution (see LICENCE). 5 | 6 | cccccccccc FORTRAN subroutine locpol.f cccccccccc 7 | 8 | c For computing an binned approximation to a 9 | c local bandwidth local polynomial kernel regression estimator 10 | c of an arbitrary derivative of a regression function. 11 | c LINPACK is used for matrix inversion. 12 | 13 | c Last changed: 10/02/95 14 | 15 | subroutine locpol(xcnts,ycnts,idrv,delta,hdisc,Lvec,indic, 16 | + midpts,M,iQ,fkap,ipp,ippp,ss,tt,Smat,Tvec, 17 | + ipvt,cvest) 18 | integer i,j,k,ii,Lvec(*),M,iQ,mid,indic(*),midpts(*),ipvt(*), 19 | + info,idrv,ipp,ippp,indss 20 | double precision xcnts(*),ycnts(*),fkap(*),hdisc(*), 21 | + cvest(*),delta,ss(M,ippp),tt(M,ipp), 22 | + Smat(ipp,ipp),Tvec(ipp),fac 23 | 24 | c Obtain kernel weights 25 | 26 | PRINT *,"Hello" 27 | mid = Lvec(1) + 1 28 | do 10 i=1,(iQ-1) 29 | midpts(i) = mid 30 | fkap(mid) = 1.0d0 31 | do 20 j=1,Lvec(i) 32 | fkap(mid+j) = exp(-(delta*j/hdisc(i))**2/2) 33 | fkap(mid-j) = fkap(mid+j) 34 | 20 continue 35 | mid = mid + Lvec(i) + Lvec(i+1) + 1 36 | 10 continue 37 | midpts(iQ) = mid 38 | fkap(mid) = 1.0d0 39 | do 30 j=1,Lvec(iQ) 40 | fkap(mid+j) = exp(-(delta*j/hdisc(iQ))**2/2) 41 | fkap(mid-j) = fkap(mid+j) 42 | 30 continue 43 | 44 | c Combine kernel weights and grid counts 45 | 46 | do 40 k = 1,M 47 | if (xcnts(k).ne.0) then 48 | do 50 i = 1,iQ 49 | do 60 j = max(1,k-Lvec(i)),min(M,k+Lvec(i)) 50 | if (indic(j).eq.i) then 51 | fac = 1.0d0 52 | ss(j,1) = ss(j,1) + xcnts(k)*fkap(k-j+midpts(i)) 53 | tt(j,1) = tt(j,1) + ycnts(k)*fkap(k-j+midpts(i)) 54 | do 70 ii = 2,ippp 55 | fac = fac*delta*(k-j) 56 | ss(j,ii) = ss(j,ii) 57 | + + xcnts(k)*fkap(k-j+midpts(i))*fac 58 | if (ii.le.ipp) then 59 | tt(j,ii) = tt(j,ii) 60 | + + ycnts(k)*fkap(k-j+midpts(i))*fac 61 | endif 62 | 70 continue 63 | endif 64 | 60 continue 65 | 50 continue 66 | endif 67 | 40 continue 68 | 69 | do 80 k = 1,M 70 | do 90 i = 1,ipp 71 | do 100 j = 1,ipp 72 | indss = i + j - 1 73 | Smat(i,j) = ss(k,indss) 74 | 100 continue 75 | Tvec(i) = tt(k,i) 76 | 90 continue 77 | 78 | call dgefa(Smat,ipp,ipp,ipvt,info) 79 | call dgesl(Smat,ipp,ipp,ipvt,Tvec,0) 80 | 81 | cvest(k) = Tvec(idrv+1) 82 | 83 | 80 continue 84 | 85 | return 86 | end 87 | 88 | cccccccccc End of locpol.f cccccccccc 89 | -------------------------------------------------------------------------------- /RDDtools/R/bw_ROT.R: -------------------------------------------------------------------------------- 1 | #' Bandwidth selector 2 | #' 3 | #' implements dpill 4 | #' 5 | #' @param object object of class RDDdata 6 | #' @references McCrary, Justin. (2008) "Manipulation of the running variable in the regression discontinuity design: A density test," \emph{Journal of Econometrics}. 142(2): 698-714. \url{http://dx.doi.org/10.1016/j.jeconom.2007.05.005} 7 | #' @include plotBin.R 8 | #' @export 9 | #' @author Drew Dimmery <\email{drewd@@nyu.edu}> 10 | #' @examples 11 | #' #No discontinuity 12 | 13 | ### Crary bw 14 | 15 | ROT_bw <- function(object){ 16 | 17 | if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects") 18 | cutpoint <- getCutpoint(object) 19 | x <- object$x 20 | y <- object$y 21 | 22 | ##### first step 23 | n <- length(y) 24 | sd_x <- sd(x, na.rm=TRUE) 25 | bw_pilot <- (2*sd_x)/sqrt(n) 26 | 27 | ## hist 28 | his <- plotBin(x=x, y=y, h=bw_pilot, cutpoint=cutpoint,plot=FALSE, type="number") 29 | # his2 <- hist(x, breaks=c(min(x), his[["x"]], max(x))) 30 | x1 <- his$x 31 | y1 <- his[,"y.Freq"] 32 | 33 | ##### second step 34 | 35 | ## regs: 36 | reg_left <- lm(y1 ~ poly(x1, degree=4, raw=TRUE), subset=x1=cutpoint) 38 | 39 | 40 | 41 | } 42 | 43 | 44 | #' Global bandwidth selector of Ruppert, Sheather and Wand (1995) from package \pkg{KernSmooth} 45 | #' 46 | #' Uses the global bandwidth selector of Ruppert, Sheather and Wand (1995) 47 | #' either to the whole function, or to the functions below and above the cutpoint. 48 | #' 49 | #' @param object object of class RDDdata created by \code{\link{RDDdata}} 50 | #' @param type Whether to choose a global bandwidth for the whole function (\code{global}) 51 | #' or for each side (\code{sided}) 52 | #' @return One (or two for \code{sided}) bandwidth value. 53 | #' @references See \code{\link[KernSmooth]{dpill}} 54 | #' @include plotBin.R 55 | #' @seealso \code{\link{RDDbw_IK}} Local RDD bandwidth selector using the plug-in method of Imbens and Kalyanaraman (2012) 56 | #' @import KernSmooth 57 | #' @export 58 | #' @examples 59 | #' data(Lee2008) 60 | #' rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0) 61 | #' RDDbw_RSW(rd) 62 | 63 | 64 | #### 65 | RDDbw_RSW <- function(object, type=c("global", "sided")){ 66 | 67 | type <- match.arg(type) 68 | 69 | if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects") 70 | cutpoint <- getCutpoint(object) 71 | x <- object$x 72 | y <- object$y 73 | 74 | ## 75 | if(type=="global"){ 76 | bw <- dpill(x=x, y=y) 77 | } else { 78 | dat_left <- subset(object, x=cutpoint) 80 | 81 | bw_left <- dpill(x=dat_left$x, y=dat_left$y) 82 | bw_right <- dpill(x=dat_right$x, y=dat_right$y) 83 | bw <- c(bw_left, bw_right) 84 | } 85 | 86 | ## result 87 | bw 88 | } 89 | 90 | 91 | if(FALSE){ 92 | # lee_dat4 <- read.csv("/home/mat/Dropbox/HEI/rdd/Rcode/IK bandwidth/datasets/imbens_from_MATLAB.csv", header=FALSE) 93 | # head(lee_dat4) 94 | # a<-RDDdata(y=lee_dat4[,2], x=lee_dat4[,1], cutpoint=0) 95 | # ROT_bw(object=a) 96 | # RDDbw_RSW(object=a) 97 | RDDbw_RSW(object=a, type="sided") 98 | } 99 | -------------------------------------------------------------------------------- /RDDtools/man/RDDreg_lm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{RDDreg_lm} 3 | \alias{RDDreg_lm} 4 | \title{Parametric polynomial estimator of the regression discontinuity} 5 | \usage{ 6 | RDDreg_lm(RDDobject, covariates = NULL, order = 1, bw = NULL, 7 | slope = c("separate", "same"), covar.opt = list(strategy = c("include", 8 | "residual"), slope = c("same", "separate"), bw = NULL), 9 | covar.strat = c("include", "residual"), weights) 10 | } 11 | \arguments{ 12 | \item{RDDobject}{Object of class RDDdata created by \code{\link{RDDdata}}} 13 | 14 | \item{covariates}{Formula to include covariates} 15 | 16 | \item{order}{Order of the polynomial regression.} 17 | 18 | \item{bw}{A bandwidth to specify the subset on which the parametric regression is estimated} 19 | 20 | \item{covar.strat}{DEPRECATED, use covar.opt instead.} 21 | 22 | \item{covar.opt}{Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}).} 23 | 24 | \item{weights}{Optional weights to pass to the lm function. Note this cannot be entered together with \code{bw}} 25 | 26 | \item{slope}{Whether slopes should be different on left or right (separate), or the same.} 27 | } 28 | \value{ 29 | An object of class RDDreg_lm and class lm, with specific print and plot methods 30 | } 31 | \description{ 32 | Compute a parametric polynomial regression of the ATE, 33 | possibly on the range specified by bandwidth 34 | } 35 | \details{ 36 | This function estimates the standard \emph{discontinuity regression}: 37 | \deqn{Y=\alpha+\tau D+\beta_{1}(X-c)+\beta_{2}D(X-c)+\epsilon} 38 | with \eqn{\tau} the main parameter of interest. Several versions of the regression can be estimated, either restricting the slopes to be the same, 39 | i.e \eqn{\beta_{1}=\beta_{2}} (argument \code{slope}). The order of the polynomial in \eqn{X-c} can also be adjusted with argument \code{order}. 40 | Note that a value of zero can be used, which corresponds to the simple \emph{difference in means}, that one would use if the samples were random. 41 | Covariates can also be added in the regression, according to the two strategies discussed in Lee and Lemieux (2010, sec 4.5), through argument \code{covar.strat}: 42 | \describe{ 43 | \item{include}{Covariates are simply added as supplementary regressors in the RD equation} 44 | \item{residual}{The dependent variable is first regressed on the covariates only, then the RDD equation is applied on the residuals from this first step}} 45 | The regression can also be estimated in a neighborhood of the cutpoint with the argument \code{bw}. This make the parametric regression resemble 46 | the non-parametric local kernel \code{\link{RDDreg_np}}. Similarly, weights can also be provided (but not simultaneously to \code{bw}). 47 | 48 | The returned object is a classical \code{lm} object, augmented with a \code{RDDslot}, so usual methods can be applied. As is done in general in R, 49 | heteroskeadsticity-robust inference can be done later on with the usual function from package \pkg{sandwich}. For the case of clustered observations 50 | a specific function \code{\link{clusterInf}} is provided. 51 | } 52 | \examples{ 53 | ## Step 0: prepare data 54 | data(Lee2008) 55 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 56 | ## Step 2: regression 57 | # Simple polynomial of order 1: 58 | reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) 59 | print(reg_para) 60 | plot(reg_para) 61 | 62 | # Simple polynomial of order 4: 63 | reg_para4 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=4) 64 | reg_para4 65 | plot(reg_para4) 66 | 67 | # Restrict sample to bandwidth area: 68 | bw_ik <- RDDbw_IK(Lee2008_rdd) 69 | reg_para_ik <- RDDreg_lm(RDDobject=Lee2008_rdd, bw=bw_ik, order=4) 70 | reg_para_ik 71 | plot(reg_para_ik) 72 | } 73 | \references{ 74 | TODO 75 | } 76 | 77 | -------------------------------------------------------------------------------- /RDDtools/man/RDDpred.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2 (4.0.1): do not edit by hand 2 | \name{RDDpred} 3 | \alias{RDDpred} 4 | \title{RDD coefficient prediction} 5 | \usage{ 6 | RDDpred(object, covdata, se.fit = TRUE, vcov. = NULL, newdata, 7 | stat = c("identity", "sum", "mean"), weights) 8 | } 9 | \arguments{ 10 | \item{object}{A RDD regression object} 11 | 12 | \item{covdata}{New data.frame specifying the values of the covariates, can have multiple rows.} 13 | 14 | \item{se.fit}{A switch indicating if standard errors are required.} 15 | 16 | \item{vcov.}{Specific covariance function (see package sandwich ), by default uses the \code{\link{vcov}}} 17 | 18 | \item{newdata}{Another data on which to evaluate the x/D variables. Useful in very few cases.} 19 | 20 | \item{stat}{The statistic to use if there are multiple predictions, 'identity' just returns the single values, 'mean' averages them} 21 | 22 | \item{weights}{Eventual weights for the averaging of the predicted values.} 23 | } 24 | \value{ 25 | Returns the predicted value(s), and, if se.fit=TRUE, their standard errors. 26 | } 27 | \description{ 28 | Function to predict the RDD coefficient in presence of covariate (without covariates, returns the same than \code{\link{RDDcoef}}) 29 | } 30 | \details{ 31 | The function \code{RDDpred} does a simple prediction of the RDD effect 32 | \deqn{RDDeffect= \mu(x, z, D=1) - \mu(x, z, D=0)} 33 | When there are no covariates (and z is irrelevant in the equation above), this amounts exactly to the usual RDD coefficient, 34 | shown in the outputs, or obtained with \code{\link{RDDcoef}}. If there were covariates, and if these covariates were estimated using the 35 | \dQuote{include} \emph{strategy} and with different coefficients left and right to the cutoff (i.e. 36 | had argument \emph{slope} = \dQuote{separate}), than the RDD effect is also dependent on the value of the covariate(s). 37 | \code{RDDpred} allows to set the value of the covariate(s) at which to evaluate the RDD effect, by providing a data.frame with 38 | the values for the covariates. Note that the effect can be evaluated at multiple points, if you provide multiple rows of \code{covdata}. 39 | 40 | In pressence of covariate-specific RDD effect, one may wish to estimate an average effect. This can be done by setting the argument \code{stat="mean"}. 41 | Weights can additionally be added, with the argument \code{weights}, to obtain a weighted-average of the predictions. Note however that in most cases, 42 | this will be equivalent to provide covariates at their (weighted) mean value, which will be much faster also! 43 | 44 | Standard errors, obtained setting the argument \code{se.fit=TRUE}, are computed using following formula: 45 | \deqn{x_i \Omega x_i^{'}} 46 | where \eqn{\Omega} is the estimated variance-covariance matrix ( by default \eqn{\sigma^2(X^{'}X)^{-1}} using \code{\link{vcov}}) and 47 | \eqn{x_i} is the input data (a mix of covdata and input data). If one wishes individual predictions, standard errors are simply obtained 48 | as the square of that diagonal matrix, whereas for mean/sum, covariances are taken into account. 49 | } 50 | \examples{ 51 | ## Load data, add (artificial) covariates: 52 | data(Lee2008) 53 | n_Lee <- nrow(Lee2008) 54 | z1 <- runif(n_Lee) 55 | Lee2008_rdd <- RDDdata(y=y, x=x, data=Lee2008, covar=z1, cutpoint=0) 56 | 57 | ## estimation without covariates: RDDpred is the same than RDDcoef: 58 | reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) 59 | 60 | RDDpred(reg_para) 61 | RDDcoef(reg_para, allInfo=TRUE) 62 | 63 | ## estimation with covariates: 64 | reg_para_cov <- RDDreg_lm(RDDobject=Lee2008_rdd, covariates="z1", covar.opt=list(slope="separate")) 65 | RDDpred(reg_para_cov, covdata=data.frame(z1=0)) ## should obtain same result than with RDestimate 66 | RDDpred(reg_para_cov, covdata=data.frame(z1=0.5)) #evaluate at mean of z1 (as comes from uniform) 67 | } 68 | \references{ 69 | Froehlich (2007) Regression discontinuity design with covariates, IZA discussion paper 3024 70 | } 71 | 72 | -------------------------------------------------------------------------------- /RDDtools/R/get_methods.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | # checkIsRDD <- function(object) if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects") 4 | # checkIsAnyRDD <- function(object) if(!inherits(object, c("RDDdata", "RDDreg_np"))) stop("Only works for RDDdata objects") 5 | 6 | # function(object) if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects") 7 | checkIsAnyRDD <- checkIsRDD <- function(object) { 8 | classesOk <- c("RDDdata", "RDDreg_np", "RDDreg_lm") 9 | if(!inherits(object, classesOk)) stop("Only works for RDDdata objects") 10 | } 11 | 12 | getType <- function(object){ 13 | checkIsRDD(object) 14 | attr(object, "type") 15 | } 16 | 17 | isFuzzy <- function(object){ 18 | checkIsRDD(object) 19 | attr(object, "type")=="Fuzzy" 20 | } 21 | 22 | getCutpoint <- function(object){ 23 | 24 | checkIsRDD(object) 25 | attr(object, "cutpoint") 26 | } 27 | 28 | getOrder <- function(object){ 29 | 30 | checkIsRDD(object) 31 | attr(object, "PolyOrder") 32 | } 33 | 34 | getSlope <- function(object){ 35 | 36 | checkIsRDD(object) 37 | attr(object, "slope") 38 | } 39 | 40 | getBW <- function(object, force.na=FALSE){ 41 | 42 | checkIsAnyRDD(object) 43 | res <- attr(object, "bw") 44 | if(force.na) if(is.null(res)) res <- NA 45 | res 46 | } 47 | 48 | 49 | 50 | ## return the type of inference used by RDDreg_np 51 | infType <- function(x) { 52 | if(is.null(getCall(x)$inference)) "se" else getCall(x)$inference 53 | } 54 | 55 | 56 | hasCovar <- function(object) 57 | UseMethod("hasCovar") 58 | 59 | hasCovar.RDDdata <- function(object) attr(object, "hasCovar") 60 | 61 | hasCovar.RDDreg <- function(object) { 62 | call <- getCall(object) 63 | !is.null(call$covariates) 64 | } 65 | 66 | getCovar <- function(object){ 67 | if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects") 68 | if(!hasCovar(object)) stop("object has no covariates") 69 | 70 | rem <- if(isFuzzy(object)) 1:3 else 1:2 71 | res <- object[,-rem, drop=FALSE] 72 | as.data.frame(res) 73 | } 74 | 75 | getCovarNames <- function(object){ 76 | if(!inherits(object, "RDDdata")) stop("Only works for RDDdata objects") 77 | if(!hasCovar(object)) stop("object has no covariates") 78 | 79 | rem <- if(isFuzzy(object)) 1:3 else 1:2 80 | colnames(object)[-rem] 81 | } 82 | 83 | getOriginalX <- function(object){ 84 | 85 | cutpoint <- getCutpoint(object) 86 | x <- object$model[,"x"] 87 | if(cutpoint!=0) x <- x+cutpoint 88 | x 89 | } 90 | 91 | getOriginalX <- function(object) 92 | UseMethod("getOriginalX") 93 | 94 | 95 | getOriginalX.RDDreg <- function(object){ 96 | object$RDDslot$RDDdata[, "x"] 97 | } 98 | 99 | getOriginalX.RDDdata <- function(object){ 100 | object[, "x"] 101 | } 102 | 103 | # getOriginalX.RDDreg_np <- function(object){ 104 | # 105 | # cutpoint <- getCutpoint(object) 106 | # Xnam <- getXname(object) 107 | # x <- object$model[,Xnam] 108 | # if(cutpoint!=0) x <- x+cutpoint 109 | # x 110 | # } 111 | 112 | 113 | getOriginalData <- function(object, na.rm=TRUE, classRDD=TRUE) 114 | UseMethod("getOriginalData") 115 | 116 | # getOriginalData.RDDreg_np <- function(object, na.rm=TRUE){ 117 | # 118 | # cutpoint <- getCutpoint(object) 119 | # Xnam <- getXname(object) 120 | # dat <- object$model[,c("y",Xnam)] 121 | # if(cutpoint!=0) dat[,Xnam] <- dat[,Xnam] +cutpoint 122 | # if(na.rm) dat <- dat[apply(dat, 1, function(x) all(!is.na(x))),] # remove na rows 123 | # dat 124 | # } 125 | 126 | 127 | 128 | getOriginalData.RDDreg <- function(object, na.rm=TRUE, classRDD=TRUE){ 129 | res <- object$RDDslot$RDDdata 130 | if(na.rm) res <- res[apply(res, 1, function(x) all(!is.na(x))),] # remove na rows 131 | if(!classRDD) res <- as.data.frame(res) 132 | res 133 | } 134 | 135 | 136 | 137 | #' @importFrom stats getCall 138 | #' @S3method getCall RDDreg 139 | getCall.RDDreg <- function(x,...) attr(x, "RDDcall") 140 | 141 | 142 | #format(Sys.Date(), "%A %Y-%m-%d") 143 | 144 | -------------------------------------------------------------------------------- /RDDtools/misc/locpoly.R: -------------------------------------------------------------------------------- 1 | locpoly2 <- function(x, y, drv = 0L, degree, kernel = "normal", 2 | bandwidth, gridsize = 401L, bwdisc = 25, range.x, 3 | binned = FALSE, truncate = TRUE) 4 | 5 | { 6 | ## Install safeguard against non-positive bandwidths: 7 | if (!missing(bandwidth) && bandwidth <= 0) 8 | stop("'bandwidth' must be strictly positive") 9 | 10 | drv <- as.integer(drv) 11 | if (missing(degree)) degree <- drv + 1L 12 | else degree <- as.integer(degree) 13 | 14 | if (missing(range.x) && !binned) 15 | if (missing(y)) { 16 | extra <- 0.05*(max(x) - min(x)) 17 | range.x <- c(min(x)-extra, max(x)+extra) 18 | } else range.x <- c(min(x), max(x)) 19 | 20 | ## Rename common variables 21 | M <- gridsize 22 | Q <- as.integer(bwdisc) 23 | a <- range.x[1L] 24 | b <- range.x[2L] 25 | pp <- degree + 1L 26 | ppp <- 2L*degree + 1L 27 | tau <- 4 28 | 29 | ## Decide whether a density estimate or regressionestimate is required. 30 | 31 | if (missing(y)) { # obtain density estimate 32 | y <- NULL 33 | n <- length(x) 34 | gpoints <- seq(a, b, length = M) 35 | xcounts <- linbin(x, gpoints, truncate) 36 | ycounts <- (M-1)*xcounts/(n*(b-a)) 37 | xcounts <- rep(1, M) 38 | } else { # obtain regression estimate 39 | ## Bin the data if not already binned 40 | if (!binned) { 41 | gpoints <- seq(a, b, length = M) 42 | out <- rlbin(x, y, gpoints, truncate) 43 | xcounts <- out$xcounts 44 | ycounts <- out$ycounts 45 | } else { 46 | xcounts <- x 47 | ycounts <- y 48 | M <- length(xcounts) 49 | gpoints <- seq(a, b, length = M) 50 | } 51 | } 52 | 53 | ## Set the bin width 54 | delta <- (b-a)/(M-1L) 55 | 56 | ## Discretise the bandwidths 57 | if (length(bandwidth) == M) { 58 | hlow <- sort(bandwidth)[1L] 59 | hupp <- sort(bandwidth)[M] 60 | hdisc <- exp(seq(log(hlow), log(hupp), length = Q)) 61 | 62 | ## Determine value of L for each member of "hdisc" 63 | Lvec <- floor(tau*hdisc/delta) 64 | 65 | ## Determine index of closest entry of "hdisc" 66 | ## to each member of "bandwidth" 67 | indic <- if (Q > 1L) { 68 | lhdisc <- log(hdisc) 69 | gap <- (lhdisc[Q]-lhdisc[1L])/(Q-1) 70 | if (gap == 0) rep(1, M) 71 | else round(((log(bandwidth) - log(sort(bandwidth)[1L]))/gap) + 1) 72 | } else rep(1, M) 73 | } else if (length(bandwidth) == 1L) { 74 | indic <- rep(1, M) 75 | Q <- 1L 76 | Lvec <- rep(floor(tau*bandwidth/delta), Q) 77 | hdisc <- rep(bandwidth, Q) 78 | } else 79 | stop("'bandwidth' must be a scalar or an array of length 'gridsize'") 80 | 81 | if (min(Lvec) == 0) 82 | stop("Binning grid too coarse for current (small) bandwidth: consider increasing 'gridsize'") 83 | 84 | ## Allocate space for the kernel vector and final estimate 85 | 86 | dimfkap <- 2L * sum(Lvec) + Q 87 | fkap <- rep(0, dimfkap) 88 | curvest <- rep(0, M) 89 | midpts <- rep(0, Q) 90 | ss <- matrix(0, M, ppp) 91 | tt <- matrix(0, M, pp) 92 | Smat <- matrix(0, pp, pp) 93 | Tvec <- rep(0, pp) 94 | ipvt <- rep(0, pp) 95 | 96 | 97 | print(Q) 98 | print(Lvec) 99 | print(indic) 100 | print(dimfkap) 101 | ## Call FORTRAN routine "locpol" 102 | 103 | out <- .Fortran(F_locpol, as.double(xcounts), as.double(ycounts), 104 | as.integer(drv), as.double(delta), as.double(hdisc), 105 | as.integer(Lvec), as.integer(indic), as.integer(midpts), 106 | as.integer(M), as.integer(Q), as.double(fkap), as.integer(pp), 107 | as.integer(ppp), as.double(ss), as.double(tt), 108 | as.double(Smat), as.double(Tvec), as.integer(ipvt), 109 | as.double(curvest)) 110 | 111 | # print(out) 112 | curvest <- gamma(drv+1) * out[[19L]] 113 | 114 | list(x = gpoints, y = curvest) 115 | } 116 | 117 | 118 | environment(locpoly2) <- environment(locpoly) 119 | y <- geyser$waiting 120 | plot(x, y) 121 | fit <- locpoly2(x, y, bandwidth = 0.25) 122 | # fit <- locpoly2(x, y, bandwidth = rep(0.25, 401)) 123 | lines(fit) -------------------------------------------------------------------------------- /RDDtools/R/Waldci.R: -------------------------------------------------------------------------------- 1 | #' Confint allowing vcov 2 | #' 3 | #' Version of vcov allowing for confint 4 | #' @param x Object of class lm or else 5 | #' @param parm specification of which parameters are to be given confidence intervals, see confint 6 | #' @param level the confidence level required, see confint() 7 | #' @param vcov. Specific covariance function to pass to coeftest. See help of sandwich 8 | #' @param df Degrees of freedom 9 | #' @param \ldots Further argument 10 | #' @keywords internal 11 | 12 | waldci <- function(x, parm = NULL, level = 0.95, vcov. = NULL, df = NULL, ...) 13 | { 14 | UseMethod("waldci") 15 | } 16 | 17 | waldci.default <- function(x, parm = NULL, level = 0.95, vcov. = NULL, df = NULL, ...) 18 | { 19 | ## use S4 methods if loaded 20 | coef0 <- if("stats4" %in% loadedNamespaces()) stats4::coef else coef 21 | vcov0 <- if("stats4" %in% loadedNamespaces()) stats4::vcov else vcov 22 | 23 | ## extract coefficients and standard errors 24 | est <- coef0(x) 25 | if(is.null(vcov.)) se <- vcov0(x) else { 26 | if(is.function(vcov.)) se <- vcov.(x) 27 | else se <- vcov. 28 | } 29 | se <- sqrt(diag(se)) 30 | 31 | ## match using names and compute t/z statistics 32 | if(!is.null(names(est)) && !is.null(names(se))) { 33 | anames <- names(est)[names(est) %in% names(se)] 34 | est <- est[anames] 35 | se <- se[anames] 36 | } 37 | 38 | ## process level 39 | a <- (1 - level)/2 40 | a <- c(a, 1 - a) 41 | 42 | ## get quantile from central limit theorem 43 | if(is.null(df)) { 44 | df <- try(df.residual(x), silent = TRUE) 45 | if(inherits(df, "try-error")) df <- NULL 46 | } 47 | if(is.null(df)) df <- 0 48 | fac <- if(is.finite(df) && df > 0) qt(a, df = df) else qnorm(a) 49 | 50 | ## set up confidence intervals 51 | ci <- cbind(est + fac[1] * se, est + fac[2] * se) 52 | colnames(ci) <- paste(format(100 * a, trim = TRUE, scientific = FALSE, digits = 3L), "%") 53 | 54 | ## process parm 55 | if(is.null(parm)) parm <- seq_along(est) 56 | # if(is.character(parm)) parm <- which(parm %in% names(est)) 57 | if(is.character(parm)) parm <- which(names(est)%in% parm ) 58 | ci <- ci[parm, , drop = FALSE] 59 | return(ci) 60 | } 61 | 62 | 63 | ## copy of stats:::format.perc 64 | format.perc <- function (probs, digits) 65 | paste(format(100 * probs, trim = TRUE, scientific = FALSE, digits = digits), 66 | "%") 67 | 68 | waldci.RDDreg_np <- function(x, level = 0.95, vcov. = NULL, df = Inf, ...){ 69 | 70 | inf_met <- infType(x) ## def in Misc.R 71 | if(inf_met=="se"){ 72 | if(!is.null(vcov.)|!is.infinite(df)) {warning("Arg 'vcov.' and 'df' only work for RDDreg with inf='lm'") 73 | } 74 | ## code recycled from stats::confint.default 75 | co <- RDDcoef(x, allInfo=TRUE) 76 | a <- (1 - level)/2 77 | a <- c(a, 1 - a) 78 | fac <- qnorm(a) 79 | pct <- format.perc(a, 3) ## import!! 80 | ci <- array(NA, dim = c(1, 2L), dimnames = list("D", pct)) 81 | ci[] <- co[,"Estimate"] + co[,"Std. Error"] %o% fac 82 | return(ci) 83 | } else { 84 | waldci.default(x$RDDslot$model, parm = "D", level = level, vcov. = vcov., df = df, ...) 85 | } 86 | } 87 | 88 | 89 | 90 | 91 | waldci.glm <- function(x, parm = NULL, level = 0.95, vcov. = NULL, df = Inf, ...) 92 | waldci.default(x, parm = parm, level = level, vcov. = vcov., df = df, ...) 93 | 94 | waldci.mlm <- function(x, parm=NULL, level = 0.95, vcov. = NULL, df = NULL, ...) 95 | { 96 | ## obtain vcov 97 | v <- if(is.null(vcov.)) vcov(x) else if(is.function(vcov.)) vcov.(x) else vcov. 98 | 99 | ## nasty hack: replace coefficients so that their names match the vcov() method 100 | x$coefficients <- structure(as.vector(x$coefficients), .Names = colnames(vcov(x))) 101 | 102 | ## call default method 103 | waldci.default(x, parm = parm, level = level, vcov. = v, df = df, ...) 104 | } 105 | 106 | waldci.survreg <- function(x, parm = NULL, level = 0.95, vcov. = NULL, df = Inf, ...) 107 | { 108 | if(is.null(vcov.)) v <- vcov(x) else { 109 | if(is.function(vcov.)) v <- vcov.(x) 110 | else v <- vcov. 111 | } 112 | if(length(x$coefficients) < NROW(x$var)) { 113 | x$coefficients <- c(x$coefficients, "Log(scale)" = log(x$scale)) 114 | } 115 | waldci.default(x, parm = parm, level = level, vcov. = v, df = df, ...) 116 | } 117 | 118 | 119 | if(FALSE){ 120 | 121 | library(sandwich) 122 | library(lmtest) 123 | 124 | reg <- lm(freeny) 125 | 126 | ### Regular 127 | all(confint(reg)==waldci(reg)) 128 | confint(reg) 129 | co_reg <- coeftest(reg) 130 | co_reg[,1] + qnorm(0.975)*co_reg[,2] 131 | co_reg[,1] + qt(0.975, df=reg[["df.residual"]] )*co_reg[,2] 132 | 133 | ## vcovHC 134 | waldci(reg, vcov.=vcovHC) 135 | co <- coeftest(reg, vcov.=vcovHC) 136 | co[,1] + qnorm(0.975)*co[,2] 137 | co[,1] + qt(0.975, df=reg[["df.residual"]] )*co[,2] 138 | 139 | } -------------------------------------------------------------------------------- /RDDtools/R/gen_MC_IK.R: -------------------------------------------------------------------------------- 1 | #' Generate Monte Carlo simulations of Imbens and Kalyanaraman 2 | #' 3 | #' Generate the simulations reported in Imbens and Kalyanaraman (2012) 4 | #' @param n The size of sampel to generate 5 | #' @param version The MC version of Imbens and Kalnayaraman (between 1 and 4). 6 | #' @param sd The standard deviation of the error term. 7 | #' @param output Whether to return a data-frame, or already a RDDdata 8 | #' @param size The size of the effect, this depends on the specific version, defaults are as in IK: 0.04, NULL, 0.1, 0.1 9 | #' @return An data frame with x and y variables. 10 | #' @references TODO 11 | #' @export 12 | #' @examples 13 | #' MC1_dat <- gen_MC_IK() 14 | #' MC1_rdd <- RDDdata(y=MC1_dat$y, x=MC1_dat$x, cutpoint=0) 15 | #' 16 | #' ## Use np regression: 17 | #' reg_nonpara <- RDDreg_np(RDDobject=MC1_rdd) 18 | #' reg_nonpara 19 | #' 20 | #' # Represent the curves: 21 | #' plotCu <- function(version=1, xlim=c(-0.1,0.1)){ 22 | #' res <- gen_MC_IK(sd=0.0000001, n=1000, version=version) 23 | #' res <- res[order(res$x),] 24 | #' ylim <- range(subset(res, x>=min(xlim) & x<=max(xlim), "y")) 25 | #' plot(res, type="l", xlim=xlim, ylim=ylim, main=paste("DGP", version)) 26 | #' abline(v=0) 27 | #' xCut <- res[which(res$x==min(res$x[res$x>=0]))+c(0,-1),] 28 | #' points(xCut, col=2) 29 | #' } 30 | #' layout(matrix(1:4,2, byrow=TRUE)) 31 | #' plotCu(version=1) 32 | #' plotCu(version=2) 33 | #' plotCu(version=3) 34 | #' plotCu(version=4) 35 | #' layout(matrix(1)) 36 | 37 | gen_MC_IK <- function(n=200, version=1, sd=0.1295, output=c("data.frame", "RDDdata"), size){ 38 | 39 | output <- match.arg(output) 40 | if(!version%in% c(1:4) |length(version) !=1) stop("arg 'version' should be between 1 and 4") 41 | 42 | foo <- switch(version, 43 | "1"=gen_MC_IK_1, 44 | "2"=gen_MC_IK_2, 45 | "3"=gen_MC_IK_3, 46 | "4"=gen_MC_IK_4) 47 | if(missing(size)) { 48 | size <- switch(version, 49 | "1"=0.04, 50 | "2"=0, 51 | "3"=0.1, 52 | "4"=0.1) 53 | } 54 | res <- foo(n=n, sd=sd, size=size) 55 | if(output=="RDDdata"){ 56 | res <- RDDdata(x=res$x, y=res$y, cutpoint=0) 57 | } 58 | res 59 | } 60 | 61 | 62 | #################################### 63 | ######### MC 1 64 | #################################### 65 | 66 | gen_MC_IK_1 <- function(n=200, sd=0.1295, size=0.04){ 67 | 68 | ## Regressor: 69 | Z <- rbeta(n, shape1=2, shape2=4, ncp = 0) 70 | X <- 2*Z-1 71 | error <- rnorm(n, sd=sd) 72 | 73 | ## Prepare variables: 74 | Y <- vector("numeric", length=n) 75 | ind_below <- X<0 76 | X_low <- X[ind_below] 77 | X_up <- X[!ind_below] 78 | 79 | ## Compute Y variables: 80 | Y[ind_below] <- 0.48 + 1.27*X_low + 7.18*X_low^2 + 20.21* X_low^3 +21.54*X_low^4 +7.33*X_low^5 + error[ind_below] 81 | Y[!ind_below] <- 0.48+size + 0.84*X_up - 3* X_up^2 + 7.99* X_up^3 - 9.01*X_up^4 +3.56*X_up^5 + error[!ind_below] 82 | 83 | ## Result: 84 | res <- data.frame(x=X, y=Y) 85 | return(res) 86 | } 87 | 88 | #################################### 89 | ######### MC 2 90 | #################################### 91 | 92 | gen_MC_IK_2 <- function(n=200, sd=0.1295, size=0){ 93 | 94 | # if(!missing(size) && !is.null(size)) warning("Argument 'size' ignored for gen_MC_IK_2") 95 | ## Regressor: 96 | Z <- rbeta(n, shape1=2, shape2=4, ncp = 0) 97 | X <- 2*Z-1 98 | error <- rnorm(n, sd=sd) 99 | 100 | ## Compute Y variables: 101 | Y <- ifelse(X<0, 3*X^2, 4*X^2+size) + error 102 | 103 | ## Result: 104 | res <- data.frame(x=X, y=Y) 105 | return(res) 106 | } 107 | 108 | 109 | #################################### 110 | ######### MC 3 111 | #################################### 112 | 113 | gen_MC_IK_3 <- function(n=200, sd=0.1295, size=0.1){ 114 | 115 | ## Regressor: 116 | Z <- rbeta(n, shape1=2, shape2=4, ncp = 0) 117 | X <- 2*Z-1 118 | error <- rnorm(n, sd=sd) 119 | 120 | ## Compute Y variables: 121 | Y <- 0.42 + ifelse(X<0, 0, size) + 0.84*X - 3*X^2 +7.99 * X^3-9.01*X^4+3.56*X^5 + error 122 | 123 | ## Result: 124 | res <- data.frame(x=X, y=Y) 125 | return(res) 126 | } 127 | 128 | #################################### 129 | ######### MC 4 130 | #################################### 131 | 132 | gen_MC_IK_4 <- function(n=200, sd=0.1295, size=0.1){ 133 | 134 | ## Regressor: 135 | Z <- rbeta(n, shape1=2, shape2=4, ncp = 0) 136 | X <- 2*Z-1 137 | error <- rnorm(n, sd=sd) 138 | 139 | ## Compute Y variables: 140 | Y <- 0.42 + ifelse(X<0, 0, size) + 0.84*X +7.99 * X^3-9.01*X^4+3.56*X^5 + error 141 | 142 | ## Result: 143 | res <- data.frame(x=X, y=Y) 144 | return(res) 145 | } 146 | 147 | 148 | #################################### 149 | ######### MC simple 150 | #################################### 151 | 152 | gen_MC_simple <- function(n=200, LATE=0.3){ 153 | 154 | ## Regressor: 155 | x <- rnorm(n) 156 | D <- x>= 0 157 | y <- 0.8 + LATE*D+ 0.3*x+0.1*x*D+rnorm(n) 158 | RDDdata(x=x, y=y, cutpoint=0) 159 | 160 | } -------------------------------------------------------------------------------- /RDDtools/vignettes/RDD_refs.bib: -------------------------------------------------------------------------------- 1 | % This file was created with JabRef 2.7b. 2 | % Encoding: UTF-8 3 | 4 | @TECHREPORT{CalonicoCattaneoEtAl2012, 5 | author = {Sebastian Calonico and Matias D. Cattaneo and Rocio Titiunik}, 6 | title = {Robust Nonparametric Bias-Corrected Inference in the Regression Discontinuity 7 | Design}, 8 | year = {2012}, 9 | owner = {mat}, 10 | timestamp = {2013.04.17} 11 | } 12 | 13 | @ARTICLE{ChengFanEtAl1997, 14 | author = {Cheng, M.-Y. and Fan, J. and Marron, J. S.}, 15 | title = {On Automatic Boundary Corrections}, 16 | journal = {Annals of Statistics}, 17 | year = {1997}, 18 | volume = {25}, 19 | pages = {1691-1708}, 20 | owner = {mat}, 21 | timestamp = {2013.04.17} 22 | } 23 | 24 | @BOOK{FanGijbels1996, 25 | title = {Local Polynomial Modeling and its Implications}, 26 | publisher = {Boca Raton: Chapman and Hall/CRC, Monographs on Statistics and Applied 27 | Probability no. 66}, 28 | year = {1996}, 29 | author = {Fan, J. and Gijbels, I.}, 30 | owner = {mat}, 31 | timestamp = {2013.04.17} 32 | } 33 | 34 | @ARTICLE{FanGijbels1992, 35 | author = {Fan, J. and Gijbels, I.}, 36 | title = {Variable Bandwidth and Local Linear Regression Smoothers}, 37 | journal = {Annals of Statistics}, 38 | year = {1992}, 39 | volume = {20}, 40 | pages = {2008-2036}, 41 | owner = {mat}, 42 | timestamp = {2013.04.17} 43 | } 44 | 45 | @ARTICLE{ImbensKalyanaraman2012, 46 | author = {Guido Imbens And Karthik Kalyanaraman}, 47 | title = {Optimal Bandwidth Choice for the Regression Discontinuity Estimator}, 48 | journal = {Review of Economic Studies}, 49 | year = {2012}, 50 | volume = {79}, 51 | pages = {933-959}, 52 | owner = {mat}, 53 | timestamp = {2013.04.17} 54 | } 55 | 56 | @ARTICLE{ImbensLemieux2008, 57 | author = {Imbens, Guido W. and Lemieux, Thomas}, 58 | title = {Regression discontinuity designs: A guide to practice}, 59 | journal = {Journal of Econometrics}, 60 | year = {2008}, 61 | volume = {142}, 62 | pages = {615-635}, 63 | number = {2}, 64 | month = {February}, 65 | abstract = {In Regression Discontinuity (RD) designs for evaluating causal effects 66 | of interventions, assignment to a treatment is determined at least 67 | partly by the value of an observed covariate lying on either side 68 | of a fixed threshold. These designs were first introduced in the 69 | evaluation literature by Thistlewaite and Campbell (1960). With the 70 | exception of a few unpublished theoretical papers, these methods 71 | did not attract much attention in the economics literature until 72 | recently. Starting in the late 1990s, there has been a large number 73 | of studies in economics applying and extending RD methods. In this 74 | paper we review some of the practical and theoretical issues involved 75 | in the implementation of RD methods.

(This abstract was borrowed 76 | from another version of this item.)}, 77 | owner = {matifou}, 78 | timestamp = {2014.05.21}, 79 | url = {http://ideas.repec.org/a/eee/econom/v142y2008i2p615-635.html} 80 | } 81 | 82 | @ARTICLE{Lee2008, 83 | author = {David S. Lee}, 84 | title = {Randomized experiments from non-random selection in U.S. House elections}, 85 | journal = {Journal of Econometrics}, 86 | year = {2008}, 87 | volume = {142}, 88 | pages = {675-697}, 89 | owner = {mat}, 90 | timestamp = {2013.04.17} 91 | } 92 | 93 | @ARTICLE{LeeLemieux2010, 94 | author = {Lee, David S. and Thomas Lemieux}, 95 | title = {Regression Discontinuity Designs in Economics}, 96 | journal = {Journal of Economic Literature}, 97 | year = {2010}, 98 | volume = {48(2)}, 99 | pages = {281-355}, 100 | owner = {mat}, 101 | timestamp = {2012.11.19} 102 | } 103 | 104 | @ARTICLE{McCrary2008, 105 | author = {McCrary, Justin}, 106 | title = {Manipulation of the Running Variable in the Regression Discontinuity 107 | Design: A Density Test}, 108 | journal = {Journal of Econometrics}, 109 | year = {2008}, 110 | volume = {142}, 111 | pages = {698-714}, 112 | owner = {mat}, 113 | timestamp = {2013.04.17} 114 | } 115 | 116 | @TECHREPORT{Porter2003, 117 | author = {Porter, Jack}, 118 | title = {Estimation in the Regression Discontinuity Model}, 119 | institution = {University of Wisconsin, Madison, Department of Economics}, 120 | year = {2003}, 121 | owner = {mat}, 122 | timestamp = {2013.04.17} 123 | } 124 | 125 | @ARTICLE{RuppertSheatherEtAl1995, 126 | author = {Ruppert, D. and Sheather, S. J. and Wand, M. P.}, 127 | title = {An effective bandwidth selector for local least squares regression}, 128 | journal = {Journal of the American Statistical Association}, 129 | year = {1995}, 130 | volume = {90}, 131 | pages = {1257-1270}, 132 | owner = {mat}, 133 | timestamp = {2013.04.17} 134 | } 135 | 136 | @comment{jabref-meta: selector_publisher:} 137 | 138 | @comment{jabref-meta: selector_author:} 139 | 140 | @comment{jabref-meta: selector_journal:} 141 | 142 | @comment{jabref-meta: selector_keywords:} 143 | 144 | -------------------------------------------------------------------------------- /RDDtools/tests/RDDtools_vs_rdd.Rout.save: -------------------------------------------------------------------------------- 1 | 2 | R version 2.15.2 (2012-10-26) -- "Trick or Treat" 3 | Copyright (C) 2012 The R Foundation for Statistical Computing 4 | ISBN 3-900051-07-0 5 | Platform: x86_64-pc-linux-gnu (64-bit) 6 | 7 | R is free software and comes with ABSOLUTELY NO WARRANTY. 8 | You are welcome to redistribute it under certain conditions. 9 | Type 'license()' or 'licence()' for distribution details. 10 | 11 | Natural language support but running in an English locale 12 | 13 | R is a collaborative project with many contributors. 14 | Type 'contributors()' for more information and 15 | 'citation()' on how to cite R or R packages in publications. 16 | 17 | Type 'demo()' for some demos, 'help()' for on-line help, or 18 | 'help.start()' for an HTML browser interface to help. 19 | Type 'q()' to quit R. 20 | 21 | > 22 | > library(rdd) 23 | Loading required package: sandwich 24 | Loading required package: lmtest 25 | Loading required package: zoo 26 | 27 | Attaching package: 'zoo' 28 | 29 | The following objects are masked from 'package:base': 30 | 31 | as.Date, as.Date.numeric 32 | 33 | Loading required package: AER 34 | Loading required package: car 35 | Loading required package: survival 36 | Loading required package: splines 37 | Loading required package: Formula 38 | > library(RDDtools) 39 | KernSmooth 2.23 loaded 40 | Copyright M. P. Wand 1997-2009 41 | 42 | RDDtools 0.22 43 | PLEASE NOTE THIS is currently only a development version. 44 | Run vignette('RDDtools') for the documentation 45 | > 46 | > set.seed(1234) 47 | > x<-runif(1000,-1,1) 48 | > cov<-rnorm(1000) 49 | > y<-3+2*x+3*cov+10*(x>=0)+rnorm(1000) 50 | > 51 | > RD <- RDDdata(x=x, y=y, cutpoint=0, covar=cov) 52 | > 53 | > ### Simple estimation: 54 | > bw <- IKbandwidth(X=x, Y=y, cutpoint=0) 55 | > bw 56 | [1] 0.6442702 57 | > rdd_mod <- RDestimate(y~x, bw=bw, se.type="const", model=TRUE)$model[[1]] 58 | > RDDtools_mod <- RDDreg_np(RD, bw=bw, inference="lm") 59 | > 60 | > rdd_co <- coef(summary(rdd_mod)) 61 | > RDDtools_co <- RDDcoef(RDDtools_mod, allCo=TRUE, allInfo=TRUE) 62 | > rdd_co 63 | Estimate Std. Error t value Pr(>|t|) 64 | (Intercept) 2.3870103 0.3039002 7.8545857 1.670299e-14 65 | Tr 10.8995093 0.4071983 26.7670789 7.187232e-107 66 | Xl 0.3076565 1.1003584 0.2795966 7.798762e-01 67 | Xr 1.0007232 1.0724028 0.9331599 3.510850e-01 68 | > RDDtools_co 69 | Estimate Std. Error t value Pr(>|t|) 70 | (Intercept) 2.3870103 0.3039002 7.8545857 1.670299e-14 71 | D 10.8995093 0.4071983 26.7670789 7.187232e-107 72 | x 0.3076565 1.1003584 0.2795966 7.798762e-01 73 | x_right 0.6930668 1.5365013 0.4510681 6.520914e-01 74 | > 75 | > all.equal(rdd_co[-4,], RDDtools_co[1:3,], check.attributes=FALSE) 76 | [1] TRUE 77 | > all.equal(rdd_co[4,1], sum(RDDtools_co[3:4,1]), check.attributes=FALSE) 78 | [1] TRUE 79 | > 80 | > 81 | > ### Covariate estimation: 82 | > rdd_mod_cov <- RDestimate(y~x|cov, kernel="rectangular", bw=5, model=TRUE, se.type="const")$model[[1]] 83 | > RDDtools_mod_cov <- RDDreg_lm(RD, bw=5, covariates="cov", covar.opt=list(slope="separate")) 84 | > 85 | > rdd_co_cov <- coef(summary(rdd_mod_cov)) 86 | > RDDtools_co_cov <- RDDcoef(RDDtools_mod_cov, allCo=TRUE, allInfo=TRUE) 87 | > rdd_co_cov 88 | Estimate Std. Error t value Pr(>|t|) 89 | (Intercept) 2.90737195 0.09660411 30.0957385 5.649434e-142 90 | Tr 10.20606095 0.13062887 78.1302094 0.000000e+00 91 | Xl 1.81515024 0.16640546 10.9079970 3.027120e-26 92 | Xr 1.86396889 0.15068992 12.3695656 8.602692e-33 93 | cov 3.04154403 0.05189778 58.6064361 0.000000e+00 94 | Tr:cov -0.03728164 0.06948406 -0.5365496 5.916988e-01 95 | > RDDtools_co_cov 96 | Estimate Std. Error t value Pr(>|t|) 97 | (Intercept) 2.90737195 0.09660411 30.0957385 5.649434e-142 98 | D 10.20606095 0.13062887 78.1302094 0.000000e+00 99 | x 1.81515024 0.16640546 10.9079970 3.027120e-26 100 | x_right 0.04881865 0.22449550 0.2174594 8.278950e-01 101 | cov 3.04154403 0.05189778 58.6064361 0.000000e+00 102 | `cov:D` -0.03728164 0.06948406 -0.5365496 5.916988e-01 103 | > 104 | > all.equal(rdd_co_cov[-4,], RDDtools_co_cov[-4,], check.attributes=FALSE) 105 | [1] TRUE 106 | > 107 | > ## Fuzzy 108 | > set.seed(123) 109 | > selec <- rbinom(nrow(RD), 1, prob=ifelse(RD$x<0, 0.1, 0.9)) 110 | > RD_rdd_ins <- RDDdata(y=RD$y, x=RD$x, z=selec,cutpoint=0) 111 | > 112 | > RDDto_reg_fuz <- RDDreg_lm(RD_rdd_ins, bw=0.2) 113 | > rdd_reg_fuz <- RDestimate(y~x+selec, data=RD_rdd_ins, kernel="rectangular", bw=0.2, model=TRUE, se.type="const")$model[[2]][[1]] 114 | > 115 | > all.equal(RDDcoef(RDDto_reg_fuz),coef(summary(rdd_reg_fuz))[2,1]) 116 | [1] TRUE 117 | > all.equal(RDDcoef(RDDto_reg_fuz, allCo=TRUE)[1:3],coef(summary(rdd_reg_fuz))[1:3,1], check.attributes=FALSE) 118 | [1] TRUE 119 | > 120 | > 121 | > proc.time() 122 | utilisateur système écoulé 123 | 1.248 0.076 1.325 124 | -------------------------------------------------------------------------------- /RDDtools/R/RDDdata_methods.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | ### SUMMARY method 4 | #' @S3method summary RDDdata 5 | summary.RDDdata <- function(object, ...){ 6 | 7 | cutpoint <- getCutpoint(object) 8 | hasCovar_eng <- ifelse(hasCovar(object), "yes", "no") 9 | cat("### RDDdata object ###\n") 10 | cat("\nCutpoint:", cutpoint, "\n") 11 | cat("Sample size:", 12 | "\n\t-Full :", nrow(object), 13 | "\n\t-Left :", sum(object$x=cutpoint)) 15 | cat("\nCovariates:", hasCovar_eng, "\n") 16 | } 17 | 18 | #' Plot RDDdata 19 | #' 20 | #' Binned plot of the forcing and outcome variable 21 | #' 22 | #' @param x Object of class RDDdata 23 | #' @param h The binwidth parameter (note this differs from the bandwidth parameter!) 24 | #' @param nbins Alternative to h, the total number of bins in the plot. 25 | #' @param xlim The range of the x data 26 | #' @param cex Size of the points, see \code{\link{par}} 27 | #' @param nplot Number of plot to draw 28 | #' @param device Type of device used. Currently not used. 29 | #' @param \ldots Further arguments passed to the \code{\link{plot}} function. 30 | #' @return A plot 31 | #' @details Produces a simple binned plot averaging values within each interval. The length of the intervals 32 | #' is specified with the argument \code{h}, specifying the whole binwidth (contrary to the usual bandwidth 33 | #' argument, that gives half of the length of the kernel window. 34 | #' When no bandwidth is given, the bandwidth of Ruppert et al is used, see \code{\link{RDDbw_RSW}}. 35 | #' 36 | #' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> 37 | #' @examples 38 | #' data(Lee2008) 39 | #' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 40 | #' plot(Lee2008_rdd) 41 | #' 42 | #' ## Specify manually the bandwidth: 43 | #' plot(Lee2008_rdd, h=0.2) 44 | #' 45 | #' ## Show three plots with different bandwidth: 46 | #' plot(Lee2008_rdd, h=c(0.2,0.3,0.4), nplot=3) 47 | #' 48 | #' ## Specify instead of the bandwidth, the final number of bins: 49 | #' plot(Lee2008_rdd, nbins=22) 50 | #' 51 | #' ## If the specified number of bins is odd, the larger number is given to side with largest range 52 | #' plot(Lee2008_rdd, nbins=21) 53 | #' @method plot RDDdata 54 | #' @S3method plot RDDdata 55 | 56 | 57 | ### PLOT method 58 | plot.RDDdata <- function(x, h, nbins=NULL, xlim=range(object$x, na.rm=TRUE), cex=0.7, nplot=1, device=c("base", "ggplot"),...){ 59 | 60 | object <- x 61 | cutpoint <- getCutpoint(object) 62 | device <- match.arg(device) 63 | 64 | ## bandwidth: use Ruppert, Sheather and Wand (KernSmooth:::dpill) 65 | if(missing(h) & is.null(nbins)) { 66 | if(!all(xlim==range(object$x, na.rm=TRUE))){ 67 | object <- subset(object, x> min(xlim) & x< max(xlim)) 68 | } 69 | h <- RDDbw_RSW(object) 70 | if(is.even(nplot)) { 71 | se <- seq(from=1-(sum(1:nplot<(nplot/2)))*0.2, to=1+(sum(1:nplot>(nplot/2)))*0.2, by=.2) 72 | } else { 73 | se <- seq(from=1-floor(nplot/2)*0.2, to=1+floor(nplot/2)*0.2, by=.2) 74 | } 75 | hs <- if(nplot==1) h else se *h 76 | } else if(!missing(h) & is.null(nbins)){ 77 | if(length(h)==1){ 78 | if(is.even(nplot)) { 79 | se <- seq(from=1-(sum(1:nplot<(nplot/2)))*0.2, to=1+(sum(1:nplot>(nplot/2)))*0.2, by=.2) 80 | } else { 81 | se <- seq(from=1-floor(nplot/2)*0.2, to=1+floor(nplot/2)*0.2, by=.2) 82 | } 83 | hs <- if(nplot==1) h else se *h 84 | } else { 85 | if(length(h==nplot)){ 86 | hs <- h 87 | } else { 88 | stop("Length of h should be either one or equal to nplot (", nplot, ")") 89 | } 90 | } 91 | } else if(!is.null(nbins)){ 92 | hs <- rep(0.05, nplot) 93 | if(length(nbins)!=nplot){ 94 | stop("Length of nbins should be equal to nplot (", nplot, ")") 95 | } 96 | } 97 | 98 | 99 | 100 | 101 | ## plot 102 | 103 | par_orig <- par() 104 | par(mfrow=c(nplot,1)) 105 | for(i in 1:nplot){ 106 | plotBin(x=object$x, y=object$y, cutpoint=cutpoint, h=hs[i], nbins=nbins[i], xlim=xlim, cex=cex,...) 107 | } 108 | par(mfrow=c(1,1)) 109 | 110 | 111 | 112 | ## invisible return: 113 | invisible(object) 114 | } 115 | 116 | 117 | 118 | #' Convert a rdd object to lm 119 | #' @param x An object to convert to lm 120 | #' @return An object of class \code{lm} 121 | #' @seealso \code{\link{as.npreg}} which converts \code{RDDreg} objects into \code{npreg} from package \code{np}. 122 | #' @examples 123 | #' data(Lee2008) 124 | #' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 125 | #' reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) 126 | #' reg_para_lm <- as.lm(reg_para) 127 | #' reg_para_lm 128 | #' plot(reg_para_lm, which=4) 129 | #' @export 130 | as.lm <- function(x) 131 | UseMethod("as.lm") 132 | 133 | 134 | as.lm_RDD <- function(x){ 135 | 136 | at_x <- attributes(x) 137 | at_x[names(at_x)!="names"] <- NULL 138 | class(x) <- "lm" 139 | 140 | x 141 | } 142 | 143 | #' @S3method as.lm RDDreg_np 144 | as.lm.RDDreg_np <- function(x) as.lm_RDD(x) 145 | 146 | #' @S3method as.lm RDDreg 147 | as.lm.RDDreg <- function(x) as.lm_RDD(x) 148 | 149 | 150 | 151 | 152 | # subset.RDDdata <- function(x,...){ 153 | # 154 | # res <- subset.data.frame(x,...) 155 | # attributes(res) <- attributes(x) 156 | # res 157 | # } 158 | 159 | 160 | ### EXAMPLE 161 | if(FALSE){ 162 | library(RDDtools) 163 | # data(Lee2008) 164 | 165 | 166 | environment(plot.RDDdata) <- environment(RDDdata) 167 | 168 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 169 | plot(Lee2008_rdd) 170 | 171 | plot(Lee2008_rdd, h=0.2) 172 | plot(Lee2008_rdd, h=c(0.2,0.3,0.4), nplot=3) 173 | 174 | plot(Lee2008_rdd, nbins=21) 175 | 176 | } 177 | -------------------------------------------------------------------------------- /RDDtools/R/STAR_MHE-data.R: -------------------------------------------------------------------------------- 1 | #' @name STAR_MHE 2 | #' @title Transformation of the STAR dataset as used in Angrist and Pischke (2008) 3 | #' @description Transformation of the STAR dataset as used in Table 8.2.1 of Angrist and Pischke (2008) 4 | #' @docType data 5 | #' @usage STAR_MHE 6 | #' @seealso \code{\link[AER]{STAR}} for the original dataset. 7 | #' @format A data frame containing 5743 observations and 6 variables. The first variable is from the original dataset, 8 | #' all other are created by Angrist and Pischke STAT code. 9 | #' \describe{ 10 | #' \item{schidkn}{School ID in kindergarden (original variable, schoolidk in \code{\link[AER]{STAR}})} 11 | #' \item{pscore}{The propensity score (computed by A & P)} 12 | #' \item{classid}{The id of the class (computed by A & P)} 13 | #' \item{cs}{Class size (computed by A & P)} 14 | #' \item{female, nwhite}{Various covariates (computed by A & P)} 15 | #' } 16 | #' @details ). This is a transformation of the dataset from the project STAR (Student/Teacher Achievement Ratio. 17 | #' The full dataset is described and available in package AER, \code{\link[AER]{STAR}}. 18 | #' The transformed data was obtained using the STATA script krueger.do, obtained from Joshua Angrist website 19 | #' (\url{http://economics.mit.edu/faculty/angrist/data1/mhe/krueger}), on the webstar.dta. 20 | #' @references Krueger, A. (1999) "Experimental Estimates Of Education Production Functions," 21 | #' \emph{The Quarterly Journal of Economics}, Vol. 114(2), pages 497-532, May. 22 | #' @references Angrist, A. ad Pischke J-S (2008) \emph{Mostly Harmless Econometrics: An Empiricist's Companion}, 23 | #' Princeton University press 24 | #' @source Data obtained using the script krueger.do on data webstar.rda, found on J. Angrist website 25 | #' \url{http://economics.mit.edu/faculty/angrist/data1/mhe/krueger}, retrieved on 26 November 2012. 26 | #' @examples 27 | #' data(STAR_MHE) 28 | #' 29 | #' # Compute the group means: 30 | #' STAR_MHE_means <- aggregate(STAR_MHE[, c("classid", "pscore", "cs")], by=list(STAR_MHE$classid), mean) 31 | #' 32 | #' # Regression of means, with weighted average: 33 | #' reg_krug_gls <- lm(pscore~cs, data=STAR_MHE_means, weights=cs) 34 | #' coef(summary(reg_krug_gls))[2,2] 35 | 36 | NULL 37 | 38 | 39 | ##### Quick R code used on the output data: 40 | # STAR_MHE <- read.csv(".../abuelita.csv") 41 | # STAR_MHE$female <- as.factor(STAR_MHE$female) 42 | # STAR_MHE$nwhite <- as.factor(STAR_MHE$nwhite) 43 | # STAR_MHE$n <- NULL 44 | # 45 | # save(STAR_MHE, file="STAR_MHE.rda") 46 | 47 | 48 | ##### STATA code krueger.do (retrieved 26 November 2012 on http://economics.mit.edu/faculty/angrist/data1/mhe/krueger) 49 | # version 9 50 | # set more 1 51 | # capture log close 52 | # log using krueger, text replace 53 | # 54 | # /* create Krueger scaled scores */ 55 | # 56 | # /* reading score */ 57 | # 58 | # clear 59 | # use webstar 60 | # 61 | # keep if cltypek > 1 /* regular classes */ 62 | # keep if treadssk ~= . 63 | # 64 | # sort treadssk 65 | # gen pread0 = 100*_n/_N 66 | # 67 | # egen pread = mean(pread0), by(treadssk) /* percentile score in reg. classes */ 68 | # 69 | # keep treadssk pread 70 | # sort tread 71 | # keep if tread ~= tread[_n-1] 72 | # save tempr, replace 73 | # 74 | # /* math score */ 75 | # 76 | # use webstar 77 | # 78 | # keep if cltypek > 1 /* regular classes */ 79 | # keep if tmathssk ~= . 80 | # 81 | # sort tmathssk 82 | # gen pmath0 = 100*_n/_N 83 | # egen pmath = mean(pmath0), by(tmathssk) 84 | # 85 | # keep tmathssk pmath 86 | # sort tmath 87 | # keep if tmath ~= tmath[_n-1] 88 | # save tempm, replace 89 | # 90 | # /* merge percentile scores back on */ 91 | # 92 | # use webstar 93 | # 94 | # keep if stark == 1 95 | # 96 | # sort treadssk 97 | # merge treadssk using tempr 98 | # ipolate pread treadssk, gen(pr) epolate 99 | # drop _merge 100 | # 101 | # sort tmathssk 102 | # merge tmathssk using tempm 103 | # ipolate pmath tmathssk, gen(pm) epolate 104 | # replace pm = 0 if pm < 0 105 | # drop _merge 106 | # 107 | # egen pscore = rowmean(pr pm) 108 | # 109 | # /* make class ids */ 110 | # 111 | # egen classid1 = group(schidkn cltypek) 112 | # egen cs1 = count(classid1), by(classid1) 113 | # 114 | # egen classid2 = group(classid1 totexpk hdegk cladk) if cltypek==1 & cs >= 20 115 | # egen classid3 = group(classid1 totexpk hdegk cladk) if cltypek>1 & cs >= 30 116 | # 117 | # gen temp = classid1*100 118 | # egen classid = rowtotal(temp classid2 classid3) 119 | # egen cs = count(classid), by(classid) 120 | # 121 | # gen female = ssex == 2 122 | # gen nwhite = srace >= 2 & srace <= 6 if srace ~= . 123 | # 124 | # keep if cs <= 27 & pscore ~= . 125 | # keep pscore cs schidkn classid female nwhite 126 | # gen n = 1 127 | # 128 | # save temp, replace 129 | # 130 | # reg pscore cs, robust 131 | # local se = _se[cs] 132 | # local t = _b[cs]/`se' 133 | # predict r, res 134 | # loneway r classid 135 | # local rho = r(rho) 136 | # 137 | # collapse cs, by(classid) 138 | # sum cs 139 | # 140 | # dis r(Var) 141 | # local m = 1 + (r(Var)/r(mean) + r(mean) - 1)*`rho' 142 | # dis `m' 143 | # dis sqrt(`m') 144 | # dis `se' 145 | # dis sqrt(`m')*`se' 146 | # dis `t'/sqrt(`m') 147 | # 148 | # 149 | # use temp, clear 150 | # 151 | # reg pscore cs, robust 152 | # moulton pscore cs, cluster(classid) moulton 153 | # moulton pscore cs, cluster(classid) 154 | # reg pscore cs, cluster(classid) 155 | # brl pscore cs, cluster(classid) 156 | # 157 | # 158 | # 159 | # set seed 123456789 160 | # bootstrap "reg pscore cs" _b, reps(1000) cluster(classid) 161 | # 162 | # areg pscore, absorb(classid) 163 | # predict hat 164 | # gen ry = pscore - hat + _b[_cons] 165 | # collapse (mean) ry cs (sum) n, by(classid) 166 | # 167 | # reg ry cs [aw=n] 168 | # 169 | # 170 | # log close 171 | # set more 0 172 | -------------------------------------------------------------------------------- /RDDtools/R/as.npreg.R: -------------------------------------------------------------------------------- 1 | #' Convert an RDDreg object to a \code{npreg} object 2 | #' 3 | #' Convert an RDDobject to a non-parametric regression \code{npreg} from package \code{np} 4 | #' @param x Object of class \code{RDDreg} created by \code{\link{RDDreg_np}} or \code{\link{RDDreg_lm}} 5 | #' @param \ldots Further arguments passed to the \code{\link{npregbw}} or \code{\link{npreg}} 6 | #' @details This function converts an RDDreg object into an \code{npreg} object from package \code{np} 7 | #' Note that the output won't be the same, since \code{npreg} does not offer a triangualr kernel, but a gaussian or Epanechinkov one. 8 | #' Another reason why estimates might differ slightly is that \code{npreg} implements a multivariate kernel, while RDDreg 9 | #' proceeds as if the kernerl was univariate. A simple solution to make the multivariate kernel similar to the univariate one 10 | #' is to set the bandwidth for x and Dx to a large number, so that they converge towards a constant, and one obtains back the univariate kernel. 11 | #' @export 12 | #' @return An object of class \code{npreg} or \code{npregbw} 13 | #' @seealso \code{\link{as.lm}} which converts \code{RDDreg} objects into \code{lm}. 14 | #' @examples 15 | #' # Estimate ususal RDDreg: 16 | #' data(Lee2008) 17 | #' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 18 | #' reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) 19 | #' 20 | #' ## Convert to npreg: 21 | #' reg_nonpara_np <- as.npreg(reg_nonpara) 22 | #' reg_nonpara_np 23 | #' RDDcoef(reg_nonpara_np, allCo=TRUE, allInfo=TRUE) 24 | #' 25 | #' ## Compare with result obtained with a Gaussian kernel: 26 | #' bw_lm <- dnorm(Lee2008_rdd$x, sd=RDDtools:::getBW(reg_nonpara)) 27 | #' reg_nonpara_gaus <- RDDreg_lm(RDDobject=Lee2008_rdd, w=bw_lm) 28 | #' all.equal(RDDcoef(reg_nonpara_gaus),RDDcoef(reg_nonpara_np)) 29 | 30 | 31 | 32 | 33 | 34 | 35 | as.npregbw <- function(x,...){ 36 | res <- as.npregbw_low(x=x, npreg=FALSE,...) 37 | res 38 | } 39 | 40 | #' @rdname as.npregbw 41 | #' @export 42 | as.npreg <- function(x,...){ 43 | res <- as.npregbw_low(x=x, npreg=TRUE,...) 44 | res 45 | } 46 | 47 | 48 | as.npregbw_low <- function(x, npreg=FALSE, adjustIK_bw=TRUE, ...){ 49 | 50 | dat <- getOriginalData(x) 51 | bw <- getBW(x) 52 | cutpoint <- getCutpoint(x) 53 | 54 | ## Specify inputs to npregbw: 55 | 56 | ## data: 57 | x <- dat$x 58 | dat_np <- data.frame(y=dat$y, x=x, D=ifelse(x>=cutpoint,1,0), Dx=ifelse(x>=cutpoint,x,0)) 59 | dataPoints <- data.frame(x=c(cutpoint,cutpoint), D=c(0,1), Dx=c(0,cutpoint)) 60 | 61 | ## bw: 62 | range.x <- range(dat$x, na.rm=TRUE, finite=TRUE) 63 | if(adjustIK_bw ){ ## & names(bw) =="h_opt" 64 | bw <- RDDbw_IK(dat, kernel="Normal") 65 | } 66 | bw_other <- 9999*diff(range.x) 67 | bws <- c(bw, rep(bw_other, 2)) 68 | 69 | 70 | ## start npregbw 71 | res <- npregbw(bws=bws, formula=y~x+D+Dx, data= dat_np, regtype = "ll", 72 | eval=dataPoints, bandwidth.compute=FALSE, gradients=TRUE,...) 73 | class(res) <- c("RDDreg_npregbw", class(res)) 74 | 75 | ## if npreg, return instead model_np <- npreg(bw_np, newdata=dataPoints, gradients=TRUE) 76 | if(npreg) { 77 | options(np.messages = TRUE) ## otherwise got warnings messages... probably because comes only if loaded! 78 | res <- npreg(res, newdata=dataPoints, gradients=TRUE,...) 79 | class(res) <- c("RDDreg_npreg", class(res)) 80 | } 81 | attr(res, "RDDdf") <- dat_np 82 | attr(res, "cutpoint") <- cutpoint 83 | res 84 | } 85 | 86 | 87 | #' @S3method RDDcoef RDDreg_npreg 88 | RDDcoef.RDDreg_npreg <- function(object, allInfo=FALSE, allCo=FALSE, ...){ 89 | 90 | co <- diff(object$mean) 91 | if(allInfo) { 92 | se <- sum(object$merr) 93 | zval <- co/se 94 | pval <- 2 * pnorm(abs(zval), lower.tail = FALSE) 95 | res <- cbind(co, se, zval, pval) 96 | colnames(res) <- c("Estimate", "Std. Error", "z value", "Pr(>|z|)") 97 | rownames(res) <- "D" 98 | } else { 99 | res <- co 100 | } 101 | 102 | if(allCo){ 103 | cos <- c(object$mean[1], object$grad) 104 | ses <- c(object$merr[1], object$gerr) 105 | 106 | ## X_right: 107 | dataPoints_Xr <- data.frame(x=0, D=0, Dx=c(0,1)) 108 | Xr <- diff(predict(object, newdata=dataPoints_Xr)) 109 | 110 | estimates <- c(cos[1], co, cos[2], Xr) 111 | 112 | if(allInfo){ 113 | zvals <- cos/ses 114 | pvals <- 2 * pnorm(abs(zvals), lower.tail = FALSE) 115 | res <- data.frame("Estimate" = estimates, 116 | "Std. Error" = c(ses[1], se, ses[2:3]), 117 | "z value" = c(zvals[1], zval, zvals[2:3]), 118 | "Pr(>|z|)" = c(pvals[1], pval, pvals[2:3]), 119 | check.names=FALSE) 120 | rownames(res) <- c("(Intercept)", "D", "x_left", "x_right") 121 | } else { 122 | res <- estimates 123 | } 124 | } 125 | 126 | res 127 | } 128 | 129 | 130 | if(FALSE){ 131 | library(RDDtools) 132 | data(Lee2008) 133 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 134 | reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) 135 | 136 | # environment(as.npregbw_low) <- environment(RDDdata) 137 | reg_nonpara_npbw <- as.npregbw(reg_nonpara) 138 | reg_nonpara_npbw 139 | class(reg_nonpara_npbw) 140 | RDDcoef(reg_nonpara_npbw) 141 | 142 | reg_nonpara_np <- as.npreg(reg_nonpara, adjustIK_bw=FALSE) 143 | reg_nonpara_np 144 | class(reg_nonpara_np) 145 | RDDcoef(reg_nonpara_np) 146 | RDDcoef(reg_nonpara_np, allInfo=TRUE) 147 | RDDcoef(reg_nonpara_np, allInfo=TRUE, allCo=TRUE) 148 | 149 | ## manual predict: 150 | 151 | cutpoint <- 0 152 | dataPoints <- data.frame(x=c(cutpoint,cutpoint), D=c(0,1), Dx=c(0,cutpoint)) 153 | dataPoints2 <- data.frame(x=0, D=c(0,1), Dx=0) 154 | dataPoints3 <- data.frame(x=c(0,1), D=0, Dx=0) 155 | dataPoints3 <- data.frame(x=0, D=0, Dx=c(0,1)) 156 | 157 | diff(predict(reg_nonpara_np, newdata=dataPoints)) 158 | diff(predict(reg_nonpara_np, newdata=dataPoints2)) 159 | 160 | diff(predict(reg_nonpara_np, newdata=dataPoints3)) 161 | RDDcoef(reg_nonpara_gaus, allCo=TRUE) 162 | 163 | ## compare: 164 | bw_lm <- dnorm(Lee2008_rdd$x, sd=RDDtools:::getBW(reg_nonpara)) 165 | reg_nonpara_gaus <- RDDreg_lm(RDDobject=Lee2008_rdd, w=bw_lm) 166 | all.equal(RDDcoef(reg_nonpara_gaus),RDDcoef(reg_nonpara_np)) 167 | all.equal(RDDcoef(reg_nonpara_gaus, allCo=TRUE),RDDcoef(reg_nonpara_np, allCo=TRUE), check=FALSE) 168 | 169 | 170 | } 171 | -------------------------------------------------------------------------------- /RDDtools/tests/simple_MC.Rout.save: -------------------------------------------------------------------------------- 1 | 2 | R version 2.15.2 (2012-10-26) -- "Trick or Treat" 3 | Copyright (C) 2012 The R Foundation for Statistical Computing 4 | ISBN 3-900051-07-0 5 | Platform: x86_64-pc-linux-gnu (64-bit) 6 | 7 | R is free software and comes with ABSOLUTELY NO WARRANTY. 8 | You are welcome to redistribute it under certain conditions. 9 | Type 'license()' or 'licence()' for distribution details. 10 | 11 | Natural language support but running in an English locale 12 | 13 | R is a collaborative project with many contributors. 14 | Type 'contributors()' for more information and 15 | 'citation()' on how to cite R or R packages in publications. 16 | 17 | Type 'demo()' for some demos, 'help()' for on-line help, or 18 | 'help.start()' for an HTML browser interface to help. 19 | Type 'q()' to quit R. 20 | 21 | > 22 | > library(RDDtools) 23 | Loading required package: AER 24 | Loading required package: car 25 | Loading required package: lmtest 26 | Loading required package: zoo 27 | 28 | Attaching package: 'zoo' 29 | 30 | The following objects are masked from 'package:base': 31 | 32 | as.Date, as.Date.numeric 33 | 34 | Loading required package: sandwich 35 | Loading required package: survival 36 | Loading required package: splines 37 | KernSmooth 2.23 loaded 38 | Copyright M. P. Wand 1997-2009 39 | 40 | RDDtools 0.22 41 | PLEASE NOTE THIS is currently only a development version. 42 | Run vignette('RDDtools') for the documentation 43 | > library(rdd) 44 | Loading required package: Formula 45 | > 46 | > ## simple MC: 47 | > set.seed(123) 48 | > 49 | > MC_simple <- function(n=200, CATE=0.3, HATE=0.1){ 50 | + x <- rnorm(n, mean=20, sd=5) 51 | + D <- x>= 20 52 | + y <- 0.8 + CATE*D+ 0.3*x+HATE*x*D+rnorm(n, sd=0.1) 53 | + cat("effect", CATE+HATE*20, "\n") 54 | + RDDdata(x=x, y=y, cutpoint=20) 55 | + 56 | + } 57 | > 58 | > input_mc <- MC_simple(n=1000, CATE=0.4) 59 | effect 2.4 60 | > plot(input_mc) 61 | > 62 | > RDD_bw <- RDDbw_IK(input_mc) 63 | > 64 | > RDD_np_sep <- RDDreg_np(input_mc, bw=RDD_bw) 65 | > RDD_np_same <- RDDreg_np(input_mc, slope="same", bw=RDD_bw) 66 | > RDD_np_sep_inflm <- RDDreg_np(input_mc, bw=RDD_bw, inf="lm") 67 | > RDD_np_same_inflm <- RDDreg_np(input_mc, slope="same", bw=RDD_bw, inf="lm") 68 | > RDD_lm_sep <- RDDreg_lm(input_mc, bw=RDD_bw) 69 | > RDD_lm_same <- RDDreg_lm(input_mc, slope="same", bw=RDD_bw) 70 | > rdd_RDe <- RDestimate(y~x, data=input_mc, cutpoint=20, model=TRUE, bw=RDD_bw) 71 | > 72 | > 73 | > printCoefmat(coef(summary(RDD_np_sep_inflm$RDDslot$model))) 74 | Estimate Std. Error t value Pr(>|t|) 75 | (Intercept) 6.7943125 0.0074768 908.722 < 2.2e-16 *** 76 | D 2.4175554 0.0106230 227.578 < 2.2e-16 *** 77 | x 0.2984534 0.0022980 129.876 < 2.2e-16 *** 78 | x_right 0.1007346 0.0032831 30.683 < 2.2e-16 *** 79 | --- 80 | Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 81 | > printCoefmat(coef(summary(RDD_np_same_inflm$RDDslot$model))) 82 | Estimate Std. Error t value Pr(>|t|) 83 | (Intercept) 6.9205374 0.0088024 786.21 < 2.2e-16 *** 84 | D 2.4225702 0.0149756 161.77 < 2.2e-16 *** 85 | x 0.3478051 0.0023140 150.31 < 2.2e-16 *** 86 | --- 87 | Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 88 | > printCoefmat(coef(summary(RDD_lm_sep))) 89 | Estimate Std. Error t value Pr(>|t|) 90 | (Intercept) 6.7962504 0.0079252 857.552 < 2.2e-16 *** 91 | D 2.4109453 0.0112070 215.129 < 2.2e-16 *** 92 | x 0.2992111 0.0017938 166.802 < 2.2e-16 *** 93 | x_right 0.1018062 0.0025548 39.849 < 2.2e-16 *** 94 | --- 95 | Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 96 | > printCoefmat(coef(summary(RDD_lm_same))) 97 | Estimate Std. Error t value Pr(>|t|) 98 | (Intercept) 6.9762180 0.0106354 655.95 < 2.2e-16 *** 99 | D 2.4137377 0.0183016 131.89 < 2.2e-16 *** 100 | x 0.3494005 0.0020859 167.51 < 2.2e-16 *** 101 | --- 102 | Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 103 | > printCoefmat(coef(summary(rdd_RDe $model[[1]]))) 104 | Estimate Std. Error t value Pr(>|t|) 105 | (Intercept) 6.7943125 0.0074768 908.72 < 2.2e-16 *** 106 | Tr 2.4175554 0.0106230 227.58 < 2.2e-16 *** 107 | Xl 0.2984534 0.0022980 129.88 < 2.2e-16 *** 108 | Xr 0.3991880 0.0023448 170.24 < 2.2e-16 *** 109 | --- 110 | Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 111 | > 112 | > 113 | > ## few checks: 114 | > plse <- plotSensi(RDD_np_sep, from=5, to=20, by=0.5) 115 | > plotPlacebo(RDD_np_sep) 116 | > 117 | > plotSensi(RDD_np_same, from=5, to=20, by=0.5) 118 | > plotPlacebo(RDD_np_same) 119 | > 120 | > a<-plotSensi(RDD_lm_sep, from=5, to=20, by=0.5) 121 | > plotPlacebo(RDD_lm_sep) 122 | > 123 | > plotSensi(RDD_lm_same, from=5, to=20, by=0.5) 124 | > plotPlacebo(RDD_lm_same) 125 | > 126 | > #### Other MCs: 127 | > set.seed(123) 128 | > head(gen_MC_IK()) 129 | x y 130 | 1 -0.5604223 0.0192401 131 | 2 -0.4325322 0.2071696 132 | 3 0.4824464 0.8091620 133 | 4 -0.3013330 0.4993961 134 | 5 -0.2740911 0.4570206 135 | 6 -0.1112708 0.3558237 136 | > 137 | > set.seed(123) 138 | > head(gen_MC_IK(output="RDDdata")) 139 | x y 140 | 1 -0.5604223 0.0192401 141 | 2 -0.4325322 0.2071696 142 | 3 0.4824464 0.8091620 143 | 4 -0.3013330 0.4993961 144 | 5 -0.2740911 0.4570206 145 | 6 -0.1112708 0.3558237 146 | > 147 | > set.seed(123) 148 | > head(gen_MC_IK(version=2)) 149 | x y 150 | 1 -0.5604223 0.775848845 151 | 2 -0.4325322 0.486922823 152 | 3 0.4824464 1.011047103 153 | 4 -0.3013330 0.416130145 154 | 5 -0.2740911 0.317010484 155 | 6 -0.1112708 -0.009950054 156 | > 157 | > set.seed(123) 158 | > head(gen_MC_IK(version=3)) 159 | x y 160 | 1 -0.5604223 -3.6512588 161 | 2 -0.4325322 -1.5947076 162 | 3 0.4824464 0.8091620 163 | 4 -0.3013330 -0.2635494 164 | 5 -0.2740911 -0.1648652 165 | 6 -0.1112708 0.2298459 166 | > 167 | > set.seed(123) 168 | > head(gen_MC_IK(version=4)) 169 | x y 170 | 1 -0.5604223 -2.709039228 171 | 2 -0.4325322 -1.033455253 172 | 3 0.4824464 1.507425459 173 | 4 -0.3013330 0.008855458 174 | 5 -0.2740911 0.060512581 175 | 6 -0.1112708 0.266989475 176 | > 177 | > proc.time() 178 | utilisateur système écoulé 179 | 1.23 0.07 1.46 180 | -------------------------------------------------------------------------------- /RDDtools/R/deprecated.R: -------------------------------------------------------------------------------- 1 | 2 | plotPlacebo_OLD<- function(RDDregobject, from, to, by=0.1, level=0.95, same_bw=FALSE){ 3 | 4 | object <- RDDregobject 5 | bw <- getBW(object) 6 | cutpoint <- getCutpoint(object) 7 | forc_var <- object$model[,"x^1"] 8 | 9 | ## set grid: 10 | if(missing(from)) from <- median(forc_var[forc_var=cutpoint]) 12 | 13 | seqi <- sort(c(cutpoint,seq(from=from, to=to, by=by))) 14 | n_seqi <- length(seqi) 15 | 16 | ## set matrix for results: 17 | seq_vals <- matrix(NA, nrow=n_seqi, ncol=4, dimnames=list(seqi, c("LATE", "se", "CI_low", "CI_high"))) 18 | 19 | ## get call: 20 | object_call <- attr(object, "RDDcall") 21 | 22 | ## original dataset: 23 | dat_orig <- eval(object_call$RDDobject) 24 | 25 | ## run each time: 26 | for(i in seq_along(seqi)){ 27 | attr(dat_orig, "cutpoint") <- seqi[i] 28 | bw_reg <- if(same_bw) bw else RDDbw_IK(dat_orig) 29 | object_new <- RDDreg_np(dat_orig, bw=bw_reg) 30 | if(!inherits(object_new, "try-error")){ 31 | co <- coef(summary(object_new))[2,, drop=FALSE] 32 | seq_vals[i,"LATE"] <- co[,1] 33 | seq_vals[i,"se"] <- co[,2] 34 | } 35 | } 36 | 37 | ## compute intervals: 38 | probs <- (1 - level)/2 39 | probs <- c(probs, 1 - probs) 40 | quants <- qnorm(probs) 41 | seq_vals[,"CI_low"] <- seq_vals[,"LATE"] +quants[1]*seq_vals[,"se"] 42 | seq_vals[,"CI_high"] <- seq_vals[,"LATE"] +quants[2]*seq_vals[,"se"] 43 | 44 | 45 | ## plot results: 46 | ra <- range(seq_vals[,c("CI_low", "CI_high")], na.rm=TRUE) 47 | plot(seqi, seq_vals[,"LATE"], type="l", ylab="LATE", xlab="Cutpoints", ylim=ra) 48 | title("Placebo test") 49 | 50 | lines(seqi, seq_vals[,"CI_low"], lty=2) 51 | lines(seqi, seq_vals[,"CI_high"], lty=2) # 52 | abline(h=0) 53 | 54 | ## add optim in case: 55 | est <- RDDcoef(object) 56 | points(cutpoint, RDDcoef(RDDregobject), col=2) 57 | segments(cutpoint,ra[1]-1, cutpoint, est, col="red", lty=2) 58 | segments(min(seqi,na.rm=TRUE)-1, est, cutpoint, est, col="red", lty=2) 59 | 60 | ## export (silently) results: 61 | invisible(seq_vals) 62 | } 63 | 64 | 65 | plotPlacebo_OTHER_OLD <- function(RDDregobject, from=0.25, to=0.75, by=0.1, level=0.95, same_bw=FALSE, device=c("ggplot", "base")){ 66 | 67 | device <- match.arg(device) 68 | object <- RDDregobject 69 | bw <- getBW(object) 70 | cutpoint <- getCutpoint(object) 71 | forc_var <- getOriginalX(RDDregobject) 72 | 73 | ## set grid: 74 | quants_left <- quantile(forc_var[forc_var=cutpoint], probs=c(from, to)) 76 | 77 | seqi_left <- seq(from=quants_left[1], to=quants_left[2], by=by) 78 | seqi_right <- seq(from=quants_right[1], to=quants_right[2], by=by) 79 | seqi <- c(seqi_left, seqi_right) 80 | 81 | n_seqi_left <- length(seqi_left) 82 | n_seqi_right <- length(seqi_right) 83 | n_seqi <- length(seqi) 84 | 85 | ## set matrix for results: 86 | seq_vals <- matrix(NA, nrow=n_seqi, ncol=6) 87 | colnames(seq_vals) <- c("cutpoint", "position", "LATE", "se", "CI_low", "CI_high") 88 | seq_vals[, "cutpoint"] <- seqi 89 | 90 | ## get call: 91 | object_call <- attr(object, "RDDcall") 92 | 93 | ## original dataset: 94 | dat_orig <- eval(object_call$RDDobject) 95 | 96 | ## run each time: 97 | for(i in seq_along(seqi)){ 98 | 99 | ## select sample: 100 | if(seqi[i]cutpoint) ## exclude x>cutpoint 104 | } 105 | 106 | ## change the cutpoint 107 | attr(dat_sides, "cutpoint") <- seqi[i] 108 | 109 | ## Re-estimate model and eventually bw 110 | bw_reg <- if(same_bw) bw else RDDbw_IK(dat_sides) 111 | object_new <- RDDreg_np(dat_sides, bw=bw_reg) 112 | 113 | ## assign results (LATE and se) 114 | if(!inherits(object_new, "try-error")){ 115 | co <- coef(summary(object_new))[2,, drop=FALSE] 116 | seq_vals[i,"LATE"] <- co[,1] 117 | seq_vals[i,"se"] <- co[,2] 118 | } 119 | } 120 | 121 | ## compute intervals: 122 | probs <- (1 - level)/2 123 | probs <- c(probs, 1 - probs) 124 | quants <- qnorm(probs) 125 | seq_vals[,"CI_low"] <- seq_vals[,"LATE"] +quants[1]*seq_vals[,"se"] 126 | seq_vals[,"CI_high"] <- seq_vals[,"LATE"] +quants[2]*seq_vals[,"se"] 127 | 128 | 129 | ## plot results: 130 | # prepare df: 131 | seq_vals <- as.data.frame(seq_vals) 132 | seq_vals$position <- ifelse(seq_vals$cutpoint < cutpoint, "left", "right") 133 | 134 | # get estimates at true cutpoint : 135 | est <- RDDcoef(object) 136 | est_conf <- confint(RDDregobject, level=level)["D",] 137 | 138 | if(device=="base"){ 139 | ra <- range(seq_vals[,c("CI_low", "CI_high")], est_conf, na.rm=TRUE) 140 | xlims <- c(quants_left[1], quants_right[2]) 141 | # ylims <- range(seq_vals[, c("LATE", "CI_low", "CI_high")], est_conf) 142 | plot(seqi_left, seq_vals[1:n_seqi_left,"LATE"], type="l", ylab="LATE", xlab="Cutpoints", ylim=ra, xlim=xlims) 143 | title("Placebo test") 144 | abline(h=0) 145 | 146 | # left CI 147 | lines(seqi_left, seq_vals[1:n_seqi_left,"CI_low"], lty=2) 148 | lines(seqi_left, seq_vals[1:n_seqi_left,"CI_high"], lty=2) 149 | 150 | # right values: 151 | lines(seqi_right, seq_vals[(n_seqi_left+1):n_seqi,"LATE"], lty=1) 152 | lines(seqi_right, seq_vals[(n_seqi_left+1):n_seqi,"CI_low"], lty=2) 153 | lines(seqi_right, seq_vals[(n_seqi_left+1):n_seqi,"CI_high"], lty=2) 154 | 155 | # add estimate at true cutoff 156 | points(cutpoint, est, col=2) 157 | segments(cutpoint,ra[1]-1, cutpoint, est, col="red", lty=2) 158 | segments(min(seqi,na.rm=TRUE)-1, est, cutpoint, est, col="red", lty=2) 159 | } else { 160 | 161 | est_df <- data.frame(cutpoint=cutpoint, LATE=est, position="middle", CI_low=est_conf[1], CI_high=est_conf[2]) 162 | 163 | # hack for decent width of error bar: 164 | last_left <- nrow(subset(seq_vals, position=="left")) 165 | W <- diff(seq_vals[c(last_left, last_left+1), "cutpoint"])/5 166 | 167 | pl <- qplot(x=cutpoint, y=LATE, data=seq_vals, geom="line", colour=position)+ 168 | geom_smooth(aes(ymin=CI_low, ymax=CI_high), data=seq_vals, stat="identity")+ 169 | theme(legend.position="none")+geom_hline(yintercept=0)+ 170 | geom_point(aes(x=cutpoint, y=LATE), data=est_df)+ 171 | geom_errorbar(aes(ymin=CI_low, ymax=CI_high), data=est_df, width=W) 172 | print(pl) 173 | } 174 | 175 | ## export (silently) results: 176 | invisible(seq_vals) 177 | } 178 | -------------------------------------------------------------------------------- /RDDtools/R/reg_gen.R: -------------------------------------------------------------------------------- 1 | #' General polynomial estimator of the regression discontinuity 2 | #' 3 | #' Compute RDD estimate allowing a locally kernel weighted version of any estimation function 4 | #' possibly on the range specified by bandwidth 5 | #' @param RDDobject Object of class RDDdata created by \code{\link{RDDdata}} 6 | #' @param covariates Formula to include covariates 7 | #' @param order Order of the polynomial regression. 8 | #' @param bw A bandwidth to specify the subset on which the kernel weighted regression is estimated 9 | #' @param weights Optional weights to pass to the lm function. Note this cannot be entered together with \code{bw} 10 | #' @param slope Whether slopes should be different on left or right (separate), or the same. 11 | #' @param covar.opt Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}). 12 | #' @param fun The function to estimate the parameters 13 | #' @param \ldots Further arguments passed to fun. See the example. 14 | #' @details This function allows the user to use a custom estimating function, instead of the traditional \code{lm()}. 15 | #' It is assumed that the custom funciton has following behaviour: 16 | #' \enumerate{ 17 | #' \item A formula interface, together with a \code{data} argument 18 | #' \item A \code{weight} argument 19 | #' \item A coef(summary(x)) returning a data-frame containing a column Estimate 20 | #' } 21 | #' Note that for the last requirement, this can be accomodated by writing a specific \code{\link{RDDcoef}} 22 | #' function for the class of the object returned by \code{fun}. 23 | #' @return An object of class RDDreg_lm and class lm, with specific print and plot methods 24 | #' @references TODO 25 | #' @include plotBin.R 26 | #' @export RDDgenreg 27 | #' @examples 28 | #' ## Step 0: prepare data 29 | #' data(Lee2008) 30 | #' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 31 | #' 32 | #' ## Estimate a local probit: 33 | #' Lee2008_rdd$y <- with(Lee2008_rdd, ifelse(y= cutpoint -bw & dat$x <= cutpoint +bw, 1, 0) 54 | } else if(!missing(weights)){ 55 | weights <- weights 56 | } else { 57 | weights <- NULL 58 | } 59 | 60 | ## Construct data 61 | if(missing(weights)) weights <- NULL 62 | dat_step1 <- model.matrix(RDDobject, covariates=covariates, order=order, bw=bw, 63 | slope=slope, covar.opt=covar.opt) 64 | 65 | ## Regression 66 | reg <- fun(y~., data=dat_step1, weights=weights,...) 67 | 68 | ##Return 69 | RDDslot <- list() 70 | RDDslot$RDDdata <- RDDobject 71 | reg$RDDslot <- RDDslot 72 | class(reg) <- c("RDDreg_lm", "RDDreg", class(reg)) 73 | attr(reg, "PolyOrder") <- order 74 | attr(reg, "cutpoint") <- cutpoint 75 | attr(reg, "slope") <- slope 76 | attr(reg, "RDDcall") <- match.call() 77 | attr(reg, "bw") <- bw 78 | reg 79 | } 80 | 81 | RDDgenreg_old <- function(RDDobject, covariates=".", bw=RDDbw_IK(RDDobject), slope=c("separate", "same"), fun=glm, ...){ 82 | 83 | slope <- match.arg(slope) 84 | checkIsRDD(RDDobject) 85 | if(!is.function(fun)) stop("Arg 'fun' should be a function") 86 | cutpoint <- getCutpoint(RDDobject) 87 | 88 | ## Construct data 89 | dat <- as.data.frame(RDDobject) 90 | 91 | dat_step1 <- dat[, c("y", "x")] 92 | dat_step1$x <- dat_step1$x -cutpoint 93 | dat_step1$D <- ifelse(dat_step1$x >= 0, 1,0) 94 | if(slope=="separate") { 95 | dat_step1$x_right <- dat_step1$x*dat_step1$D 96 | } 97 | 98 | ### Weights 99 | kernel_w <- Kernel_tri(dat_step1[,"x"], center=0, bw=bw) 100 | 101 | ## Regression 102 | reg <- fun(y~., data=dat_step1, weights=kernel_w,...) 103 | 104 | ##Return 105 | class(reg) <- c("RDDreg_gen", "RDDreg", class(reg)) 106 | attr(reg, "RDDcall") <- match.call() 107 | attr(reg, "cutpoint") <- cutpoint 108 | attr(reg, "bw") <- bw 109 | reg 110 | } 111 | 112 | 113 | if(FALSE){ 114 | 115 | library(RDDtools) 116 | data(Lee2008) 117 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 118 | 119 | reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) 120 | environment(RDDgenreg) <- environment(RDDdata) 121 | reg_glm_norm <- RDDgenreg(RDDobject=Lee2008_rdd) 122 | 123 | reg_nonpara 124 | reg_glm_norm 125 | plot(reg_glm_norm) 126 | 127 | 128 | ### Binary example: 129 | 130 | ## gen from latent model: 131 | gen_MC_binom <- function(n=200, LATE=0.3){ 132 | x <- rnorm(n) 133 | D <- x>= 0 134 | y <- 0.8 + LATE*D+ 0.3*x+0.1*x*D+rnorm(n) 135 | y <- as.integer(ifelse(y> -0.5, 1, 0)) 136 | if(mean(y==1)<0.04) y[sample(c(0,1), prob=c(0.1, 0.9), replace=TRUE, size=n)] <- 1 137 | RDDdata(x=x, y=y, cutpoint=0) 138 | } 139 | 140 | mc <- gen_MC_binom() 141 | environment(RDDgenreg) <- environment(RDDdata) 142 | reg_bin_glm <- RDDgenreg(RDDobject=mc, fun= glm, family=binomial(link="probit")) 143 | 144 | ## quantile: 145 | library(quantreg) 146 | MC1_dat <- gen_MC_IK() 147 | MC1_rdd <- RDDdata(y=MC1_dat$y, x=MC1_dat$x, cutpoint=0) 148 | 149 | RDDcoef.rq <- function(object, allInfo=FALSE, ...){ 150 | res <- coef(summary(object))["D",, drop=FALSE] 151 | if(!allInfo) res <- res[,"coefficients"] 152 | res 153 | } 154 | 155 | reg_bin_rq1 <- RDDgenreg(RDDobject=MC1_rdd, fun=rq, tau=0.5, bw=0.5) 156 | reg_bin_rq1 157 | coef(reg_bin_rq1) 158 | RDDcoef(reg_bin_rq1) 159 | RDDcoef(reg_bin_rq1, allInfo=TRUE) 160 | summary(reg_bin_rq1) 161 | 162 | pl_rq <- plotSensi(reg_bin_rq1, order=1, from=0.1, to=1) 163 | pl_rq 164 | 165 | 166 | 167 | 168 | 169 | ## Monte Carlo 170 | 171 | doEs<- function(n){ 172 | mc <- gen_MC_binom() 173 | reg_bin_np <- RDDreg_np(RDDobject=mc) 174 | environment(RDDgenreg) <- environment(RDDdata) 175 | reg_bin_glm <- RDDgenreg(RDDobject=mc, fun= glm, family=binomial(link="probit")) 176 | reg_bin_glm_log <- RDDgenreg(RDDobject=mc, fun= glm, family=binomial(link="logit")) 177 | 178 | a<- RDDtools:::RDDcoef(reg_bin_glm)/2.5 179 | b<- RDDtools:::RDDcoef(reg_bin_glm_log)/4 180 | d<- RDDtools:::RDDcoef(reg_bin_np) 181 | 182 | res <- c(a, b, d) 183 | names(res) <- c("Probit", "Logit", "LPM") 184 | res 185 | } 186 | 187 | MC_logs <- replicate(500, doEs()) 188 | 189 | MC_logs2 <- t(MC_logs) 190 | colMeans(MC_logs2) 191 | 192 | colMeans(MC_logs2-0.2) 193 | apply(MC_logs2, 2, sd) 194 | 195 | colMeans(MC_logs2-0.2)^2+apply(MC_logs2, 2, var) 196 | colMeans(MC_logs2-0.2)^2+apply(MC_logs2, 2, sd) 197 | 198 | head(MC_logs) 199 | 200 | reg_bin_glm 201 | reg_bin_np 202 | 203 | fav <- mean(dnorm(predict(reg_bin_glm, type = "link"))) 204 | fav * coef(swiss_probit) 205 | 206 | 207 | } 208 | -------------------------------------------------------------------------------- /RDDtools/R/reg_lm.R: -------------------------------------------------------------------------------- 1 | #' Parametric polynomial estimator of the regression discontinuity 2 | #' 3 | #' Compute a parametric polynomial regression of the ATE, 4 | #' possibly on the range specified by bandwidth 5 | #' @param RDDobject Object of class RDDdata created by \code{\link{RDDdata}} 6 | #' @param covariates Formula to include covariates 7 | #' @param order Order of the polynomial regression. 8 | #' @param bw A bandwidth to specify the subset on which the parametric regression is estimated 9 | #' @param covar.strat DEPRECATED, use covar.opt instead. 10 | #' @param covar.opt Options for the inclusion of covariates. Way to include covariates, either in the main regression (\code{include}) or as regressors of y in a first step (\code{residual}). 11 | #' @param weights Optional weights to pass to the lm function. Note this cannot be entered together with \code{bw} 12 | #' @param slope Whether slopes should be different on left or right (separate), or the same. 13 | #' @return An object of class RDDreg_lm and class lm, with specific print and plot methods 14 | #' @details This function estimates the standard \emph{discontinuity regression}: 15 | #' \deqn{Y=\alpha+\tau D+\beta_{1}(X-c)+\beta_{2}D(X-c)+\epsilon} 16 | #' with \eqn{\tau} the main parameter of interest. Several versions of the regression can be estimated, either restricting the slopes to be the same, 17 | #' i.e \eqn{\beta_{1}=\beta_{2}} (argument \code{slope}). The order of the polynomial in \eqn{X-c} can also be adjusted with argument \code{order}. 18 | #' Note that a value of zero can be used, which corresponds to the simple \emph{difference in means}, that one would use if the samples were random. 19 | #' Covariates can also be added in the regression, according to the two strategies discussed in Lee and Lemieux (2010, sec 4.5), through argument \code{covar.strat}: 20 | #' \describe{ 21 | #' \item{include}{Covariates are simply added as supplementary regressors in the RD equation} 22 | #' \item{residual}{The dependent variable is first regressed on the covariates only, then the RDD equation is applied on the residuals from this first step}} 23 | #' The regression can also be estimated in a neighborhood of the cutpoint with the argument \code{bw}. This make the parametric regression resemble 24 | #' the non-parametric local kernel \code{\link{RDDreg_np}}. Similarly, weights can also be provided (but not simultaneously to \code{bw}). 25 | #' 26 | #' The returned object is a classical \code{lm} object, augmented with a \code{RDDslot}, so usual methods can be applied. As is done in general in R, 27 | #' heteroskeadsticity-robust inference can be done later on with the usual function from package \pkg{sandwich}. For the case of clustered observations 28 | #' a specific function \code{\link{clusterInf}} is provided. 29 | #' @references TODO 30 | #' @include plotBin.R 31 | #' @import Formula 32 | #' @importFrom AER ivreg 33 | #' @export 34 | #' @examples 35 | #' ## Step 0: prepare data 36 | #' data(Lee2008) 37 | #' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 38 | #' ## Step 2: regression 39 | #' # Simple polynomial of order 1: 40 | #' reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) 41 | #' print(reg_para) 42 | #' plot(reg_para) 43 | #' 44 | #' # Simple polynomial of order 4: 45 | #' reg_para4 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=4) 46 | #' reg_para4 47 | #' plot(reg_para4) 48 | #' 49 | #' # Restrict sample to bandwidth area: 50 | #' bw_ik <- RDDbw_IK(Lee2008_rdd) 51 | #' reg_para_ik <- RDDreg_lm(RDDobject=Lee2008_rdd, bw=bw_ik, order=4) 52 | #' reg_para_ik 53 | #' plot(reg_para_ik) 54 | 55 | 56 | RDDreg_lm <- function(RDDobject, covariates=NULL, order=1, bw=NULL, slope=c("separate", "same"), covar.opt=list(strategy=c("include", "residual"), slope=c("same", "separate"), bw=NULL), covar.strat=c("include", "residual"), weights){ 57 | 58 | checkIsRDD(RDDobject) 59 | cutpoint <- getCutpoint(RDDobject) 60 | type <- getType(RDDobject) 61 | 62 | slope <- match.arg(slope) 63 | 64 | if(!missing(covar.strat)) warning("covar.strat is (soon) deprecated arg!") 65 | if(!missing(weights)&!is.null(bw)) stop("Cannot give both 'bw' and 'weights'") 66 | 67 | ## Subsetting 68 | dat <- as.data.frame(RDDobject) 69 | 70 | if(!is.null(bw)){ 71 | weights <- ifelse(dat$x >= cutpoint -bw & dat$x <= cutpoint +bw, 1, 0) 72 | } else if(!missing(weights)){ 73 | weights <- weights 74 | } else { 75 | weights <- NULL 76 | } 77 | 78 | ## Construct data 79 | if(missing(weights)) weights <- NULL 80 | dat_step1 <- model.matrix(RDDobject, covariates=covariates, order=order, bw=bw, 81 | slope=slope, covar.opt=covar.opt) 82 | 83 | ## Regression 84 | if(type=="Sharp"){ 85 | reg <- lm(y~., data=dat_step1, weights=weights) 86 | class_reg <- "lm" 87 | } else { 88 | if(!is.null(covariates)) stop("Covariates currently not implemented for Fuzzy case") 89 | reg <- ivreg(y~.-ins|.-D, data=dat_step1, weights=weights) 90 | class_reg <- "ivreg" 91 | } 92 | 93 | 94 | ##Return 95 | RDDslot <- list() 96 | RDDslot$RDDdata <- RDDobject 97 | reg$RDDslot <- RDDslot 98 | class(reg) <- c("RDDreg_lm", "RDDreg", class_reg) 99 | attr(reg, "PolyOrder") <- order 100 | attr(reg, "cutpoint") <- cutpoint 101 | attr(reg, "slope") <- slope 102 | attr(reg, "RDDcall") <- match.call() 103 | attr(reg, "bw") <- bw 104 | reg 105 | } 106 | 107 | 108 | #' @S3method print RDDreg_lm 109 | print.RDDreg_lm <- function(x,...) { 110 | 111 | order <- getOrder(x) 112 | cutpoint <- getCutpoint(x) 113 | slope <- getSlope(x) 114 | bw <- getBW(x) 115 | hasBw <- !is.null(bw) 116 | bw2 <- if(hasBw) bw else Inf 117 | 118 | x_var <- getOriginalX(x) 119 | n_left <- sum(x_var >= cutpoint -bw2 & x_var < cutpoint) 120 | n_right <- sum(x_var >= cutpoint & x_var <= cutpoint+bw2) 121 | 122 | cat("### RDD regression: parametric ###\n") 123 | cat("\tPolynomial order: ", order, "\n") 124 | cat("\tSlopes: ", slope, "\n") 125 | if(hasBw) cat("\tBandwidth: ", bw, "\n") 126 | cat("\tNumber of obs: ", sum(n_left+n_right), " (left: ", n_left, ", right: ", n_right, ")\n", sep="") 127 | 128 | cat("\n\tCoefficient:\n") 129 | 130 | printCoefmat(coef(summary(x))[2,, drop=FALSE]) 131 | 132 | } 133 | 134 | #' @S3method plot RDDreg_lm 135 | plot.RDDreg_lm <- function(x,...) { 136 | 137 | ## data 138 | dat <- getOriginalData(x) 139 | subw <- if(!is.null(x$weights)) x$weights>0 else rep(TRUE, nrow(dat)) 140 | pred <- data.frame(x=dat$x,y=fitted(x))[subw,] 141 | 142 | ##plot 143 | plotBin(dat$x, dat$y, ...) 144 | lines(pred[order(pred$x),]) 145 | } 146 | 147 | 148 | 149 | if(FALSE){ 150 | 151 | library(RDDtools) 152 | data(Lee2008) 153 | 154 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 155 | 156 | 157 | reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) 158 | print(x=reg_para ) 159 | summary(reg_para ) 160 | 161 | reg_para_same <- RDDreg_lm(RDDobject=Lee2008_rdd, slope="same") 162 | print(x=reg_para_same ) 163 | summary(reg_para_same ) 164 | 165 | reg_para2 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=2) 166 | reg_para2 167 | summary(reg_para2) 168 | plot(reg_para2) 169 | 170 | reg_para2_same <- RDDreg_lm(RDDobject=Lee2008_rdd, order=2, slope="same") 171 | reg_para2_same 172 | summary(reg_para2_same) 173 | plot(reg_para2) 174 | 175 | bw_ik <- RDDbw_IK(Lee2008_rdd) 176 | reg_para_ik <- RDDreg_lm(RDDobject=Lee2008_rdd, bw=bw_ik) 177 | print(x=reg_para_ik) 178 | plot(x=reg_para_ik) 179 | 180 | } -------------------------------------------------------------------------------- /RDDtools/R/clusterInf.R: -------------------------------------------------------------------------------- 1 | #' Post-inference for clustered data 2 | #' 3 | #' Correct standard-errors to account for clustered data, doing either a degrees of freedom correction or using a heteroskedasticidty-cluster robust covariance matrix 4 | #' possibly on the range specified by bandwidth 5 | #' @param object Object of class lm, from which RDDreg also inherits. 6 | #' @param clusterVar The variable containing the cluster attributions. 7 | #' @param vcov. Specific covariance function to pass to coeftest. See help of sandwich 8 | #' @param type The type of cluster correction to use: either the degrees of freedom, or a HC matrix. 9 | #' @param \ldots Further arguments passed to coeftest 10 | #' @return The output of the coeftest function, which is itself of class \code{coeftest} 11 | #' @seealso \code{\link{vcovCluster}}, which implements the cluster-robust covariance matrix estimator used by \code{cluserInf} 12 | #' @references Wooldridge (2003) Cluster-sample methods in applied econometrics. 13 | #' \emph{AmericanEconomic Review}, 93, p. 133-138 14 | #' @export 15 | #' @import sandwich 16 | #' @import lmtest 17 | #' @examples 18 | #' data(Lee2008) 19 | #' Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 20 | #' reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) 21 | #' 22 | #' # here we just generate randomly a cluster variable: 23 | #' nlet <- sort(c(outer(letters, letters, paste, sep=""))) 24 | #' clusRandom <- sample(nlet[1:60], size=nrow(Lee2008_rdd), replace=TRUE) 25 | #' 26 | #' # now do post-inference: 27 | #' clusterInf(reg_para, clusterVar=clusRandom) 28 | #' clusterInf(reg_para, clusterVar=clusRandom, type="HC") 29 | 30 | 31 | clusterInf <- function(object, clusterVar, vcov. = NULL, type=c("df-adj", "HC"), ...){ 32 | 33 | if(is.null(clusterVar)) stop("clusterVar seems to be NULL?") 34 | type <- match.arg(type) 35 | 36 | if(type=="df-adj"){ 37 | nClus <- if(is.factor(clusterVar)) nlevels(clusterVar) else length(unique(clusterVar)) 38 | res <- coeftest(object, vcov. = vcov., df = nClus, ...) 39 | } else { 40 | if(!is.null(vcov.)) warning("arg 'vcov.' not used when 'type=HC' (default vcovCluster used)") 41 | res <- coeftest(object, vcov. = function(x) vcovCluster(x, clusterVar=clusterVar), ...) 42 | } 43 | 44 | return(res) 45 | } 46 | 47 | #' @S3method estfun RDDreg_np 48 | estfun.RDDreg_np <- function(x,...){ 49 | inf_met <- infType(x) ## def in Misc.R 50 | if(inf_met=="se") stop("No 'vcovHC', 'vcovCluster', 'estfun' etc can be applied to RDDrg_np with non-parametric inference estimators") 51 | estfun(x$RDDslot$model) 52 | } 53 | 54 | #' @S3method bread RDDreg_np 55 | bread.RDDreg_np <- function(x,...){ 56 | inf_met <- infType(x) ## def in Misc.R 57 | if(inf_met=="se") stop("No 'vcovHC', 'vcovCluster', 'estfun' etc can be applied to RDDrg_np with non-parametric inference estimators") 58 | bread(x$RDDslot$model) 59 | } 60 | 61 | 62 | # sandwich.RDDreg_np <- function (x, bread. = bread, meat. = meat, ...){ 63 | # inf_met <- infType(x) ## def in Misc.R 64 | # if(inf_met=="se") stop("No 'vcovHC', 'vcovCluster', 'estfun' etc can be applied to RDDrg_np with non-parametric inference estimators") 65 | # sandwich(x$RDDslot$model, bread.=bread., meat.=meat., ...) 66 | # } 67 | 68 | #' @S3method model.frame RDDreg_np 69 | model.frame.RDDreg_np <- function (formula, ...) 70 | model.frame(formula$RDDslot$model) 71 | 72 | #' Cluster Heteroskedasticity-consistent estimation of the covariance matrix. 73 | #' 74 | #' Offer a cluster variant of the usual Heteroskedasticity-consistent 75 | #' @param object Object of class lm, from which RDDreg also inherits. 76 | #' @param clusterVar The variable containing the cluster attributions. 77 | #' @return A matrix containing the covariance matrix estimate. 78 | #' @author Mahmood Arai, see \url{http://people.su.se/~ma/econometrics.html} 79 | #' @references Cameron, C., Gelbach, J. and Miller, D. (2011) Robust Inference With Multiway Clustering, 80 | #' \emph{Journal of Business and Economic Statistics}, vol. 29(2), pages 238-249. 81 | #' #' @references Wooldridge (2003) Cluster-sample methods in applied econometrics. 82 | #' \emph{AmericanEconomic Review}, 93, p. 133-138 83 | #' @references Arai, M. (2011) Cluster-robust standard errors using R, Note available \url{http://people.su.se/~ma/clustering.pdf}. 84 | #' @export 85 | #' @seealso \code{\link{clusterInf}} for a direct function, allowing also alternative cluster inference methods. 86 | #' See also \code{\link[rms]{robcov}} from package \code{rms} for another implementation of the cluster robust. 87 | #' @examples 88 | #' data(STAR_MHE) 89 | #' if(all(c(require(sandwich), require(lmtest)))){ 90 | #' 91 | #' # Run simple regression: 92 | #' reg_krug <- lm(pscore~cs, data=STAR_MHE) 93 | #' 94 | #' # Row 1 of Table 8.2.1, inference with standard vcovHC: 95 | #' coeftest(reg_krug,vcov.=vcovHC(reg_krug, "HC1"))[2,2] 96 | #' 97 | #' # Row 4 of Table 8.2.1, inference with cluster vcovHC: 98 | #' coeftest(reg_krug,vcov.=vcovCluster(reg_krug, clusterVar=STAR_MHE$classid))[2,2] 99 | #' } 100 | 101 | vcovCluster <- function(object, clusterVar){ 102 | M <- length(unique(clusterVar)) 103 | N <- length(clusterVar) 104 | K <- getModelRank(object) 105 | dfc <- (M/(M-1))*((N-1)/(N-K)) 106 | uj <- apply(estfun(object),2, function(x) tapply(x, clusterVar, sum)) 107 | dfc*sandwich(object, meat.=crossprod(uj)/N) 108 | } 109 | 110 | #' @rdname vcovCluster 111 | #' @param clusterVar1,clusterVar2 The two cluster variables for the 2-cluster case. 112 | #' @export 113 | vcovCluster2 <- function(object, clusterVar1, clusterVar2){ 114 | # R-codes (www.r-project.org) for computing multi-way 115 | # clustered-standard errors. Mahmood Arai, Jan 26, 2008. 116 | # See: Thompson (2006), Cameron, Gelbach and Miller (2006) 117 | # and Petersen (2006). 118 | # reweighting the var-cov matrix for the within model 119 | 120 | K <- getModelRank(object) 121 | estF <- estfun(object) 122 | 123 | clusterVar12 <- paste(clusterVar1,clusterVar2, sep="") 124 | M1 <- length(unique(clusterVar1)) 125 | M2 <- length(unique(clusterVar2)) 126 | M12 <- length(unique(clusterVar12)) 127 | N <- length(clusterVar1) 128 | 129 | dfc1 <- (M1/(M1-1))*((N-1)/(N-K)) 130 | dfc2 <- (M2/(M2-1))*((N-1)/(N-K)) 131 | dfc12 <- (M12/(M12-1))*((N-1)/(N-K)) 132 | 133 | u1j <- apply(estF, 2, function(x) tapply(x, clusterVar1, sum)) 134 | u2j <- apply(estF, 2, function(x) tapply(x, clusterVar2, sum)) 135 | u12j <- apply(estF, 2, function(x) tapply(x, clusterVar12, sum)) 136 | vc1 <- dfc1*sandwich(object, meat.=crossprod(u1j)/N ) 137 | vc2 <- dfc2*sandwich(object, meat.=crossprod(u2j)/N ) 138 | vc12 <- dfc12*sandwich(object, meat.=crossprod(u12j)/N) 139 | vcovMCL <- vc1 + vc2 - vc12 140 | vcovMCL 141 | } 142 | 143 | getModelRank <- function(object,...) 144 | UseMethod("getModelRank") 145 | 146 | getModelRank.default <- function(object,...) object$rank 147 | 148 | getModelRank.RDDreg_np <- function(object,...) getModelRank.default(object$RDDslot$model) 149 | 150 | if(FALSE){ 151 | 152 | library(RDDtools) 153 | data(Lee2008) 154 | 155 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 156 | 157 | 158 | reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) 159 | print(x=reg_para ) 160 | summary(reg_para ) 161 | 162 | ## cluster inference 163 | set.seed(123) 164 | nlet <- sort(c(outer(letters, letters, paste, sep=""))) 165 | clusRandom <- sample(nlet[1:60], size=nrow(Lee2008_rdd), replace=TRUE) 166 | clusterInf(reg_para, clusterVar=clusRandom) 167 | 168 | clusterInf(reg_para, clusterVar=clusRandom, type="HC") 169 | 170 | ## compare with rdd: 171 | library(rdd) 172 | rddest <- RDestimate(y~x, data=Lee2008, bw=30, kernel="rectangular", model=TRUE) 173 | rddest_2 <- RDestimate2(y~x, data=Lee2008, bw=30, kernel="rectangular", model=TRUE, cluster=clusRandom) 174 | coef(summary(reg_para)) 175 | coef(summary(rddest$model[[2]])) 176 | 177 | all.equal(clusterInf(reg_para, clusterVar=clusRandom, type="HC")["D", "Std. Error"],rddest_2[["se"]][2]) 178 | } -------------------------------------------------------------------------------- /RDDtools/R/RDDdata.R: -------------------------------------------------------------------------------- 1 | #'Construct RDDdata 2 | #' 3 | #' Construct the base RDD object, containing x, y and the cutpoint, eventuallay covariates. 4 | #' 5 | #' @param x Forcing variable 6 | #' @param y Output 7 | #' @param covar Exogeneous variables 8 | #' @param cutpoint Cutpoint 9 | #' @param labels Additional labels to provide as list (with entries \code{x}, \code{y}, and eventually vector \code{covar}). Unused currently. 10 | #' @param data A data-frame for the \code{x} and \code{y} variables. If this is provided, 11 | #' the column names can be entered directly for argument \code{x} and \code{y} 12 | #' @param z Assignment variable for the fuzzy case. 13 | #' @return Object of class \code{RDDdata}, inheriting from \code{data.frame} 14 | #' @export 15 | #' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> 16 | #' @examples 17 | #' data(Lee2008) 18 | #' rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0) 19 | #' rd2 <- RDDdata(x=x, y=y, data=Lee2008, cutpoint=0) 20 | #' 21 | #' # The print() function is the same as the print.data.frame: 22 | #' rd 23 | #' 24 | #' # The summary() and plot() function are specific to RDDdata 25 | #' summary(rd) 26 | #' plot(rd) 27 | 28 | 29 | RDDdata <- function(y, x, covar, cutpoint, z, labels, data){ 30 | 31 | 32 | ## check args 33 | type <- ifelse(missing(z), "Sharp", "Fuzzy") 34 | hasCovar <- !missing(covar) 35 | if(missing(cutpoint)) stop("Please provide cutpoint") 36 | covar_nam <- deparse(substitute(covar)) 37 | 38 | ## Use data in case: 39 | if(!missing(data)){ 40 | pf <- parent.frame() 41 | x <- eval(substitute(x), data, enclos = pf) # copy from with.default 42 | y <- eval(substitute(y), data, enclos = pf) # copy from with.default 43 | if(hasCovar) covar <- eval(substitute(covar), data, enclos = pf) # idem 44 | } 45 | 46 | ### Check y, x univariate 47 | k_y <- NCOL(y) 48 | k_x <- NCOL(x) 49 | 50 | if(any(!c(k_y, k_x)==1)) stop("y or x should be univariate") 51 | 52 | ### Check y, x, z same size 53 | n_y <- NROW(y) 54 | n_x <- NROW(x) 55 | n_covar <- if(hasCovar) NROW(x) else NULL 56 | 57 | if(any(c(n_y, n_x) != n_covar)) stop("y or x should be univariate") 58 | 59 | ### Check cutpoint 60 | range_x <- range(x, na.rm=TRUE) 61 | if(cutpointrange_x[2]) stop("Cutpoint outside range of x") 62 | 63 | ## Check labels 64 | if(!missing(labels)){ 65 | if(!is.list(labels)) stop("labels should be a list.") 66 | if(is.null(names(labels)) || !all(names(labels)%in%c("x", "y", "covar"))) stop("labels should be a list with components x, and/or y, and/or covar") 67 | if(hasCovar){ 68 | if("covar"%in%names(labels) && length(labels$covar)!=NCOL(covar)) stop("There should be ", NCOL(covar), " values (dim of covar) for component 'covar' in labels") 69 | } 70 | } else { 71 | labels <- list() 72 | } 73 | 74 | # if(is.null(labels$x)) labels$x <- deparse(substitute(x)) 75 | # if(is.null(labels$y)) labels$y <- deparse(substitute(y)) 76 | # if(hasCova && is.null(labels$covar)) labels$covar <- if(NCOL(covar)==1) names(deparse(substitute(y)) 77 | 78 | ## Assemble data 79 | RDDdat <- data.frame(x=x, y=y) 80 | if(hasCovar) { 81 | RDDdat <- cbind(RDDdat,covar) 82 | if(NCOL(covar)==1 && is.null(colnames(covar))) colnames(RDDdat)[3] <- covar_nam 83 | } 84 | 85 | if(type=="Fuzzy"){ 86 | RDDdat <- cbind(RDDdat,z) 87 | } 88 | 89 | ## return 90 | class(RDDdat) <- c("RDDdata", "data.frame") 91 | attr(RDDdat, "hasCovar") <- hasCovar 92 | attr(RDDdat, "labels") <- labels 93 | attr(RDDdat, "cutpoint") <- cutpoint 94 | attr(RDDdat, "type") <- type 95 | 96 | RDDdat 97 | } 98 | 99 | 100 | ### Specific subsetting methods 101 | 102 | ##### @S3method as.data.frame RDDdata 103 | # as.data.frame.RDDdata <- function(x) { 104 | # subset(x, y> 105 | # }as.data.frame.default(x) 106 | 107 | #' @S3method "[" RDDdata 108 | '[.RDDdata' <- function(x,i,...){ 109 | attr_x <- attributes(x) 110 | r <- NextMethod("[", object=as.data.frame(x)) 111 | 112 | ## keep attributes only if remains a data frame! 113 | if(inherits(r, "data.frame")){ 114 | attr_x$row.names <- attr(r, "row.names") 115 | attr_x$names <- attr(r, "names") 116 | mostattributes(r) <- attr_x 117 | attributes(r) <- attributes(r)[match(names(attr_x), names(attributes(r)))] 118 | } 119 | # newCla <- class(r) 120 | # if(any(grepl("RDDdata", newCla))) newCla <- newCla[-grepl("RDDdata", newCla)] 121 | # print(names(attributes(newCla))) 122 | # 123 | # if(!inherits(newCla, "data.frame")) attr(r, "class")[which(attr(r, "class")=="data.frame")] <- newCla 124 | r 125 | } 126 | 127 | #' @S3method subset RDDdata 128 | subset.RDDdata <- function (x, subset, select, drop = FALSE, ...) { 129 | attr_x <- attributes(x) 130 | 131 | ### subset code: start 132 | if (missing(subset)) 133 | r <- TRUE 134 | else { 135 | e <- substitute(subset) 136 | r <- eval(e, x, parent.frame()) 137 | if (!is.logical(r)) 138 | stop("'subset' must evaluate to logical") 139 | r <- r & !is.na(r) 140 | } 141 | if (missing(select)) 142 | vars <- TRUE 143 | else { 144 | nl <- as.list(seq_along(x)) 145 | names(nl) <- names(x) 146 | vars <- eval(substitute(select), nl, parent.frame()) 147 | } 148 | res <- x[r, vars, drop = drop] 149 | ### subset code: end 150 | # r <- subset.data.frame(x,...) 151 | # r <- NextMethod("subset") 152 | 153 | ## keep attributes only if remains a data frame! 154 | if(inherits(r, "data.frame")){ 155 | attr_x$row.names <- attr(res, "row.names") 156 | attr_x$names <- attr(res, "names") 157 | mostattributes(res) <- attr_x 158 | attributes(res) <- attributes(res)[match(names(attr_x), names(attributes(res)))] 159 | } 160 | res 161 | } 162 | 163 | #' @S3method as.data.frame RDDdata 164 | as.data.frame.RDDdata <- function(x,...){ 165 | class(x) <- "data.frame" 166 | attr(x, "hasCovar") <- NULL 167 | attr(x, "labels") <- NULL 168 | attr(x, "cutpoint") <- NULL 169 | x 170 | } 171 | 172 | 173 | if(FALSE){ 174 | 175 | library(RDDtools) 176 | data(Lee2008) 177 | 178 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 179 | Lee2008_rdd2 <- RDDdata(y=y, x=x,data=Lee2008, cutpoint=0) 180 | 181 | all.equal(Lee2008_rdd, Lee2008_rdd2) 182 | 183 | ### wrong covariate setting, legitimate warnings: 184 | Lee2008_rdd_lab1 <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0, labels=c("a","bb")) 185 | Lee2008_rdd_lab2 <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0, labels=list("a","bb")) 186 | Lee2008_rdd_lab3 <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0, labels=list(x="a",u="bb")) 187 | 188 | ### Covariate setting: 189 | Z <- data.frame(z_con=runif(nrow(Lee2008)), z_dic=factor(sample(letters[1:3], size=nrow(Lee2008), replace=TRUE))) 190 | 191 | Lee2008_rdd_Z <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0) 192 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0, labels=c("a","bb")) 193 | 194 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0, labels=list(x="aha")) 195 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0, labels=list(x="aha", u="aa")) 196 | 197 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0, labels=list(x="aha", covar="aa")) 198 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0, labels=list(x="aha", z=c("aa", "hj"))) 199 | 200 | ### subsetting 201 | dat <- Lee2008_rdd 202 | dat_sub <- subset(Lee2008_rdd, x<1000) 203 | dat_ind <- Lee2008_rdd[1:nrow(Lee2008_rdd),] 204 | dat_ind_1 <- Lee2008_rdd[,1] 205 | dat_ind_2 <- Lee2008_rdd[1:5,] 206 | 207 | 208 | all.equal(dat, dat_sub) 209 | all.equal(attributes(dat), attributes(dat_sub)) 210 | 211 | all.equal(dat, dat_ind) 212 | all.equal(attributes(dat), attributes(dat_ind)) 213 | 214 | df<- as.data.frame(Lee2008_rdd) 215 | head(df) 216 | 217 | 218 | head(Lee2008_rdd_Z) 219 | colnames(Lee2008_rdd_Z[, -c(1,2)]) 220 | attributes(Lee2008_rdd_Z[, -c(1,2)]) 221 | 222 | colnames(subset(Lee2008_rdd_Z,select= c("z1","z2"))) 223 | 224 | colnames(dat_sub) 225 | colnames(dat_ind) 226 | colnames(dat_ind_1) 227 | colnames(dat_ind_2) 228 | } -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | RDDtools: an R package for Regression Discontinuity Design 2 | ======================================================== 3 | 4 | **RDDtools** is a new R package under development, designed to offer a set of tools to run all the steps required for a Regression Discontinuity Design (RDD) Analysis, from primary data visualisation to discontinuity estimation, sensitivity and placebo testing. 5 | 6 | 7 | Installing **RDDtools** 8 | ----------------------- 9 | 10 | This github website hosts the source code. One of the easiest ways to install the package from github is by using the R package **devtools**: 11 | 12 | ```{r eval=FALSE} 13 | library(devtools) 14 | install_github(repo="RDDtools", username="MatthieuStigler", subdir="RDDtools") 15 | ``` 16 | 17 | Note however the latest version of RDDtools only works with R 3.0, and that you might need to install [Rtools](http://stat.ethz.ch/CRAN/bin/windows/Rtools/) if on Windows. 18 | 19 | 20 | Documentation 21 | ----------------------- 22 | The (preliminary) documentation is available in the help files directly, as well as in the *vignette*. The vignette can be accessed from R with vignette("RDDtools"), or by accessing the [pdf](https://github.com/MatthieuStigler/RDDtools/raw/master/RDDtools/inst/doc/RDDtools.pdf) stored on this github. 23 | 24 | RDDtools: main features 25 | ----------------------- 26 | 27 | 28 | + Simple visualisation of the data using binned-plot: **plot()** 29 | 30 | + Bandwidth selection: 31 | + MSE-RDD bandwidth procedure of [Imbens and Kalyanaraman 2012]: **RDDbw_IK()** 32 | + MSE global bandwidth procedure of [Ruppert et al 1995]: **RDDbw_RSW()** 33 | + Estimation: 34 | + RDD parametric estimation: **RDDreg_lm()** This includes specifying the polynomial order, including covariates with various specifications as advocated in [Imbens and Lemieux 2008]. 35 | + RDD local non-parametric estimation: **RDDreg_np()**. Can also include covariates, and allows different types of inference (fully non-parametric, or parametric approximation). 36 | + RDD generalised estimation: allows to use custom estimating functions to get the RDD coefficient. Could allow for example a probit RDD, or quantile regression. 37 | + Post-Estimation tools: 38 | + Various tools, to obtain predictions at given covariate values ( **RDDpred()** ), or to convert to other classes, to lm ( **as.lm()** ), or to the package *np* ( **as.npreg()** ). 39 | + Function to do inference with clustered data: **clusterInf()** either using a cluster covariance matrix ( **vcovCluster()** ) or by a degrees of freedom correction (as in [Cameron et al. 2008]). 40 | + Regression sensitivity analysis: 41 | + Plot the sensitivity of the coefficient with respect to the bandwith: **plotSensi()** 42 | + *Placebo plot* using different cutpoints: **plotPlacebo()** 43 | + Design sensitivity analysis: 44 | + McCrary test of manipulation of the forcing variable: wrapper **dens_test()** to the function **DCdensity()** from package **rdd**. 45 | + Test of equal means of covariates: **covarTest_mean()** 46 | + Test of equal density of covariates: **covarTest_dens()** 47 | + Datasets 48 | + Contains the seminal dataset of [Lee 2008]: **Lee2008** 49 | + Contains functions to replicate the Monte-Carlo simulations of [Imbens and Kalyanaraman 2012]: **gen_MC_IK()** 50 | 51 | Using RDDtools: a quick example 52 | ----------------------- 53 | **RDDtools** works in an object-oriented way: the user has to define once the characteristic of the data, creating a *RDDdata* object, on which different anaylsis tools can be applied. 54 | 55 | ### Data preparation and visualisation 56 | Load the package, and load the built-in dataset from [Lee 2008]: 57 | 58 | ```{r options, echo=FALSE} 59 | opts_chunk$set(warning= FALSE, message=FALSE, fig.align="center", fig.path='figuresREADME/') 60 | ``` 61 | 62 | 63 | ```{r} 64 | library(RDDtools) 65 | data(Lee2008) 66 | ``` 67 | 68 | Declare the data to be a *RDDdata* object: 69 | 70 | ```{r} 71 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 72 | ``` 73 | 74 | 75 | You can now directly summarise and visualise this data: 76 | 77 | ```{r dataPlot} 78 | summary(Lee2008_rdd) 79 | plot(Lee2008_rdd) 80 | ``` 81 | 82 | ### Estimation 83 | 84 | #### Parametric 85 | 86 | Estimate parametrically, by fitting a 4th order polynomial: 87 | ```{r reg_para} 88 | reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd, order=4) 89 | reg_para 90 | 91 | plot(reg_para) 92 | ``` 93 | 94 | 95 | #### Non-parametric 96 | As well as run a simple local regression, using the [Imbens and Kalyanaraman 2012] bandwidth: 97 | ```{r RegPlot} 98 | bw_ik <- RDDbw_IK(Lee2008_rdd) 99 | reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd, bw=bw_ik) 100 | print(reg_nonpara) 101 | plot(x=reg_nonpara) 102 | 103 | ``` 104 | 105 | ### Regression Sensitivity tests: 106 | 107 | One can easily check the sensitivity of the estimate to different bandwidths: 108 | ```{r SensiPlot} 109 | plotSensi(reg_nonpara, from=0.05, to=1, by=0.1) 110 | ``` 111 | 112 | Or run the Placebo test, estimating the RDD effect based on fake cutpoints: 113 | ```{r placeboPlot} 114 | plotPlacebo(reg_nonpara) 115 | ``` 116 | 117 | ### Design Sensitivity tests: 118 | 119 | Design sensitivity tests check whether the discontinuity found can actually be attributed ot other causes. Two types of tests are available: 120 | 121 | + Discontinuity comes from manipulation: test whether there is possible manipulation around the cutoff, McCrary 2008 test: **dens_test()** 122 | + Discontinuity comes from other variables: should test whether discontinuity arises with covariates. Currently, only simple tests of equality of covariates around the threshold are available: 123 | 124 | #### Discontinuity comes from manipulation: McCrary test 125 | 126 | use simply the function **dens_test()**, on either the raw data, or the regression output: 127 | ```{r DensPlot} 128 | dens_test(reg_nonpara) 129 | ``` 130 | 131 | #### Discontinuity comes from covariates: covariates balance tests 132 | 133 | Two tests available: 134 | + equal means of covariates: **covarTest_mean()** 135 | + equal density of covariates: **covarTest_dens()** 136 | 137 | 138 | We need here to simulate some data, given that the Lee (2008) dataset contains no covariates. 139 | We here simulate three variables, with the second having a different mean on the left and the right. 140 | 141 | ```{r} 142 | set.seed(123) 143 | n_Lee <- nrow(Lee2008) 144 | Z <- data.frame(z1 = rnorm(n_Lee, sd=2), 145 | z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)), 146 | z3 = sample(letters, size = n_Lee, replace = TRUE)) 147 | Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0) 148 | ``` 149 | 150 | 151 | Run the tests: 152 | ```{r} 153 | ## test for equality of means around cutoff: 154 | covarTest_mean(Lee2008_rdd_Z, bw=0.3) 155 | 156 | ## Can also use function covarTest_dis() for Kolmogorov-Smirnov test: 157 | covarTest_dis(Lee2008_rdd_Z, bw=0.3) 158 | ``` 159 | 160 | Tests correctly reject equality of the second, and correctly do not reject equality for the first and third. 161 | 162 | [Imbens and Kalyanaraman 2012]: http://ideas.repec.org/a/oup/restud/v79y2012i3p933-959.html "Imbens, G. & Kalyanaraman, K. (2012) Optimal Bandwidth Choice for the Regression Discontinuity Estimator, Review of Economic Studies, 79, 933-959" 163 | 164 | [Lee 2008]: http://ideas.repec.org/a/eee/econom/v142y2008i2p675-697.html "Lee, D. S. (2008) Randomized experiments from non-random selection in U.S. House elections, Journal of Econometrics, 142, 675-697" 165 | 166 | [Imbens and Lemieux 2008]: http://ideas.repec.org/a/eee/econom/v142y2008i2p615-635.html "Imbens, G. & Lemieux, T. (2008) Regression discontinuity designs: A guide to practice, Journal of Econometrics, Vol. 142(2), pages 615-635" 167 | 168 | [Cameron et al. 2008]: http://ideas.repec.org/a/tpr/restat/v90y2008i3p414-427.html "Cameron, Gelbach and Miller (2008) Bootstrap-Based Improvements for Inference with Clustered Errors, The Review of Economics and Statistics, Vol. 90(3), pages 414-427" 169 | 170 | [Ruppert et al 1995]: http://www.jstor.org/stable/2291516 "Ruppert, D., Sheather, S. J. and Wand, M. P. (1995). An effective bandwidth selector for local least squares regression. Journal of the American Statistical Association, 90, 1257–1270." 171 | 172 | 173 | -------------------------------------------------------------------------------- /RDDtools/tests/packageDemo.R: -------------------------------------------------------------------------------- 1 | library(RDDtools) 2 | 3 | 4 | 5 | 6 | ############################################ 7 | ### STEP 0: Data Manipulation 8 | ############################################ 9 | data(Lee2008) 10 | head(Lee2008) 11 | 12 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 13 | 14 | head(Lee2008_rdd) 15 | 16 | summary(Lee2008_rdd) 17 | 18 | ## With covariates 19 | 20 | n_Lee <- nrow(Lee2008) 21 | 22 | set.seed(123) 23 | Z<- data.frame(z1=rnorm(n_Lee), z2=rnorm(n_Lee, mean=20, sd=2), z3=sample(letters[1:3], size=n_Lee, replace=TRUE)) 24 | Lee2008_rdd_z <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z,cutpoint=0) 25 | 26 | head(Lee2008_rdd_z ) 27 | summary(Lee2008_rdd_z ) 28 | 29 | ### Fuzzy 30 | set.seed(123) 31 | ins <- rbinom(n_Lee, 1, prob=ifelse(Lee2008$x<0, 0.1, 0.9)) 32 | Lee2008_rdd_ins <- RDDdata(y=Lee2008$y, x=Lee2008$x, z=ins,cutpoint=0) 33 | table(Lee2008$x<0, ins==0) 34 | 35 | ############################################ 36 | ### STEP 2: Graphical inspection 37 | ############################################ 38 | 39 | ### Plot 40 | plot(Lee2008_rdd) 41 | plot(Lee2008_rdd, nplot=3, h=c(0.02, 0.03, 0.04)) 42 | plot(Lee2008_rdd, nplot=1, h=0.1) 43 | 44 | plot(Lee2008_rdd, xlim=c(-0.5, 0.5)) 45 | 46 | # plot(Lee2008_rdd, xlim=c(-0.5, 0.5), type="ggplot") 47 | 48 | 49 | ############################################ 50 | ### STEP 2: Regression 51 | ############################################ 52 | 53 | ## few bandwidths: 54 | RDDbw_RSW(Lee2008_rdd) 55 | RDDbw_IK(Lee2008_rdd) 56 | 57 | 58 | ###### Parametric regression ###### 59 | # Simple polynomial of order 1: 60 | reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) 61 | print(reg_para) 62 | summary(reg_para) 63 | plot(reg_para) 64 | 65 | all.equal(unlist(RDDpred(reg_para)), RDDcoef(reg_para, allInfo=TRUE)[1:2], check.attributes=FALSE) 66 | 67 | ## Difference in means regression: 68 | # Simple polynomial of order 0: 69 | reg_para_0 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=0) 70 | print(reg_para_0) 71 | summary(reg_para_0) 72 | plot(reg_para_0) 73 | 74 | 75 | ## Simple polynomial of order 4: 76 | reg_para4 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=4) 77 | reg_para4 78 | plot(reg_para4) 79 | all.equal(unlist(RDDpred(reg_para4)), RDDcoef(reg_para4, allInfo=TRUE)[1:2], check.attributes=FALSE) 80 | 81 | ## Restrict sample to bandwidth area: 82 | bw_ik <- RDDbw_IK(Lee2008_rdd) 83 | reg_para_ik <- RDDreg_lm(RDDobject=Lee2008_rdd, bw=bw_ik, order=4) 84 | reg_para_ik 85 | plot(reg_para_ik) 86 | 87 | all.equal(unlist(RDDpred(reg_para_ik)), RDDcoef(reg_para_ik, allInfo=TRUE)[1:2], check.attributes=FALSE) 88 | 89 | ## Fuzzy reg 90 | reg_para_fuzz <- RDDreg_lm(Lee2008_rdd_ins) 91 | coef(reg_para_fuzz) 92 | summary(reg_para_fuzz) 93 | 94 | ## Covariates: 95 | reg_para4_cov <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates=".") 96 | reg_para4_cov 97 | summary(reg_para4_cov) 98 | 99 | reg_para4_cov_slSep <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates=".", covar.opt=list(slope="separate")) 100 | summary(reg_para4_cov_slSep) 101 | RDDpred(reg_para4_cov_slSep) 102 | RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0, 0.2, 0.2), z2=c(0,20,20), z3b=c(0,1,0), z3c=c(0,0,1))) 103 | 104 | 105 | reg_para4_cov_startR <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates=".", covar.opt=list(strategy="residual")) 106 | reg_para4_cov_startR 107 | summary(reg_para4_cov_startR) 108 | 109 | plot(reg_para4_cov) 110 | 111 | reg_para4_cov_startR_sl2 <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates=".", covar.opt=list(strategy="residual", slope="separate")) 112 | summary(reg_para4_cov_startR_sl2) 113 | 114 | reg_para4_cov_2 <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z3+I(z1^2)") 115 | reg_para4_cov_2 116 | summary(reg_para4_cov_2) 117 | 118 | ###### Non-parametric regression ###### 119 | reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd) 120 | print(reg_nonpara) 121 | summary(reg_nonpara) 122 | plot(x=reg_nonpara) 123 | 124 | reg_nonpara_inflm <- RDDreg_np(RDDobject=Lee2008_rdd, inference="lm") 125 | print(reg_nonpara_inflm) 126 | summary(reg_nonpara_inflm) 127 | plot(x=reg_nonpara_inflm) 128 | 129 | 130 | reg_nonpara_sameSl <- RDDreg_np(RDDobject=Lee2008_rdd, slope="same") 131 | print(reg_nonpara_sameSl) 132 | summary(reg_nonpara_sameSl) 133 | 134 | 135 | ###### PLOT SENSI ###### 136 | plSe_reg_para <- plotSensi(reg_para_ik, order=4:6) 137 | plSe_reg_para_fac <- plotSensi(reg_para_ik, type="facet", order=4:6) 138 | plSe_reg_para 139 | plSe_reg_para_fac 140 | 141 | 142 | plSe_reg_nonpara <- plotSensi(reg_nonpara) 143 | plSe_reg_nonpara 144 | 145 | plSe_reg_nonpara_HC <- plotSensi(reg_nonpara_inflm, vcov. =function(x) vcovCluster(x, clusterVar=model.frame(x)$x)) 146 | plSe_reg_nonpara_HC 147 | 148 | plSe_reg_para_0 <- plotSensi(reg_para_0, plot=FALSE) 149 | plSe_reg_para_0 150 | 151 | plSe_reg_para_0_gg <- plotSensi(reg_para_0, plot=FALSE, output="ggplot") 152 | str(plSe_reg_para_0_gg) 153 | 154 | 155 | ###### Post-inference: ###### 156 | 157 | clusterInf(reg_para, clusterVar=model.frame(reg_para)$x, type="df-adj") 158 | clusterInf(reg_para, clusterVar=model.frame(reg_para)$x, type="HC") 159 | 160 | 161 | ############################################ 162 | ### STEP 3: Validty tests 163 | ############################################ 164 | 165 | ## Placebo test: 166 | placeb_dat_reg_nonpara <- computePlacebo(reg_nonpara) 167 | 168 | plotPlacebo(placeb_dat_reg_nonpara) 169 | plotPlacebo(placeb_dat_reg_nonpara, device="base") 170 | 171 | 172 | plotPlaceboDens(placeb_dat_reg_nonpara) 173 | 174 | ## check invisible return: 175 | ptPl_reg_nonpara <- plotPlacebo(reg_nonpara, plot=FALSE) 176 | ptPl_reg_nonpara 177 | 178 | ptPl_reg_nonpara2 <- plotPlacebo(reg_nonpara, plot=FALSE, output="ggplot") 179 | ptPl_reg_nonpara2 180 | 181 | # with HC: 182 | ptPl_reg_nonpara_HC <- plotPlacebo(reg_nonpara_inflm, vcov. =function(x) vcovCluster(x, clusterVar=model.frame(x)$x)) 183 | ptPl_reg_nonpara_HC 184 | 185 | ptPl_reg_para_0 <- plotPlacebo(reg_para_0) 186 | ptPl_reg_para_0 187 | 188 | 189 | 190 | ## density tests 191 | dens_test(Lee2008_rdd) 192 | dens_test(reg_para_0, plot=FALSE) 193 | dens_test(reg_nonpara, plot=FALSE)$test.output[c("theta", "se", "z", "p", "binsize", "bw", "cutpoint")] 194 | 195 | 196 | ## Covariates tests 197 | covarTest_mean(Lee2008_rdd_z) 198 | covarTest_mean(Lee2008_rdd_z, bw=0.1) 199 | covarTest_dis(Lee2008_rdd_z) 200 | covarTest_dis(Lee2008_rdd_z, bw=0.1) 201 | 202 | covarTest_mean(reg_para4_cov) 203 | covarTest_dis(reg_para4_cov) 204 | #### as npreg 205 | reg_nonpara_np <- as.npreg(reg_nonpara, adjustIK_bw=FALSE) 206 | reg_nonpara_np 207 | RDDcoef(reg_nonpara_np) 208 | RDDcoef(reg_nonpara_np, allCo=TRUE) 209 | RDDcoef(reg_nonpara_np, allInfo=TRUE) 210 | RDDcoef(reg_nonpara_np, allInfo=TRUE, allCo=TRUE) 211 | 212 | ## Compare with result obtained with a Gaussian kernel: 213 | bw_lm <- dnorm(Lee2008_rdd$x, sd=RDDtools:::getBW(reg_nonpara)) 214 | reg_nonpara_gaus <- RDDreg_lm(RDDobject=Lee2008_rdd, w=bw_lm) 215 | all.equal(RDDcoef(reg_nonpara_gaus, allCo=TRUE),RDDcoef(reg_nonpara_np, allCo=TRUE), check.attributes=FALSE) 216 | 217 | 218 | 219 | #### methods 220 | 221 | regs_all <- list(reg_para=reg_para, 222 | reg_para_0=reg_para_0, 223 | reg_para4=reg_para4, 224 | reg_para_ik=reg_para_ik, 225 | reg_para_fuzz=reg_para_fuzz, 226 | reg_para4_cov=reg_para4_cov, 227 | reg_para4_cov_slSep=reg_para4_cov_slSep, 228 | reg_para4_cov_startR=reg_para4_cov_startR, 229 | reg_para4_cov_startR_sl2=reg_para4_cov_startR_sl2, 230 | reg_nonpara=reg_nonpara, 231 | reg_nonpara_inflm=reg_nonpara_inflm, 232 | reg_nonpara_sameSl=reg_nonpara_sameSl) 233 | capply <- function(x){ 234 | n.obs <- sapply(x, length) 235 | seq.max <- seq_len(max(n.obs)) 236 | t(sapply(x, "[", i = seq.max)) 237 | } 238 | 239 | capply(lapply(regs_all, coef)) 240 | sapply(regs_all, RDDcoef) 241 | RDDpred_issue <- c("reg_para_0", "reg_para_fuzz", "reg_nonpara", "reg_nonpara_sameSl") 242 | sapply(regs_all[!names(regs_all)%in%RDDpred_issue], RDDpred) 243 | 244 | sapply(regs_all, RDDtools:::getCutpoint) 245 | lapply(regs_all, plotSensi, plot=FALSE) 246 | 247 | sapply(regs_all, function(x) dens_test(x, plot=FALSE)[c("p.value", "statistic", "estimate")]) 248 | 249 | -------------------------------------------------------------------------------- /RDDtools/R/bw_IK.R: -------------------------------------------------------------------------------- 1 | #' Imbens-Kalyanaraman Optimal Bandwidth Calculation 2 | #' 3 | #' Imbens-Kalyanaraman optimal bandwidth 4 | #' for local linear regression in Regression discontinuity designs. 5 | #' 6 | #' @param RDDobject of class RDDdata created by \code{\link{RDDdata}} 7 | #' @param kernel The type of kernel used: either \code{triangular} or \code{uniform}. 8 | #' @return The optimal bandwidth 9 | #' @references Imbens, Guido and Karthik Kalyanaraman. (2012) "Optimal Bandwidth Choice for the regression discontinuity estimator," 10 | #' Review of Economic Studies (2012) 79, 933-959 11 | #' @seealso \code{\link{RDDbw_RSW}} Global bandwidth selector of Ruppert, Sheather and Wand (1995) 12 | #' @export 13 | #' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> 14 | #' @examples 15 | #' data(Lee2008) 16 | #' rd<- RDDdata(x=Lee2008$x, y=Lee2008$y, cutpoint=0) 17 | #' RDDbw_IK(rd) 18 | 19 | 20 | RDDbw_IK <-function(RDDobject, kernel=c("Triangular", "Uniform", "Normal")) { 21 | 22 | kernel <- match.arg(kernel) 23 | checkIsRDD(RDDobject) 24 | cutpoint <- getCutpoint(RDDobject) 25 | 26 | res <- RDDbw_IK_low(X=RDDobject$x,Y=RDDobject$y,threshold=cutpoint,verbose=FALSE, type="RES", returnBig=FALSE, kernel=kernel) 27 | return(res) 28 | 29 | } 30 | 31 | IK_bias <-function(RDDobject, kernel=c("Triangular", "Uniform", "Normal"), bw) { 32 | 33 | kernel <- match.arg(kernel) 34 | checkIsRDD(RDDobject) 35 | cutpoint <- getCutpoint(RDDobject) 36 | 37 | resB <- RDDbw_IK_low(X=RDDobject$x,Y=RDDobject$y,threshold=cutpoint,verbose=FALSE, type="RES", returnBig=TRUE, kernel=kernel) 38 | 39 | ## compute C1: see IK equ 5, and Fan Jijbels (1996, 3.23) 40 | # is done in R with locpol, computeMu(i=2, equivKernel(TrianK, nu=0, deg=1, lower=0, upper=Inf), lower=0, upper=Inf) 41 | C1 <- switch(kernel, "Triangular"= -0.1, "Uniform"= -0.1666667, "Normal"= -0.7519384) ## from: 42 | 43 | ## Compute bias as in IK equ:5, 44 | # note here 1/4 is outside C1 45 | if(missing(bw)) bw <- resB$h_opt 46 | res<- C1 * 1/2 * bw^2 *(resB$m2_right-resB$m2_left) 47 | return(res) 48 | 49 | } 50 | 51 | IK_var <-function(RDDobject, kernel=c("Triangular", "Uniform", "Normal"), bw) { 52 | 53 | kernel <- match.arg(kernel) 54 | checkIsRDD(RDDobject) 55 | cutpoint <- getCutpoint(RDDobject) 56 | 57 | resB <- RDDbw_IK_low(X=RDDobject$x,Y=RDDobject$y,threshold=cutpoint,verbose=FALSE, type="RES", returnBig=TRUE, kernel=kernel) 58 | 59 | ## compute C2: see IK equ 5, and Fan Jijbels (1996, 3.23) 60 | # is done in R with locpol, computeRK(equivKernel(TrianK, nu=0, deg=1, lower=0, upper=Inf), lower=0, upper=Inf) 61 | C2 <- switch(kernel, "Triangular"= 4.8, "Uniform"= 4, "Normal"=1.785961) ## from: 62 | 63 | ## Compute var as in IK equ:5, 64 | if(missing(bw)) bw <- resB$h_op 65 | elem1 <- (resB$var_inh_left+resB$var_inh_right)/resB$f_cu 66 | elem2 <- C2/(nrow(RDDobject)*bw) 67 | res <- elem1*elem2 68 | res 69 | } 70 | 71 | IK_amse <- function(RDDobject, kernel=c("Triangular", "Uniform", "Normal"), bw) { 72 | 73 | var <- IK_var(RDDobject=RDDobject, kernel=kernel, bw=bw) 74 | bias <- IK_bias(RDDobject=RDDobject, kernel=kernel, bw=bw) 75 | res <- bias^2+var 76 | res 77 | } 78 | 79 | 80 | RDDbw_IK_low <-function (X,Y,threshold=0,verbose=FALSE, type=c("RES", "RES_imp","WP"), returnBig=FALSE, kernel=c("Triangular", "Uniform", "Normal")) { 81 | 82 | type <- match.arg(type) 83 | kernel <- match.arg(kernel) 84 | 85 | 86 | N <- length(X) 87 | N_left <- sum(X=threshold, na.rm=TRUE) 89 | 90 | 91 | ########## 92 | ### STEP 1 93 | ########## 94 | 95 | ## Silverman bandwidth 96 | h1 <- 1.84*sd(X)*N^(-1/5) 97 | if(verbose) cat("\n-h1:", h1) 98 | 99 | ## f(cut) 100 | isIn_h1_left <- X>=(threshold-h1) & X=threshold & X<=(threshold+h1) 102 | 103 | NisIn_h1_left <- sum(isIn_h1_left, na.rm=TRUE) 104 | NisIn_h1_right <- sum(isIn_h1_right, na.rm=TRUE) 105 | if(verbose) cat("\n-N left /right:", NisIn_h1_left, NisIn_h1_right) 106 | 107 | 108 | f_cut <-(NisIn_h1_left+NisIn_h1_right)/(2*N*h1) 109 | if(verbose) cat("\n-f(threshold):", f_cut) 110 | 111 | ## Variances : Equ (13) 112 | 113 | var_inh_left <- var(Y[isIn_h1_left], na.rm=TRUE) 114 | var_inh_right <- var(Y[isIn_h1_right], na.rm=TRUE) 115 | 116 | # problem with working pap0er: Equ 4.9 is different! 117 | if(type=="WP"){ 118 | denom <- 1/(NisIn_h1_left+NisIn_h1_right) 119 | var_inh_global <- denom* ((NisIn_h1_left-1)* var_inh_left + (NisIn_h1_right-1)* var_inh_right) 120 | } 121 | 122 | if(verbose){ 123 | cat("\n-Sigma^2 left:", var_inh_left, "\n-Sigma^2 right:", var_inh_right) 124 | } 125 | ########## 126 | ### STEP 2 127 | ########## 128 | 129 | 130 | ## Global function of order 3: Equ (14) 131 | reg <-lm(Y~I(X>=threshold)+I(X-threshold)+I((X-threshold)^2)+I((X-threshold)^3)) 132 | m3<- 6*coef(reg)[5] 133 | if(verbose) cat("\n-m3:", m3) 134 | 135 | 136 | ## left and right bandwidths: Equ (15) 137 | Ck_h2 <- 3.556702 # 7200^(1/7) 138 | h2_left <- Ck_h2 * ( var_inh_left /(f_cut*m3^2))^(1/7) * N_left^(-1/7) 139 | h2_right <- Ck_h2 * ( var_inh_right /(f_cut*m3^2))^(1/7) * N_right^(-1/7) 140 | 141 | if(verbose) cat("\n-h2 left:", h2_left, "\n-h2 right:", h2_right) 142 | 143 | ## second derivatives right/left 144 | isIn_h2_left <- X>=(threshold-h2_left) & X=threshold & X<=(threshold+h2_right) 146 | 147 | N_h2_left <- sum(isIn_h2_left, na.rm=TRUE) 148 | N_h2_right <- sum(isIn_h2_right, na.rm=TRUE) 149 | 150 | reg2_left <-lm(Y~ I(X-threshold)+I((X-threshold)^2),subset=isIn_h2_left) 151 | reg2_right <-lm(Y~ I(X-threshold)+I((X-threshold)^2),subset=isIn_h2_right) 152 | 153 | m2_left <- as.numeric(2*coef(reg2_left)[3]) 154 | m2_right <- as.numeric(2*coef(reg2_right)[3]) 155 | 156 | if(verbose) cat("\n-m2 left:", m2_left, "\n-m2 right:", m2_right) 157 | 158 | ########## 159 | ### STEP 3 160 | ########## 161 | 162 | ## Regularization: Equ (16) 163 | if(type=="RES"){ 164 | r_left <- (2160*var_inh_left) / (N_h2_left *h2_left^4) 165 | r_right <- (2160*var_inh_right) / (N_h2_right*h2_right^4) 166 | } else { 167 | r_left <- (2160*var_inh_global) / (N_h2_left *h2_left^4) 168 | r_right <- (2160*var_inh_global) / (N_h2_right*h2_right^4) 169 | } 170 | 171 | 172 | if(verbose) cat("\n-Reg left:", r_left, "\n-Reg right:", r_right) 173 | 174 | ## Compute kernel dependent constant: (see file ~/Dropbox/HEI/rdd/Rcode/IK bandwidth/bandwidth_comput.R) 175 | Ck <- switch(kernel, "Triangular"=3.4375, "Uniform"=2.70192, "Normal"=1.25864) # is not 5.4 as in paper since our kernel is on I(|x|<1), not <1/2 176 | 177 | ## Final bandwidth: Equ (17) 178 | h_opt <- Ck * ( (var_inh_left+ var_inh_right) / (f_cut * ((m2_right-m2_left)^2 + r_left +r_right)))^(1/5) * N^(-1/5) 179 | names(h_opt) <- "h_opt" 180 | 181 | if(verbose) cat("\n\n") 182 | 183 | ### 184 | if(returnBig){ 185 | res<- list() 186 | res$h_opt <- as.numeric(h_opt) 187 | res$var_inh_left <- var_inh_left 188 | res$var_inh_right <- var_inh_right 189 | res$m2_right <- m2_right 190 | res$m2_left <- m2_left 191 | res$f_cut <- f_cut 192 | res$h2_left <- h2_left 193 | res$h2_right <- h2_right 194 | } else { 195 | res <- h_opt 196 | } 197 | 198 | return(res) 199 | } 200 | 201 | if(FALSE){ 202 | lee_dat4 <- read.csv("/home/mat/Dropbox/HEI/rdd/Rcode/IK bandwidth/datasets/imbens_from_MATLAB.csv", header=FALSE) 203 | colnames(lee_dat4) <- c("X", "Y") 204 | IKbandwidth3(X=lee_dat4$X, Y=lee_dat4$Y, verbose=TRUE) 205 | IKbandwidth3(X=lee_dat4$X, Y=lee_dat4$Y, verbose=TRUE, type="WP") 206 | IKbandwidth3(X=lee_dat4$X, Y=lee_dat4$Y, verbose=FALSE, returnBig=TRUE) 207 | 208 | 209 | data(Lee2008) 210 | Lee2008_rdd <- RDDdata(x=Lee2008$x,y=Lee2008$y , cutpoint=0) 211 | 212 | ### 213 | bw_IK <- RDDbw_IK(Lee2008_rdd) 214 | bws <- sort(c(bw_IK, seq(0.05, 0.5, by=0.05))) 215 | bi <- Vectorize(IK_bias, vectorize.args="bw")(Lee2008_rdd, bw=bws) 216 | va <- Vectorize(IK_var, vectorize.args="bw")(Lee2008_rdd, bw=bws) 217 | ms <- Vectorize(IK_amse, vectorize.args="bw")(Lee2008_rdd, bw=bws) 218 | 219 | df<- data.frame(bw=rep(bws,3), value=c(ms, va, bi^2), type=rep(c("ms", "va", "bias^2"), each=length(bws))) 220 | 221 | 222 | # qplot(x=bw, y=value, data=df, geom="line", colour=type)+geom_point(data=subset(df, value==min(subset(df, type=="ms", "value")))) 223 | 224 | bws_03 <- sort(c(bw_IK, seq(0.25, 0.35, by=0.005))) 225 | ms_03 <- Vectorize(IK_amse, vectorize.args="bw")(Lee2008_rdd, bw=bws_03) 226 | df2 <- data.frame(bw=bws_03,mse=ms_03) 227 | 228 | subset(df2, mse==min(mse)) ## 1.78, not 1.74 from: 229 | qplot(x=bw, y=mse, data=df2, geom="line") 230 | } 231 | -------------------------------------------------------------------------------- /RDDtools/R/covarTests.R: -------------------------------------------------------------------------------- 1 | #' Testing for balanced covariates: equality of means with t-test 2 | #' 3 | #' Tests equality of means by a t-test for each covariate, between the two full groups or around the discontinuity threshold 4 | #' 5 | #' @param object object of class RDDdata 6 | #' @param bw a bandwidth 7 | #' @param paired Argument of the \code{\link{t.test}} function: logical indicating whether you want paired t-tests. 8 | #' @param var.equal Argument of the \code{\link{t.test}} function: logical variable indicating whether to treat the two variances as being equal 9 | #' @param p.adjust Whether to adjust the p-values for multiple testing. Uses the \code{\link{p.adjust}} function 10 | #' @param \ldots currently not used 11 | #' @return A data frame with, for each covariate, the mean on each size, the difference, t-stat and ts p-value. 12 | #' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> 13 | #' @seealso \code{\link{covarTest_dis}} for the Kolmogorov-Smirnov test of equality of distribution 14 | #' @examples 15 | #' data(Lee2008) 16 | #' 17 | #' ## Add randomly generated covariates 18 | #' set.seed(123) 19 | #' n_Lee <- nrow(Lee2008) 20 | #' Z <- data.frame(z1 = rnorm(n_Lee, sd=2), 21 | #' z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)), 22 | #' z3 = sample(letters, size = n_Lee, replace = TRUE)) 23 | #' Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0) 24 | #' 25 | #' ## test for equality of means around cutoff: 26 | #' covarTest_mean(Lee2008_rdd_Z, bw=0.3) 27 | #' 28 | #' ## Can also use function covarTest_dis() for Kolmogorov-Smirnov test: 29 | #' covarTest_dis(Lee2008_rdd_Z, bw=0.3) 30 | #' 31 | #' ## covarTest_mean works also on regression outputs (bw will be taken from the model) 32 | #' reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd_Z) 33 | #' covarTest_mean(reg_nonpara) 34 | 35 | 36 | 37 | 38 | 39 | #' @export 40 | covarTest_mean <- function(object, bw=NULL, paired = FALSE, var.equal = FALSE, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) 41 | UseMethod("covarTest_mean") 42 | 43 | #' @rdname covarTest_mean 44 | #' @method covarTest_mean RDDdata 45 | #' @S3method covarTest_mean RDDdata 46 | covarTest_mean.RDDdata <- function(object, bw=NULL, paired = FALSE, var.equal = FALSE, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) { 47 | 48 | cutpoint <- getCutpoint(object) 49 | covar <- getCovar(object) 50 | cutvar <- object$x 51 | 52 | covarTest_mean_low(covar=covar,cutvar=cutvar,cutpoint=cutpoint, bw=bw, paired = paired, var.equal = var.equal, p.adjust=p.adjust) 53 | 54 | } 55 | 56 | 57 | #' @rdname covarTest_mean 58 | #' @method covarTest_mean RDDreg 59 | #' @S3method covarTest_mean RDDreg 60 | covarTest_mean.RDDreg <- function(object, bw=NULL, paired = FALSE, var.equal = FALSE, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) { 61 | 62 | cutpoint <- getCutpoint(object) 63 | dat <- object$RDDslot$RDDdata 64 | covar <- getCovar(dat) 65 | cutvar <- dat$x 66 | if(is.null(bw)) bw <- getBW(object) 67 | 68 | covarTest_mean_low(covar=covar,cutvar=cutvar,cutpoint=cutpoint, bw=bw, paired = paired, var.equal = var.equal, p.adjust=p.adjust) 69 | 70 | } 71 | 72 | 73 | covarTest_mean_low <- function(covar,cutvar, cutpoint, bw=NULL, paired = FALSE, var.equal = FALSE, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) { 74 | 75 | p.adjust <- match.arg(p.adjust) 76 | 77 | ## subset 78 | if(!is.null(bw)){ 79 | isInH <- cutvar >= cutpoint -bw & cutvar <= cutpoint +bw 80 | covar <- covar[isInH,] 81 | cutvar <- cutvar[isInH] 82 | } 83 | regime <- cutvar < cutpoint 84 | 85 | ## Split data 86 | covar_num <- sapply(covar, as.numeric) 87 | 88 | tests <-apply(covar_num, 2, function(x) t.test(x[regime], x[!regime], paired=paired, var.equal=var.equal)) 89 | tests_vals <- sapply(tests, function(x) c(x[["estimate"]], diff(x[["estimate"]]),x[c("statistic", "p.value")])) 90 | 91 | ## Adjust p values if required: 92 | if(p.adjust!="none") tests_vals["p.value",] <- p.adjust(tests_vals["p.value",], method=p.adjust) 93 | 94 | ## Print results 95 | res <- t(tests_vals) 96 | colnames(res)[3] <- "Difference" 97 | res 98 | 99 | 100 | } 101 | 102 | 103 | 104 | 105 | #' Testing for balanced covariates: equality of distribution 106 | #' 107 | #' Tests equality of distribution with a Kolmogorov-Smirnov for each covariates, between the two full groups or around the discontinuity threshold 108 | #' 109 | #' @param object object of class RDDdata 110 | #' @param bw a bandwidth 111 | #' @param exact Argument of the \code{\link{ks.test}} function: NULL or a logical indicating whether an exact p-value should be computed. 112 | #' @param p.adjust Whether to adjust the p-values for multiple testing. Uses the \code{\link{p.adjust}} function 113 | #' @param \ldots currently not used 114 | #' @return A data frame with, for each covariate, the K-S statistic and its p-value. 115 | #' @author Matthieu Stigler <\email{Matthieu.Stigler@@gmail.com}> 116 | #' @seealso \code{\link{covarTest_mean}} for the t-test of equality of means 117 | #' @examples 118 | #' data(Lee2008) 119 | #' 120 | #' ## Add randomly generated covariates 121 | #' set.seed(123) 122 | #' n_Lee <- nrow(Lee2008) 123 | #' Z <- data.frame(z1 = rnorm(n_Lee, sd=2), 124 | #' z2 = rnorm(n_Lee, mean = ifelse(Lee2008<0, 5, 8)), 125 | #' z3 = sample(letters, size = n_Lee, replace = TRUE)) 126 | #' Lee2008_rdd_Z <- RDDdata(y = Lee2008$y, x = Lee2008$x, covar = Z, cutpoint = 0) 127 | #' 128 | #' ## Kolmogorov-Smirnov test of equality in distribution: 129 | #' covarTest_dis(Lee2008_rdd_Z, bw=0.3) 130 | #' 131 | #' ## Can also use function covarTest_dis() for a t-test for equality of means around cutoff: 132 | #' covarTest_mean(Lee2008_rdd_Z, bw=0.3) 133 | #' ## covarTest_dis works also on regression outputs (bw will be taken from the model) 134 | #' reg_nonpara <- RDDreg_np(RDDobject=Lee2008_rdd_Z) 135 | #' covarTest_dis(reg_nonpara) 136 | 137 | #' @export 138 | covarTest_dis <- function(object, bw, exact=NULL, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) 139 | UseMethod("covarTest_dis") 140 | 141 | #' @rdname covarTest_dis 142 | #' @method covarTest_dis RDDdata 143 | #' @S3method covarTest_dis RDDdata 144 | covarTest_dis.RDDdata <- function(object, bw=NULL, exact = FALSE, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) { 145 | 146 | cutpoint <- getCutpoint(object) 147 | covar <- getCovar(object) 148 | cutvar <- object$x 149 | 150 | covarTest_dis_low(covar=covar,cutvar=cutvar,cutpoint=cutpoint, bw=bw, exact= exact, p.adjust=p.adjust) 151 | 152 | } 153 | 154 | #' @rdname covarTest_dis 155 | #' @method covarTest_dis RDDreg 156 | #' @S3method covarTest_dis RDDreg 157 | covarTest_dis.RDDreg <- function(object, bw=NULL, exact = FALSE, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) { 158 | 159 | cutpoint <- getCutpoint(object) 160 | dat <- object$RDDslot$RDDdata 161 | covar <- getCovar(dat) 162 | cutvar <- dat$x 163 | if(is.null(bw)) bw <- getBW(object) 164 | 165 | covarTest_dis_low(covar=covar,cutvar=cutvar,cutpoint=cutpoint, bw=bw, exact= exact, p.adjust=p.adjust) 166 | 167 | } 168 | 169 | covarTest_dis_low <- function(covar,cutvar, cutpoint, bw=NULL, exact=NULL, p.adjust=c("none", "holm", "BH", "BY","hochberg", "hommel", "bonferroni")) { 170 | 171 | p.adjust <- match.arg(p.adjust) 172 | 173 | ## subset 174 | if(!is.null(bw)){ 175 | isInH <- cutvar >= cutpoint -bw & cutvar <= cutpoint +bw 176 | covar <- covar[isInH,] 177 | cutvar <- cutvar[isInH] 178 | } 179 | regime <- cutvar < cutpoint 180 | 181 | 182 | 183 | ## Split data 184 | covar_num <- sapply(covar, as.numeric) 185 | 186 | tests <-apply(covar_num, 2, function(x) ks.test(x[regime], x[!regime], exact=exact)) 187 | tests_vals <- sapply(tests, function(x) x[c("statistic", "p.value")]) 188 | 189 | ## Adjust p values if required: 190 | if(p.adjust!="none") tests_vals["p.value",] <- p.adjust(tests_vals["p.value",], method=p.adjust) 191 | 192 | ## Print results 193 | res <- t(tests_vals) 194 | res 195 | 196 | 197 | } 198 | 199 | 200 | ########################################## 201 | ###### TODO 202 | ########################################## 203 | ## -mean: can use t.test for factors? What else? Count test? Warn for character/factors! 204 | ## -mean: add multivariate hotelling 205 | ## -ks: ok for factors? 206 | ## -do qqplot? 207 | ## -add methods for regs? Once converted to other objects... 208 | ## -add example and bettet output documentation 209 | ## 210 | ## 211 | ## 212 | 213 | ########################################## 214 | ###### TESTS 215 | ########################################## 216 | 217 | if(FALSE){ 218 | library(Hotelling) 219 | library(mvtnorm) 220 | 221 | data <- rmvnorm(n=200, mean=c(1,2)) 222 | spli <- sample(c(TRUE, FALSE), size=200, replace=TRUE) 223 | 224 | a<-hotel.stat(data[spli,],data[!spli,]) 225 | a 226 | 227 | b<-hotel.test(data[spli,],data[!spli,]) 228 | b 229 | b$stats 230 | 231 | } 232 | 233 | 234 | 235 | 236 | if(FALSE){ 237 | library(RDDtools) 238 | data(Lee2008) 239 | 240 | Z <- data.frame(z_con=runif(nrow(Lee2008)), z_dic=factor(sample(letters[1:3], size=nrow(Lee2008), replace=TRUE))) 241 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z, cutpoint=0) 242 | 243 | 244 | covarTest_mean(object=Lee2008_rdd) 245 | covarTest_dis(object=Lee2008_rdd) 246 | 247 | 248 | 249 | } 250 | -------------------------------------------------------------------------------- /RDDtools/tests/RDDpred.R: -------------------------------------------------------------------------------- 1 | library(RDDtools) 2 | library(car) 3 | 4 | 5 | #### DATA 6 | data(Lee2008) 7 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 8 | 9 | n_Lee <- nrow(Lee2008) 10 | 11 | set.seed(123) 12 | Z<- data.frame(z1=rnorm(n_Lee), z2=rnorm(n_Lee, mean=20, sd=2), z3=sample(letters[1:3], size=n_Lee, replace=TRUE)) 13 | Lee2008_rdd_z <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z,cutpoint=0) 14 | 15 | #### REGS 16 | bw_IK <- RDDbw_IK(Lee2008_rdd_z) 17 | w_IK <- RDDtools:::Kernel_tri(Lee2008_rdd_z$x, 0, bw_IK) 18 | reg_para4_cov_slSep <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z1", covar.opt=list(slope="separate")) 19 | reg_para4_cov_slSep_W <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z1", covar.opt=list(slope="separate"), weights=w_IK) 20 | reg_np_cov <- RDDreg_np(RDDobject=Lee2008_rdd_z, covariates="z1", bw=bw_IK, inference="lm") 21 | 22 | 23 | 24 | 25 | reg_para4_cov_slSep_2Z <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z1+z2", covar.opt=list(slope="separate")) 26 | 27 | reg_li <- list( reg_para4_cov_slSep=reg_para4_cov_slSep, 28 | reg_para4_cov_slSep_W=reg_para4_cov_slSep_W, 29 | reg_np_cov=reg_np_cov, 30 | reg_para4_cov_slSep_2Z=reg_para4_cov_slSep_2Z) 31 | 32 | checkRDDmean <- function(x, n=5){ 33 | covDF <- model.frame(x) 34 | zDF <- grep("z", colnames(covDF), value=FALSE) 35 | hasD <- zDF[-grep(":", colnames(covDF)[zDF])] 36 | 37 | DF_1 <- covDF[1:n,hasD, drop=FALSE] 38 | DF_2 <- data.frame(t(colMeans(DF_1))) 39 | 40 | pred_1 <- RDDpred(x, covdata=DF_1, stat="mean") 41 | pred_2 <- RDDpred(x, covdata=DF_2) 42 | all.equal(pred_1, pred_2, check.attributes=FALSE) 43 | } 44 | 45 | sapply(reg_li, checkRDDmean) 46 | 47 | sapply(reg_li, function(x) all.equal(unlist(RDDpred(x)),RDDcoef(x, allInfo=TRUE)[1,1:2], check.attributes=FALSE)) 48 | 49 | 50 | # 51 | # reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) 52 | # print(reg_para) 53 | # summary(reg_para) 54 | # plot(reg_para) 55 | # 56 | # formula(reg_para) 57 | # 58 | # update(as.formula("y ~ D + `x^1` + `x^1_right`"), reg_para) 59 | # reg_para_l <- as.lm(reg_para) 60 | # # update(reg_para_l, y ~ D + `x^1` + `x^1_right`) 61 | # 62 | # mf <- model.frame(reg_para) 63 | # 64 | # lm("y ~ D + `x^1` + `x^1_right`", mf) 65 | # a<-lm("y ~ -1 + D +I(1-D) + `x^1` + `x^1_right`", mf) 66 | # diff(coef(a)[2:1]) 67 | # coef(reg_para) 68 | # 69 | # # deltaMethod(a, "I(1-D) - D", parameterNames=paste("a", 1:4, sep="")) 70 | # deltaMethod(a, "a1 - a2", parameterNames=paste("a", 1:4, sep="")) 71 | # coef(summary(reg_para))[2,] 72 | # 73 | # reg_para4_cov_slSep <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=4, covariates="z1", covar.opt=list(slope="separate")) 74 | # 75 | mf_2 <- model.frame(reg_para4_cov_slSep) 76 | # formula(reg_para4_cov_slSep) 77 | # 78 | aa <- lm("y ~ D + `x` + `x^2` + `x^3` + `x^4` + `x_right` + `x^2_right` + `x^3_right` + `x^4_right` + z1 + `z1:D`", data=mf_2) 79 | aaa <- lm("y ~ -1+ D + I(1-D)+`x` + `x^2` + `x^3` + `x^4` + `x_right` + `x^2_right` + `x^3_right` + `x^4_right` + z1 + `z1:D`", data=mf_2) 80 | # 81 | # diff(coef(aaa)[2:1]) 82 | # RDDpred(reg_para4_cov_slSep) 83 | # RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=0)) 84 | # 85 | # RDDcoef(reg_para4_cov_slSep, allInfo=TRUE) 86 | 87 | ## compare RDDpred and Delta at 1: 88 | rdd_p_1 <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=1)) 89 | delta_1 <- deltaMethod(aaa, "a1 - a2 + a12", parameterNames=paste("a", 1:12, sep="")) 90 | rdd_p_1 91 | delta_1 92 | all.equal(unlist(rdd_p_1), drop(as.matrix(delta_1[1:2])), check.attributes=FALSE) 93 | 94 | ## compare RDDpred and Delta at 0: 95 | rdd_p_0 <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=0)) 96 | rdd_c_0 <- RDDcoef(reg_para4_cov_slSep, allInfo=TRUE) 97 | delta_0 <- deltaMethod(aaa, "a1 - a2 ", parameterNames=paste("a", 1:12, sep="")) 98 | rdd_p_0 99 | rdd_c_0 100 | delta_0 101 | all.equal(unlist(rdd_p_0), drop(as.matrix(delta_0[1:2])), check.attributes=FALSE) 102 | all.equal(unlist(rdd_p_0), drop(as.matrix(rdd_c_0[1:2])), check.attributes=FALSE) 103 | 104 | ## compare RDDpred and Delta at 2 points: 105 | rdd_p_01_AGG <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0.5))) 106 | rdd_p_01_all <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0, 1))) 107 | rdd_p_01_S <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0, 1)), stat="sum") 108 | rdd_p_01_M <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0, 1)), stat="mean") 109 | 110 | delta_01_S <- deltaMethod(aaa, "2*(a1 - a2) +1*a12", parameterNames=paste("a", 1:12, sep="")) 111 | delta_01_M <- deltaMethod(aaa, "(2*(a1 - a2) +1*a12)/2", parameterNames=paste("a", 1:12, sep="")) 112 | delta_01_S 113 | delta_01_M 114 | 115 | all(delta_01_S/2==delta_01_M) 116 | 117 | ## compare individuals (stat=ident) 118 | all.equal(rdd_p_01_all$fit, c(delta_0[1,1], delta_1[1,1])) 119 | all.equal(rdd_p_01_all$se.fit, c(delta_0[1,2], delta_1[1,2])) 120 | c(rdd_p_01_M$fit/2, rdd_p_01_AGG$fit) 121 | 122 | ## compare sum (stat=sum) 123 | all.equal(unlist(rdd_p_01_S), drop(as.matrix(delta_01_S[1:2])), check.attributes=FALSE) 124 | 125 | ## compare mean (stat=mean) 126 | all.equal(unlist(rdd_p_01_M), drop(as.matrix(delta_01_M[1:2])), check.attributes=FALSE) 127 | all.equal(rdd_p_01_M$fit, rdd_p_01_S$fit/2) 128 | all.equal(rdd_p_01_M$fit, rdd_p_01_AGG$fit, check.attributes=FALSE) 129 | all.equal(rdd_p_01_M$se.fit, rdd_p_01_AGG$se.fit, check.attributes=FALSE) 130 | 131 | ## compare RDDpred and Delta at 5 first points: 132 | ind_z_pos <- head(which(Lee2008_rdd_z$z1>0),5) 133 | 134 | rdd_p_01_5z_S <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=Lee2008_rdd_z$z1[1:5]), stat="sum") 135 | rdd_p_01_5z_Sb <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=sum(Lee2008_rdd_z$z1[1:5])), stat="sum") 136 | rdd_p_01_5zPos_S <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=Lee2008_rdd_z$z1[ind_z_pos]), stat="sum") 137 | rdd_p_01_5zPos_Sb <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=sum(Lee2008_rdd_z$z1[ind_z_pos])), stat="sum") 138 | rdd_p_01_5z_M <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=Lee2008_rdd_z$z1[1:5]), stat="mean") 139 | rdd_p_01_5z_Mb <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=mean(Lee2008_rdd_z$z1[1:5])), stat="mean") 140 | rdd_p_01_ALLz_M <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=Lee2008_rdd_z$z1), stat="mean") 141 | rdd_p_01_ALLz_Mb <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=mean(Lee2008_rdd_z$z1)), stat="mean") 142 | 143 | del <- function(x, mean=FALSE) { 144 | n <- length(x) 145 | res <- paste(c(paste(n, "*(a1-a2) "), paste(x, "*a12", sep="")), collapse=" +") 146 | su <- sum(x) 147 | sig <- if(sign(su)==1) "+" else NULL 148 | res <- paste(n, "*(a1-a2) ", sig, su, "*a12", sep="") 149 | if(mean) res <- paste("(", res, ")/", n, sep="") 150 | res 151 | } 152 | 153 | del(x=Lee2008_rdd_z$z1[1:5]) 154 | delta_01_5z_S <- deltaMethod(aaa, del(x=Lee2008_rdd_z$z1[1:5]), parameterNames=paste("a", 1:12, sep=""), func="RDD") 155 | delta_01_5z_M <- deltaMethod(aaa, del(x=Lee2008_rdd_z$z1[1:5], mean=TRUE), parameterNames=paste("a", 1:12, sep=""), func="RDD") 156 | 157 | all.equal(unlist(rdd_p_01_5z_S), drop(as.matrix(delta_01_5z_S[1:2])), check.attributes=FALSE) 158 | all.equal(unlist(rdd_p_01_5z_Sb), drop(as.matrix(delta_01_5z_S[1:2])), check.attributes=FALSE) 159 | all.equal(unlist(rdd_p_01_5z_M), drop(as.matrix(delta_01_5z_M[1:2])), check.attributes=FALSE) 160 | all.equal(unlist(rdd_p_01_5z_Mb), drop(as.matrix(delta_01_5z_M[1:2])), check.attributes=FALSE) 161 | 162 | ## All z: 163 | # all.equal(rdd_p_01_ALLz_M, rdd_p_01_ALLz_Mb, check.attributes=FALSE) 164 | 165 | #### Weighted mean!! 166 | w_5 <- c(0.1, 0.2, 0.4, 0.2, 0.1) 167 | w <- c(0.4, 0.6) 168 | rdd_p_01_Sid <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0.2,1)), stat="identity") 169 | wm <- weighted.mean(rdd_p_01_Sid$fit , w=w) 170 | 171 | delta_2z_w <- deltaMethod(aaa, "0.4*(a1 - a2) + 0.4*0.2*a12+0.6*(a1 - a2) + 0.6*a12", parameterNames=paste("a", 1:12, sep="")) 172 | delta_2z_w2 <- deltaMethod(aaa, "1*(a1 - a2) + 0.4*0.2*a12 + 0.6*a12", parameterNames=paste("a", 1:12, sep="")) 173 | delta_2z_w3 <- deltaMethod(aaa, "1*(a1 - a2) + a12*(0.4*0.2 + 0.6)", parameterNames=paste("a", 1:12, sep="")) 174 | all(delta_2z_w==delta_2z_w2) 175 | all.equal(delta_2z_w, delta_2z_w3, check.attributes=FALSE) 176 | all.equal(delta_2z_w[1,1],wm) 177 | 178 | rdd_p_01_W_S <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0.2,1)), stat="sum", weights=w) 179 | rdd_p_01_W_M <- RDDpred(reg_para4_cov_slSep, covdata=data.frame(z1=c(0.2,1)), stat="mean", weights=w) 180 | all.equal(rdd_p_01_W_M$fit,wm) 181 | 182 | all.equal(unlist(rdd_p_01_W_S), drop(as.matrix(delta_2z_w2[1:2])), check.attributes=FALSE) 183 | all.equal(unlist(rdd_p_01_W_M), drop(as.matrix(delta_2z_w2[1:2])), check.attributes=FALSE) 184 | 185 | 186 | ###### 2 Z: 187 | df_2Z_5z <- Lee2008_rdd_z[1:5, c("z1", "z2")] 188 | df_2Z_5z_M <- data.frame(t(colMeans(df_2Z_5z))) 189 | df_2Z_5z_Mw <- data.frame(t(apply(df_2Z_5z, 2, weighted.mean, w=w_5))) 190 | 191 | rdd_p_sZ_5z_S <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z, stat="sum") 192 | rdd_p_sZ_5z_M <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z, stat="mean") 193 | rdd_p_sZ_5z_Mb <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z_M, stat="sum") 194 | 195 | rdd_p_sZ_5z_MW <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z, stat="mean", weights=w_5) 196 | rdd_p_sZ_5z_MWb <- RDDpred(reg_para4_cov_slSep_2Z, covdata=df_2Z_5z_Mw, stat="sum") 197 | 198 | all.equal(rdd_p_sZ_5z_M, rdd_p_sZ_5z_Mb, check.attributes=FALSE) 199 | all.equal(rdd_p_sZ_5z_MW, rdd_p_sZ_5z_MWb, check.attributes=FALSE) 200 | -------------------------------------------------------------------------------- /RDDtools/R/RDDpred.R: -------------------------------------------------------------------------------- 1 | #' RDD coefficient prediction 2 | #' 3 | #' Function to predict the RDD coefficient in presence of covariate (without covariates, returns the same than \code{\link{RDDcoef}}) 4 | #' @param object A RDD regression object 5 | #' @param covdata New data.frame specifying the values of the covariates, can have multiple rows. 6 | #' @param se.fit A switch indicating if standard errors are required. 7 | #' @param vcov. Specific covariance function (see package sandwich ), by default uses the \code{\link{vcov}} 8 | #' @param newdata Another data on which to evaluate the x/D variables. Useful in very few cases. 9 | #' @param stat The statistic to use if there are multiple predictions, 'identity' just returns the single values, 'mean' averages them 10 | #' @param weights Eventual weights for the averaging of the predicted values. 11 | #' @details The function \code{RDDpred} does a simple prediction of the RDD effect 12 | #' \deqn{RDDeffect= \mu(x, z, D=1) - \mu(x, z, D=0)} 13 | #' When there are no covariates (and z is irrelevant in the equation above), this amounts exactly to the usual RDD coefficient, 14 | #' shown in the outputs, or obtained with \code{\link{RDDcoef}}. If there were covariates, and if these covariates were estimated using the 15 | #' \dQuote{include} \emph{strategy} and with different coefficients left and right to the cutoff (i.e. 16 | #' had argument \emph{slope} = \dQuote{separate}), than the RDD effect is also dependent on the value of the covariate(s). 17 | #' \code{RDDpred} allows to set the value of the covariate(s) at which to evaluate the RDD effect, by providing a data.frame with 18 | #' the values for the covariates. Note that the effect can be evaluated at multiple points, if you provide multiple rows of \code{covdata}. 19 | #' 20 | #' In pressence of covariate-specific RDD effect, one may wish to estimate an average effect. This can be done by setting the argument \code{stat="mean"}. 21 | #' Weights can additionally be added, with the argument \code{weights}, to obtain a weighted-average of the predictions. Note however that in most cases, 22 | #' this will be equivalent to provide covariates at their (weighted) mean value, which will be much faster also! 23 | #' 24 | #' Standard errors, obtained setting the argument \code{se.fit=TRUE}, are computed using following formula: 25 | #' \deqn{x_i \Omega x_i^{'}} 26 | #' where \eqn{\Omega} is the estimated variance-covariance matrix ( by default \eqn{\sigma^2(X^{'}X)^{-1}} using \code{\link{vcov}}) and 27 | #' \eqn{x_i} is the input data (a mix of covdata and input data). If one wishes individual predictions, standard errors are simply obtained 28 | #' as the square of that diagonal matrix, whereas for mean/sum, covariances are taken into account. 29 | #' @return Returns the predicted value(s), and, if se.fit=TRUE, their standard errors. 30 | #' @export 31 | #' @references Froehlich (2007) Regression discontinuity design with covariates, IZA discussion paper 3024 32 | #' @examples 33 | #' ## Load data, add (artificial) covariates: 34 | #' data(Lee2008) 35 | #' n_Lee <- nrow(Lee2008) 36 | #' z1 <- runif(n_Lee) 37 | #' Lee2008_rdd <- RDDdata(y=y, x=x, data=Lee2008, covar=z1, cutpoint=0) 38 | #' 39 | #' ## estimation without covariates: RDDpred is the same than RDDcoef: 40 | #' reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) 41 | #' 42 | #' RDDpred(reg_para) 43 | #' RDDcoef(reg_para, allInfo=TRUE) 44 | #' 45 | #' ## estimation with covariates: 46 | #' reg_para_cov <- RDDreg_lm(RDDobject=Lee2008_rdd, covariates="z1", covar.opt=list(slope="separate")) 47 | #' RDDpred(reg_para_cov, covdata=data.frame(z1=0)) ## should obtain same result than with RDestimate 48 | #' RDDpred(reg_para_cov, covdata=data.frame(z1=0.5)) #evaluate at mean of z1 (as comes from uniform) 49 | 50 | RDDpred <- function(object, covdata, se.fit=TRUE, vcov. = NULL, newdata, stat=c("identity", "sum", "mean"), weights){ 51 | 52 | stat <- match.arg(stat) 53 | 54 | if(!missing(weights)) { 55 | if(missing(covdata)) stop("Arg 'weights' only useful with arg 'covdata'") 56 | if(stat=="identity") stop("Argument 'weights' not useful when arg: stat='identity'") 57 | if(stat=="sum") { 58 | warning("Providing weights for a sum makes little sense?!") 59 | } 60 | if(length(weights)!=NROW(covdata)) stop("Weights should be of the same length than covdata") 61 | } 62 | 63 | x_call <- getCall(object) 64 | hasCo <- hasCovar(object) 65 | 66 | if(is.null(x_call$covar.opt)){ 67 | covar.slope <- "same" 68 | covar.strat <- "include" 69 | } else { 70 | covar.slope <- ifelse(is.null(x_call$covar.opt$slope), "same", x_call$covar.opt$slope) 71 | covar.strat <- ifelse(is.null(x_call$covar.opt$strategy), "include", x_call$covar.opt$strategy) 72 | } 73 | 74 | 75 | ## get original data structure: 76 | mf <- model.frame(object)[1:2,-1] 77 | if(any(grepl("\\(weights\\)", colnames(mf)))) mf <- mf[,-grep("\\(weights\\)", colnames(mf))] 78 | 79 | ## Fill orig struc with 0/1 80 | if(missing(newdata)){ 81 | which.D <- grep("^D$", colnames(mf)) 82 | mf[,which.D] <- c(0,1) ## set coeff of interest 83 | mf[,-which.D] <- 0 ## remove others (not absolutely necessary actually) 84 | newdata <- mf 85 | } 86 | 87 | ## Merge covdata with newdata: 88 | 89 | if(!missing(covdata)){ 90 | if(covar.strat=="residual") stop("Do not provide 'covdata' if covariates were use with 'residual' strategy") 91 | if(covar.slope=="separate"){ 92 | Nrow_cov <- nrow(covdata) 93 | if(Nrow_cov>1) newdata <- newdata[c(1, rep(2,Nrow_cov)),] 94 | if(!is.null(rownames(covdata))) { 95 | if("1" %in% rownames(covdata)) rownames(newdata)[1] <- "0" 96 | rownames(newdata)[-1] <- rownames(covdata) 97 | } else { 98 | rownames(newdata) <- c(0, seq_len(Nrow_cov)) 99 | } 100 | colnames_cov <- colnames(covdata) 101 | ind <- seq(from=2, by=2, length.out=Nrow_cov) 102 | if(!all(colnames_cov%in% colnames(newdata))) stop("Arg 'covdata' contains colnames not in the data") 103 | newdata[2:nrow(newdata), paste(colnames(covdata), "D", sep=":")] <- covdata 104 | } 105 | } 106 | 107 | multiN <- nrow(newdata)>2 108 | 109 | ## Merge and check no NAs 110 | X_i <- as.matrix(cbind(1,newdata)) 111 | if(any(is.na(X_i))){ 112 | warning("data contains NA. Were removed") 113 | X_i <- X_i[-apply(X_i, 1, function(x) any(is.na(x))),] 114 | } 115 | 116 | ## Set up variance matrix: X_i (X'X)^{-1} X_i' 117 | if(is.null(vcov.)) vcov. <- vcov(object) 118 | X_inv <- vcov. 119 | mat <- X_i%*%X_inv%*%t(X_i) 120 | 121 | ## preds: 122 | 123 | if(!multiN) { 124 | pred_point <- drop(diff(X_i%*%RDDcoef(object, allCo=TRUE))) 125 | if(se.fit) pred_se <- sqrt(sum(c(diag(mat), -2*mat[1,2]))) 126 | } else { 127 | d <- X_i%*%coef(object) 128 | 129 | 130 | Mat_SUM <- cbind( 1, diag(nrow(d)-1)) 131 | Mat_DIAG <- matrix(diag(mat), ncol=1) 132 | if(missing(weights)) { 133 | MAT_SmallSum <- matrix(c(-(nrow(d)-1), rep(1,nrow(d)-1 )), nrow=1) ## create vector: [- n-1, 1, 1, 1....] 134 | } else { 135 | MAT_SmallSum <- matrix(c(-1, weights), nrow=1) ## create vector: [- 1, w_1, w_2, w_n] 136 | } 137 | 138 | if(stat=="identity"){ 139 | Mat_DIFF <- Mat_SUM 140 | Mat_DIFF[,1] <- -1 141 | pred_point <- drop(Mat_DIFF%*%d) 142 | if(se.fit) pred_se <- drop(sqrt(Mat_SUM %*%Mat_DIAG -2* mat[1,2:ncol(mat)])) 143 | } else { 144 | if(stat=="mean" & missing(weights)) MAT_SmallSum <- MAT_SmallSum/Nrow_cov 145 | pred_point <- drop(MAT_SmallSum%*%d) 146 | if(se.fit) pred_se <- drop(sqrt(MAT_SmallSum%*%mat%*%t(MAT_SmallSum))) 147 | } 148 | } 149 | 150 | 151 | ## result: 152 | if(se.fit){ 153 | res <- list() 154 | res$fit <- pred_point 155 | res$se.fit <- pred_se 156 | } else { 157 | res <- pred_point 158 | } 159 | res 160 | } 161 | 162 | if(FALSE){ 163 | library(RDDtools) 164 | data(Lee2008) 165 | head(Lee2008) 166 | 167 | Lee2008_rdd <- RDDdata(y=Lee2008$y, x=Lee2008$x, cutpoint=0) 168 | 169 | set.seed(123) 170 | n_Lee <- nrow(Lee2008) 171 | Z<- data.frame(z1=rnorm(n_Lee), z2=rnorm(n_Lee, mean=20, sd=2), z3=sample(letters[1:3], size=n_Lee, replace=TRUE)) 172 | Lee2008_rdd_z <- RDDdata(y=Lee2008$y, x=Lee2008$x, covar=Z,utpoint=0) 173 | 174 | ## use: 175 | reg_para <- RDDreg_lm(RDDobject=Lee2008_rdd) 176 | 177 | RDDpred(reg_para) 178 | RDDcoef(reg_para, allInfo=TRUE) 179 | all.equal(unlist(RDDpred(reg_para)), RDDcoef(reg_para, allInfo=TRUE)[1:2], check=FALSE) 180 | 181 | ## pred other coefs: 182 | pred_Xr <- RDDpred(reg_para, newdata= data.frame(Tr=0, Xl=0, Xr=c(0,1))) 183 | all.equal(RDDcoef(reg_para, allInfo=TRUE, allCo=TRUE)[4,1:2], unlist(pred_Xr), check=FALSE) 184 | 185 | pred_Xl <- RDDpred(reg_para, newdata= data.frame(Tr=0, Xl=c(0,1), Xr=0)) 186 | all.equal(RDDcoef(reg_para, allInfo=TRUE, allCo=TRUE)[3,1:2], unlist(pred_Xl), check=FALSE) 187 | 188 | reg_para2 <- RDDreg_lm(RDDobject=Lee2008_rdd, order=2) 189 | RDDpred(reg_para2) 190 | all.equal(unlist(RDDpred(reg_para2)), RDDcoef(reg_para2, allInfo=TRUE)[1:2], check=FALSE) 191 | 192 | 193 | ### Covariates 194 | reg_para4_cov <- RDDreg_lm(RDDobject=Lee2008_rdd_z, order=1, covariates="z1", covar.opt=list(slope="separate")) 195 | reg_para4_cov 196 | summary(reg_para4_cov) 197 | 198 | RDDpred(reg_para4_cov) 199 | all.equal(unlist(RDDpred(reg_para4_cov)), RDDcoef(reg_para4_cov, allInfo=TRUE)[1:2], check=FALSE) 200 | 201 | all.equal(RDDpred(reg_para4_cov, covdata=data.frame(z1=0)),RDDpred(reg_para4_cov)) 202 | 203 | ### Check RDDpred: 204 | vec_eval <- c(2,4,4,5,6) 205 | estim_sep <- lapply(vec_eval, function(x) RDDpred(object=reg_para4_cov, covdata=data.frame(z1=x))) 206 | estim_toget <- RDDpred(reg_para4_cov, covdata=data.frame(z1=vec_eval)) 207 | 208 | all(estim_toget$fit==sapply(estim_sep, function(x) x$fit)) 209 | all(estim_toget$se.fit==sapply(estim_sep, function(x) x$se.fit)) 210 | 211 | environment(RDDpred) <- environment(RDDreg_lm) 212 | sum(RDDpred(reg_para4_cov, covdata=data.frame(z1=c(0,1,2,1)))$fit) 213 | # RDDpred(x=reg_para4_cov, covdata=data.frame(z1=c(2,4,4,4,5,6))) 214 | # RDDpred(reg_para4_cov) 215 | 216 | } 217 | --------------------------------------------------------------------------------