├── .gitignore ├── Code ├── RDcourse-part1-causal-inf.do ├── RDcourse-part3-rdplots.R ├── RDcourse-part3-rdplots.do ├── RDcourse-part4-rdrobust.R ├── RDcourse-part4-rdrobust.do ├── RDcourse-part5-localrandomization.R ├── RDcourse-part5-localrandomization.do ├── RDcourse-part6-fuzzyRD.R ├── RDcourse-part6-fuzzyRD.do ├── RDcourse-part7-falsification.R ├── RDcourse-part7-falsification.do ├── RDcourse-part8-discrete.R ├── RDcourse-part8-discrete.do ├── README.md ├── education.dta ├── headstart.dta ├── house.dta ├── jtpa.csv ├── jtpa.dta ├── senate.dta ├── spp.dta ├── uruguayancct.dta └── workflow.do ├── Labs ├── Exercise-1 │ ├── README.md │ ├── exercise-Meyersson.do │ ├── polecon.csv │ └── polecon.dta └── Exercise-2 │ ├── CaugheySekhon2011.csv │ ├── CaugheySekhon2011.dta │ ├── README.md │ └── exercise-CaugheySekhon.do ├── README.md ├── Readings ├── CattaneoFrandsenTitiunik2015-JCI.pdf ├── CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf ├── CattaneoIdroboTitiunik2023-CUP-Extensions.pdf ├── CattaneoTitiunik2022-ARE.pdf ├── CaugheySekhon2011-PA.pdf ├── ImbensLemieux2008-JoE.pdf ├── LeeLemieux2010-JEL.pdf └── Meyersson2014-ECTA.pdf ├── Slides └── RD.pdf ├── Syllabus.pdf └── img ├── banner.png └── logo.png /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | 3 | ## Core latex/pdflatex auxiliary files: 4 | *.aux 5 | *.lof 6 | *.log 7 | *.lot 8 | *.fls 9 | *.out 10 | *.toc 11 | *.fmt 12 | *.fot 13 | *.cb 14 | *.cb2 15 | .*.lb 16 | *.nav 17 | *.snm 18 | 19 | 20 | 21 | ## Intermediate documents: 22 | *.dvi 23 | *.xdv 24 | *-converted-to.* 25 | # these rules might exclude image files for figures etc. 26 | # *.ps 27 | # *.eps 28 | # *.pdf 29 | 30 | ## Generated if empty string is given at "Please type another file name for output:" 31 | .pdf 32 | 33 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 34 | *.bbl 35 | *.bcf 36 | *.blg 37 | *-blx.aux 38 | *-blx.bib 39 | *.run.xml 40 | 41 | ## Build tool auxiliary files: 42 | *.fdb_latexmk 43 | *.synctex 44 | *.synctex(busy) 45 | *.synctex.gz 46 | *.synctex.gz(busy) 47 | *.pdfsync 48 | 49 | 50 | ## Word/Ppt Temp files 51 | **/~$* 52 | .Rproj.user 53 | 54 | # lyx 55 | *.lyx~ 56 | 57 | 58 | -------------------------------------------------------------------------------- /Code/RDcourse-part1-causal-inf.do: -------------------------------------------------------------------------------- 1 | ******************************************************************************** 2 | ** Part 1: Introduction to Causal Inference and Policy Evaluation 3 | ** Author: Rocio Titiunik, Matias Cattaneo, Sebastian Calonico 4 | ** Last update: May 2023 5 | ******************************************************************************** 6 | ** RDROBUST: net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata) replace 7 | ** RDLOCRAND: net install rdlocrand, from(https://raw.githubusercontent.com/rdpackages/rdlocrand/master/stata) replace 8 | ** RDDENSITY: net install rddensity, from(https://raw.githubusercontent.com/rdpackages/rddensity/master/stata) replace 9 | ** RDPOWER: net install rdpower, from(https://raw.githubusercontent.com/rdpackages/rdpower/master/stata) replace 10 | ******************************************************************************** 11 | 12 | ******************************************************************************** 13 | clear all 14 | set more off 15 | set linesize 200 16 | 17 | ******************************************************************************** 18 | ** Example: JTPA 19 | ******************************************************************************** 20 | use jtpa.dta, clear 21 | 22 | ******************************************************************************** 23 | ** Conventional approach 24 | ******************************************************************************** 25 | ** using TTEST 26 | ttest earnings, by(treatment) 27 | ttest earnings, by(treatment) unequal 28 | 29 | ** using stata 30 | sum earnings if treatment==0 31 | ret list 32 | local N0 = r(N) 33 | local mu0 = r(mean) 34 | local sd0 = r(sd) 35 | local V0 = r(Var)/r(N) 36 | 37 | sum earnings if treatment==1 38 | ret list 39 | local N1 = r(N) 40 | local mu1 = r(mean) 41 | local sd1 = r(sd) 42 | local V1 = r(Var)/r(N) 43 | 44 | local tau = `mu1'-`mu0' 45 | local v = sqrt(`V1'+`V0') 46 | local T = `tau'/`v' 47 | local pval = 2*normal(-abs(`T')) 48 | 49 | local mu0 = round(`mu0', .01) 50 | local mu1 = round(`mu1', .0001) 51 | local sd0 = round(`sd0', .01) 52 | local sd1 = round(`sd1', .0001) 53 | 54 | di "Group Means (Std.Dev.)" 55 | di "Control: `mu0' (`sd0') -- N=`N0'" 56 | di "Treatme: `mu1' (`sd1') -- N=`N1'" 57 | di "Test = `T' -- p-val = `pval'" 58 | 59 | * Compare to: 60 | ttest earnings, by(treatment) unequal 61 | ret list 62 | 63 | ** using mata 64 | mata: 65 | y = st_data(., "earnings") 66 | t = st_data(., "treatment") 67 | N1 = sum(t:==1); N0 = rows(t)-N1 68 | mu1 = mean(select(y,t:==1)); mu1 69 | mu0 = mean(select(y,t:==0)); mu0 70 | v1 = variance(select(y,t:==1))/N1; 71 | v0 = variance(select(y,t:==0))/N0; 72 | 73 | T = (mu1-mu0)/sqrt(v1+v0) 74 | pval = 2*normal(-abs(T)) 75 | 76 | st_numscalar("T", T); st_numscalar("pval", pval) 77 | end 78 | 79 | display "t-test = " T 80 | display "pval = " pval 81 | 82 | ******************************************************************************** 83 | ** Randomization Inference approach 84 | ******************************************************************************** 85 | clear all 86 | set obs 15 87 | gen T = (runiform()>=.6) 88 | mata: st_numscalar("toshiba", comb(15,sum(st_data(.,"T")))) 89 | 90 | gen Y = rnormal() + T*1 91 | ** using stata, ttest 92 | ttest Y, by(T) 93 | 94 | ** using stata, permute 95 | permute T diffmean=(r(mu_2)-r(mu_1)), reps(1000) nowarn: ttest Y, by(T) 96 | matrix pval = r(p_twosided) 97 | display "p-val = " pval[1,1] 98 | 99 | ** using mata, by hand 100 | ** careful: cvpermute() not random, it enumerates objects in order 101 | mata: 102 | y = st_data(., "Y"); t = st_data(., "T") 103 | 104 | totperm = comb(rows(t),sum(t)); totperm 105 | 106 | T = abs(mean(select(y,t:==1))-mean(select(y,t:==0))) 107 | 108 | S=3000; Tdist = J(min((S,totperm)),1,.) 109 | info = cvpermutesetup(t); s=1 110 | while ((t=cvpermute(info)) != J(0,1,.) & s<=S){ 111 | Tdist[s,] = abs(mean(select(y,t:==1))-mean(select(y,t:==0))) 112 | s++ 113 | } 114 | pval = sum(Tdist:>=T)/min((S,totperm)); pval 115 | end 116 | 117 | ** Example: JTPA 118 | use jtpa.dta, clear 119 | permute treatment diffmean=(r(mu_2)-r(mu_1)), reps(1000) nowarn: ttest earnings, by(treatment) 120 | 121 | * Compare to: 122 | ttest earnings, by(treatment) unequal 123 | 124 | 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /Code/RDcourse-part3-rdplots.R: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------# 2 | # Part 3: RD plots 3 | # Authors: Matias Cattaneo, Sebastian Calonico, Rocio Titiunik 4 | # Last update: May 2023 5 | #------------------------------------------------------------------------------# 6 | # SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/ 7 | #------------------------------------------------------------------------------# 8 | # TO INSTALL/DOWNLOAD R PACKAGES/FUNCTIONS: 9 | # FOREIGN: install.packages('foreign') 10 | # GGPLOT2: install.packages('ggplot2') 11 | # LPDENSITY: install.packages('lpdensity') 12 | # RDDENSITY: install.packages('rddensity') 13 | # RDLOCRAND: install.packages('rdlocrand') 14 | # RDROBUST: install.packages('rdrobust') 15 | #------------------------------------------------------------------------------# 16 | rm(list=ls()) 17 | 18 | library(foreign) 19 | library(lpdensity) 20 | library(rddensity) 21 | library(rdrobust) 22 | library(rdlocrand) 23 | library(readstata13) 24 | 25 | data = read.dta13("headstart.dta") 26 | Y = data$mort_age59_related_postHS 27 | X = data$povrate60 - 59.1984 28 | T = (X>=0) 29 | 30 | dim(data) 31 | summary(data) 32 | length(X) 33 | length(unique(X)) 34 | length(Y) 35 | length(unique(Y)) 36 | 37 | # Figure 3.1, Scatter plot 38 | plot(X, Y, xlab = "Running Variable", ylab = "Outcome", col=1, pch=20) 39 | abline(v=0) 40 | 41 | out = rdplot(Y, X, binselect = 'qsmv', p=3) 42 | 43 | # Figure 3.2, RD Plot Using 40 Bins of Equal Length 44 | out = rdplot(Y, X, nbins = c(20,20), binselect = 'esmv') 45 | 46 | # Figure 3.4, 40 Evenly-Spaced Bins 47 | out = rdplot(Y, X, nbins = c(20,20), binselect = 'es') 48 | 49 | # Figure 3.5, 40 Quantile-Spaced Bins 50 | out = rdplot(Y, X, nbins = c(20,20), binselect = 'qs') 51 | 52 | # Figure 3.7, IMSE RD PLot with Evenly-Spaced Bins 53 | out = rdplot(Y, X, binselect = 'es') 54 | 55 | # Figure 3.8, IMSE RD Plot with Quantile-Spaced Bins 56 | out = rdplot(Y, X, binselect = 'qs') 57 | 58 | # Figure 3.10, Mimicking Variance RD Plot with Evenly-Spaced Bins 59 | out = rdplot(Y, X, binselect = 'esmv') 60 | 61 | # Figure 3.11, Mimicking Variance RD Plot with Quantile-Spaced Bins 62 | out = rdplot(Y, X, binselect = 'qsmv') 63 | 64 | out = rdplot(Y, X, binselect = 'qsmv', kernel = 'uniform', support = c(-150,150)) 65 | 66 | 67 | ################################################################################ 68 | ## US Senate data 69 | ################################################################################ 70 | data = read.dta13("senate.dta") 71 | names(data) 72 | 73 | X = data$demmv 74 | Y = data$demvoteshfor2 75 | 76 | # Create "Democratic Win at t" 77 | Z = rep(NA, length(X)) 78 | Z[X<0 & !is.na(X)]=0 79 | Z[X>=0 & !is.na(X)]=1 80 | 81 | plot(X, Y, xlab = "Running Variable", ylab = "Outcome", col=1, pch=20) 82 | abline(v=0) 83 | 84 | out = rdplot(Y, X, binselect = 'qsmv', p=3) 85 | -------------------------------------------------------------------------------- /Code/RDcourse-part3-rdplots.do: -------------------------------------------------------------------------------- 1 | ******************************************************************************** 2 | ** Part 3: Graphical Illustration of RD designs 3 | ** Author: Rocio Titiunik, Matias Cattaneo, Sebastian Calonico 4 | ** Last update: May 2023 5 | ******************************************************************************** 6 | ** RDROBUST: net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata) replace 7 | ** RDLOCRAND: net install rdlocrand, from(https://raw.githubusercontent.com/rdpackages/rdlocrand/master/stata) replace 8 | ** RDDENSITY: net install rddensity, from(https://raw.githubusercontent.com/rdpackages/rddensity/master/stata) replace 9 | ** RDPOWER: net install rdpower, from(https://raw.githubusercontent.com/rdpackages/rdpower/master/stata) replace 10 | ******************************************************************************** 11 | 12 | 13 | ******************************************************************************** 14 | clear all 15 | set more off 16 | set linesize 200 17 | 18 | ******************************************************************************** 19 | ** Example 1 : Head Start 20 | ******************************************************************************** 21 | use headstart, clear 22 | gl y mort_age59_related_postHS 23 | gl x povrate60 24 | gl c = 59.1984 25 | 26 | 27 | ** Basic Scatter Plot 28 | scatter $y $x 29 | scatter $y $x, xline($c, lcolor(red)) 30 | 31 | ******************************************************************************** 32 | ** RD Plots 33 | ******************************************************************************** 34 | 35 | ** Quick RD plots (default: mimicking variance) 36 | rdplot $y $x, c($c) 37 | 38 | ** Add title 39 | rdplot $y $x , c($c) graph_options(title(RD Plot - Head Start) /// 40 | ytitle(Child Mortality 5 to 9 years old) /// 41 | xtitle(Poverty Rate)) 42 | 43 | 44 | ** Evenly-spaced 45 | rdplot $y $x, c($c) binselect(es) 46 | 47 | ** Quantile-spaced 48 | rdplot $y $x, c($c) binselect(qs) 49 | 50 | ** Global 2nd order polynomial 51 | rdplot $y $x, c($c) p(2) 52 | 53 | ** Select manually number of bins 54 | rdplot $y $x, c($c) nbins(10) 55 | 56 | ** Add confidence intervals 57 | rdplot $y $x, c($c) nbins(10) ci(95) 58 | 59 | ** Add confidence intervals w/shade 60 | rdplot $y $x, c($c) nbins(10) ci(95) shade 61 | 62 | ** Generate variables 63 | rdplot $y $x, c($c) p(2) ci(95) shade genvars 64 | 65 | ** Stored output 66 | ereturn list 67 | 68 | ******************************************************************************* 69 | ** Example 2 : U.S. Senate 70 | ******************************************************************************** 71 | use senate, clear 72 | global x demmv 73 | global y demvoteshfor2 74 | global c = 0 75 | ******************************************************************************** 76 | ** Summary Stats & Diff-in-means 77 | ******************************************************************************** 78 | sum $y $x demvoteshlag1 demvoteshlag2 dopen 79 | gen T= ($x>=0) 80 | ttest $y, by(T) 81 | 82 | 83 | ******************************************************************************** 84 | ** RD Plots 85 | ******************************************************************************** 86 | 87 | ** Quick RD plots (default: mimicking variance) 88 | rdplot $y $x 89 | 90 | rdplot $y $x, graph_options(name(Rdplot1)) 91 | 92 | rdplot $y $x if abs($x)<=40, graph_options(name(Rdplot2)) 93 | 94 | graph drop Rdplot1 95 | graph drop Rdplot2 96 | 97 | ** Add title 98 | rdplot $y $x , graph_options(title(RD Plot - Senate Elections Data) /// 99 | ytitle(Vote Share in Election t+1) /// 100 | xtitle(Margin of Victory in Election t)) 101 | 102 | 103 | ** Evenly-spaced 104 | rdplot $y $x, c($c) binselect(esmv) 105 | 106 | ** Quantile-spaced 107 | rdplot $y $x, c($c) binselect(qsmv) 108 | 109 | ** Global 2nd order polynomial 110 | rdplot $y $x, c($c) p(2) 111 | 112 | ** Select manually number of bins 113 | rdplot $y $x, c($c) nbins(10) 114 | 115 | ** Add confidence intervals 116 | rdplot $y $x, c($c) nbins(10) ci(95) 117 | 118 | ** Add confidence intervals w/shade 119 | rdplot $y $x, c($c) nbins(10) ci(95) shade 120 | 121 | ** Generate variables 122 | rdplot $y $x, c($c) p(2) ci(95) shade genvars 123 | 124 | ** Use support option 125 | rdplot $y $x, nbins(20) support(-100 100) 126 | 127 | ** Stored output 128 | ereturn list 129 | 130 | ******************************************************************************** 131 | ** Example 3 : U.S. House 132 | ******************************************************************************** 133 | use house, clear 134 | global x difdemshare 135 | global y demsharenext 136 | 137 | ** Quick RD plot (default: mimicking variance) 138 | rdplot $y $x , graph_options(title(RD Plot - House Elections Data) /// 139 | ytitle(Vote Share in Election at time t+1) /// 140 | xtitle(Margin of Victory at time t)) 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | -------------------------------------------------------------------------------- /Code/RDcourse-part4-rdrobust.R: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------# 2 | # Part 4: Local Polynomial RD Analysis 3 | # Authors: Matias Cattaneo, Sebastian Calonico, Rocio Titiunik 4 | # Last update: May 2023 5 | #------------------------------------------------------------------------------# 6 | # SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/ 7 | #------------------------------------------------------------------------------# 8 | # TO INSTALL/DOWNLOAD R PACKAGES/FUNCTIONS: 9 | # FOREIGN: install.packages('foreign') 10 | # GGPLOT2: install.packages('ggplot2') 11 | # LPDENSITY: install.packages('lpdensity') 12 | # RDDENSITY: install.packages('rddensity') 13 | # RDLOCRAND: install.packages('rdlocrand') 14 | # RDROBUST: install.packages('rdrobust') 15 | #------------------------------------------------------------------------------# 16 | rm(list=ls()) 17 | 18 | library(foreign) 19 | library(lpdensity) 20 | library(rddensity) 21 | library(rdrobust) 22 | library(rdlocrand) 23 | library(readstata13) 24 | 25 | ####################################################### 26 | # 27 | # Head Start 28 | # 29 | ######################################################## 30 | data = read.dta13("headstart.dta") 31 | Y = data$mort_age59_related_postHS 32 | X = data$povrate60 - 59.1984 33 | T = (X>=0) 34 | 35 | 36 | #-----------------------------------------# 37 | # Local polynomial analysis (Continuity-Based RD Approach) 38 | #-----------------------------------------# 39 | # Code snippet 1 (two regressions) 40 | out = lm(Y~X,subset=X<0 & X>=-10) 41 | left_intercept = out$coefficients[1] 42 | print(left_intercept) 43 | out = lm(Y~X, subset=X>=0 & X<10) 44 | right_intercept = out$coefficients[1] 45 | print(right_intercept) 46 | difference = right_intercept - left_intercept 47 | print(paste("The RD estimator is", difference, sep = " ")) 48 | 49 | # Code snippet 2 (one regression) 50 | T_X = X*T 51 | out = lm(Y~X+T+T_X, subset=abs(X)<=10) 52 | print(out) 53 | 54 | # Code snippet 3 (generation of weights) 55 | w = NA 56 | w = 1-abs(X)/10 57 | 58 | # Code snippet 4 (regression with weights) 59 | summary(rdrobust(Y, X, h = 10, kernel = "uniform")) 60 | 61 | out = lm(Y~X+T+T_X, weight=w, subset=abs(X)<=10) 62 | print(out) 63 | 64 | # Code snippet 5 (using rdrobust with uniform weights) 65 | summary(rdrobust(Y, X, kernel = 'uniform', p = 1, h = 10)) 66 | 67 | # Code snippet 6 (using rdrobust with triangular weights) 68 | rdrobust(Y, X, kernel = 'triangular', p = 1, h = 10) 69 | 70 | # Code snippet 7 (using rdrobust with triangular weights and p=2) 71 | rdrobust(Y, X, kernel = 'triangular', p = 2, h = 10) 72 | 73 | # Code snippet 8 (using rdbwselect, mserd) 74 | rdbwselect(Y, X, kernel = 'triangular', p = 1, bwselect = 'mserd') 75 | 76 | # Code snippet 9 (using rdbwselect, msetwo) 77 | rdbwselect(Y, X, kernel = 'triangular', p = 1, bwselect = 'msetwo') 78 | 79 | # Code snippet 10 (using rdrobust, mserd) 80 | rdrobust(Y, X, kernel = 'triangular', p = 1, bwselect = 'mserd') 81 | 82 | # Code snippet 12 (using rdrobust and showing the associated rdplot) 83 | bandwidth = rdrobust(Y, X, kernel = 'triangular', p = 1, bwselect = 'mserd')$h_l 84 | out = rdplot(Y, X, p = 1, kernel = 'triangular', h = bandwidth) 85 | print(out) 86 | 87 | # Code snippet 11 (using rdrobust and showing the objects it returns) 88 | rdout = rdrobust(Y, X, kernel = 'triangular', p = 1, bwselect = 'mserd') 89 | print(names(rdout)) 90 | print(rdout$beta_p_r) 91 | print(rdout$beta_p_l) 92 | 93 | # Code snippet 13 (using rdrobust without regularization term) 94 | rdrobust(Y, X, kernel = 'triangular', scaleregul = 0, p = 1, bwselect = 'mserd') 95 | 96 | # Code snippet 14 (using rdrobust default options) 97 | rdrobust(Y, X, kernel = 'triangular', p = 1, bwselect = 'mserd') 98 | 99 | # Code snippet 15 (using rdrobust default options and showing all the output) 100 | rdrobust(Y, X, kernel = 'triangular', p = 1, bwselect = 'mserd', all = TRUE) 101 | 102 | 103 | ############################################################# 104 | ## Head Start Data: Additional Empirical Analysis 105 | ############################################################# 106 | # 1960 Census covariates 107 | X60 = cbind(data$census1960_pop, 108 | data$census1960_pctsch1417, 109 | data$census1960_pctsch534, 110 | data$census1960_pctsch25plus, 111 | data$census1960_pop1417, 112 | data$census1960_pop534, 113 | data$census1960_pop25plus, 114 | data$census1960_pcturban, 115 | data$census1960_pctblack) 116 | 117 | # 1990 Census covariates 118 | X90 = cbind(data$census1990_pop, 119 | data$census1990_pop1824, 120 | data$census1990_pop2534, 121 | data$census1990_pop3554, 122 | data$census1990_pop55plus, 123 | data$census1990_pcturban, 124 | data$census1990_pctblack, 125 | data$census1990_percapinc) 126 | 127 | 128 | # Placebo outcomes 129 | Plac = cbind(data$mort_age59_injury_postHS,data$mort_age59_related_preHS) 130 | 131 | tmp = rdrobust(Plac[,1],X) 132 | summary(tmp) 133 | 134 | tmp = rdrobust(Plac[,2],X) 135 | summary(tmp) 136 | 137 | 138 | ################################################################### 139 | ## Table 2: Nonparametric Density Continuity Tests 140 | ################################################################### 141 | 142 | tmp = rddensity(Xraw,c=c) 143 | summary(tmp) 144 | 145 | ## Robust Nonparametric Method with Covariates 146 | rdrobust(Y,X,covs=X60) 147 | 148 | ## Robust Nonparametric Method: Different Bandwdiths at Each Side 149 | rdrobust(Y,X,bwselect='msetwo') 150 | 151 | 152 | ################################################################################ 153 | ## Head Start Data using covariates 154 | ################################################################################ 155 | data = read.dta13("headstart.dta") 156 | attach(data) 157 | 158 | y <- mort_age59_related_postHS 159 | x <- povrate60 160 | z <- cbind(census1960_pop, census1960_pctsch1417, census1960_pctsch534, 161 | census1960_pctsch25plus, census1960_pop1417, census1960_pop534, 162 | census1960_pop25plus, census1960_pcturban, census1960_pctblack) 163 | 164 | ## rho unrestricted; MSE-optimal bandwidths w/o covs; RD w/o covs 165 | rd <- rdrobust(y, x, c=59.1968) 166 | h <- rd$h_l 167 | b <- rd$b_l 168 | IL <- rd$ci[3,2] - rd$ci[3,1] 169 | 170 | 171 | ## rho unrestricted; MSE-optimal bandwidths w/o covs; RD w/ covs 172 | rd <- rdrobust(y, x, c=59.1968, covs=z, h=h, b=b) 173 | ILch <- ((rd$ci[3,2] - rd$ci[3,1])/IL - 1)* 100 174 | 175 | 176 | ## rho unrestricted; MSE-optimal bandwidths w/ covs; RD w/ covs 177 | rd <- rdrobust(y, x, c=59.1968, covs=z) 178 | ILch <- ((rd$ci[3,2] - rd$ci[3,1])/IL - 1)* 100 179 | 180 | 181 | ####################################################### 182 | # 183 | # US Senate application 184 | # 185 | ######################################################## 186 | data = read.dta13("senate.dta") 187 | names(data) 188 | 189 | X = data$demmv 190 | Y = data$demvoteshfor2 191 | 192 | # Create "Democratic Win at t" 193 | Z = rep(NA, length(X)) 194 | Z[X<0 & !is.na(X)]=0 195 | Z[X>=0 & !is.na(X)]=1 196 | 197 | summary(data) 198 | length(X) 199 | length(unique(X)) 200 | length(Y) 201 | length(unique(Y)) 202 | 203 | rdplot(Y,X) 204 | 205 | rdplot(Y,X, binselect="qsmv", p=4) 206 | 207 | # Using rdrobust with uniform weights 208 | out = rdrobust(Y, X, kernel = 'uniform', p = 1, h = 17 , all=TRUE) 209 | summary(out) 210 | names(out) 211 | out$coef 212 | -------------------------------------------------------------------------------- /Code/RDcourse-part4-rdrobust.do: -------------------------------------------------------------------------------- 1 | ******************************************************************************** 2 | ** Part 4: Local Polynomial RD Analysis 3 | ** Author: Rocio Titiunik, Matias Cattaneo, Sebastian Calonico 4 | ** Last update: May 2023 5 | ******************************************************************************** 6 | ** RDROBUST: net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata) replace 7 | ** RDLOCRAND: net install rdlocrand, from(https://raw.githubusercontent.com/rdpackages/rdlocrand/master/stata) replace 8 | ** RDDENSITY: net install rddensity, from(https://raw.githubusercontent.com/rdpackages/rddensity/master/stata) replace 9 | ** RDPOWER: net install rdpower, from(https://raw.githubusercontent.com/rdpackages/rdpower/master/stata) replace 10 | ********************************************************************************* 11 | *-------------------------------------------------------------------------------------------------------------* 12 | ** SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/ 13 | *-------------------------------------------------------------------------------------------------------------* 14 | cd C:\Users\titiunik\Dropbox\teaching\workshops\2022-08-RD-StatisticalHorizons\slides-and-codes\data-and-codes 15 | 16 | clear all 17 | 18 | ******************************************************** 19 | **** Head Start **************************************** 20 | ******************************************************** 21 | 22 | use "headstart.dta", clear 23 | 24 | gen Y = mort_age59_related_postHS 25 | gen X = povrate60 - 59.1984 26 | 27 | gen T=. 28 | replace T=0 if X<0 & X!=. 29 | replace T=1 if X>=0 & X!=. 30 | 31 | order X Y T 32 | 33 | * Descriptive Statistics 34 | su 35 | duplicates report Y 36 | tab Y 37 | duplicates report X 38 | 39 | *-----------------------------------------* 40 | * Continuity-Based RD Approach * 41 | *-----------------------------------------* 42 | *** optimal bw 43 | rdbwselect Y X, p(0) 44 | 45 | ** rdrobust: MSE data-driven bandwidth - local constant regression 46 | rdrobust Y X, p(0) 47 | 48 | ** rdrobust: parametric bandwidth - local constant regression 49 | rdrobust Y X, p(0) h(9) 50 | 51 | ** rdrobust: MSE data-driven bandwidth - local linear regression 52 | rdrobust Y X, p(1) 53 | 54 | ** rdrobust: parametric bandwidth - local linear regression 55 | rdrobust Y X, p(1) h(9) 56 | 57 | * Output from rdrobust 58 | rdrobust Y X, h(10) kernel(uniform) 59 | 60 | * Output from regressions on both sides 61 | reg Y X if X<0 & X>=-10 62 | matrix coef_left=e(b) 63 | local intercept_left=coef_left[1,2] 64 | reg Y X if X>=0 & X<=10 65 | matrix coef_right=e(b) 66 | local intercept_right=coef_right[1,2] 67 | local difference=`intercept_right'-`intercept_left' 68 | display "The RD estimator is `difference'" 69 | 70 | * Output from joint regression 71 | gen T_X=X*T 72 | reg Y X T T_X if abs(X)<=10 73 | 74 | * Matching standard errors 75 | reg Y X T T_X if abs(X)<=10, robust 76 | rdrobust Y X, h(10) kernel(uniform) vce(hc1) 77 | 78 | * For other kernels besides uniform 79 | gen weights=. 80 | replace weights=(1-abs(X/10)) if abs(X)<10 81 | 82 | * Code snippet 4 (regression with weights) 83 | reg Y X [aw=weights] if X<0 & X>=-10 84 | matrix coef_left=e(b) 85 | local intercept_left=coef_left[1,2] 86 | reg Y X [aw=weights] if X>=0 & X<=10 87 | matrix coef_right=e(b) 88 | local intercept_right=coef_right[1,2] 89 | local difference=`intercept_right'-`intercept_left' 90 | display "The RD estimator is `difference'" 91 | 92 | rdrobust Y X, h(10) kernel(uniform) 93 | 94 | rdrobust Y X, kernel(triangular) p(1) h(10) 95 | 96 | rdrobust Y X, kernel(triangular) p(2) h(10) 97 | 98 | rdbwselect Y X, kernel(triangular) p(1) bwselect(mserd) 99 | 100 | rdbwselect Y X, kernel(triangular) p(1) bwselect(msetwo) 101 | 102 | rdrobust Y X 103 | ereturn list 104 | 105 | * showing the associated rdplot) 106 | rdrobust Y X, p(1) kernel(triangular) bwselect(mserd) 107 | local bandwidth=e(h_l) 108 | rdplot Y X if abs(X)<=`bandwidth', p(1) h(`bandwidth') kernel(triangular) 109 | 110 | * (using rdrobust without regularization term) 111 | rdrobust Y X, kernel(triangular) p(1) bwselect(mserd) scaleregul(0) 112 | 113 | * (using rdrobust default options) 114 | rdrobust Y X, kernel(triangular) p(1) bwselect(mserd) 115 | 116 | * (using rdrobust default options) 117 | rdrobust Y X, kernel(triangular) p(1) bwselect(cerrd) 118 | 119 | 120 | * (using rdrobust default options and showing all the output) 121 | rdrobust Y X, kernel(triangular) p(1) bwselect(mserd) all 122 | 123 | ** Placebo Outcome Variables 124 | foreach y of varlist mort_age59_injury_postHS mort_age59_related_preHS { 125 | rdrobust `y' X, p(0) 126 | rdrobust `y' X, p(0) h(9) 127 | rdrobust `y' X, p(1) 128 | rdrobust `y' X, p(1) h(9) 129 | } 130 | 131 | 132 | 133 | ******************************************************************************** 134 | ** Head Start Data 135 | ******************************************************************************** 136 | use "headstart.dta", clear 137 | 138 | gl y mort_age59_related_postHS 139 | gl x povrate60 140 | gl z census1960_pop census1960_pctsch1417 census1960_pctsch534 /// 141 | census1960_pctsch25plus census1960_pop1417 census1960_pop534 /// 142 | census1960_pop25plus census1960_pcturban census1960_pctblack 143 | 144 | ** MSE-optimal bandwidths w/o covs; RD w/o covs 145 | rdrobust $y $x, c(59.1968) 146 | local h = e(h_l) 147 | local b = e(b_l) 148 | local IL = e(ci_r_rb) - e(ci_l_rb) 149 | 150 | ** MSE-optimal bandwidths w/o covs; RD w/ covs 151 | rdrobust $y $x, c(59.1968) covs($z) h(`h') b(`b') 152 | local ILch = round((((`e(ci_r_rb)' - `e(ci_l_rb)')/`IL') - 1)* 100, 0.001) 153 | 154 | ** MSE-optimal bandwidths w/ covs; RD w/ covs 155 | rdrobust $y $x, c(59.1968) covs($z) 156 | local ILch = round((((`e(ci_r_rb)' - `e(ci_l_rb)')/`IL') - 1)* 100, 0.001) 157 | 158 | 159 | ******************************************************** 160 | **** Incumbency US Senate ****************************** 161 | ******************************************************** 162 | 163 | use "senate.dta", clear 164 | 165 | rename demmv X 166 | rename demvoteshfor2 Y 167 | 168 | gen T=. 169 | replace T=0 if X<0 & X!=. 170 | replace T=1 if X>=0 & X!=. 171 | label var T "Democratic Win at t" 172 | 173 | order X Y T 174 | 175 | * Descriptive Statistics 176 | su 177 | duplicates report Y 178 | tab Y 179 | duplicates report X 180 | 181 | 182 | * Output from rdrobust 183 | rdrobust Y X, h(10) kernel(uniform) 184 | 185 | * Output from regressions on both sides 186 | reg Y X if X<0 & X>=-10 187 | matrix coef_left=e(b) 188 | local intercept_left=coef_left[1,2] 189 | reg Y X if X>=0 & X<=10 190 | matrix coef_right=e(b) 191 | local intercept_right=coef_right[1,2] 192 | local difference=`intercept_right'-`intercept_left' 193 | display "The RD estimator is `difference'" 194 | 195 | * Output from joint regression 196 | gen T_X=X*T 197 | reg Y X T T_X if abs(X)<=10 198 | 199 | * Matching standard errors 200 | reg Y X T T_X if abs(X)<=10, robust 201 | rdrobust Y X, h(10) kernel(uniform) vce(hc1) 202 | 203 | * For other kernels besides uniform 204 | gen weights=. 205 | replace weights=(1-abs(X/10)) if abs(X)<10 206 | 207 | * Code snippet 4 (regression with weights) 208 | reg Y X [aw=weights] if X<0 & X>=-10 209 | matrix coef_left=e(b) 210 | local intercept_left=coef_left[1,2] 211 | reg Y X [aw=weights] if X>=0 & X<=10 212 | matrix coef_right=e(b) 213 | local intercept_right=coef_right[1,2] 214 | local difference=`intercept_right'-`intercept_left' 215 | display "The RD estimator is `difference'" 216 | 217 | rdrobust Y X, h(10) 218 | 219 | rdrobust Y X, kernel(triangular) p(1) h(10) 220 | 221 | rdrobust Y X, kernel(triangular) p(2) h(10) 222 | 223 | rdbwselect Y X, kernel(triangular) p(1) bwselect(mserd) 224 | 225 | rdbwselect Y X, kernel(triangular) p(1) bwselect(msetwo) 226 | 227 | rdrobust Y X 228 | ereturn list 229 | 230 | * showing the associated rdplot) 231 | rdrobust Y X, p(1) kernel(triangular) bwselect(mserd) 232 | local bandwidth=e(h_l) 233 | rdplot Y X if abs(X)<=`bandwidth', p(1) h(`bandwidth') kernel(triangular) 234 | 235 | * (using rdrobust without regularization term) 236 | rdrobust Y X, kernel(triangular) p(1) bwselect(mserd) scaleregul(0) 237 | 238 | * (using rdrobust default options) 239 | rdrobust Y X, kernel(triangular) p(1) bwselect(mserd) 240 | 241 | * (using rdrobust default options) 242 | rdrobust Y X, kernel(triangular) p(1) bwselect(cerrd) 243 | 244 | 245 | * (using rdrobust default options and showing all the output) 246 | rdrobust Y X, kernel(triangular) p(1) bwselect(mserd) all 247 | 248 | * (rdbwselect with covariates) 249 | global covariates "presdemvoteshlag1 demvoteshlag1 demvoteshlag2 demwinprv1 demwinprv2 dmidterm dpresdem dopen" 250 | rdbwselect Y X, covs($covariates) p(1) kernel(triangular) bwselect(mserd) scaleregul(1) 251 | 252 | * (rdrobust with covariates) 253 | global covariates "presdemvoteshlag1 demvoteshlag1 demvoteshlag2 demwinprv1 demwinprv2 dmidterm dpresdem dopen" 254 | rdrobust Y X, covs($covariates) p(1) kernel(triangular) bwselect(mserd) scaleregul(1) 255 | 256 | -------------------------------------------------------------------------------- /Code/RDcourse-part5-localrandomization.R: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------# 2 | # Part 6: Local Randomization RD analysis 3 | # Authors: Matias Cattaneo, Sebastian Calonico, Rocio Titiunik 4 | # Last update: May 2023 5 | #------------------------------------------------------------------------------# 6 | # SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/ 7 | #------------------------------------------------------------------------------# 8 | # TO INSTALL/DOWNLOAD R PACKAGES/FUNCTIONS: 9 | # FOREIGN: install.packages('foreign') 10 | # GGPLOT2: install.packages('ggplot2') 11 | # LPDENSITY: install.packages('lpdensity') 12 | # RDDENSITY: install.packages('rddensity') 13 | # RDLOCRAND: install.packages('rdlocrand') 14 | # RDROBUST: install.packages('rdrobust') 15 | #------------------------------------------------------------------------------# 16 | rm(list=ls()) 17 | 18 | library(foreign) 19 | library(lpdensity) 20 | library(rddensity) 21 | library(rdrobust) 22 | library(rdlocrand) 23 | library(readstata13) 24 | 25 | ################################################################################ 26 | ## US Senate data 27 | ################################################################################ 28 | data = read.dta13("senate.dta") 29 | names(data) 30 | 31 | X = data$demmv 32 | Y = data$demvoteshfor2 33 | 34 | # Create "Democratic Win at t" 35 | Z = rep(NA, length(X)) 36 | Z[X<0 & !is.na(X)]=0 37 | Z[X>=0 & !is.na(X)]=1 38 | 39 | #*--------------------------------------------* 40 | # * Local Randomization RD Approach * 41 | # *--------------------------------------------* 42 | rdrandinf(Y, X, wl=-0.7652, wr=0.7652, seed=50, ci=TRUE) 43 | 44 | #Using rdrandinf in the Ad-hoc Window with the Bernoulli Option 45 | bernpr=rep(NA, length(X)) 46 | bernpr[abs(X)<=0.75]=1/2 47 | rdrandinf(Y, X, wl=-0.75, wr=0.75, seed=50, bernoulli =bernpr) 48 | 49 | # Code snippet 3, Using rdrandinf in the Ad-hoc Window with the Kolmogorov-Smirnov Statistic 50 | rdrandinf(Y, X, wl=-0.75, wr=0.75, seed=50, statistic = "ksmirnov") 51 | 52 | # Code snippet 4, Using rdrandinf in the Ad-hoc Window with the Confidence Interval Option 53 | rdrandinf(Y, X, wl=-0.75, wr=0.75, seed=50, ci = c(0.05, seq(from=-10, to=10, by=0.25))) 54 | 55 | # Code snippet 5, Using rdwinselect with the wobs Option 56 | covs = cbind(data$presdemvoteshlag1, data$demvoteshlag1, data$demvoteshlag2, data$demwinprv1, data$demwinprv2, data$dmidterm, data$dpresdem, data$dopen) 57 | rdwinselect(X, c = 0 , covs, seed = 50, wobs=2, nwindows=200 , approx=TRUE, plot=TRUE) 58 | 59 | # Code snippet 6, Using rdwinselect with the wobs, nwindows and plot Options 60 | rdwinselect(X, covs, seed = 50, wobs=2, nwindows =50, plot=TRUE) 61 | 62 | #Code snippet 7, Using rdwinselect with the wstep and nwindows Options 63 | rdwinselect(X, covs, seed = 50, wstep=0.1, nwindows=25) 64 | 65 | # Code snippet 8, Using rdrandinf with the Confidence Interval Option 66 | rdrandinf(Y, X, wl=-0.75, wr=0.75, seed=50, ci = c(0.05, seq(from=-10, to=10, by=0.10))) 67 | 68 | # Code snippet 9, Using rdrandinf with the Alternative Power Option 69 | out = rdrandinf(Y, X, covariates = covs, seed=50, d = 7.416) 70 | names(out) 71 | out$window 72 | -------------------------------------------------------------------------------- /Code/RDcourse-part5-localrandomization.do: -------------------------------------------------------------------------------- 1 | ******************************************************************************** 2 | ** Part 6: Local Randomization RD Analysis 3 | ** Author: Rocio Titiunik, Matias Cattaneo, Sebastian Calonico 4 | ** Last update: May 2023 5 | ******************************************************************************** 6 | ** RDROBUST: net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata) replace 7 | ** RDLOCRAND: net install rdlocrand, from(https://raw.githubusercontent.com/rdpackages/rdlocrand/master/stata) replace 8 | ** RDDENSITY: net install rddensity, from(https://raw.githubusercontent.com/rdpackages/rddensity/master/stata) replace 9 | ** RDPOWER: net install rdpower, from(https://raw.githubusercontent.com/rdpackages/rdpower/master/stata) replace 10 | ********************************************************************************* 11 | *-------------------------------------------------------------------------------------------------------------* 12 | ** SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/ 13 | *-------------------------------------------------------------------------------------------------------------* 14 | cd C:\Users\titiunik\Dropbox\teaching\workshops\2022-08-RD-StatisticalHorizons\slides-and-codes\data-and-codes 15 | clear 16 | clear all 17 | clear matrix 18 | cap log close 19 | set more off 20 | 21 | use "senate.dta", clear 22 | 23 | rename demmv X 24 | rename demvoteshfor2 Y 25 | 26 | gen Z=. 27 | replace Z=0 if X<0 & X!=. 28 | replace Z=1 if X>=0 & X!=. 29 | label var Z "Democratic Win at t" 30 | 31 | order X Y Z 32 | 33 | *--------------------------------------------* 34 | * Local Randomization RD Approach * 35 | *--------------------------------------------* 36 | * Code snippet 1, Using rdrandinf in the Ad-hoc Window 37 | rdrandinf Y X, wl(-0.75) wr(0.75) seed(50) 38 | 39 | * Code snippet 2, Using rdrandinf in the Ad-hoc Window with the Bernoulli Option 40 | gen bern_prob=1/2 if abs(X)<=0.75 41 | rdrandinf Y X, wl(-0.75) wr(0.75) seed(50) bernoulli(bern_prob) 42 | 43 | * Code snippet 3, Using rdrandinf in the Ad-hoc Window with the Kolmogorov-Smirnov Statistic 44 | rdrandinf Y X, wl(-0.75) wr(0.75) seed(50) statistic(ksmirnov) 45 | 46 | * Code snippet 4, Using rdrandinf in the Ad-hoc Window with the Confidence Interval Option 47 | rdrandinf Y X, wl(-0.75) wr(0.75) seed(50) ci(0.05 -10(0.25)10) 48 | 49 | * Code snippet 5, Using rdwinselect with the wobs Option 50 | global covs "presdemvoteshlag1 demvoteshlag1 demvoteshlag2 demwinprv1 demwinprv2 dmidterm dpresdem dopen" 51 | rdwinselect X $covs, seed(50) wobs(2) 52 | 53 | * Code snippet 6, Using rdwinselect with the wobs, nwindows and plot Options 54 | global covs "presdemvoteshlag1 demvoteshlag1 demvoteshlag2 demwinprv1 demwinprv2 dmidterm dpresdem dopen" 55 | rdwinselect X $covs, seed(50) wobs(2) nwindows(100) plot approximate 56 | 57 | * Code snippet 7, Using rdwinselect with the wstep and nwindows Options 58 | global covs "presdemvoteshlag1 demvoteshlag1 demvoteshlag2 demwinprv1 demwinprv2 dmidterm dpresdem dopen" 59 | rdwinselect X $covs, seed(50) wstep(0.1) nwindows(25) 60 | 61 | * Code snippet 8, Using rdrandinf with the Confidence Interval Option 62 | rdrandinf Y X, wl(-0.75) wr(0.75) seed(50) ci(0.05 -10(0.1)10) 63 | 64 | * Code snippet 9, Using rdrandinf with the Alternative Power Option 65 | global covs "presdemvoteshlag1 demvoteshlag1 demvoteshlag2 demwinprv1 demwinprv2 dmidterm dpresdem dopen" 66 | rdrandinf Y X, covariates($covs) seed(50) d(7.416) 67 | 68 | -------------------------------------------------------------------------------- /Code/RDcourse-part6-fuzzyRD.R: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------# 2 | # Part 5: Fuzzy RD 3 | # Authors: Matias Cattaneo, Sebastian Calonico, Rocio Titiunik 4 | # Last update: May 2023 5 | #------------------------------------------------------------------------------# 6 | # SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/ 7 | #------------------------------------------------------------------------------# 8 | # TO INSTALL/DOWNLOAD R PACKAGES/FUNCTIONS: 9 | # FOREIGN: install.packages('foreign') 10 | # GGPLOT2: install.packages('ggplot2') 11 | # LPDENSITY: install.packages('lpdensity') 12 | # RDDENSITY: install.packages('rddensity') 13 | # RDLOCRAND: install.packages('rdlocrand') 14 | # RDROBUST: install.packages('rdrobust') 15 | #------------------------------------------------------------------------------# 16 | rm(list=ls()) 17 | 18 | library(foreign) 19 | library(lpdensity) 20 | library(rddensity) 21 | library(rdrobust) 22 | library(rdlocrand) 23 | library(readstata13) 24 | 25 | # Application by LONDONO-VELEZ, RODRIGUEZ AND SANCHEZ: Ser Pilo Paga (SSP) program 26 | # Fuzzy RD analysis (using only one dimension) 27 | 28 | data = read.dta13("spp.dta") 29 | dim(data) 30 | names(data) 31 | 32 | # This replication file only includes first cohort (icfes_per == 20142), as in original replication files 33 | table(data$icfes_per) 34 | dim(data) 35 | 36 | # Running variables 37 | summary(data$running_saber11) #/* this running variable is discrete: 447 unique values*/ 38 | 39 | summary(data$running_sisben) #/* this running variable is discrete but number of masspoints is large: 25,375 unique values*/ 40 | 41 | # treatment indicator (=1 if the student in fact received the money) 42 | summary(data$beneficiary_spp) 43 | 44 | # Keep subsample of students with scores above Saber 11 cutoff 45 | summary(data$running_saber11) 46 | data=data[data$running_saber11>=0,] 47 | dim(data) 48 | 49 | # Students who live in households who don't receive welfare benefits have missing score ==> drop missing scores 50 | sum(is.na(data$running_sisben)) 51 | data = data[!is.na(data$running_sisben),] 52 | dim(data) 53 | 54 | 55 | # Create variables 56 | X = data$running_sisben 57 | D = data$beneficiary_spp 58 | Y1 = data$spadies_any 59 | Y2 = data$spadies_hq 60 | 61 | # First Stage: T on D 62 | #------------- 63 | # Rdrobust on D 64 | out = rdrobust(D,X) 65 | summary(out) 66 | 67 | # Rdplot on D 68 | rdplot(D,X, x.label="SISBEN wealth index", y.label="Received SPP subsidy") 69 | 70 | # ITT: T on Y 71 | #------------- 72 | rdplot(Y1,X, x.label="SISBEN wealth index", y.label="Attended any HEI") 73 | 74 | out = rdrobust(Y1,X) 75 | summary(out) 76 | 77 | # Rddensity 78 | #---------- 79 | out = rddensity(X) 80 | summary(out) 81 | 82 | # Outcome 1: Student Attended any high education institution (HEI) immediately after receiving subsidy 83 | #-------------------- 84 | 85 | # Fuzzy effect 86 | fout = rdrobust(Y1, X, fuzzy = D) 87 | h = fout$bws[1,1] 88 | b = fout$bws[2,1] 89 | 90 | # equivalent to the two-step estimation below 91 | # be careful! if there are missing values in D, you may fail to replicate the fuzzy effect by hand 92 | sum(is.na(D)) 93 | 94 | out = rdrobust(Y1, X, h = h, b=b) 95 | itt = out$Estimate[1] 96 | 97 | out = rdrobust(D, X, h=h, b=b) 98 | fs = out$Estimate[1] 99 | 100 | itt/fs 101 | 102 | # Rdrobust (sharp) on Y1: cluster by mass point in running variable 103 | out = rdrobust(Y1, X, vce = "nn", cluster = X) 104 | summary(out) 105 | 106 | # Rdrobust (fuzzy) on Y1 107 | out = rdrobust(Y1, X, fuzzy = D, vce = "nn", cluster= X) 108 | summary(out) 109 | 110 | # Rdplot on Y1 111 | rdplot(Y1, X) 112 | 113 | 114 | # Outcome 1: Student Attended high-quality HEI immediately after receiving subsidy 115 | #------------------------------- 116 | # Rdrobust (sharp) on Y2 117 | rdrobust(Y2, X) 118 | 119 | # Rdrobust (fuzzy) on Y2 120 | rdrobust(Y2, X, fuzzy =D) 121 | 122 | # Rdplot on Y2 123 | rdplot(Y2, X, fuzzy = D) 124 | 125 | # Validation on Covariates 126 | #------------------------- 127 | # Sharp rdrobust 128 | covs = cbind(data$icfes_per, data$icfes_female, data$icfes_urm, data$icfes_stratum, data$icfes_privatehs, data$icfes_famsize) 129 | for(i in 1:ncol(covs)){ 130 | summary(rdrobust(covs[,i], X)) 131 | } 132 | 133 | # Fuzzy rdrobust 134 | for(i in 1:ncol(covs)){ 135 | summary(rdrobust(covs[,i], X, fuzzy=D)) 136 | } 137 | 138 | 139 | # Local randomization Fuzzy RD analysis 140 | 141 | rdwinselect(X, covs, seed=86543) 142 | wr = 0.13 143 | wl = -0.13 144 | rdrandinf(D, X, wl=wl, wr=wr) 145 | rdrandinf(Y1, X, wl=wl, wr=wr) 146 | rdrandinf(Y2, X, wl=wl, wr=wr) 147 | rdrandinf(Y2, X, wl=wl, wr=wr, fuzzy=D) 148 | 149 | -------------------------------------------------------------------------------- /Code/RDcourse-part6-fuzzyRD.do: -------------------------------------------------------------------------------- 1 | ******************************************************************************** 2 | ** Part 5: Fuzzy RD designs 3 | ** Author: Rocio Titiunik, Matias Cattaneo, Sebastian Calonico 4 | ** Last update: May 2023 5 | ******************************************************************************** 6 | ** RDROBUST: net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata) replace 7 | ** RDLOCRAND: net install rdlocrand, from(https://raw.githubusercontent.com/rdpackages/rdlocrand/master/stata) replace 8 | ** RDDENSITY: net install rddensity, from(https://raw.githubusercontent.com/rdpackages/rddensity/master/stata) replace 9 | ** RDPOWER: net install rdpower, from(https://raw.githubusercontent.com/rdpackages/rdpower/master/stata) replace 10 | ********************************************************************************* 11 | *-------------------------------------------------------------------------------------------------------------* 12 | ** SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/ 13 | *-------------------------------------------------------------------------------------------------------------* 14 | cd C:\Users\titiunik\Dropbox\teaching\workshops\2022-08-RD-StatisticalHorizons\slides-and-codes\data-and-codes 15 | clear all 16 | clear 17 | clear all 18 | clear matrix 19 | cap log close 20 | set more off 21 | 22 | /*********************************** 23 | 24 | Fuzzy RD Analysis (one-dimensional) 25 | 26 | ************************************/ 27 | 28 | *----------------------------------------------------------------------* 29 | ** Empirical Illustration: LONDONO-VELEZ, RODRIGUEZ AND SANCHEZ (2020, AEJ Policy) 30 | *-----------------------------------------------------------------------* 31 | 32 | * Loading the data 33 | use "spp.dta", clear 34 | 35 | * This replication only includes first cohort (keep icfes_per==20142 as in replication files) 36 | tab icfes_per 37 | 38 | * Running variables 39 | codebook running_saber11 /* this running variable is discrete: 447 unique values*/ 40 | 41 | codebook running_sisben /* this running variable is discrete but number of masspoints is large: 25,375 unique values*/ 42 | 43 | * Treatment indicator (=1 if the student in fact received the money) 44 | sum beneficiary_spp 45 | 46 | * Keep subsample of students with scores above Saber 11 cutoff 47 | sum running_saber11 48 | keep if running_saber11>=0 49 | 50 | * Drop observations with missing values in SISBEN running variable: Students who live in households who don't receive welfare benefits have missing score 51 | drop if running_sisben==. 52 | 53 | * Look at repeated values in the final sample for analysis 54 | duplicates report sisben_score 55 | ret list 56 | 57 | duplicates tag sisben_score, gen(duplic) 58 | 59 | * Rename variables 60 | rename running_sisben X /* score (normalized): distance from Sisben wealth index to cutoff */ 61 | rename beneficiary_spp D /* treatment: indicator for whether student receive SPP subsidy*/ 62 | rename spadies_any Y1 /* Outcome 1*/ 63 | rename spadies_hq Y2 /* Outcome 2 */ 64 | 65 | *------------- 66 | * First Stage: T on D 67 | *------------- 68 | 69 | * Rdplot on D 70 | rdplot D X 71 | 72 | * Rdrobust on D 73 | /* Note: because of one-sided non-compliance, no variability on D to the left of cutoff 74 | when we choose msetwo, the bandwidth on the left of cutoff is the entire support*/ 75 | rdrobust D X 76 | rdrobust D X, bwselect(msetwo) 77 | 78 | *------------- 79 | * ITT: T on Y 80 | *------------- 81 | rdplot Y1 X 82 | 83 | rdrobust Y1 X /* Bandwidth not exactly same as in published paper, which is h=9.028 */ 84 | /* Two reasons for this : (1) Back then, rdrobust did not adjust for masspoints automatically 85 | (2) They reported bias-corrected point estimators (!) 86 | */ 87 | rdrobust Y1 X, masspoints(off) all /* more similar */ 88 | 89 | *------------------ 90 | * Outcome 1: Dummy ==1 if student attended any certified HEI immediately after receiving subsidy 91 | *-------------------- 92 | * Fuzzy effect 93 | rdrobust Y1 X, fuzzy(D) 94 | global h = e(h_l) 95 | global b = e(b_l) 96 | 97 | * equivalent to the two-step estimation below 98 | drop if D==. /* be careful! if you don't drop missing values in D, steps below do not replicate the effect*/ 99 | rdrobust Y1 X, h($h) b($b) 100 | global itt = e(tau_cl) 101 | 102 | rdrobust D X, h($h) b($b) 103 | global fs = e(tau_cl) 104 | 105 | display $itt/$fs 106 | 107 | * Rdrobust (sharp) on Y1 108 | rdrobust Y1 X, vce(cluster X) 109 | 110 | * Rdrobust (fuzzy) on Y1 111 | rdrobust Y1 X, fuzzy(D) vce(cluster X) 112 | 113 | * Rdplot on Y1 114 | rdplot Y1 X 115 | 116 | *------------------ 117 | * Outcome 2: Dummy ==1 if student attended a high-quality certified HEI immediately after receiving subsidy 118 | *------------------------------- 119 | * Rdrobust (sharp) on Y2 120 | rdrobust Y2 X 121 | 122 | * Rdrobust (fuzzy) on Y2 123 | rdrobust Y2 X, fuzzy(D) 124 | 125 | * Rdplot on Y2 126 | rdplot Y2 X 127 | 128 | *------------- 129 | * Falsification 130 | *------------- 131 | *------------- 132 | * Rddensity 133 | *---------- 134 | rddensity X 135 | 136 | *------------- 137 | * Test on Covariates 138 | *------------------------- 139 | * Sharp rdrobust 140 | global covariates "icfes_female icfes_age icfes_urm icfes_stratum icfes_privatehs icfes_famsize" 141 | foreach y of global covariates { 142 | rdrobust `y' X, all 143 | } 144 | 145 | * Fuzzy rdrobust 146 | foreach y of global covariates { 147 | rdrobust `y' X, fuzzy(D) all 148 | } 149 | 150 | ************************************* 151 | * Extra: Local randomization analysis 152 | *------------------------------- 153 | * Window selection 154 | rdwinselect X icfes_female icfes_age icfes_urm icfes_stratum icfes_privatehs icfes_famsize, seed(86543) 155 | 156 | global wr = 0.13 157 | global wl = -0.13 158 | 159 | * Rdrandinf on D: first stage in this window 160 | rdrandinf D X, wl($wl) wr($wr) 161 | 162 | * Rdrandinf (sharp) on Y1 163 | rdrandinf Y1 X, wl($wl) wr($wr) 164 | 165 | * Rdrandinf (fuzzy, AR) on Y1 166 | rdrandinf Y1 X, fuzzy(D) wl($wl) wr($wr) 167 | 168 | * Rdrandinf (fuzzy, TSLS) on Y1 169 | rdrandinf Y1 X, fuzzy(D tsls) wl($wl) wr($wr) 170 | 171 | * Rdrandinf (sharp) on Y2 172 | rdrandinf Y2 X, wl($wl) wr($wr) 173 | 174 | * Rdrandinf (fuzzy, AR) on Y2 175 | rdrandinf Y2 X, fuzzy(D) wl($wl) wr($wr) 176 | 177 | * Rdrandinf (fuzzy, TSLS) on Y2 178 | rdrandinf Y2 X, fuzzy(D tsls) wl($wl) wr($wr) 179 | 180 | * Additional outcomes 181 | rdrandinf spadies_hq_pri X, wl($wl) wr($wr) 182 | rdrandinf spadies_hq_pub X, wl($wl) wr($wr) 183 | 184 | -------------------------------------------------------------------------------- /Code/RDcourse-part7-falsification.R: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------# 2 | # Part 7: Falsification Analysis for RD designs 3 | # Authors: Matias Cattaneo, Sebastian Calonico, Rocio Titiunik 4 | # Last update: May 2023 5 | #------------------------------------------------------------------------------# 6 | # SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/ 7 | #------------------------------------------------------------------------------# 8 | # TO INSTALL/DOWNLOAD R PACKAGES/FUNCTIONS: 9 | # FOREIGN: install.packages('foreign') 10 | # GGPLOT2: install.packages('ggplot2') 11 | # LPDENSITY: install.packages('lpdensity') 12 | # RDDENSITY: install.packages('rddensity') 13 | # RDLOCRAND: install.packages('rdlocrand') 14 | # RDROBUST: install.packages('rdrobust') 15 | #------------------------------------------------------------------------------# 16 | rm(list=ls()) 17 | 18 | library(foreign) 19 | library(lpdensity) 20 | library(rddensity) 21 | library(rdrobust) 22 | library(rdlocrand) 23 | library(readstata13) 24 | library(ggplot2) 25 | 26 | data = read.dta13("headstart.dta") 27 | names(data) 28 | 29 | Y = data$mort_age59_related_postHS 30 | X = data$povrate60 - 59.1984 31 | 32 | # Using rddensity 33 | out = rddensity(X) 34 | plot1 = rdplotdensity(out, X) 35 | plot1 = rdplotdensity(out, X, plotRange = c(-20, 20), plotN = 25) 36 | 37 | # Histogram 38 | out = rddensity(X) 39 | bw_left = as.numeric(out$h$left) 40 | bw_right = as.numeric(out$h$right) 41 | tempdata = as.data.frame(X); colnames(tempdata) = c("v1"); 42 | plot2 = ggplot(data=tempdata, aes(tempdata$v1)) + 43 | geom_histogram(data = tempdata, aes(x = v1, y= ..count..), breaks = seq(-bw_left, 0, 1), fill = "blue", col = "black", alpha = 1) + 44 | geom_histogram(data = tempdata, aes(x = v1, y= ..count..), breaks = seq(0, bw_right, 1), fill = "red", col = "black", alpha = 1) + 45 | labs(x = "Score", y = "Number of Observations") + geom_vline(xintercept = 0, color = "black") + 46 | theme_bw() 47 | plot2 48 | 49 | 50 | #** Placebo Outcome Variables 51 | placebos = cbind(data$mort_age59_injury_postHS, data$mort_age59_related_preHS) 52 | for(i in 1:ncol(placebos)) { 53 | rdplot(placebos[,i], X) 54 | rdrobust(placebos[,i], X) 55 | } 56 | 57 | covariates = cbind(data$census1960_pop, data$census1960_pctsch1417, data$census1960_pctsch534, 58 | data$census1960_pctsch25plus, data$census1960_pop1417, data$census1960_pop534, 59 | data$census1960_pop25plus, data$census1960_pcturban, data$census1960_pctblack) 60 | 61 | for(i in 1:ncol(covariates)) { 62 | rdplot(covariates[,i], X) 63 | rdrobust(covariates[,i], X) 64 | } 65 | 66 | # Estimating the RD effect at alternative cutoffs (true cutoff is zero) 67 | summary(rdrobust(Y, X)) # true cutoff 68 | summary(rdrobust(Y[X>0], X[X>0], c=5)) 69 | 70 | ################################################################################ 71 | ## US Senate data 72 | ################################################################################ 73 | data = read.dta13("senate.dta") 74 | names(data) 75 | 76 | X = data$demmv 77 | Y = data$demvoteshfor2 78 | 79 | # Create "Democratic Win at t" 80 | Z = rep(NA, length(X)) 81 | Z[X<0 & !is.na(X)]=0 82 | Z[X>=0 & !is.na(X)]=1 83 | 84 | # RDdensity 85 | # Using rddensity 86 | out = rddensity(X) 87 | summary(out) 88 | plot1 = rdplotdensity(out, X) 89 | plot1 = rdplotdensity(out, X, plotRange = c(-20, 20), plotN = 25) 90 | 91 | # Histogram 92 | out = rddensity(X) 93 | bw_left = as.numeric(out$h$left) 94 | bw_right = as.numeric(out$h$right) 95 | tempdata = as.data.frame(X); colnames(tempdata) = c("v1"); 96 | plot2 = ggplot(data=tempdata, aes(tempdata$v1)) + 97 | geom_histogram(data = tempdata, aes(x = v1, y= ..count..), breaks = seq(-bw_left, 0, 1), fill = "blue", col = "black", alpha = 1) + 98 | geom_histogram(data = tempdata, aes(x = v1, y= ..count..), breaks = seq(0, bw_right, 1), fill = "red", col = "black", alpha = 1) + 99 | labs(x = "Score", y = "Number of Observations") + geom_vline(xintercept = 0, color = "black") + 100 | theme_bw() 101 | plot2 102 | 103 | 104 | # Binomial Test with rdwinselect 105 | rdwinselect(X, wmin = 0.75, nwindows=1) 106 | 107 | # Code snippet 3, Binomial Test by Hand 108 | binom.test(x = 15, n = 39, p = 0.5) 109 | 110 | # Code snippet 4, Using rdrobust on demvoteshlag1 111 | summary(rdrobust(data$demvoteshlag1, X)) 112 | rdplot(data$demvoteshlag1, X) 113 | 114 | # Table 6.1 (rdrobust on all the covariates) 115 | covs = cbind(data$presdemvoteshlag1, data$demvoteshlag1, data$demvoteshlag2, data$demwinprv1, data$demwinprv2, data$dmidterm, data$dpresdem, data$dopen) 116 | for(i in 1:ncol(covariates)) { 117 | rdplot(covariates[,i], X) 118 | rdrobust(covariates[,i], X) 119 | } 120 | 121 | # Code snippet 5, Using rdplot to Show the rdrobust Effect for demvoteshlag1 122 | out = rdrobust(data$demvoteshlag1,X) 123 | bandwidth=out$bws[1,1] 124 | ii = abs(X)<=bandwidth 125 | rdplot(data$demvoteshlag1[ii],X[ii], p=1, kernel = "triangular", h = bandwidth) 126 | 127 | # Code snippet 6, Using rdrandinf on dopen 128 | rdrandinf(data$dopen, X, wl=-.75, wr=.75) 129 | 130 | #* Table 6.2, rdrandinf on All Covariates 131 | covs = cbind(data$presdemvoteshlag1, data$demvoteshlag1, data$demvoteshlag2, data$demwinprv1, data$demwinprv2, data$dmidterm, data$dpresdem, data$dopen) 132 | window=0.75 133 | for(i in 1:ncol(covariates)) { 134 | ttest(covariates[,i][abs(X) < window & Z==1], covariates[,i][abs(X) < window & Z==0]) 135 | rdrandinf(covariates[,i], X, wl=-window, wr=window) 136 | } 137 | 138 | #* Code snippet 7, Using rdplot to Show the rdrobust Effect for dopen 139 | ii = (abs(X)<=.75) 140 | rdplot(data$dopen[ii], X[ii], h=0.75, p=0, kernel='uniform') 141 | 142 | # Code snippet 8, Using rdrobust with the Cutoff Equal to 1 143 | rdrobust(Y[X>=0], X[X>=0], c=1) 144 | -------------------------------------------------------------------------------- /Code/RDcourse-part7-falsification.do: -------------------------------------------------------------------------------- 1 | ******************************************************************************** 2 | ** Part 7: RD Falsification Analysis 3 | ** Author: Rocio Titiunik, Matias Cattaneo, Sebastian Calonico 4 | ** Last update: May 2023 5 | ******************************************************************************** 6 | ** RDROBUST: net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata) replace 7 | ** RDLOCRAND: net install rdlocrand, from(https://raw.githubusercontent.com/rdpackages/rdlocrand/master/stata) replace 8 | ** RDDENSITY: net install rddensity, from(https://raw.githubusercontent.com/rdpackages/rddensity/master/stata) replace 9 | ** RDPOWER: net install rdpower, from(https://raw.githubusercontent.com/rdpackages/rdpower/master/stata) replace 10 | ********************************************************************************* 11 | *-------------------------------------------------------------------------------------------------------------* 12 | ** SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/ 13 | *-------------------------------------------------------------------------------------------------------------* 14 | cd C:\Users\titiunik\Dropbox\teaching\workshops\2022-08-RD-StatisticalHorizons\slides-and-codes\data-and-codes 15 | clear 16 | clear all 17 | clear matrix 18 | cap log close 19 | set more off 20 | 21 | use "headstart.dta", clear 22 | 23 | gen Y = mort_age59_related_postHS 24 | gen X = povrate60 - 59.1984 25 | 26 | 27 | * Using rddensity 28 | rddensity X 29 | 30 | * Figure 31 | rddensity X 32 | local bandwidth_left=e(h_l) 33 | local bandwidth_right=e(h_r) 34 | twoway (histogram X if X>=-`bandwidth_left' & X<0, freq width(1) color(blue)) /// 35 | (histogram X if X>=0 & X<=`bandwidth_right', freq width(1) color(red)), xlabel(-20(10)30) /// 36 | graphregion(color(white)) xtitle(Score) ytitle(Number of Observations) legend(off) 37 | 38 | rddensity X, plot 39 | 40 | 41 | ** Placebo Outcome Variables 42 | foreach y of varlist mort_age59_injury_postHS mort_age59_related_preHS { 43 | rdplot `y' X 44 | rdrobust `y' X 45 | } 46 | 47 | 48 | foreach y of varlist census1960_pop census1960_pctsch1417 census1960_pctsch534 /// 49 | census1960_pctsch25plus census1960_pop1417 census1960_pop534 /// 50 | census1960_pop25plus census1960_pcturban census1960_pctblack { 51 | 52 | rdplot `y' X 53 | rdrobust `y' X 54 | } 55 | 56 | 57 | 58 | 59 | 60 | ******************************************************************************** 61 | use "senate.dta", clear 62 | 63 | rename demmv X 64 | rename demvoteshfor2 Y 65 | 66 | gen Z=. 67 | replace Z=0 if X<0 & X!=. 68 | replace Z=1 if X>=0 & X!=. 69 | label var Z "Democratic Win at t" 70 | 71 | order X Y Z 72 | 73 | *-----------------------------------------* 74 | * Section 6: Validation and Falsification * 75 | *-----------------------------------------* 76 | * Code snippet 1, Using rddensity 77 | rddensity X 78 | 79 | * Figure 6.2a 80 | rddensity X 81 | local bandwidth_left=e(h_l) 82 | local bandwidth_right=e(h_r) 83 | twoway (histogram X if X>=-`bandwidth_left' & X<0, freq width(1) color(blue)) /// 84 | (histogram X if X>=0 & X<=`bandwidth_right', freq width(1) color(red)), xlabel(-20(10)30) /// 85 | graphregion(color(white)) xtitle(Score) ytitle(Number of Observations) legend(off) 86 | 87 | rddensity X, plot 88 | 89 | * Code snippet 2, Binomial Test with rdwinselect 90 | rdwinselect X, wmin(0.75) nwindows(1) 91 | 92 | * Code snippet 3, Binomial Test by Hand 93 | bitesti 39 15 1/2 94 | 95 | * Code snippet 4, Using rdrobust on demvoteshlag1 96 | rdrobust demvoteshlag1 X 97 | 98 | * Table 6.1 (rdrobust on all the covariates) 99 | global covariates "presdemvoteshlag1 demvoteshlag1 demvoteshlag2 demwinprv1 demwinprv2 dmidterm dpresdem dopen" 100 | matrix define R=J(8,8,.) 101 | local k=1 102 | foreach y of global covariates { 103 | rdrobust `y' X 104 | } 105 | 106 | 107 | * Code snippet 5, Using rdplot to Show the rdrobust Effect for demvoteshlag1 108 | rdrobust demvoteshlag1 X 109 | local bandwidth=e(h_l) 110 | rdplot demvoteshlag1 X if abs(X)<=`bandwidth', h(`bandwidth') p(1) kernel(triangular) 111 | 112 | * Code snippet 6, Using rdrandinf on dopen 113 | rdrandinf dopen X, wl(-.75) wr(.75) 114 | 115 | * Table 6.2, rdrandinf on All Covariates 116 | global covariates "presdemvoteshlag1 demvoteshlag1 demvoteshlag2 demwinprv1 demwinprv2 dmidterm dpresdem dopen" 117 | local window=0.75 118 | matrix define R=J(8,5,.) 119 | local k=1 120 | foreach y of global covariates { 121 | ttest `y' if abs(X)<=`window', by(Z) 122 | 123 | rdrandinf `y' X, wl(-`window') wr(`window') 124 | } 125 | 126 | * Code snippet 7, Using rdplot to Show the rdrobust Effect for dopen 127 | rdplot dopen X if abs(X)<=.75, h(.75) p(0) kernel(uniform) 128 | 129 | * Code snippet 8, Using rdrobust with the Cutoff Equal to 1 130 | rdrobust Y X if X>=0, c(1) 131 | 132 | * Table 6.3 (rdrobust all cutoffs from -5 to 5 in increments of 1) 133 | matrix define R=J(11,10,.) 134 | local k=1 135 | forvalues x=-5(1)5 { 136 | if `x'>0 { 137 | local condition="if X>=0" 138 | } 139 | else if `x'<0 { 140 | local condition="if X<0" 141 | } 142 | else { 143 | local condition="" 144 | } 145 | 146 | rdrobust Y X `condition', c(`x') 147 | 148 | } 149 | 150 | 151 | -------------------------------------------------------------------------------- /Code/RDcourse-part8-discrete.R: -------------------------------------------------------------------------------- 1 | #------------------------------------------------------------------------------# 2 | #------------------------------------------------------------------------------# 3 | # RD Designs with Discrete Running Variables 4 | # Authors: Matias D. Cattaneo, Nicolas Idrobo and Rocio Titiunik 5 | # Last update: May 2023 6 | #------------------------------------------------------------------------------# 7 | # SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/ 8 | #------------------------------------------------------------------------------# 9 | # TO INSTALL/DOWNLOAD R PACKAGES/FUNCTIONS: 10 | # FOREIGN: install.packages('foreign') 11 | # GGPLOT2: install.packages('ggplot2') 12 | # GRID: install.packages('grid') 13 | # LPDENSITY: install.packages('lpdensity') 14 | # RDDENSITY: install.packages('rddensity') 15 | # RDLOCRAND: install.packages('rdlocrand') 16 | # RDROBUST: install.packages('rdrobust') 17 | # TEACHINGDEMOS: install.packages('TeachingDemos') 18 | #------------------------------------------------------------------------------# 19 | #------------------------------------------------------------------------------# 20 | rm(list=ls()) 21 | 22 | library(foreign) 23 | library(ggplot2) 24 | library(lpdensity) 25 | library(rddensity) 26 | library(rdrobust) 27 | library(rdlocrand) 28 | library(readstata13) 29 | 30 | # Loading the data and defining the main variables 31 | data = read.dta13("education.dta") 32 | nextGPA = data$nextGPA 33 | left_school = data$left_school 34 | nextGPA_nonorm = data$nextGPA_nonorm 35 | X = data$X 36 | T = data$T 37 | T_X = T*X 38 | 39 | #--------------------------------------------# 40 | # Example of RD With Discrete Score # 41 | #--------------------------------------------# 42 | 43 | # Figure 3.1 44 | # Histogram 45 | tempdata = as.data.frame(X); colnames(tempdata) = c("v1"); 46 | p = ggplot(data=tempdata, aes(tempdata$v1))+ 47 | geom_histogram(breaks=seq(-2.8, 0, by = 0.1), col="black", fill="blue", alpha = 1)+ 48 | geom_histogram(breaks=seq(0, 1.6, by = 0.1), col="black", fill="red", alpha = 1)+ 49 | labs(x="Score", y="Number of Observations")+geom_vline(xintercept=0, color="black")+ 50 | theme_bw() 51 | p 52 | 53 | # Counting the number of observations with X different from missing 54 | length(X) 55 | 56 | # Counting the unique values of X 57 | length(unique(X)) 58 | 59 | # Using rddensity 60 | out = rddensity(X) 61 | summary(out) 62 | 63 | # Using rdrobust on hsgrade_pct 64 | out = rdrobust(data$hsgrade_pct, X) 65 | summary(out) 66 | 67 | # Using rdplot on hsgrade_pct 68 | rdplot(data$hsgrade_pct, X, x.label = "Running Variable", y.label = "", title="") 69 | 70 | rdplot(data$hsgrade_pct, X, x.label = "Running Variable", y.label = "", title="") 71 | 72 | # Table 3.3 73 | summary(rdrobust(data$hsgrade_pct, X)) 74 | summary(rdrobust(data$totcredits_year1, X)) 75 | summary(rdrobust(data$age_at_entry, X)) 76 | summary(rdrobust(data$male, X)) 77 | summary(rdrobust(data$bpl_north_america, X)) 78 | summary(rdrobust(data$english, X)) 79 | summary(rdrobust(data$loc_campus1, X)) 80 | summary(rdrobust(data$loc_campus2, X)) 81 | summary(rdrobust(data$loc_campus3, X)) 82 | 83 | # Figure 3.3 84 | rdplot(data$hsgrade_pct, X, x.label = "Running Variable", y.label = "", title="") 85 | rdplot(data$totcredits_year1, X, x.label = "Running Variable", y.label = "", title="") 86 | rdplot(data$age_at_entry, X, x.label = "Running Variable", y.label = "", title="") 87 | rdplot(data$english, X, x.label = "Running Variable", y.label = "", title="") 88 | rdplot(data$male, X, x.label = "Running Variable", y.label = "", title="") 89 | rdplot(data$bpl_north_america, X, x.label = "Running Variable", y.label = "", title="") 90 | 91 | # Using rdplot on the outcome 92 | out = rdplot(nextGPA_nonorm, X, binselect = 'esmv') 93 | summary(out) 94 | 95 | rdplot(nextGPA_nonorm, X, binselect = 'esmv', x.label = 'Running Variable', y.label = 'Outcome', title = '') 96 | 97 | # Using rdrobust on the outcome 98 | out = rdrobust(nextGPA_nonorm, X, kernel = 'triangular', p = 1, bwselect = 'mserd') 99 | summary(out) 100 | 101 | # Using rdrobust and showing the objects it returns 102 | rdout = rdrobust(nextGPA_nonorm, X, kernel = 'triangular', p = 1, bwselect = 'mserd') 103 | print(names(rdout)) 104 | print(rdout$beta_p_r) 105 | print(rdout$beta_p_l) 106 | 107 | # Using rdrobust with clustered standard errors 108 | clustervar = X 109 | out = rdrobust(nextGPA_nonorm, X, kernel = 'triangular', p = 1, bwselect = 'mserd', vce = 'hc0', cluster = clustervar) 110 | summary(out) 111 | 112 | # Using rdrobust on the collapsed data 113 | data2 = data.frame(nextGPA_nonorm, X) 114 | dim(data2) 115 | collapsed = aggregate(nextGPA_nonorm ~ X, data = data2, mean) 116 | dim(collapsed) 117 | out = rdrobust(collapsed$nextGPA_nonorm, collapsed$X) 118 | summary(out) 119 | 120 | # Binomial test with rdwinselect 121 | out = rdwinselect(X, wmin = 0.01, nwindows = 1, cutoff = 5.00000000000e-06) 122 | 123 | # Binomial Test by Hand 124 | binom.test(77, 305, 1/2) 125 | 126 | # Using rdrandinf on hsgrade_pct 127 | out = rdrandinf(data$hsgrade_pct, X, wl = -0.005, wr = 0.01, seed = 50) 128 | 129 | # Using rdwinselect with covariates and windows with equal number of mass points to each side of the cutoff 130 | Z = cbind(data$hsgrade_pct, data$totcredits_year1, data$age_at_entry, data$male, 131 | data$bpl_north_america, data$english, data$loc_campus1, data$loc_campus2, 132 | data$loc_campus3) 133 | colnames(Z) = c("hsgrade_pct", "totcredits_year1", "age_at_entry", "male", 134 | "bpl_north_america", "english", "loc_campus1", "loc_campus2", 135 | "loc_campus3") 136 | out = rdwinselect(X, Z, p = 1, seed = 50, wmin = 0.01, wstep = 0.01, cutoff = 5.00000000000e-06) 137 | 138 | # Using rdrandinf on the Outcome 139 | out = rdrandinf(nextGPA_nonorm, X, wl = -0.005, wr = 0.01, seed = 50) 140 | 141 | -------------------------------------------------------------------------------- /Code/RDcourse-part8-discrete.do: -------------------------------------------------------------------------------- 1 | ******************************************************************************** 2 | ** Part 8: RD Designs with Discrete Running Variables 3 | ** Author: Rocio Titiunik, Matias Cattaneo, Sebastian Calonico 4 | ** Last update: May 2023 5 | ******************************************************************************** 6 | ** RDROBUST: net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata) replace 7 | ** RDLOCRAND: net install rdlocrand, from(https://raw.githubusercontent.com/rdpackages/rdlocrand/master/stata) replace 8 | ** RDDENSITY: net install rddensity, from(https://raw.githubusercontent.com/rdpackages/rddensity/master/stata) replace 9 | ** RDPOWER: net install rdpower, from(https://raw.githubusercontent.com/rdpackages/rdpower/master/stata) replace 10 | ********************************************************************************* 11 | *-------------------------------------------------------------------------------------------------------------* 12 | ** SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/ 13 | *-------------------------------------------------------------------------------------------------------------* 14 | clear 15 | clear all 16 | clear matrix 17 | cap log close 18 | set more off 19 | 20 | cd C:\Users\titiunik\Dropbox\teaching\workshops\2022-08-RD-StatisticalHorizons\slides-and-codes\data-and-codes 21 | 22 | * A folder called "outputs" needs to be created in order to store 23 | * all of the figures, logs and tables. If the folder already exists, 24 | * the user will get an error message but the code will not stop. 25 | capture noisily mkdir "outputs" 26 | 27 | * Loading the data 28 | use "education.dta", clear 29 | 30 | *----------------------------------------------* 31 | * Example of RD With Discrete Score * 32 | *----------------------------------------------* 33 | 34 | * Histogram 35 | twoway (histogram X if X<0, width(0.1) freq color(blue) xline(0)) /// 36 | (histogram X if X>=0, width(0.1) freq color(red)), graphregion(color(white)) legend(off) /// 37 | xtitle(Score) ytitle(Number of Observations) 38 | 39 | * Counting the Number of Observations with X Different from Missing 40 | count if X!=. 41 | 42 | * Code snippet 2, Counting the Unique Values of X 43 | codebook X 44 | 45 | duplicates report X 46 | 47 | preserve 48 | gen obs=1 49 | collapse (sum) obs, by(X) 50 | list X obs 51 | restore 52 | 53 | * Using rddensity 54 | rddensity X 55 | 56 | * Using rdrobust on hsgrade_pct 57 | rdrobust hsgrade_pct X 58 | 59 | * Using rdplot on hsgrade_pct 60 | rdplot hsgrade_pct X 61 | 62 | * rdrobust on All Covariates 63 | global covariates "hsgrade_pct totcredits_year1 age_at_entry male bpl_north_america english loc_campus1 loc_campus2 loc_campus3" 64 | matrix define R=J(9,8,.) 65 | local k=1 66 | foreach y of global covariates { 67 | rdrobust `y' X, all 68 | local label_`k': variable label `y' 69 | matrix R[`k',1]=e(h_l) 70 | matrix R[`k',2]=e(tau_cl) 71 | matrix R[`k',3]=e(tau_bc) 72 | matrix R[`k',4]=e(se_tau_rb) 73 | matrix R[`k',5]=2*normal(-abs(R[`k',3]/R[`k',4])) 74 | matrix R[`k',6]=R[`k',3]-invnormal(0.975)*R[`k',4] 75 | matrix R[`k',7]=R[`k',3]+invnormal(0.975)*R[`k',4] 76 | matrix R[`k',8]=e(N_h_l)+e(N_h_r) 77 | 78 | local k=`k'+1 79 | } 80 | matlist R 81 | 82 | preserve 83 | clear 84 | local t=`k'-1 85 | svmat R 86 | gen R0="" 87 | forvalues k=1/`t' { 88 | replace R0="`label_`k''" if _n==`k' 89 | } 90 | order R0 91 | save "outputs/section7_rdrobust_allcovariates.dta", replace 92 | restore 93 | 94 | * Using rdplot on the Outcome 95 | rdplot nextGPA_nonorm X, binselect(esmv) /// 96 | graph_options(graphregion(color(white)) /// 97 | xtitle(Running Variable) ytitle(Outcome)) 98 | 99 | * Using rdrobust on the Outcome 100 | rdrobust nextGPA_nonorm X, kernel(triangular) p(1) bwselect(mserd) 101 | 102 | * Using rdrobust and Showing the Objects it Returns 103 | rdrobust nextGPA_nonorm X 104 | ereturn list 105 | 106 | * Using rdrobust with Clustered Standard Errors 107 | cap drop clustervar 108 | gen clustervar=X 109 | rdrobust nextGPA_nonorm X, kernel(triangular) p(1) bwselect(mserd) vce(cluster clustervar) 110 | 111 | * Using rdrobust on the Collapsed Data 112 | preserve 113 | collapse (mean) nextGPA_nonorm, by(X) 114 | rdrobust nextGPA_nonorm X 115 | restore 116 | 117 | * Binomial Test with rdwinselect 118 | rdwinselect X, wmin(0.01) nwindows(1) cutoff(0.000005) 119 | 120 | * Binomial Test by Hand 121 | bitesti 305 77 1/2 122 | 123 | * Using rdrandinf on hsgrade_pct 124 | rdrandinf hsgrade_pct X , seed(50) wl(-.005) wr(.01) 125 | 126 | * rdrandinf on All Covariates 127 | global covariates "hsgrade_pct totcredits_year1 age_at_entry male bpl_north_america english loc_campus1 loc_campus2 loc_campus3" 128 | matrix define R=J(9,5,.) 129 | local k=1 130 | foreach y of global covariates { 131 | ttest `y' if X>=-0.005 & X<=0.01, by(Z) 132 | matrix R[`k',1]=r(mu_1) 133 | matrix R[`k',2]=r(mu_2) 134 | 135 | rdrandinf `y' X, wl(-0.005) wr(0.01) seed(50) 136 | local label_`k': variable label `y' 137 | matrix R[`k',3]=r(obs_stat) 138 | matrix R[`k',4]=r(randpval) 139 | matrix R[`k',5]=r(N) 140 | 141 | local k=`k'+1 142 | } 143 | 144 | mat list R 145 | preserve 146 | clear 147 | local t=`k'-1 148 | svmat R 149 | gen R0="" 150 | forvalues k=1/`t' { 151 | replace R0="`label_`k''" if _n==`k' 152 | } 153 | order R0 154 | save "outputs/section7_rdrandinf_allcovariates.dta", replace 155 | restore 156 | 157 | * Using rdwinselect with Covariates and Windows with Equal Number of Mass Points 158 | * to Each Side of the Cutoff 159 | global covariates "hsgrade_pct totcredits_year1 age_at_entry male bpl_north_america english loc_campus1 loc_campus2 loc_campus3" 160 | rdwinselect X $covariates, cutoff(0.00005) wmin(0.01) wstep(0.01) 161 | 162 | rdwinselect X $covariates if loc_campus3==1, cutoff(0.00005) wmin(0.01) wstep(0.01) 163 | 164 | * Using rdrandinf on the Outcome 165 | rdrandinf nextGPA_nonorm X, seed(50) wl(-0.005) wr(0.01) 166 | 167 | *-------------------------------------------------------------------------------------------------------------* 168 | clear all 169 | -------------------------------------------------------------------------------- /Code/README.md: -------------------------------------------------------------------------------- 1 | # Example Code 2 | 3 | This folder contains example implementation of all the methods we discuss in this course in both `R` and `Stata`. They are numbered in order of the material covered. The necessary datasets are included in this folder as well. 4 | 5 | -------------------------------------------------------------------------------- /Code/education.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Code/education.dta -------------------------------------------------------------------------------- /Code/headstart.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Code/headstart.dta -------------------------------------------------------------------------------- /Code/house.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Code/house.dta -------------------------------------------------------------------------------- /Code/jtpa.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Code/jtpa.dta -------------------------------------------------------------------------------- /Code/senate.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Code/senate.dta -------------------------------------------------------------------------------- /Code/spp.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Code/spp.dta -------------------------------------------------------------------------------- /Code/uruguayancct.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Code/uruguayancct.dta -------------------------------------------------------------------------------- /Code/workflow.do: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | * (1) Establish whether the RD assumptions are plausible 6 | [TOMORROW] 7 | 8 | * (2) rdPLOT 9 | rdplot Y X 10 | 11 | * (3) If using local polynomial 12 | rdrobust Y X, p(1) 13 | 14 | * (4) Explore robustness of the result 15 | rdrobust Y X, p(1) h(5) 16 | rdrobust Y X, p(1) h(5) kernel(uniform) 17 | rdrobust Y X, p(1) bwselect(cerrd) -------------------------------------------------------------------------------- /Labs/Exercise-1/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Exercise 1: Continuity-based analysis of the Meyersson application 3 | 4 | In this exercise, you will re-analyze the data in [Meyersson (2014)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/Meyersson2014-ECTA.pdf). The Meyersson application is described and replicated in Section 1 of [Cattaneo, Idrobo and Titiunik (2020)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf). The data is in the file `polecon.dta` (Stata format) and `polecon.csv` (comma separated values format). 5 | 6 | Meyersson is broadly interested in the effect of Islamic parties’ control of local governments on women’s rights, in particular on the educational attainment of young women. To study this ques- tion, he uses an RD design focused exclusively on the 1994 Turkish mayoral elections. Specifically, the unit of analysis is the municipality, and the score (variable `X` in the dataset) is the Islamic margin of victory—defined as the difference between the vote share obtained by the largest Islamic party, and the vote share obtained by the largest secular party opponent. The main outcome of interest (variable `Y` in the dataset) is the school attainment for women who were (potentially) in high school during the period 1994-2000, measured with variables extracted from the 2000 census. The particular outcome we re-analyze is the share of the cohort of women ages 15 to 20 in 2000 who had completed high school by 2000. 7 | 8 | ## Part 1: Continuity-Based Analysis 9 | 10 | 1. Explore the range and density of the running variable. Is this a continuous random variable? 11 | 12 | 2. Always plot the data first! 13 | - Produce RD plots using rdplot for the outcome variable. 14 | - Do not forget to vary the number of bins used, and look at the difference between quantile bins and evenly-spaced bins. 15 | - Re-explore the effects of varying the polynomial order for the global fit. 16 | 17 | 3. Compute optimal bandwidth choices using rdbwselect for the outcome variable. 18 | - Consider the same and different bandwidths for each side of the cutoff, and MSE versus CER optimal bandwidths. 19 | - Is the estimated bandwidth sensitive to these choices? 20 | 21 | 4. Explore the influence of tuning parameters for bandwidth selection. 22 | - How do estimated bandwidth change when you restrict the support of the running variable? 23 | - How do estimated bandwidth change when you change the order of local polynomials? 24 | 25 | 5. Use rdrobust to estimate the RD treatment effects using local polynomials with optimal bandwidth selection. 26 | 27 | 6. Produce RD estimates and confidence intervals using rdrobust for each outcome variable. 28 | - Obtain the same results for all three approaches: conventional, bias-corrected & robust. 29 | - What happens if you change the kernel shape? 30 | - What happens if you change the standard error estimation method? 31 | - What happens if you change the bandwidth selection method? 32 | - What happens if you change the order of polynomials used? 33 | 34 | -------------------------------------------------------------------------------- /Labs/Exercise-1/exercise-Meyersson.do: -------------------------------------------------------------------------------- 1 | /******************************************** 2 | 3 | In class exercise: Meyersson example 4 | 5 | The impact of having a mayor from an Islamic party on educational attainment of women 6 | Unit of observation: municipalities (in Turkey) 7 | Period: 1994-2000 8 | 9 | Running variable/score : vote margin obtained by the Islamic party in the 1994 Turkish mayoral elections ==> X 10 | Outcome of interest : percentage of women aged 15 to 20 in 2000 who had completed high school by 2000 ==> Y 11 | Treatment : electoral victory of the Islamic party in 1994 ==> T 12 | 13 | *********************************************/ 14 | 15 | use polecon.dta, clear 16 | 17 | -------------------------------------------------------------------------------- /Labs/Exercise-1/polecon.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Labs/Exercise-1/polecon.dta -------------------------------------------------------------------------------- /Labs/Exercise-2/CaugheySekhon2011.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Labs/Exercise-2/CaugheySekhon2011.dta -------------------------------------------------------------------------------- /Labs/Exercise-2/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Exercise 2: Caughey and Sekhon Application 3 | 4 | In this exercise, you will re-analyze the data in [Caughey and Sekhon (2011)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CaugheySekhon2011-PA.pdf). The data is in the file `CaugheySekhon2011.dta` (Stata format) and `CaugheySekhon2011.csv` (comma separated values format). 5 | 6 | The authors are interested in estimating the incumbency advantage at the party level. To study this question, they use an RD design focused exclusively on U.S. House elections. Specifically, the unit of analysis is the U.S. congressional district, and the score (variable `DifDPct` in the dataset) is the margin of victory of the Democratic party—defined as the difference between the vote share obtained by the Democratic party and the vote share obtained by the Democratic party’s strongest opponent. The main outcome of interest (variable `DWinNxt` in the dataset) is the Democratic party’s victory in the following election. 7 | 8 | ## Part 1: Continuity-based Falsification 9 | 10 | 1. Try to validate the RD design. 11 | - Plot the histogram of the running variable. 12 | - Conduct a binomial test for counts and a density test. 13 | - Look at covariate balance, both graphically and formally. 14 | - Do not forget to plot with the rdbinselect command! 15 | 16 | 2. Do you think this design is credible under a continuity assumption? 17 | 18 | ## Part 2: Local randomization Falsification 19 | 20 | 1. Select a window based on the two covariates given 21 | 22 | 2. Try to provide empirical evidence supporting the local-randomization assumption in the window you chose. 23 | 24 | 3. Do you think this design is credible under a local randomization assumption? 25 | 26 | ## Part 3: Compare both approaches 27 | 28 | 1. Do you reach different conclusions from the two approaches? Is this RD design valid? Why/Why not? 29 | -------------------------------------------------------------------------------- /Labs/Exercise-2/exercise-CaugheySekhon.do: -------------------------------------------------------------------------------- 1 | /******************************************** 2 | 3 | In class exercise: Caughey and Sekhon example 4 | 5 | Party incumbency advantage in the U.S. House 6 | Unit of observation: house district 7 | Period: 1942-2010 8 | 9 | Running variable/score : Democratic Party's Margin of Victory at Election t ==> DifDPct 10 | Outcome of interest : Democratic Party's Victory at Election t+1 ==> DWinNxt 11 | Pre-determined covariate 1 : Democratic Party's Victory at Election t-1 (DWinPrv) 12 | Pre-determined covariate 2 : Democratic Party's Vote Share at Election t-1 (DPctPrv) 13 | 14 | *********************************************/ 15 | 16 | use CaugheySekhon2011.dta, clear 17 | global x DifDPct /* score */ 18 | global y DWinNxt /* outcome*/ 19 | global z1 DWinPrv /* covariate 1*/ 20 | global z2 DPctPrv /* covariate 2*/ 21 | 22 | /* Step 1 : Analyze this RD using a local polinomial approach */ 23 | * Do RD plot, falsification on density and covariates, and local polynomial estimation with optimal bandwidth selection 24 | 25 | /* Step 2 : Analyze this RD using a local randomization approach */ 26 | * Select a window based on the two covariates and peform inference inside chosen window 27 | 28 | /* Step 3: Compare results from both approaches */ 29 | * Do you reach different conclusions from the two approaches? Is this RD design valid? Why/Why not? 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Mixtape Sessions Banner 2 | 3 | 4 | ## About 5 | 6 | This course covers methods for the analysis and interpretation of the Regression Discontinuity (RD) design, a non-experimental strategy to study treatment effects that can be used when units receive a treatment based on a score and a cutoff. The course covers methods for estimation, inference, and falsification of RD treatment effects using two different approaches: the continuity-based framework, implemented with local polynomials, and the local randomization framework, implemented with standard tools from the analysis of experiments. The focus is on conceptual understanding of the underlying methodological issues and effective empirical implementation. Every topic is illustrated with the analysis of RD examples using real-world data, walking through R and Stata codes that fully implement all the methods discussed. At the end of the course, participants will have acquired the necessary skills to rigorously interpret, visualize, validate, estimate, and characterize the uncertainty of RD treatment effects. 7 | 8 | ## Overview 9 | 10 | The goal of this workshop is to give an introduction to standard and recent methodological developments in the analysis and interpretation of regression discontinuity (RD) designs. The course focuses on methodology and empirical practice, as well as on conceptual issues, but not on the technical details of the statistical and econometric theory underlying the results. A brief 11 | description of the course, along with references to further readings, is given below. 12 | 13 | It is assumed that participants have elementary working knowledge of statistics, econometrics and policy evaluation. It would be useful, but not required, if participants were familiar with basic results from the literature on program evaluation and treatment effects at the level of Wooldridge (2010). The course is nonetheless meant to be self-contained and hence most underlying statistics/econometrics concepts and results are introduced and explained in class 14 | 15 | ## Schedule 16 | 17 | ### Day 1 18 | 19 | Introduction to Sharp RD design and graphical illustration. Continuity based framework: estimands, identification assumptions, and point estimation. 20 | 21 | - Sharp RD design: introduction and graphical illustration with RD plots. 22 | - Continuity based RD analysis: estimands and identification. 23 | - Estimation of RD effects with local polynomials. 24 | - Optimal bandwidth selection. 25 | 26 | #### Readings 27 | [Cattaneo, Idrobo, and Titiunik (2020)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf), Chapters 1, 2, 3, and Sections 4.1 and 4.2 in Chapter 4. 28 | 29 | ### Day 2 30 | 31 | Continuity based framework, continued: robust inference based on local polynomials. Introduction to local randomization framework: estimands, window selection, estimation and inference. 32 | 33 | - Continuity based RD analysis, continued: 34 | – Robust confidence intervals based on local polynomials 35 | - Local Randomization RD analysis 36 | – Inferences based on Fisherian methods 37 | – Window selection based on covariates 38 | – Inferences based on large-sample methods 39 | 40 | #### Readings 41 | 42 | [Cattaneo, Idrobo, and Titiunik (2020)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf), Chapter 4 43 | 44 | [Cattaneo, Idrobo, and Titiunik (2023)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2023-CUP-Extensions.pdf), Chapter 2 45 | 46 | ### Day 3 47 | 48 | RD local randomization analysis, RD falsification methods, and extensions to canonical RD design. 49 | 50 | - Falsification of RD assumptions: density and covariate balance tests 51 | - The RD design under imperfect compliance 52 | 53 | #### Readings 54 | 55 | [Cattaneo, Idrobo, and Titiunik (2020)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf), Chapter 5 56 | 57 | [Cattaneo, Idrobo, and Titiunik (2023)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2023-CUP-Extensions.pdf), Chapter 3 58 | 59 | 60 | 61 | ## Slides 62 | 63 | [RD Slides](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Slides/RD.pdf) 64 | 65 | 66 | ## Coding Exercises 67 | 68 | [Exercise 1: Meyersson (2014)](https://github.com/Mixtape-Sessions/Regression-Discontinuity/tree/main/Labs/Exercise-1/) 69 | 70 | - Paper: [Meyersson (2014)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/Meyersson2014-ECTA.pdf) 71 | 72 | [Exercise 2: Caughey and Sekhon (2011)](https://github.com/Mixtape-Sessions/Regression-Discontinuity/tree/main/Labs/Exercise-2/) 73 | 74 | - Paper: [Caughey and Sekhon (2011)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CaugheySekhon2011-PA.pdf) 75 | 76 | [Additonal code for all covered material](https://github.com/Mixtape-Sessions/Regression-Discontinuity/tree/main/Code/) 77 | 78 | 79 | ## Readings 80 | 81 | ### Essential 82 | 83 | [Cattaneo, Idrobo, and Titiunik (2020)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf) 84 | 85 | [Cattaneo, Idrobo, and Titiunik (2023)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2023-CUP-Extensions.pdf) 86 | 87 | ### Background 88 | 89 | [Cattaneo, Frandsen, and Titiunik (2015)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoFrandsenTitiunik2015-JCI.pdf) 90 | 91 | [Cattaneo and Titiunik (2022)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoTitiunik2022-ARE.pdf) 92 | 93 | [Imbens and Lemieux (2008)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/ImbensLemieux2008-JoE.pdf) 94 | 95 | [Lee and Lemieux (2010)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/LeeLemieux2010-JEL.pdf) 96 | 97 | 98 | ## Computing and Software 99 | 100 | To participate in the hands-on exercises, you are strongly encouraged to secure access to a computer with the most recent version of `Stata` or `R` plus `RStudio` installed. 101 | 102 | All functions and packages are available in `Stata`, which is not free, and `R`, a free and open- source statistical software environment. We also have `Python` versions of some of the packages (we are still in the process of creating Python libraries for all the commands). 103 | 104 | The workshop will employ several empirical illustrations, which will be analyzed using `Stata` and/or `R`. The following `Stata`/`R`/`Python` modules modules/commands will be used: 105 | 106 | - [rdrobust](https://rdpackages.github.io/rdrobust): RD inference employing local polynomial and partitioning methods. See Calonico, Cattaneo and Titiunik ([2014](https://journals.sagepub.com/doi/pdf/10.1177/1536867X1401400413), [2015](https://journal.r-project.org/archive/2015/RJ-2015-004/RJ-2015-004.pdf)) for introductions. 107 | 108 | - [rddensity](https://rdpackages.github.io/rddensity): Manipulation testing for RD designs. See [Cattaneo, Jansson and Ma (2018)](https://journals.sagepub.com/doi/10.1177/1536867X1801800115) for an introduction. 109 | 110 | - [rdlocrand](https://rdpackages.github.io/rdlocrand): RD inference employing randomization inference methods. See [Cattaneo, Titiunik and Vazquez-Bare (2016)](https://journals.sagepub.com/doi/10.1177/1536867X1601600205) for an introduction. 111 | 112 | - [rdmulti](https://rdpackages.github.io/rdmulti): Local polynomial methods for estimation and inference for RD designs with multiple cutoffs. 113 | 114 | - [rdpower](https://rdpackages.github.io/rdpower): Power and sample size calculations using robust bias-corrected local polynomial inference methods. 115 | 116 | Further details, including how to install the packages in both R and Stata may be found at: https://rdpackages.github.io 117 | 118 | Replication files in both `R` and `Stata` for all the empirical analyses in [Cattaneo, Idrobo, and Titiunik (2020)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf) and [Idrobo, and Titiunik (2023)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2023-CUP-Extensions.pdf) may be found at: https://github.com/rdpackages-replication/CIT_2020_CUP 119 | 120 | Please make sure you have `Stata` or `R` and the above modules/commands installed and fully functional in your personal computer before the course begins. Datasets, do-files and `R` files will be provided in advance. 121 | 122 |
Software installation instructions 123 | 124 | If you will be using R, I suggest also installing RStudio. To install these programs, follow the 125 | instructions below: 126 | 127 | 1. Download the most recent version of [R](https://www.r-project.org/) and install it in your computer. R is free and available for Windows, Mac, and Linux operating systems. Download the version of R compatible with your operating system from [CRAN](https://cloud.r-project.org/). If you are running Windows or MacOS, you should choose one of the precompiled binary distributions (i.e., ready-to-run applications) that are linked at the top of the page, not the source codes. 128 | 129 | 2. Download and install [R Studio](https://www.rstudio.com/products/rstudio/download/). This is an integrated development environment (IDE) that includes a console, a syntax-highlighting editor, and other tools for plotting, history, debug- ging and workspace management for R use. RStudio is free and available for Windows, Mac, and Linux platforms. 130 | 131 | If you are using Stata, you will have to obtain a license from [here](https://www.stata.com/). Stata is licensed through [StataCorp](http://www.stata.com/) and is frequently offered at a significant discount through academic institutions to their employees and students. Seminar participants who are not yet ready to purchase Stata could take advantage of StataCorp’s 30-day software return policy and obtain the latest version of Stata on a trial basis in the weeks immediately preceding this course. Use this link for 30-day trial copy: http://www.stata.com/customer-service/evaluate-stata/ 132 | 133 |
134 | 135 | 136 |
Package installation instructions 137 | 138 | You will need to install the following packages: `rdrobust`, `rddensity`, `rdlocrand`, `rdpower` and `rdmulti`. In R, make sure you have an internet connection and then launch R Studio. Copy the following line of text and paste them in to R’s command prompt (located in the window named “Console”): 139 | 140 | ```r 141 | install.packages("rdrobust") 142 | install.packages("rddensity") 143 | install.packages("rdlocrand") 144 | install.packages("rdpower") 145 | install.packages("rdmulti") 146 | ``` 147 | 148 | In Stata, make sure you have an internet connection and then type: 149 | 150 | ```stata 151 | net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata) replace 152 | net install rddensity, from(https://raw.githubusercontent.com/rdpackages/rddensity/master/stata) replace 153 | net install rdlocrand, from(https://raw.githubusercontent.com/rdpackages/rdlocrand/master/stata) replace 154 | net install rdpower, from(https://raw.githubusercontent.com/rdpackages/rdpower/master/stata) replace 155 | net install rdmulti, from(https://raw.githubusercontent.com/rdpackages/rdmulti/master/stata) replace 156 | ``` 157 | 158 |
159 | 160 | 161 | 162 | 163 | -------------------------------------------------------------------------------- /Readings/CattaneoFrandsenTitiunik2015-JCI.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Readings/CattaneoFrandsenTitiunik2015-JCI.pdf -------------------------------------------------------------------------------- /Readings/CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Readings/CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf -------------------------------------------------------------------------------- /Readings/CattaneoIdroboTitiunik2023-CUP-Extensions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Readings/CattaneoIdroboTitiunik2023-CUP-Extensions.pdf -------------------------------------------------------------------------------- /Readings/CattaneoTitiunik2022-ARE.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Readings/CattaneoTitiunik2022-ARE.pdf -------------------------------------------------------------------------------- /Readings/CaugheySekhon2011-PA.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Readings/CaugheySekhon2011-PA.pdf -------------------------------------------------------------------------------- /Readings/ImbensLemieux2008-JoE.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Readings/ImbensLemieux2008-JoE.pdf -------------------------------------------------------------------------------- /Readings/LeeLemieux2010-JEL.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Readings/LeeLemieux2010-JEL.pdf -------------------------------------------------------------------------------- /Readings/Meyersson2014-ECTA.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Readings/Meyersson2014-ECTA.pdf -------------------------------------------------------------------------------- /Slides/RD.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Slides/RD.pdf -------------------------------------------------------------------------------- /Syllabus.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Syllabus.pdf -------------------------------------------------------------------------------- /img/banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/img/banner.png -------------------------------------------------------------------------------- /img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/img/logo.png --------------------------------------------------------------------------------