├── .gitignore
├── Code
    ├── RDcourse-part1-causal-inf.do
    ├── RDcourse-part3-rdplots.R
    ├── RDcourse-part3-rdplots.do
    ├── RDcourse-part4-rdrobust.R
    ├── RDcourse-part4-rdrobust.do
    ├── RDcourse-part5-localrandomization.R
    ├── RDcourse-part5-localrandomization.do
    ├── RDcourse-part6-fuzzyRD.R
    ├── RDcourse-part6-fuzzyRD.do
    ├── RDcourse-part7-falsification.R
    ├── RDcourse-part7-falsification.do
    ├── RDcourse-part8-discrete.R
    ├── RDcourse-part8-discrete.do
    ├── README.md
    ├── education.dta
    ├── headstart.dta
    ├── house.dta
    ├── jtpa.csv
    ├── jtpa.dta
    ├── senate.dta
    ├── spp.dta
    ├── uruguayancct.dta
    └── workflow.do
├── Labs
    ├── Exercise-1
    │   ├── README.md
    │   ├── exercise-Meyersson.do
    │   ├── polecon.csv
    │   └── polecon.dta
    └── Exercise-2
    │   ├── CaugheySekhon2011.csv
    │   ├── CaugheySekhon2011.dta
    │   ├── README.md
    │   └── exercise-CaugheySekhon.do
├── README.md
├── Readings
    ├── CattaneoFrandsenTitiunik2015-JCI.pdf
    ├── CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf
    ├── CattaneoIdroboTitiunik2023-CUP-Extensions.pdf
    ├── CattaneoTitiunik2022-ARE.pdf
    ├── CaugheySekhon2011-PA.pdf
    ├── ImbensLemieux2008-JoE.pdf
    ├── LeeLemieux2010-JEL.pdf
    └── Meyersson2014-ECTA.pdf
├── Slides
    └── RD.pdf
├── Syllabus.pdf
└── img
    ├── banner.png
    └── logo.png


/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | 
 3 | ## Core latex/pdflatex auxiliary files:
 4 | *.aux
 5 | *.lof
 6 | *.log
 7 | *.lot
 8 | *.fls
 9 | *.out
10 | *.toc
11 | *.fmt
12 | *.fot
13 | *.cb
14 | *.cb2
15 | .*.lb
16 | *.nav
17 | *.snm
18 | 
19 | 
20 | 
21 | ## Intermediate documents:
22 | *.dvi
23 | *.xdv
24 | *-converted-to.*
25 | # these rules might exclude image files for figures etc.
26 | # *.ps
27 | # *.eps
28 | # *.pdf
29 | 
30 | ## Generated if empty string is given at "Please type another file name for output:"
31 | .pdf
32 | 
33 | ## Bibliography auxiliary files (bibtex/biblatex/biber):
34 | *.bbl
35 | *.bcf
36 | *.blg
37 | *-blx.aux
38 | *-blx.bib
39 | *.run.xml
40 | 
41 | ## Build tool auxiliary files:
42 | *.fdb_latexmk
43 | *.synctex
44 | *.synctex(busy)
45 | *.synctex.gz
46 | *.synctex.gz(busy)
47 | *.pdfsync
48 | 
49 | 
50 | ## Word/Ppt Temp files
51 | **/~$*
52 | .Rproj.user
53 | 
54 | # lyx
55 | *.lyx~
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/Code/RDcourse-part1-causal-inf.do:
--------------------------------------------------------------------------------
  1 | ********************************************************************************
  2 | ** Part 1: Introduction to Causal Inference and Policy Evaluation
  3 | ** Author: Rocio Titiunik, Matias Cattaneo, Sebastian Calonico
  4 | ** Last update: May 2023
  5 | ********************************************************************************
  6 | ** RDROBUST:  net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata) replace
  7 | ** RDLOCRAND: net install rdlocrand, from(https://raw.githubusercontent.com/rdpackages/rdlocrand/master/stata) replace
  8 | ** RDDENSITY: net install rddensity, from(https://raw.githubusercontent.com/rdpackages/rddensity/master/stata) replace
  9 | ** RDPOWER:   net install rdpower, from(https://raw.githubusercontent.com/rdpackages/rdpower/master/stata) replace
 10 | ********************************************************************************
 11 | 
 12 | ********************************************************************************
 13 | clear all
 14 | set more off
 15 | set linesize 200
 16 | 
 17 | ********************************************************************************
 18 | ** Example: JTPA
 19 | ********************************************************************************
 20 | use jtpa.dta, clear
 21 | 
 22 | ********************************************************************************
 23 | ** Conventional approach
 24 | ********************************************************************************
 25 | ** using TTEST
 26 | ttest earnings, by(treatment)
 27 | ttest earnings, by(treatment) unequal
 28 | 
 29 | ** using stata
 30 | sum earnings if treatment==0
 31 | ret list
 32 | local N0 = r(N)
 33 | local mu0 = r(mean)
 34 | local sd0 = r(sd)
 35 | local V0 = r(Var)/r(N)
 36 | 
 37 | sum earnings if treatment==1
 38 | ret list
 39 | local N1 = r(N)
 40 | local mu1 = r(mean)
 41 | local sd1 = r(sd)
 42 | local V1 = r(Var)/r(N)	
 43 | 
 44 | local tau = `mu1'-`mu0'
 45 | local v = sqrt(`V1'+`V0')
 46 | local T = `tau'/`v'
 47 | local pval = 2*normal(-abs(`T'))
 48 | 
 49 | local mu0 = round(`mu0', .01)
 50 | local mu1 = round(`mu1', .0001)
 51 | local sd0 = round(`sd0', .01)
 52 | local sd1 = round(`sd1', .0001)
 53 | 
 54 | di "Group Means (Std.Dev.)"
 55 | di "Control: `mu0' (`sd0') -- N=`N0'" 
 56 | di "Treatme: `mu1' (`sd1') -- N=`N1'" 
 57 | di "Test = `T' -- p-val = `pval'" 
 58 | 
 59 | * Compare to:
 60 | ttest earnings, by(treatment) unequal
 61 | ret list
 62 | 
 63 | ** using mata
 64 | mata:
 65 | 	y = st_data(., "earnings")
 66 | 	t = st_data(., "treatment")
 67 | 	N1 = sum(t:==1); N0 = rows(t)-N1
 68 | 	mu1 = mean(select(y,t:==1)); mu1
 69 | 	mu0 = mean(select(y,t:==0)); mu0
 70 | 	v1 = variance(select(y,t:==1))/N1;
 71 | 	v0 = variance(select(y,t:==0))/N0;
 72 | 	
 73 | 	T = (mu1-mu0)/sqrt(v1+v0)
 74 | 	pval = 2*normal(-abs(T))
 75 | 	
 76 | 	st_numscalar("T", T); st_numscalar("pval", pval)
 77 | end
 78 | 
 79 | display "t-test = " T
 80 | display "pval   = " pval
 81 | 
 82 | ********************************************************************************
 83 | ** Randomization Inference approach
 84 | ********************************************************************************
 85 | clear all
 86 | set obs 15
 87 | gen T = (runiform()>=.6)
 88 | mata: st_numscalar("toshiba", comb(15,sum(st_data(.,"T"))))
 89 | 
 90 | gen Y = rnormal() + T*1
 91 | ** using stata, ttest
 92 | ttest Y, by(T)
 93 | 
 94 | ** using stata, permute
 95 | permute T diffmean=(r(mu_2)-r(mu_1)), reps(1000) nowarn: ttest Y, by(T)
 96 | matrix pval = r(p_twosided)
 97 | display "p-val = " pval[1,1]
 98 | 
 99 | ** using mata, by hand
100 | ** careful: cvpermute() not random, it enumerates objects in order
101 | mata:
102 | 	y = st_data(., "Y"); t = st_data(., "T")
103 | 	
104 | 	totperm = comb(rows(t),sum(t)); totperm
105 | 
106 | 	T = abs(mean(select(y,t:==1))-mean(select(y,t:==0)))
107 | 
108 | 	S=3000; Tdist = J(min((S,totperm)),1,.)
109 | 	info = cvpermutesetup(t); s=1
110 | 	while ((t=cvpermute(info)) != J(0,1,.) & s<=S){
111 | 		Tdist[s,] = abs(mean(select(y,t:==1))-mean(select(y,t:==0)))
112 | 		s++
113 | 	}
114 | 	pval = sum(Tdist:>=T)/min((S,totperm)); pval
115 | end
116 | 
117 | ** Example: JTPA
118 | use jtpa.dta, clear
119 | permute treatment diffmean=(r(mu_2)-r(mu_1)), reps(1000) nowarn: ttest earnings, by(treatment)
120 | 
121 | * Compare to:
122 | ttest earnings, by(treatment) unequal
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 


--------------------------------------------------------------------------------
/Code/RDcourse-part3-rdplots.R:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------#
 2 | #  Part 3: RD plots
 3 | # Authors: Matias Cattaneo, Sebastian Calonico, Rocio Titiunik
 4 | # Last update: May 2023
 5 | #------------------------------------------------------------------------------#
 6 | # SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/
 7 | #------------------------------------------------------------------------------#
 8 | # TO INSTALL/DOWNLOAD R PACKAGES/FUNCTIONS:
 9 | # FOREIGN: install.packages('foreign')
10 | # GGPLOT2: install.packages('ggplot2')
11 | # LPDENSITY: install.packages('lpdensity')
12 | # RDDENSITY: install.packages('rddensity')
13 | # RDLOCRAND: install.packages('rdlocrand')
14 | # RDROBUST: install.packages('rdrobust')
15 | #------------------------------------------------------------------------------#
16 | rm(list=ls())
17 | 
18 | library(foreign)
19 | library(lpdensity)
20 | library(rddensity)
21 | library(rdrobust)
22 | library(rdlocrand)
23 | library(readstata13)
24 | 
25 | data = read.dta13("headstart.dta")
26 | Y = data$mort_age59_related_postHS 
27 | X = data$povrate60 - 59.1984
28 | T = (X>=0)
29 | 
30 | dim(data)
31 | summary(data)
32 | length(X)
33 | length(unique(X))
34 | length(Y)
35 | length(unique(Y))
36 | 
37 | # Figure 3.1, Scatter plot
38 | plot(X, Y, xlab = "Running Variable", ylab = "Outcome", col=1, pch=20)
39 | abline(v=0)
40 | 
41 | out = rdplot(Y, X, binselect = 'qsmv', p=3)
42 | 
43 | # Figure 3.2, RD Plot Using 40 Bins of Equal Length
44 | out = rdplot(Y, X, nbins = c(20,20), binselect = 'esmv')
45 | 
46 | # Figure 3.4, 40 Evenly-Spaced Bins
47 | out = rdplot(Y, X, nbins = c(20,20), binselect = 'es')
48 | 
49 | # Figure 3.5, 40 Quantile-Spaced Bins
50 | out = rdplot(Y, X, nbins = c(20,20), binselect = 'qs')
51 | 
52 | # Figure 3.7, IMSE RD PLot with Evenly-Spaced Bins
53 | out = rdplot(Y, X,  binselect = 'es')
54 | 
55 | # Figure 3.8, IMSE RD Plot with Quantile-Spaced Bins
56 | out = rdplot(Y, X,  binselect = 'qs')
57 | 
58 | # Figure 3.10, Mimicking Variance RD Plot with Evenly-Spaced Bins
59 | out = rdplot(Y, X,  binselect = 'esmv')
60 | 
61 | # Figure 3.11, Mimicking Variance RD Plot with Quantile-Spaced Bins
62 | out = rdplot(Y, X,  binselect = 'qsmv')
63 | 
64 | out = rdplot(Y, X,  binselect = 'qsmv', kernel = 'uniform', support = c(-150,150))
65 | 
66 | 
67 | ################################################################################
68 | ## US Senate data
69 | ################################################################################
70 | data = read.dta13("senate.dta")
71 | names(data)
72 | 
73 | X = data$demmv
74 | Y = data$demvoteshfor2
75 | 
76 | # Create "Democratic Win at t"
77 | Z = rep(NA, length(X))
78 | Z[X<0  & !is.na(X)]=0
79 | Z[X>=0 & !is.na(X)]=1
80 | 
81 | plot(X, Y, xlab = "Running Variable", ylab = "Outcome", col=1, pch=20)
82 | abline(v=0)
83 | 
84 | out = rdplot(Y, X, binselect = 'qsmv', p=3)
85 | 


--------------------------------------------------------------------------------
/Code/RDcourse-part3-rdplots.do:
--------------------------------------------------------------------------------
  1 | ********************************************************************************
  2 | **  Part 3: Graphical Illustration of RD designs
  3 | ** Author: Rocio Titiunik, Matias Cattaneo, Sebastian Calonico
  4 | ** Last update: May 2023
  5 | ********************************************************************************
  6 | ** RDROBUST:  net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata) replace
  7 | ** RDLOCRAND: net install rdlocrand, from(https://raw.githubusercontent.com/rdpackages/rdlocrand/master/stata) replace
  8 | ** RDDENSITY: net install rddensity, from(https://raw.githubusercontent.com/rdpackages/rddensity/master/stata) replace
  9 | ** RDPOWER:   net install rdpower, from(https://raw.githubusercontent.com/rdpackages/rdpower/master/stata) replace
 10 | ********************************************************************************
 11 | 
 12 | 
 13 | ********************************************************************************
 14 | clear all
 15 | set more off
 16 | set linesize 200
 17 | 
 18 | ********************************************************************************
 19 | ** Example 1 : Head Start
 20 | ********************************************************************************
 21 | use headstart, clear
 22 | gl y mort_age59_related_postHS
 23 | gl x povrate60
 24 | gl c = 59.1984
 25 | 
 26 | 
 27 | ** Basic Scatter Plot
 28 | scatter $y $x
 29 | scatter $y $x, xline($c, lcolor(red))
 30 | 
 31 | ********************************************************************************
 32 | ** RD Plots
 33 | ********************************************************************************
 34 | 
 35 | ** Quick RD plots (default: mimicking variance)
 36 | rdplot $y $x, c($c)
 37 | 
 38 | ** Add title 
 39 | rdplot $y $x , c($c) graph_options(title(RD Plot - Head Start) ///
 40 |                             ytitle(Child Mortality 5 to 9 years old) ///
 41 |                             xtitle(Poverty Rate))
 42 | 			    
 43 | 
 44 | ** Evenly-spaced
 45 | rdplot $y $x, c($c) binselect(es)
 46 | 
 47 | ** Quantile-spaced
 48 | rdplot $y $x, c($c) binselect(qs)
 49 | 
 50 | ** Global 2nd order polynomial
 51 | rdplot $y $x, c($c) p(2) 
 52 | 
 53 | ** Select manually number of bins
 54 | rdplot $y $x, c($c) nbins(10)  
 55 | 
 56 | ** Add confidence intervals
 57 | rdplot $y $x, c($c) nbins(10) ci(95) 
 58 | 
 59 | ** Add confidence intervals w/shade
 60 | rdplot $y $x, c($c) nbins(10) ci(95) shade
 61 | 
 62 | ** Generate variables
 63 | rdplot $y $x, c($c) p(2) ci(95) shade genvars
 64 | 
 65 | ** Stored output
 66 | ereturn list
 67 | 
 68 | *******************************************************************************
 69 | ** Example 2 : U.S. Senate
 70 | ********************************************************************************
 71 | use senate, clear
 72 | global x demmv
 73 | global y demvoteshfor2
 74 | global c = 0
 75 | ********************************************************************************
 76 | ** Summary Stats & Diff-in-means
 77 | ********************************************************************************
 78 | sum $y $x demvoteshlag1 demvoteshlag2 dopen
 79 | gen T= ($x>=0)
 80 | ttest $y, by(T)
 81 | 
 82 | 
 83 | ********************************************************************************
 84 | ** RD Plots
 85 | ********************************************************************************
 86 | 
 87 | ** Quick RD plots (default: mimicking variance)
 88 | rdplot $y $x
 89 | 
 90 | rdplot $y $x, graph_options(name(Rdplot1))
 91 | 
 92 | rdplot $y $x if abs($x)<=40, graph_options(name(Rdplot2))
 93 | 
 94 | graph drop Rdplot1
 95 | graph drop Rdplot2
 96 | 
 97 | ** Add title 
 98 | rdplot $y $x , graph_options(title(RD Plot - Senate Elections Data) ///
 99 |                             ytitle(Vote Share in Election t+1) ///
100 |                             xtitle(Margin of Victory in Election t))
101 | 
102 | 
103 | ** Evenly-spaced
104 | rdplot $y $x, c($c) binselect(esmv)
105 | 
106 | ** Quantile-spaced
107 | rdplot $y $x, c($c) binselect(qsmv)
108 | 
109 | ** Global 2nd order polynomial
110 | rdplot $y $x, c($c) p(2) 
111 | 
112 | ** Select manually number of bins
113 | rdplot $y $x, c($c) nbins(10)  
114 | 
115 | ** Add confidence intervals
116 | rdplot $y $x, c($c) nbins(10) ci(95) 
117 | 
118 | ** Add confidence intervals w/shade
119 | rdplot $y $x, c($c) nbins(10) ci(95) shade
120 | 
121 | ** Generate variables
122 | rdplot $y $x, c($c) p(2) ci(95) shade genvars
123 | 
124 | ** Use support option
125 | rdplot $y $x, nbins(20) support(-100 100)
126 | 
127 | ** Stored output
128 | ereturn list			    
129 | 			    
130 | ********************************************************************************
131 | ** Example 3 : U.S. House
132 | ********************************************************************************
133 | use house, clear
134 | global x difdemshare
135 | global y demsharenext
136 | 
137 | ** Quick RD plot  (default: mimicking variance)
138 | rdplot $y $x , graph_options(title(RD Plot - House Elections Data) ///
139 |                             ytitle(Vote Share in Election at time t+1) ///
140 |                             xtitle(Margin of Victory at time t))
141 | 
142 | 
143 | 
144 | 
145 | 
146 | 
147 | 
148 | 
149 | 


--------------------------------------------------------------------------------
/Code/RDcourse-part4-rdrobust.R:
--------------------------------------------------------------------------------
  1 | #------------------------------------------------------------------------------#
  2 | #  Part 4: Local Polynomial RD Analysis
  3 | # Authors: Matias Cattaneo, Sebastian Calonico, Rocio Titiunik
  4 | # Last update: May 2023
  5 | #------------------------------------------------------------------------------#
  6 | # SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/
  7 | #------------------------------------------------------------------------------#
  8 | # TO INSTALL/DOWNLOAD R PACKAGES/FUNCTIONS:
  9 | # FOREIGN: install.packages('foreign')
 10 | # GGPLOT2: install.packages('ggplot2')
 11 | # LPDENSITY: install.packages('lpdensity')
 12 | # RDDENSITY: install.packages('rddensity')
 13 | # RDLOCRAND: install.packages('rdlocrand')
 14 | # RDROBUST: install.packages('rdrobust')
 15 | #------------------------------------------------------------------------------#
 16 | rm(list=ls())
 17 | 
 18 | library(foreign)
 19 | library(lpdensity)
 20 | library(rddensity)
 21 | library(rdrobust)
 22 | library(rdlocrand)
 23 | library(readstata13)
 24 | 
 25 | #######################################################
 26 | #
 27 | # Head Start
 28 | #
 29 | ########################################################
 30 | data = read.dta13("headstart.dta")
 31 | Y = data$mort_age59_related_postHS 
 32 | X = data$povrate60 - 59.1984
 33 | T = (X>=0)
 34 | 
 35 | 
 36 | #-----------------------------------------#
 37 | # Local polynomial analysis (Continuity-Based RD Approach)
 38 | #-----------------------------------------#
 39 | # Code snippet 1 (two regressions)
 40 | out = lm(Y~X,subset=X<0 & X>=-10)
 41 | left_intercept = out$coefficients[1]
 42 | print(left_intercept)
 43 | out = lm(Y~X, subset=X>=0 & X<10)
 44 | right_intercept = out$coefficients[1]
 45 | print(right_intercept)
 46 | difference = right_intercept - left_intercept
 47 | print(paste("The RD estimator is", difference, sep = " "))
 48 | 
 49 | # Code snippet 2 (one regression)
 50 | T_X = X*T
 51 | out = lm(Y~X+T+T_X, subset=abs(X)<=10)
 52 | print(out)
 53 | 
 54 | # Code snippet 3 (generation of weights)
 55 | w = NA
 56 | w = 1-abs(X)/10
 57 | 
 58 | # Code snippet 4 (regression with weights)
 59 | summary(rdrobust(Y, X, h = 10, kernel = "uniform"))
 60 | 
 61 | out = lm(Y~X+T+T_X, weight=w, subset=abs(X)<=10)
 62 | print(out)
 63 | 
 64 | # Code snippet 5 (using rdrobust with uniform weights)
 65 | summary(rdrobust(Y, X, kernel = 'uniform',  p = 1, h = 10))
 66 | 
 67 | # Code snippet 6 (using rdrobust with triangular weights)
 68 | rdrobust(Y, X, kernel = 'triangular',  p = 1, h = 10)
 69 | 
 70 | # Code snippet 7 (using rdrobust with triangular weights and p=2)
 71 | rdrobust(Y, X, kernel = 'triangular',  p = 2, h = 10)
 72 | 
 73 | # Code snippet 8 (using rdbwselect, mserd)
 74 | rdbwselect(Y, X, kernel = 'triangular',  p = 1, bwselect = 'mserd')
 75 | 
 76 | # Code snippet 9 (using rdbwselect, msetwo)
 77 | rdbwselect(Y, X, kernel = 'triangular',  p = 1, bwselect = 'msetwo')
 78 | 
 79 | # Code snippet 10 (using rdrobust, mserd)
 80 | rdrobust(Y, X, kernel = 'triangular',  p = 1, bwselect = 'mserd')
 81 | 
 82 | # Code snippet 12 (using rdrobust and showing the associated rdplot)
 83 | bandwidth = rdrobust(Y, X, kernel = 'triangular', p = 1, bwselect = 'mserd')$h_l
 84 | out = rdplot(Y, X, p = 1, kernel = 'triangular', h = bandwidth)
 85 | print(out)
 86 | 
 87 | # Code snippet 11 (using rdrobust and showing the objects it returns)
 88 | rdout = rdrobust(Y, X, kernel = 'triangular', p = 1, bwselect = 'mserd')
 89 | print(names(rdout))
 90 | print(rdout$beta_p_r)
 91 | print(rdout$beta_p_l)
 92 | 
 93 | # Code snippet 13 (using rdrobust without regularization term)
 94 | rdrobust(Y, X, kernel = 'triangular', scaleregul = 0,  p = 1, bwselect = 'mserd')
 95 | 
 96 | # Code snippet 14 (using rdrobust default options)
 97 | rdrobust(Y, X, kernel = 'triangular',  p = 1, bwselect = 'mserd')
 98 | 
 99 | # Code snippet 15 (using rdrobust default options and showing all the output)
100 | rdrobust(Y, X, kernel = 'triangular',  p = 1, bwselect = 'mserd', all = TRUE)
101 | 
102 | 
103 | #############################################################
104 | ## Head Start Data: Additional Empirical Analysis
105 | #############################################################
106 | # 1960 Census covariates
107 | X60 = cbind(data$census1960_pop,
108 |             data$census1960_pctsch1417,
109 |             data$census1960_pctsch534, 
110 |             data$census1960_pctsch25plus,
111 |             data$census1960_pop1417,
112 |             data$census1960_pop534,
113 |             data$census1960_pop25plus,
114 |             data$census1960_pcturban,
115 |             data$census1960_pctblack)
116 | 
117 | # 1990 Census covariates
118 | X90 = cbind(data$census1990_pop,
119 |             data$census1990_pop1824,
120 |             data$census1990_pop2534,
121 |             data$census1990_pop3554,
122 |             data$census1990_pop55plus,
123 |             data$census1990_pcturban,
124 |             data$census1990_pctblack,
125 |             data$census1990_percapinc)
126 | 
127 | 
128 | # Placebo outcomes
129 | Plac = cbind(data$mort_age59_injury_postHS,data$mort_age59_related_preHS)
130 | 
131 | tmp = rdrobust(Plac[,1],X)
132 | summary(tmp)
133 | 
134 | tmp = rdrobust(Plac[,2],X)
135 | summary(tmp)
136 | 
137 | 
138 | ###################################################################
139 | ## Table 2: Nonparametric Density Continuity Tests
140 | ###################################################################
141 | 
142 | tmp = rddensity(Xraw,c=c)
143 | summary(tmp)
144 | 
145 | ## Robust Nonparametric Method with Covariates
146 | rdrobust(Y,X,covs=X60)
147 | 
148 | ##  Robust Nonparametric Method: Different Bandwdiths at Each Side
149 | rdrobust(Y,X,bwselect='msetwo')
150 | 
151 | 
152 | ################################################################################
153 | ## Head Start Data using covariates
154 | ################################################################################
155 | data = read.dta13("headstart.dta")
156 | attach(data)
157 | 
158 | y <- mort_age59_related_postHS
159 | x <- povrate60
160 | z <- cbind(census1960_pop, census1960_pctsch1417, census1960_pctsch534,
161 |            census1960_pctsch25plus, census1960_pop1417, census1960_pop534,
162 |            census1960_pop25plus, census1960_pcturban, census1960_pctblack)
163 | 
164 | ## rho unrestricted; MSE-optimal bandwidths w/o covs; RD w/o covs
165 | rd <- rdrobust(y, x, c=59.1968)
166 | h  <- rd$h_l
167 | b  <- rd$b_l
168 | IL <- rd$ci[3,2] - rd$ci[3,1]
169 | 
170 | 
171 | ## rho unrestricted; MSE-optimal bandwidths w/o covs; RD w/ covs
172 | rd <- rdrobust(y, x, c=59.1968, covs=z, h=h, b=b)
173 | ILch <- ((rd$ci[3,2] - rd$ci[3,1])/IL - 1)* 100
174 | 
175 | 
176 | ## rho unrestricted; MSE-optimal bandwidths w/ covs; RD w/ covs
177 | rd <- rdrobust(y, x, c=59.1968, covs=z)
178 | ILch <- ((rd$ci[3,2] - rd$ci[3,1])/IL - 1)* 100
179 | 
180 | 
181 | #######################################################
182 | #
183 | # US Senate application
184 | #
185 | ########################################################
186 | data = read.dta13("senate.dta")
187 | names(data)
188 | 
189 | X = data$demmv
190 | Y = data$demvoteshfor2
191 | 
192 | # Create "Democratic Win at t"
193 | Z = rep(NA, length(X))
194 | Z[X<0  & !is.na(X)]=0
195 | Z[X>=0 & !is.na(X)]=1
196 | 
197 | summary(data)
198 | length(X)
199 | length(unique(X))
200 | length(Y)
201 | length(unique(Y))
202 | 
203 | rdplot(Y,X)
204 | 
205 | rdplot(Y,X, binselect="qsmv", p=4)
206 | 
207 | # Using rdrobust with uniform weights
208 | out = rdrobust(Y, X, kernel = 'uniform',  p = 1, h = 17 , all=TRUE)
209 | summary(out)
210 | names(out)
211 | out$coef
212 | 


--------------------------------------------------------------------------------
/Code/RDcourse-part4-rdrobust.do:
--------------------------------------------------------------------------------
  1 | ********************************************************************************
  2 | **  Part 4: Local Polynomial RD Analysis
  3 | ** Author: Rocio Titiunik, Matias Cattaneo, Sebastian Calonico
  4 | ** Last update: May 2023
  5 | ********************************************************************************
  6 | ** RDROBUST:  net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata) replace
  7 | ** RDLOCRAND: net install rdlocrand, from(https://raw.githubusercontent.com/rdpackages/rdlocrand/master/stata) replace
  8 | ** RDDENSITY: net install rddensity, from(https://raw.githubusercontent.com/rdpackages/rddensity/master/stata) replace
  9 | ** RDPOWER:   net install rdpower, from(https://raw.githubusercontent.com/rdpackages/rdpower/master/stata) replace
 10 | *********************************************************************************
 11 | *-------------------------------------------------------------------------------------------------------------*
 12 | ** SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/
 13 | *-------------------------------------------------------------------------------------------------------------*
 14 | cd C:\Users\titiunik\Dropbox\teaching\workshops\2022-08-RD-StatisticalHorizons\slides-and-codes\data-and-codes
 15 | 
 16 | clear all
 17 | 
 18 | ********************************************************
 19 | **** Head Start ****************************************
 20 | ********************************************************
 21 | 
 22 | use "headstart.dta", clear
 23 | 
 24 | gen Y = mort_age59_related_postHS 
 25 | gen X = povrate60 - 59.1984
 26 | 
 27 | gen T=.
 28 | replace T=0 if X<0 & X!=.
 29 | replace T=1 if X>=0 & X!=.
 30 | 
 31 | order X Y T
 32 | 
 33 | *  Descriptive Statistics
 34 | su
 35 | duplicates report Y 
 36 | tab Y
 37 | duplicates report X
 38 | 
 39 | *-----------------------------------------*
 40 | * Continuity-Based RD Approach *
 41 | *-----------------------------------------*
 42 | *** optimal bw
 43 | rdbwselect Y X,  p(0)
 44 | 
 45 | ** rdrobust: MSE data-driven bandwidth - local constant regression
 46 | rdrobust Y X,  p(0)
 47 | 
 48 | ** rdrobust: parametric bandwidth - local constant regression
 49 | rdrobust Y X, p(0) h(9)
 50 | 
 51 | ** rdrobust: MSE data-driven bandwidth - local linear regression
 52 | rdrobust Y X, p(1)
 53 | 
 54 | ** rdrobust: parametric bandwidth - local linear regression
 55 | rdrobust Y X,  p(1) h(9)
 56 | 
 57 | * Output from rdrobust
 58 | rdrobust Y X, h(10) kernel(uniform)
 59 | 
 60 | * Output from regressions on both sides
 61 | reg Y X if X<0 & X>=-10
 62 | matrix coef_left=e(b)
 63 | local intercept_left=coef_left[1,2]
 64 | reg Y X if X>=0 & X<=10
 65 | matrix coef_right=e(b)
 66 | local intercept_right=coef_right[1,2]
 67 | local difference=`intercept_right'-`intercept_left'
 68 | display "The RD estimator is `difference'"
 69 | 
 70 | * Output from joint regression
 71 | gen T_X=X*T
 72 | reg Y X T T_X if abs(X)<=10
 73 | 
 74 | * Matching standard errors
 75 | reg Y X T T_X if abs(X)<=10, robust
 76 | rdrobust Y X, h(10) kernel(uniform) vce(hc1)
 77 | 
 78 | * For other kernels besides uniform
 79 | gen weights=.
 80 | replace weights=(1-abs(X/10)) if abs(X)<10
 81 | 
 82 | * Code snippet 4 (regression with weights)
 83 | reg Y X [aw=weights] if X<0 & X>=-10
 84 | matrix coef_left=e(b)
 85 | local intercept_left=coef_left[1,2]
 86 | reg Y X [aw=weights] if X>=0 & X<=10
 87 | matrix coef_right=e(b)
 88 | local intercept_right=coef_right[1,2]
 89 | local difference=`intercept_right'-`intercept_left'
 90 | display "The RD estimator is `difference'"
 91 | 
 92 | rdrobust Y X,  h(10) kernel(uniform)
 93 | 
 94 | rdrobust Y X, kernel(triangular) p(1) h(10)  
 95 | 
 96 | rdrobust Y X, kernel(triangular) p(2) h(10)  
 97 | 
 98 | rdbwselect Y X, kernel(triangular) p(1) bwselect(mserd) 
 99 | 
100 | rdbwselect Y X, kernel(triangular) p(1) bwselect(msetwo) 
101 | 
102 | rdrobust Y X
103 | ereturn list
104 | 
105 | * showing the associated rdplot)
106 | rdrobust Y X, p(1) kernel(triangular) bwselect(mserd)
107 | local bandwidth=e(h_l)
108 | rdplot Y X if abs(X)<=`bandwidth', p(1) h(`bandwidth') kernel(triangular)
109 | 
110 | * (using rdrobust without regularization term)
111 | rdrobust Y X, kernel(triangular) p(1) bwselect(mserd) scaleregul(0)
112 | 
113 | * (using rdrobust default options)
114 | rdrobust Y X, kernel(triangular) p(1) bwselect(mserd)  
115 | 
116 | * (using rdrobust default options)
117 | rdrobust Y X, kernel(triangular) p(1) bwselect(cerrd)  
118 | 
119 | 
120 | * (using rdrobust default options and showing all the output)
121 | rdrobust Y X, kernel(triangular) p(1) bwselect(mserd) all
122 | 
123 | ** Placebo Outcome Variables
124 | foreach y of varlist mort_age59_injury_postHS mort_age59_related_preHS {
125 |    rdrobust `y' X, p(0)
126 |    rdrobust `y' X, p(0) h(9)
127 |    rdrobust `y' X, p(1)
128 |    rdrobust `y' X, p(1) h(9)
129 | }
130 | 
131 | 
132 | 
133 | ********************************************************************************
134 | ** Head Start Data
135 | ********************************************************************************
136 | use "headstart.dta", clear
137 | 
138 | gl y mort_age59_related_postHS
139 | gl x povrate60
140 | gl z census1960_pop census1960_pctsch1417 census1960_pctsch534 ///
141 |      census1960_pctsch25plus census1960_pop1417 census1960_pop534 ///
142 | 	 census1960_pop25plus census1960_pcturban census1960_pctblack
143 | 
144 | ** MSE-optimal bandwidths w/o covs; RD w/o covs
145 | rdrobust $y $x, c(59.1968)
146 | local h = e(h_l)
147 | local b = e(b_l)
148 | local IL =  e(ci_r_rb) - e(ci_l_rb)
149 | 
150 | ** MSE-optimal bandwidths w/o covs; RD w/ covs
151 | rdrobust $y $x, c(59.1968) covs($z) h(`h') b(`b')
152 | local ILch = round((((`e(ci_r_rb)' - `e(ci_l_rb)')/`IL') - 1)* 100, 0.001)
153 | 
154 | ** MSE-optimal bandwidths w/ covs; RD w/ covs
155 | rdrobust $y $x, c(59.1968) covs($z)
156 | local ILch = round((((`e(ci_r_rb)' - `e(ci_l_rb)')/`IL') - 1)* 100, 0.001)
157 | 
158 | 
159 | ********************************************************
160 | **** Incumbency US Senate ******************************
161 | ********************************************************
162 | 
163 | use "senate.dta", clear
164 | 
165 | rename demmv X
166 | rename demvoteshfor2 Y
167 | 
168 | gen T=.
169 | replace T=0 if X<0 & X!=.
170 | replace T=1 if X>=0 & X!=.
171 | label var T "Democratic Win at t"
172 | 
173 | order X Y T
174 | 
175 | *  Descriptive Statistics
176 | su
177 | duplicates report Y 
178 | tab Y
179 | duplicates report X
180 | 	
181 | 
182 | * Output from rdrobust
183 | rdrobust Y X, h(10) kernel(uniform)
184 | 
185 | * Output from regressions on both sides
186 | reg Y X if X<0 & X>=-10
187 | matrix coef_left=e(b)
188 | local intercept_left=coef_left[1,2]
189 | reg Y X if X>=0 & X<=10
190 | matrix coef_right=e(b)
191 | local intercept_right=coef_right[1,2]
192 | local difference=`intercept_right'-`intercept_left'
193 | display "The RD estimator is `difference'"
194 | 
195 | * Output from joint regression
196 | gen T_X=X*T
197 | reg Y X T T_X if abs(X)<=10
198 | 
199 | * Matching standard errors
200 | reg Y X T T_X if abs(X)<=10, robust
201 | rdrobust Y X, h(10) kernel(uniform) vce(hc1)
202 | 
203 | * For other kernels besides uniform
204 | gen weights=.
205 | replace weights=(1-abs(X/10)) if abs(X)<10
206 | 
207 | * Code snippet 4 (regression with weights)
208 | reg Y X [aw=weights] if X<0 & X>=-10
209 | matrix coef_left=e(b)
210 | local intercept_left=coef_left[1,2]
211 | reg Y X [aw=weights] if X>=0 & X<=10
212 | matrix coef_right=e(b)
213 | local intercept_right=coef_right[1,2]
214 | local difference=`intercept_right'-`intercept_left'
215 | display "The RD estimator is `difference'"
216 | 
217 | rdrobust Y X,  h(10)  
218 | 
219 | rdrobust Y X, kernel(triangular) p(1) h(10)  
220 | 
221 | rdrobust Y X, kernel(triangular) p(2) h(10)  
222 | 
223 | rdbwselect Y X, kernel(triangular) p(1) bwselect(mserd) 
224 | 
225 | rdbwselect Y X, kernel(triangular) p(1) bwselect(msetwo) 
226 | 
227 | rdrobust Y X
228 | ereturn list
229 | 
230 | * showing the associated rdplot)
231 | rdrobust Y X, p(1) kernel(triangular) bwselect(mserd)
232 | local bandwidth=e(h_l)
233 | rdplot Y X if abs(X)<=`bandwidth', p(1) h(`bandwidth') kernel(triangular)
234 | 
235 | * (using rdrobust without regularization term)
236 | rdrobust Y X, kernel(triangular) p(1) bwselect(mserd) scaleregul(0)
237 | 
238 | * (using rdrobust default options)
239 | rdrobust Y X, kernel(triangular) p(1) bwselect(mserd)  
240 | 
241 | * (using rdrobust default options)
242 | rdrobust Y X, kernel(triangular) p(1) bwselect(cerrd)  
243 | 
244 | 
245 | * (using rdrobust default options and showing all the output)
246 | rdrobust Y X, kernel(triangular) p(1) bwselect(mserd) all
247 | 
248 | * (rdbwselect with covariates)
249 | global covariates "presdemvoteshlag1 demvoteshlag1 demvoteshlag2 demwinprv1 demwinprv2 dmidterm dpresdem dopen"
250 | rdbwselect Y X, covs($covariates) p(1) kernel(triangular) bwselect(mserd) scaleregul(1)
251 | 
252 | * (rdrobust with covariates)
253 | global covariates "presdemvoteshlag1 demvoteshlag1 demvoteshlag2 demwinprv1 demwinprv2 dmidterm dpresdem dopen"
254 | rdrobust Y X, covs($covariates) p(1) kernel(triangular) bwselect(mserd) scaleregul(1)
255 | 
256 | 


--------------------------------------------------------------------------------
/Code/RDcourse-part5-localrandomization.R:
--------------------------------------------------------------------------------
 1 | #------------------------------------------------------------------------------#
 2 | #  Part 6: Local Randomization RD analysis
 3 | # Authors: Matias Cattaneo, Sebastian Calonico, Rocio Titiunik
 4 | # Last update: May 2023
 5 | #------------------------------------------------------------------------------#
 6 | # SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/
 7 | #------------------------------------------------------------------------------#
 8 | # TO INSTALL/DOWNLOAD R PACKAGES/FUNCTIONS:
 9 | # FOREIGN: install.packages('foreign')
10 | # GGPLOT2: install.packages('ggplot2')
11 | # LPDENSITY: install.packages('lpdensity')
12 | # RDDENSITY: install.packages('rddensity')
13 | # RDLOCRAND: install.packages('rdlocrand')
14 | # RDROBUST: install.packages('rdrobust')
15 | #------------------------------------------------------------------------------#
16 | rm(list=ls())
17 | 
18 | library(foreign)
19 | library(lpdensity)
20 | library(rddensity)
21 | library(rdrobust)
22 | library(rdlocrand)
23 | library(readstata13)
24 | 
25 | ################################################################################
26 | ## US Senate data
27 | ################################################################################
28 | data = read.dta13("senate.dta")
29 | names(data)
30 | 
31 | X = data$demmv
32 | Y = data$demvoteshfor2
33 | 
34 | # Create "Democratic Win at t"
35 | Z = rep(NA, length(X))
36 | Z[X<0  & !is.na(X)]=0
37 | Z[X>=0 & !is.na(X)]=1
38 | 
39 | #*--------------------------------------------*
40 | #  * Local Randomization RD Approach *
41 | #  *--------------------------------------------*
42 | rdrandinf(Y, X, wl=-0.7652, wr=0.7652, seed=50, ci=TRUE) 
43 | 
44 | #Using rdrandinf in the Ad-hoc Window with the Bernoulli Option
45 | bernpr=rep(NA, length(X))
46 | bernpr[abs(X)<=0.75]=1/2
47 | rdrandinf(Y,  X, wl=-0.75, wr=0.75, seed=50, bernoulli =bernpr)
48 | 
49 | # Code snippet 3, Using rdrandinf in the Ad-hoc Window with the Kolmogorov-Smirnov Statistic
50 | rdrandinf(Y,  X, wl=-0.75, wr=0.75, seed=50, statistic = "ksmirnov")
51 | 
52 | # Code snippet 4, Using rdrandinf in the Ad-hoc Window with the Confidence Interval Option
53 | rdrandinf(Y,  X, wl=-0.75, wr=0.75, seed=50, ci = c(0.05, seq(from=-10, to=10, by=0.25)))
54 | 
55 | # Code snippet 5, Using rdwinselect with the wobs Option
56 | covs = cbind(data$presdemvoteshlag1, data$demvoteshlag1, data$demvoteshlag2, data$demwinprv1, data$demwinprv2, data$dmidterm, data$dpresdem, data$dopen)
57 | rdwinselect(X, c = 0 , covs, seed = 50, wobs=2, nwindows=200 , approx=TRUE, plot=TRUE)
58 | 
59 | # Code snippet 6, Using rdwinselect with the wobs, nwindows and plot Options
60 | rdwinselect(X, covs, seed = 50, wobs=2, nwindows =50, plot=TRUE) 
61 | 
62 | #Code snippet 7, Using rdwinselect with the wstep and nwindows Options
63 | rdwinselect(X, covs, seed = 50, wstep=0.1, nwindows=25)
64 | 
65 | # Code snippet 8, Using rdrandinf with the Confidence Interval Option
66 | rdrandinf(Y, X, wl=-0.75, wr=0.75, seed=50, ci = c(0.05, seq(from=-10, to=10, by=0.10)))
67 | 
68 | # Code snippet 9, Using rdrandinf with the Alternative Power Option
69 | out = rdrandinf(Y, X, covariates = covs, seed=50, d = 7.416) 
70 | names(out)
71 | out$window
72 | 


--------------------------------------------------------------------------------
/Code/RDcourse-part5-localrandomization.do:
--------------------------------------------------------------------------------
 1 | ********************************************************************************
 2 | **  Part 6: Local Randomization RD Analysis
 3 | ** Author: Rocio Titiunik, Matias Cattaneo, Sebastian Calonico
 4 | ** Last update: May 2023
 5 | ********************************************************************************
 6 | ** RDROBUST:  net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata) replace
 7 | ** RDLOCRAND: net install rdlocrand, from(https://raw.githubusercontent.com/rdpackages/rdlocrand/master/stata) replace
 8 | ** RDDENSITY: net install rddensity, from(https://raw.githubusercontent.com/rdpackages/rddensity/master/stata) replace
 9 | ** RDPOWER:   net install rdpower, from(https://raw.githubusercontent.com/rdpackages/rdpower/master/stata) replace
10 | *********************************************************************************
11 | *-------------------------------------------------------------------------------------------------------------*
12 | ** SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/
13 | *-------------------------------------------------------------------------------------------------------------*
14 | cd C:\Users\titiunik\Dropbox\teaching\workshops\2022-08-RD-StatisticalHorizons\slides-and-codes\data-and-codes
15 | clear
16 | clear all
17 | clear matrix
18 | cap log close
19 | set more off
20 | 
21 | use "senate.dta", clear
22 | 
23 | rename demmv X
24 | rename demvoteshfor2 Y
25 | 
26 | gen Z=.
27 | replace Z=0 if X<0 & X!=.
28 | replace Z=1 if X>=0 & X!=.
29 | label var Z "Democratic Win at t"
30 | 
31 | order X Y Z
32 | 
33 | *--------------------------------------------*
34 | * Local Randomization RD Approach *
35 | *--------------------------------------------*
36 | * Code snippet 1, Using rdrandinf in the Ad-hoc Window
37 | rdrandinf Y X, wl(-0.75) wr(0.75) seed(50)
38 | 
39 | * Code snippet 2, Using rdrandinf in the Ad-hoc Window with the Bernoulli Option
40 | gen bern_prob=1/2 if abs(X)<=0.75
41 | rdrandinf Y X, wl(-0.75) wr(0.75) seed(50) bernoulli(bern_prob)
42 | 
43 | * Code snippet 3, Using rdrandinf in the Ad-hoc Window with the Kolmogorov-Smirnov Statistic
44 | rdrandinf Y X, wl(-0.75) wr(0.75) seed(50) statistic(ksmirnov)
45 | 
46 | * Code snippet 4, Using rdrandinf in the Ad-hoc Window with the Confidence Interval Option
47 | rdrandinf Y X, wl(-0.75) wr(0.75) seed(50) ci(0.05 -10(0.25)10)
48 | 
49 | * Code snippet 5, Using rdwinselect with the wobs Option
50 | global covs "presdemvoteshlag1 demvoteshlag1 demvoteshlag2 demwinprv1 demwinprv2 dmidterm dpresdem dopen"
51 | rdwinselect X $covs, seed(50) wobs(2) 
52 | 
53 | * Code snippet 6, Using rdwinselect with the wobs, nwindows and plot Options
54 | global covs "presdemvoteshlag1 demvoteshlag1 demvoteshlag2 demwinprv1 demwinprv2 dmidterm dpresdem dopen"
55 | rdwinselect X $covs, seed(50) wobs(2) nwindows(100) plot approximate 
56 | 
57 | * Code snippet 7, Using rdwinselect with the wstep and nwindows Options
58 | global covs "presdemvoteshlag1 demvoteshlag1 demvoteshlag2 demwinprv1 demwinprv2 dmidterm dpresdem dopen"
59 | rdwinselect X $covs, seed(50) wstep(0.1) nwindows(25) 
60 | 
61 | * Code snippet 8, Using rdrandinf with the Confidence Interval Option
62 | rdrandinf Y X, wl(-0.75) wr(0.75) seed(50) ci(0.05 -10(0.1)10)
63 | 
64 | * Code snippet 9, Using rdrandinf with the Alternative Power Option
65 | global covs "presdemvoteshlag1 demvoteshlag1 demvoteshlag2 demwinprv1 demwinprv2 dmidterm dpresdem dopen"
66 | rdrandinf Y X, covariates($covs) seed(50) d(7.416) 
67 | 
68 | 


--------------------------------------------------------------------------------
/Code/RDcourse-part6-fuzzyRD.R:
--------------------------------------------------------------------------------
  1 | #------------------------------------------------------------------------------#
  2 | #  Part 5: Fuzzy RD
  3 | # Authors: Matias Cattaneo, Sebastian Calonico, Rocio Titiunik
  4 | # Last update: May 2023
  5 | #------------------------------------------------------------------------------#
  6 | # SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/
  7 | #------------------------------------------------------------------------------#
  8 | # TO INSTALL/DOWNLOAD R PACKAGES/FUNCTIONS:
  9 | # FOREIGN: install.packages('foreign')
 10 | # GGPLOT2: install.packages('ggplot2')
 11 | # LPDENSITY: install.packages('lpdensity')
 12 | # RDDENSITY: install.packages('rddensity')
 13 | # RDLOCRAND: install.packages('rdlocrand')
 14 | # RDROBUST: install.packages('rdrobust')
 15 | #------------------------------------------------------------------------------#
 16 | rm(list=ls())
 17 | 
 18 | library(foreign)
 19 | library(lpdensity)
 20 | library(rddensity)
 21 | library(rdrobust)
 22 | library(rdlocrand)
 23 | library(readstata13)
 24 | 
 25 | # Application by LONDONO-VELEZ, RODRIGUEZ AND SANCHEZ: Ser Pilo Paga (SSP) program
 26 | # Fuzzy RD analysis (using only one dimension) 
 27 | 
 28 | data = read.dta13("spp.dta")
 29 | dim(data)
 30 | names(data)
 31 | 
 32 | # This replication file only includes first cohort (icfes_per == 20142), as in original replication files
 33 | table(data$icfes_per)
 34 | dim(data)
 35 | 
 36 | # Running variables
 37 | summary(data$running_saber11)  #/* this running variable is discrete:  447  unique values*/
 38 |   
 39 | summary(data$running_sisben)   #/* this running variable is discrete but number of masspoints is large:  25,375 unique values*/
 40 |   
 41 | #  treatment indicator (=1 if the student in fact received the money)
 42 | summary(data$beneficiary_spp)
 43 | 
 44 | # Keep subsample of students with scores above Saber 11 cutoff
 45 | summary(data$running_saber11)
 46 | data=data[data$running_saber11>=0,]
 47 | dim(data)
 48 | 
 49 | # Students who live in households who don't receive welfare benefits have missing score ==> drop missing scores
 50 | sum(is.na(data$running_sisben))
 51 | data = data[!is.na(data$running_sisben),]
 52 | dim(data)
 53 |     
 54 |     
 55 | # Create variables
 56 | X = data$running_sisben
 57 | D = data$beneficiary_spp
 58 | Y1 = data$spadies_any
 59 | Y2 = data$spadies_hq
 60 | 
 61 | # First Stage: T on D
 62 | #-------------
 63 | # Rdrobust on D
 64 | out = rdrobust(D,X)
 65 | summary(out)
 66 | 
 67 | # Rdplot on D
 68 | rdplot(D,X, x.label="SISBEN wealth index", y.label="Received SPP subsidy")
 69 | 
 70 | # ITT: T on Y
 71 | #-------------
 72 | rdplot(Y1,X, x.label="SISBEN wealth index", y.label="Attended any HEI")
 73 | 
 74 | out = rdrobust(Y1,X)
 75 | summary(out)
 76 | 
 77 | # Rddensity
 78 | #----------
 79 | out = rddensity(X)
 80 | summary(out)
 81 | 
 82 | # Outcome 1: Student Attended any high education institution (HEI) immediately after receiving subsidy
 83 | #--------------------
 84 | 
 85 | # Fuzzy effect
 86 | fout = rdrobust(Y1, X, fuzzy = D)
 87 | h = fout$bws[1,1]
 88 | b = fout$bws[2,1]
 89 | 
 90 | # equivalent to the two-step estimation below
 91 | # be careful! if there are missing values in D, you may fail to replicate the fuzzy effect by hand
 92 | sum(is.na(D))
 93 | 
 94 | out = rdrobust(Y1, X, h = h, b=b)
 95 | itt = out$Estimate[1]
 96 | 
 97 | out = rdrobust(D, X, h=h, b=b)
 98 | fs = out$Estimate[1]
 99 | 
100 | itt/fs
101 | 
102 | # Rdrobust (sharp) on Y1: cluster by mass point in running variable
103 | out = rdrobust(Y1, X, vce = "nn", cluster = X)
104 | summary(out)
105 | 
106 | # Rdrobust (fuzzy) on Y1
107 | out = rdrobust(Y1, X, fuzzy = D, vce = "nn", cluster= X)
108 | summary(out)
109 | 
110 | # Rdplot on Y1
111 | rdplot(Y1, X)
112 | 
113 | 
114 | # Outcome 1: Student Attended high-quality HEI immediately after receiving subsidy
115 | #-------------------------------
116 | # Rdrobust (sharp) on Y2
117 | rdrobust(Y2, X)
118 | 
119 | # Rdrobust (fuzzy) on Y2
120 | rdrobust(Y2, X, fuzzy =D)
121 | 
122 | # Rdplot on Y2
123 | rdplot(Y2, X, fuzzy = D)
124 | 
125 | # Validation on Covariates
126 | #-------------------------
127 | # Sharp rdrobust
128 | covs = cbind(data$icfes_per, data$icfes_female, data$icfes_urm, data$icfes_stratum, data$icfes_privatehs, data$icfes_famsize)
129 | for(i in 1:ncol(covs)){ 
130 | 	summary(rdrobust(covs[,i], X))
131 | }
132 | 
133 | # Fuzzy rdrobust
134 | for(i in 1:ncol(covs)){ 
135 |   summary(rdrobust(covs[,i], X, fuzzy=D))
136 | }
137 | 
138 | 
139 | # Local randomization Fuzzy RD analysis
140 | 
141 | rdwinselect(X, covs, seed=86543)
142 | wr = 0.13
143 | wl = -0.13
144 | rdrandinf(D, X, wl=wl, wr=wr)
145 | rdrandinf(Y1, X, wl=wl, wr=wr)
146 | rdrandinf(Y2, X, wl=wl, wr=wr)
147 | rdrandinf(Y2, X, wl=wl, wr=wr, fuzzy=D)
148 | 
149 |           


--------------------------------------------------------------------------------
/Code/RDcourse-part6-fuzzyRD.do:
--------------------------------------------------------------------------------
  1 | ********************************************************************************
  2 | **  Part 5: Fuzzy RD designs
  3 | ** Author: Rocio Titiunik, Matias Cattaneo, Sebastian Calonico
  4 | ** Last update: May 2023
  5 | ********************************************************************************
  6 | ** RDROBUST:  net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata) replace
  7 | ** RDLOCRAND: net install rdlocrand, from(https://raw.githubusercontent.com/rdpackages/rdlocrand/master/stata) replace
  8 | ** RDDENSITY: net install rddensity, from(https://raw.githubusercontent.com/rdpackages/rddensity/master/stata) replace
  9 | ** RDPOWER:   net install rdpower, from(https://raw.githubusercontent.com/rdpackages/rdpower/master/stata) replace
 10 | *********************************************************************************
 11 | *-------------------------------------------------------------------------------------------------------------*
 12 | ** SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/
 13 | *-------------------------------------------------------------------------------------------------------------*
 14 | cd C:\Users\titiunik\Dropbox\teaching\workshops\2022-08-RD-StatisticalHorizons\slides-and-codes\data-and-codes
 15 | clear all
 16 | clear
 17 | clear all
 18 | clear matrix
 19 | cap log close
 20 | set more off
 21 | 
 22 | /***********************************
 23 | 
 24 | Fuzzy RD Analysis (one-dimensional)
 25 | 
 26 | ************************************/
 27 | 
 28 | *----------------------------------------------------------------------*
 29 | ** Empirical Illustration: LONDONO-VELEZ, RODRIGUEZ AND SANCHEZ (2020, AEJ Policy)
 30 | *-----------------------------------------------------------------------*
 31 | 
 32 | * Loading the data
 33 | use "spp.dta", clear
 34 | 
 35 | * This replication only includes first cohort (keep icfes_per==20142 as in replication files)
 36 | tab icfes_per
 37 | 
 38 | * Running variables
 39 | codebook running_saber11  /* this running variable is discrete:  447  unique values*/
 40 | 
 41 | codebook running_sisben   /* this running variable is discrete but number of masspoints is large:  25,375 unique values*/
 42 | 
 43 | * Treatment indicator (=1 if the student in fact received the money)
 44 | sum beneficiary_spp
 45 | 
 46 | * Keep subsample of students with scores above Saber 11 cutoff
 47 | sum running_saber11
 48 | keep if running_saber11>=0
 49 | 
 50 | * Drop observations with missing values in SISBEN running variable:   Students who live in households who don't receive welfare benefits have missing score
 51 | drop if running_sisben==.
 52 | 
 53 | * Look at repeated values in the final sample for analysis
 54 | duplicates report sisben_score
 55 | ret list
 56 | 
 57 | duplicates tag sisben_score, gen(duplic)
 58 | 
 59 | * Rename variables 
 60 | rename running_sisben  X  /* score (normalized): distance from Sisben wealth index to cutoff */
 61 | rename beneficiary_spp D  /* treatment: indicator for whether student receive SPP subsidy*/ 
 62 | rename spadies_any Y1     /* Outcome 1*/   
 63 | rename spadies_hq  Y2     /* Outcome 2 */
 64 | 
 65 | *-------------  
 66 | * First Stage: T on D
 67 | *-------------
 68 | 
 69 | * Rdplot on D
 70 | rdplot D X
 71 | 
 72 | * Rdrobust on D
 73 | /* Note: because of one-sided non-compliance, no variability on D to the left of cutoff
 74 |          when we choose msetwo, the bandwidth on the left of cutoff is the entire support*/
 75 | rdrobust D X
 76 | rdrobust D X, bwselect(msetwo)
 77 | 
 78 | *-------------
 79 | * ITT: T on Y
 80 | *-------------
 81 | rdplot Y1 X
 82 | 
 83 | rdrobust Y1 X  /* Bandwidth not exactly same as in published paper, which is h=9.028 */
 84 |                /* Two reasons for this : (1) Back then, rdrobust did not adjust for masspoints automatically
 85 | 			                             (2) They reported bias-corrected point estimators (!)
 86 |                */
 87 | rdrobust Y1 X, masspoints(off) all /* more similar */
 88 | 
 89 | *------------------
 90 | * Outcome 1:   Dummy ==1 if student attended any certified HEI immediately after receiving subsidy
 91 | *--------------------
 92 | * Fuzzy effect
 93 | rdrobust Y1 X, fuzzy(D)
 94 | global h = e(h_l)
 95 | global b = e(b_l)
 96 | 
 97 | * equivalent to the two-step estimation below
 98 | drop if D==. /* be careful! if you don't drop missing values in D, steps below do not replicate the effect*/
 99 | rdrobust Y1 X, h($h) b($b)
100 | global itt =  e(tau_cl)
101 | 
102 | rdrobust D X, h($h) b($b)
103 | global fs = e(tau_cl)
104 | 
105 | display $itt/$fs
106 | 
107 | * Rdrobust (sharp) on Y1
108 | rdrobust Y1 X, vce(cluster X)
109 | 
110 | * Rdrobust (fuzzy) on Y1
111 | rdrobust Y1 X, fuzzy(D) vce(cluster X)
112 | 
113 | * Rdplot on Y1
114 | rdplot Y1 X
115 | 
116 | *------------------
117 | * Outcome 2:  Dummy ==1 if student attended a high-quality certified HEI immediately after receiving subsidy
118 | *-------------------------------
119 | * Rdrobust (sharp) on Y2
120 | rdrobust Y2 X
121 | 
122 | * Rdrobust (fuzzy) on Y2
123 | rdrobust Y2 X, fuzzy(D)
124 | 
125 | * Rdplot on Y2
126 | rdplot Y2 X
127 | 
128 | *-------------
129 | * Falsification
130 | *-------------
131 | *-------------
132 | * Rddensity
133 | *----------
134 | rddensity X
135 | 
136 | *-------------
137 | * Test on Covariates
138 | *-------------------------
139 | * Sharp rdrobust
140 | global covariates "icfes_female icfes_age icfes_urm icfes_stratum icfes_privatehs icfes_famsize"
141 | foreach y of global covariates {
142 | 	rdrobust `y' X, all
143 | }
144 | 
145 | * Fuzzy rdrobust
146 | foreach y of global covariates {
147 | 	rdrobust `y' X, fuzzy(D) all
148 | }
149 | 
150 | *************************************
151 | * Extra: Local randomization analysis
152 | *-------------------------------
153 | * Window selection
154 | rdwinselect X icfes_female icfes_age icfes_urm icfes_stratum icfes_privatehs icfes_famsize, seed(86543)
155 | 
156 | global wr = 0.13
157 | global wl = -0.13
158 | 
159 | * Rdrandinf on D: first stage in this window
160 | rdrandinf D X, wl($wl) wr($wr)
161 | 
162 | * Rdrandinf (sharp) on Y1
163 | rdrandinf Y1 X, wl($wl) wr($wr)
164 | 
165 | * Rdrandinf (fuzzy, AR) on Y1
166 | rdrandinf Y1 X, fuzzy(D) wl($wl) wr($wr)
167 | 
168 | * Rdrandinf (fuzzy, TSLS) on Y1
169 | rdrandinf Y1 X, fuzzy(D tsls) wl($wl) wr($wr)
170 | 
171 | * Rdrandinf (sharp) on Y2
172 | rdrandinf Y2 X, wl($wl) wr($wr)
173 | 
174 | * Rdrandinf (fuzzy, AR) on Y2
175 | rdrandinf Y2 X, fuzzy(D) wl($wl) wr($wr)
176 | 
177 | * Rdrandinf (fuzzy, TSLS) on Y2
178 | rdrandinf Y2 X, fuzzy(D tsls)  wl($wl) wr($wr)
179 | 
180 | * Additional outcomes
181 | rdrandinf spadies_hq_pri X, wl($wl) wr($wr)
182 | rdrandinf spadies_hq_pub X, wl($wl) wr($wr)
183 | 
184 | 


--------------------------------------------------------------------------------
/Code/RDcourse-part7-falsification.R:
--------------------------------------------------------------------------------
  1 | #------------------------------------------------------------------------------#
  2 | #  Part 7: Falsification Analysis for RD designs
  3 | # Authors: Matias Cattaneo, Sebastian Calonico, Rocio Titiunik
  4 | # Last update: May 2023
  5 | #------------------------------------------------------------------------------#
  6 | # SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/
  7 | #------------------------------------------------------------------------------#
  8 | # TO INSTALL/DOWNLOAD R PACKAGES/FUNCTIONS:
  9 | # FOREIGN: install.packages('foreign')
 10 | # GGPLOT2: install.packages('ggplot2')
 11 | # LPDENSITY: install.packages('lpdensity')
 12 | # RDDENSITY: install.packages('rddensity')
 13 | # RDLOCRAND: install.packages('rdlocrand')
 14 | # RDROBUST: install.packages('rdrobust')
 15 | #------------------------------------------------------------------------------#
 16 | rm(list=ls())
 17 | 
 18 | library(foreign)
 19 | library(lpdensity)
 20 | library(rddensity)
 21 | library(rdrobust)
 22 | library(rdlocrand)
 23 | library(readstata13)
 24 | library(ggplot2)
 25 | 
 26 | data = read.dta13("headstart.dta")
 27 | names(data)
 28 | 
 29 | Y = data$mort_age59_related_postHS 
 30 | X = data$povrate60 - 59.1984
 31 | 
 32 | # Using rddensity
 33 | out = rddensity(X)
 34 | plot1 = rdplotdensity(out, X)
 35 | plot1 = rdplotdensity(out, X, plotRange = c(-20, 20), plotN = 25)
 36 | 
 37 | # Histogram
 38 | out = rddensity(X)
 39 | bw_left =  as.numeric(out$h$left) 
 40 | bw_right = as.numeric(out$h$right)
 41 | tempdata = as.data.frame(X); colnames(tempdata) = c("v1");
 42 | plot2 = ggplot(data=tempdata, aes(tempdata$v1)) +
 43 |   geom_histogram(data = tempdata, aes(x = v1, y= ..count..), breaks = seq(-bw_left, 0, 1), fill = "blue", col = "black", alpha = 1) +
 44 |   geom_histogram(data = tempdata, aes(x = v1, y= ..count..), breaks = seq(0, bw_right, 1), fill = "red", col = "black", alpha = 1) +
 45 |   labs(x = "Score", y = "Number of Observations") + geom_vline(xintercept = 0, color = "black") +
 46 |   theme_bw()
 47 | plot2
 48 | 
 49 | 	
 50 | #** Placebo Outcome Variables
 51 | placebos =  cbind(data$mort_age59_injury_postHS, data$mort_age59_related_preHS)
 52 | for(i in 1:ncol(placebos)) {
 53 |    rdplot(placebos[,i], X)
 54 |    rdrobust(placebos[,i], X)
 55 | }
 56 | 
 57 | covariates = cbind(data$census1960_pop, data$census1960_pctsch1417, data$census1960_pctsch534, 
 58 |                    data$census1960_pctsch25plus, data$census1960_pop1417, data$census1960_pop534,
 59 | 	                 data$census1960_pop25plus, data$census1960_pcturban, data$census1960_pctblack)
 60 | 	 
 61 | for(i in 1:ncol(covariates)) {
 62 |    rdplot(covariates[,i], X)
 63 |    rdrobust(covariates[,i], X)
 64 | }
 65 | 
 66 | # Estimating the RD effect at alternative cutoffs (true cutoff is zero)
 67 | summary(rdrobust(Y, X)) # true cutoff
 68 | summary(rdrobust(Y[X>0], X[X>0], c=5))
 69 | 	   
 70 | ################################################################################
 71 | ## US Senate data
 72 | ################################################################################
 73 | data = read.dta13("senate.dta")
 74 | names(data)
 75 | 
 76 | X = data$demmv
 77 | Y = data$demvoteshfor2
 78 | 
 79 | # Create "Democratic Win at t"
 80 | Z = rep(NA, length(X))
 81 | Z[X<0  & !is.na(X)]=0
 82 | Z[X>=0 & !is.na(X)]=1
 83 | 
 84 | # RDdensity
 85 | # Using rddensity
 86 | out = rddensity(X)
 87 | summary(out)
 88 | plot1 = rdplotdensity(out, X)
 89 | plot1 = rdplotdensity(out, X, plotRange = c(-20, 20), plotN = 25)
 90 | 
 91 | # Histogram
 92 | out = rddensity(X)
 93 | bw_left =  as.numeric(out$h$left) 
 94 | bw_right = as.numeric(out$h$right)
 95 | tempdata = as.data.frame(X); colnames(tempdata) = c("v1");
 96 | plot2 = ggplot(data=tempdata, aes(tempdata$v1)) +
 97 |   geom_histogram(data = tempdata, aes(x = v1, y= ..count..), breaks = seq(-bw_left, 0, 1), fill = "blue", col = "black", alpha = 1) +
 98 |   geom_histogram(data = tempdata, aes(x = v1, y= ..count..), breaks = seq(0, bw_right, 1), fill = "red", col = "black", alpha = 1) +
 99 |   labs(x = "Score", y = "Number of Observations") + geom_vline(xintercept = 0, color = "black") +
100 |   theme_bw()
101 | plot2
102 | 
103 | 
104 | # Binomial Test with rdwinselect
105 | rdwinselect(X, wmin = 0.75, nwindows=1)
106 | 
107 | # Code snippet 3, Binomial Test by Hand
108 | binom.test(x = 15, n = 39, p = 0.5)
109 | 
110 | # Code snippet 4, Using rdrobust on demvoteshlag1
111 | summary(rdrobust(data$demvoteshlag1, X))
112 | rdplot(data$demvoteshlag1, X)
113 | 
114 | # Table 6.1 (rdrobust on all the covariates)
115 | covs = cbind(data$presdemvoteshlag1, data$demvoteshlag1, data$demvoteshlag2, data$demwinprv1, data$demwinprv2, data$dmidterm, data$dpresdem, data$dopen)
116 | for(i in 1:ncol(covariates)) {
117 |   rdplot(covariates[,i], X)
118 |   rdrobust(covariates[,i], X)
119 | }
120 | 
121 | # Code snippet 5, Using rdplot to Show the rdrobust Effect for demvoteshlag1
122 | out = rdrobust(data$demvoteshlag1,X)
123 | bandwidth=out$bws[1,1]
124 | ii = abs(X)<=bandwidth 
125 | rdplot(data$demvoteshlag1[ii],X[ii], p=1, kernel = "triangular", h = bandwidth)
126 | 
127 | # Code snippet 6, Using rdrandinf on dopen
128 | rdrandinf(data$dopen, X, wl=-.75, wr=.75)
129 | 
130 | #* Table 6.2, rdrandinf on All Covariates
131 | covs = cbind(data$presdemvoteshlag1, data$demvoteshlag1, data$demvoteshlag2, data$demwinprv1, data$demwinprv2, data$dmidterm, data$dpresdem, data$dopen)
132 | window=0.75
133 | for(i in 1:ncol(covariates)) {
134 |   ttest(covariates[,i][abs(X) < window & Z==1], covariates[,i][abs(X) < window & Z==0])
135 | 	rdrandinf(covariates[,i], X, wl=-window, wr=window)
136 | }
137 | 
138 | #* Code snippet 7, Using rdplot to Show the rdrobust Effect for dopen
139 | ii = (abs(X)<=.75)
140 | rdplot(data$dopen[ii], X[ii], h=0.75, p=0, kernel='uniform')
141 | 
142 | # Code snippet 8, Using rdrobust with the Cutoff Equal to 1
143 | rdrobust(Y[X>=0], X[X>=0], c=1)
144 | 


--------------------------------------------------------------------------------
/Code/RDcourse-part7-falsification.do:
--------------------------------------------------------------------------------
  1 | ********************************************************************************
  2 | **  Part 7: RD Falsification Analysis
  3 | ** Author: Rocio Titiunik, Matias Cattaneo, Sebastian Calonico
  4 | ** Last update: May 2023
  5 | ********************************************************************************
  6 | ** RDROBUST:  net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata) replace
  7 | ** RDLOCRAND: net install rdlocrand, from(https://raw.githubusercontent.com/rdpackages/rdlocrand/master/stata) replace
  8 | ** RDDENSITY: net install rddensity, from(https://raw.githubusercontent.com/rdpackages/rddensity/master/stata) replace
  9 | ** RDPOWER:   net install rdpower, from(https://raw.githubusercontent.com/rdpackages/rdpower/master/stata) replace
 10 | *********************************************************************************
 11 | *-------------------------------------------------------------------------------------------------------------*
 12 | ** SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/
 13 | *-------------------------------------------------------------------------------------------------------------*
 14 | cd C:\Users\titiunik\Dropbox\teaching\workshops\2022-08-RD-StatisticalHorizons\slides-and-codes\data-and-codes
 15 | clear
 16 | clear all
 17 | clear matrix
 18 | cap log close
 19 | set more off
 20 | 
 21 | use "headstart.dta", clear
 22 | 
 23 | gen Y = mort_age59_related_postHS 
 24 | gen X = povrate60 - 59.1984
 25 | 
 26 | 
 27 | * Using rddensity
 28 | rddensity X
 29 | 
 30 | * Figure
 31 | rddensity X
 32 | local bandwidth_left=e(h_l)
 33 | local bandwidth_right=e(h_r)
 34 | twoway (histogram X if X>=-`bandwidth_left' & X<0, freq width(1) color(blue)) ///
 35 | 	(histogram X if X>=0 & X<=`bandwidth_right', freq width(1) color(red)), xlabel(-20(10)30) ///
 36 | 	graphregion(color(white)) xtitle(Score) ytitle(Number of Observations) legend(off)
 37 | 
 38 | rddensity X, plot
 39 | 	
 40 | 	
 41 | ** Placebo Outcome Variables
 42 | foreach y of varlist mort_age59_injury_postHS mort_age59_related_preHS {
 43 |    rdplot `y' X
 44 |    rdrobust `y' X
 45 | }
 46 | 
 47 | 
 48 | foreach y of varlist census1960_pop census1960_pctsch1417 census1960_pctsch534 ///
 49 |      census1960_pctsch25plus census1960_pop1417 census1960_pop534 ///
 50 | 	 census1960_pop25plus census1960_pcturban census1960_pctblack {
 51 | 	 
 52 | 	rdplot `y' X
 53 | 	rdrobust `y' X
 54 | }
 55 | 
 56 | 	 
 57 | 	 
 58 | 	 
 59 | 	 
 60 | ********************************************************************************
 61 | use "senate.dta", clear
 62 | 
 63 | rename demmv X
 64 | rename demvoteshfor2 Y
 65 | 
 66 | gen Z=.
 67 | replace Z=0 if X<0 & X!=.
 68 | replace Z=1 if X>=0 & X!=.
 69 | label var Z "Democratic Win at t"
 70 | 
 71 | order X Y Z
 72 | 
 73 | *-----------------------------------------*
 74 | * Section 6: Validation and Falsification *
 75 | *-----------------------------------------*
 76 | * Code snippet 1, Using rddensity
 77 | rddensity X
 78 | 
 79 | * Figure 6.2a
 80 | rddensity X
 81 | local bandwidth_left=e(h_l)
 82 | local bandwidth_right=e(h_r)
 83 | twoway (histogram X if X>=-`bandwidth_left' & X<0, freq width(1) color(blue)) ///
 84 | 	(histogram X if X>=0 & X<=`bandwidth_right', freq width(1) color(red)), xlabel(-20(10)30) ///
 85 | 	graphregion(color(white)) xtitle(Score) ytitle(Number of Observations) legend(off)
 86 | 
 87 | rddensity X, plot
 88 | 	
 89 | * Code snippet 2, Binomial Test with rdwinselect
 90 | rdwinselect X, wmin(0.75) nwindows(1) 
 91 | 
 92 | * Code snippet 3, Binomial Test by Hand
 93 | bitesti 39 15 1/2
 94 | 
 95 | * Code snippet 4, Using rdrobust on demvoteshlag1
 96 | rdrobust demvoteshlag1 X
 97 | 
 98 | * Table 6.1 (rdrobust on all the covariates)
 99 | global covariates "presdemvoteshlag1 demvoteshlag1 demvoteshlag2 demwinprv1 demwinprv2 dmidterm dpresdem dopen"
100 | matrix define R=J(8,8,.)
101 | local k=1
102 | foreach y of global covariates {
103 | 	rdrobust `y' X
104 | }
105 | 
106 | 
107 | * Code snippet 5, Using rdplot to Show the rdrobust Effect for demvoteshlag1
108 | rdrobust demvoteshlag1 X
109 | local bandwidth=e(h_l)
110 | rdplot demvoteshlag1 X if abs(X)<=`bandwidth', h(`bandwidth') p(1) kernel(triangular)
111 | 
112 | * Code snippet 6, Using rdrandinf on dopen
113 | rdrandinf dopen X, wl(-.75) wr(.75)
114 | 
115 | * Table 6.2, rdrandinf on All Covariates
116 | global covariates "presdemvoteshlag1 demvoteshlag1 demvoteshlag2 demwinprv1 demwinprv2 dmidterm dpresdem dopen"
117 | local window=0.75
118 | matrix define R=J(8,5,.)
119 | local k=1
120 | foreach y of global covariates {
121 | 	ttest `y' if abs(X)<=`window', by(Z)
122 | 
123 | 	rdrandinf `y' X, wl(-`window') wr(`window')
124 | }
125 | 
126 | * Code snippet 7, Using rdplot to Show the rdrobust Effect for dopen
127 | rdplot dopen X if abs(X)<=.75, h(.75) p(0) kernel(uniform)
128 | 
129 | * Code snippet 8, Using rdrobust with the Cutoff Equal to 1
130 | rdrobust Y X if X>=0, c(1)
131 | 
132 | * Table 6.3 (rdrobust all cutoffs from -5 to 5 in increments of 1)
133 | matrix define R=J(11,10,.)
134 | local k=1
135 | forvalues x=-5(1)5 {
136 | 	if `x'>0 {
137 | 		local condition="if X>=0"
138 | 	}
139 | 	else if `x'<0 {
140 | 		local condition="if X<0"
141 | 	}
142 | 	else {
143 | 		local condition=""
144 | 	}
145 | 	
146 | 	rdrobust Y X `condition', c(`x')
147 | 
148 | }
149 | 
150 | 
151 | 


--------------------------------------------------------------------------------
/Code/RDcourse-part8-discrete.R:
--------------------------------------------------------------------------------
  1 | #------------------------------------------------------------------------------#
  2 | #------------------------------------------------------------------------------#
  3 | # RD Designs with Discrete Running Variables
  4 | # Authors: Matias D. Cattaneo, Nicolas Idrobo and Rocio Titiunik
  5 | # Last update: May 2023
  6 | #------------------------------------------------------------------------------#
  7 | # SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/
  8 | #------------------------------------------------------------------------------#
  9 | # TO INSTALL/DOWNLOAD R PACKAGES/FUNCTIONS:
 10 | # FOREIGN: install.packages('foreign')
 11 | # GGPLOT2: install.packages('ggplot2')
 12 | # GRID: install.packages('grid')
 13 | # LPDENSITY: install.packages('lpdensity')
 14 | # RDDENSITY: install.packages('rddensity')
 15 | # RDLOCRAND: install.packages('rdlocrand')
 16 | # RDROBUST: install.packages('rdrobust')
 17 | # TEACHINGDEMOS: install.packages('TeachingDemos')
 18 | #------------------------------------------------------------------------------#
 19 | #------------------------------------------------------------------------------#
 20 | rm(list=ls()) 
 21 | 
 22 | library(foreign)
 23 | library(ggplot2)
 24 | library(lpdensity)
 25 | library(rddensity)
 26 | library(rdrobust)
 27 | library(rdlocrand)
 28 | library(readstata13)
 29 | 
 30 | # Loading the data and defining the main variables
 31 | data = read.dta13("education.dta")
 32 | nextGPA = data$nextGPA
 33 | left_school = data$left_school
 34 | nextGPA_nonorm = data$nextGPA_nonorm
 35 | X = data$X
 36 | T = data$T
 37 | T_X = T*X
 38 | 
 39 | #--------------------------------------------#
 40 | # Example of RD With Discrete Score          #
 41 | #--------------------------------------------#
 42 | 
 43 | # Figure 3.1
 44 | # Histogram
 45 | tempdata = as.data.frame(X); colnames(tempdata) = c("v1");
 46 | p = ggplot(data=tempdata, aes(tempdata$v1))+
 47 |   geom_histogram(breaks=seq(-2.8, 0, by = 0.1), col="black", fill="blue", alpha = 1)+
 48 |   geom_histogram(breaks=seq(0, 1.6, by = 0.1), col="black", fill="red", alpha = 1)+
 49 |   labs(x="Score", y="Number of Observations")+geom_vline(xintercept=0, color="black")+
 50 |   theme_bw()
 51 | p
 52 | 
 53 | # Counting the number of observations with X different from missing
 54 | length(X)
 55 | 
 56 | # Counting the unique values of X
 57 | length(unique(X))
 58 | 
 59 | # Using rddensity
 60 | out = rddensity(X)
 61 | summary(out)
 62 | 
 63 | # Using rdrobust on hsgrade_pct
 64 | out = rdrobust(data$hsgrade_pct, X)
 65 | summary(out)
 66 | 
 67 | # Using rdplot on hsgrade_pct
 68 | rdplot(data$hsgrade_pct, X, x.label = "Running Variable", y.label = "", title="")
 69 | 
 70 | rdplot(data$hsgrade_pct, X, x.label = "Running Variable", y.label = "", title="")
 71 | 
 72 | # Table 3.3
 73 | summary(rdrobust(data$hsgrade_pct, X))
 74 | summary(rdrobust(data$totcredits_year1, X))
 75 | summary(rdrobust(data$age_at_entry, X))
 76 | summary(rdrobust(data$male, X))
 77 | summary(rdrobust(data$bpl_north_america, X))
 78 | summary(rdrobust(data$english, X))
 79 | summary(rdrobust(data$loc_campus1, X))
 80 | summary(rdrobust(data$loc_campus2, X))
 81 | summary(rdrobust(data$loc_campus3, X))
 82 | 
 83 | # Figure 3.3
 84 | rdplot(data$hsgrade_pct,       X, x.label = "Running Variable", y.label = "", title="")
 85 | rdplot(data$totcredits_year1,  X, x.label = "Running Variable", y.label = "", title="")
 86 | rdplot(data$age_at_entry,      X, x.label = "Running Variable", y.label = "", title="")
 87 | rdplot(data$english,           X, x.label = "Running Variable", y.label = "", title="")
 88 | rdplot(data$male,              X, x.label = "Running Variable", y.label = "", title="")
 89 | rdplot(data$bpl_north_america, X, x.label = "Running Variable", y.label = "", title="")
 90 | 
 91 | # Using rdplot on the outcome
 92 | out = rdplot(nextGPA_nonorm, X,  binselect = 'esmv')
 93 | summary(out)
 94 | 
 95 | rdplot(nextGPA_nonorm, X,  binselect = 'esmv', x.label = 'Running Variable', y.label = 'Outcome', title = '')
 96 | 
 97 | # Using rdrobust on the outcome
 98 | out = rdrobust(nextGPA_nonorm, X, kernel = 'triangular',  p = 1, bwselect = 'mserd')
 99 | summary(out)
100 | 
101 | # Using rdrobust and showing the objects it returns
102 | rdout = rdrobust(nextGPA_nonorm, X, kernel = 'triangular', p = 1, bwselect = 'mserd')
103 | print(names(rdout))
104 | print(rdout$beta_p_r)
105 | print(rdout$beta_p_l)
106 | 
107 | # Using rdrobust with clustered standard errors
108 | clustervar = X
109 | out = rdrobust(nextGPA_nonorm, X, kernel = 'triangular', p = 1, bwselect = 'mserd', vce = 'hc0', cluster = clustervar)
110 | summary(out)
111 | 
112 | # Using rdrobust on the collapsed data
113 | data2 = data.frame(nextGPA_nonorm, X)
114 | dim(data2)
115 | collapsed = aggregate(nextGPA_nonorm ~ X, data = data2, mean)
116 | dim(collapsed)
117 | out = rdrobust(collapsed$nextGPA_nonorm, collapsed$X)
118 | summary(out)
119 | 
120 | # Binomial test with rdwinselect
121 | out = rdwinselect(X, wmin = 0.01, nwindows = 1, cutoff = 5.00000000000e-06)
122 | 
123 | # Binomial Test by Hand
124 | binom.test(77, 305, 1/2)
125 | 
126 | # Using rdrandinf on hsgrade_pct
127 | out = rdrandinf(data$hsgrade_pct, X, wl = -0.005, wr = 0.01, seed = 50)
128 | 
129 | # Using rdwinselect with covariates and windows with equal number of mass points to each side of the cutoff
130 | Z = cbind(data$hsgrade_pct, data$totcredits_year1, data$age_at_entry, data$male,
131 |           data$bpl_north_america, data$english, data$loc_campus1, data$loc_campus2, 
132 |           data$loc_campus3)
133 | colnames(Z) = c("hsgrade_pct", "totcredits_year1", "age_at_entry", "male",
134 |                 "bpl_north_america", "english", "loc_campus1", "loc_campus2",
135 |                 "loc_campus3")
136 | out = rdwinselect(X, Z, p = 1, seed = 50, wmin = 0.01, wstep = 0.01, cutoff = 5.00000000000e-06)
137 | 
138 | # Using rdrandinf on the Outcome
139 | out = rdrandinf(nextGPA_nonorm, X, wl = -0.005, wr = 0.01, seed = 50)
140 | 
141 | 


--------------------------------------------------------------------------------
/Code/RDcourse-part8-discrete.do:
--------------------------------------------------------------------------------
  1 | ********************************************************************************
  2 | **  Part 8: RD Designs with Discrete Running Variables
  3 | ** Author: Rocio Titiunik, Matias Cattaneo, Sebastian Calonico
  4 | ** Last update: May 2023
  5 | ********************************************************************************
  6 | ** RDROBUST:  net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata) replace
  7 | ** RDLOCRAND: net install rdlocrand, from(https://raw.githubusercontent.com/rdpackages/rdlocrand/master/stata) replace
  8 | ** RDDENSITY: net install rddensity, from(https://raw.githubusercontent.com/rdpackages/rddensity/master/stata) replace
  9 | ** RDPOWER:   net install rdpower, from(https://raw.githubusercontent.com/rdpackages/rdpower/master/stata) replace
 10 | *********************************************************************************
 11 | *-------------------------------------------------------------------------------------------------------------*
 12 | ** SOFTWARE WEBSITE: https://rdpackages.github.io/rdrobust/
 13 | *-------------------------------------------------------------------------------------------------------------*
 14 | clear
 15 | clear all
 16 | clear matrix
 17 | cap log close
 18 | set more off
 19 | 
 20 | cd C:\Users\titiunik\Dropbox\teaching\workshops\2022-08-RD-StatisticalHorizons\slides-and-codes\data-and-codes
 21 | 
 22 | * A folder called "outputs" needs to be created in order to store 
 23 | * all of the figures, logs and tables. If the folder already exists,
 24 | * the user will get an error message but the code will not stop.
 25 | capture noisily mkdir "outputs"
 26 | 
 27 | * Loading the data
 28 | use "education.dta", clear
 29 | 
 30 | *----------------------------------------------*
 31 | * Example of RD With Discrete Score *
 32 | *----------------------------------------------*
 33 | 
 34 | * Histogram
 35 | twoway (histogram X if X<0, width(0.1) freq color(blue) xline(0)) ///
 36 | 	(histogram X if X>=0, width(0.1) freq color(red)), graphregion(color(white)) legend(off) ///
 37 | 	xtitle(Score) ytitle(Number of Observations)
 38 | 
 39 | * Counting the Number of Observations with X Different from Missing
 40 | count if X!=.
 41 | 
 42 | * Code snippet 2, Counting the Unique Values of X
 43 | codebook X
 44 | 
 45 | duplicates report X
 46 | 
 47 | preserve
 48 | gen obs=1
 49 | collapse (sum) obs, by(X)
 50 | list X obs
 51 | restore
 52 | 
 53 | * Using rddensity
 54 | rddensity X
 55 | 
 56 | * Using rdrobust on hsgrade_pct
 57 | rdrobust hsgrade_pct X 
 58 | 
 59 | * Using rdplot on hsgrade_pct
 60 | rdplot hsgrade_pct X 
 61 | 
 62 | * rdrobust on All Covariates
 63 | global covariates "hsgrade_pct totcredits_year1 age_at_entry male bpl_north_america english loc_campus1 loc_campus2 loc_campus3"
 64 | matrix define R=J(9,8,.)
 65 | local k=1
 66 | foreach y of global covariates {
 67 | 	rdrobust `y' X, all
 68 | 	local label_`k': variable label `y'
 69 | 	matrix R[`k',1]=e(h_l)
 70 | 	matrix R[`k',2]=e(tau_cl)
 71 | 	matrix R[`k',3]=e(tau_bc)
 72 | 	matrix R[`k',4]=e(se_tau_rb)
 73 | 	matrix R[`k',5]=2*normal(-abs(R[`k',3]/R[`k',4]))
 74 | 	matrix R[`k',6]=R[`k',3]-invnormal(0.975)*R[`k',4]
 75 | 	matrix R[`k',7]=R[`k',3]+invnormal(0.975)*R[`k',4]
 76 | 	matrix R[`k',8]=e(N_h_l)+e(N_h_r)
 77 | 	
 78 | 	local k=`k'+1
 79 | }
 80 | matlist R
 81 | 
 82 | preserve
 83 | 	clear
 84 | 	local t=`k'-1
 85 | 	svmat R
 86 | 	gen R0=""
 87 | 	forvalues k=1/`t' {
 88 | 		replace R0="`label_`k''" if _n==`k'
 89 | 	}
 90 | 	order R0
 91 | 	save "outputs/section7_rdrobust_allcovariates.dta", replace
 92 | restore
 93 | 
 94 | * Using rdplot on the Outcome
 95 | rdplot nextGPA_nonorm X, binselect(esmv) ///
 96 | 	graph_options(graphregion(color(white)) ///
 97 | 	xtitle(Running Variable) ytitle(Outcome))
 98 | 
 99 | * Using rdrobust on the Outcome
100 | rdrobust nextGPA_nonorm X, kernel(triangular) p(1) bwselect(mserd)  
101 | 
102 | * Using rdrobust and Showing the Objects it Returns
103 | rdrobust nextGPA_nonorm X
104 | ereturn list
105 | 
106 | * Using rdrobust with Clustered Standard Errors
107 | cap drop clustervar
108 | gen clustervar=X
109 | rdrobust nextGPA_nonorm X, kernel(triangular) p(1) bwselect(mserd) vce(cluster clustervar)
110 | 
111 | * Using rdrobust on the Collapsed Data
112 | preserve
113 | 	collapse (mean) nextGPA_nonorm, by(X)
114 | 	rdrobust nextGPA_nonorm X
115 | restore
116 | 
117 | * Binomial Test with rdwinselect
118 | rdwinselect X, wmin(0.01) nwindows(1) cutoff(0.000005)
119 | 
120 | * Binomial Test by Hand
121 | bitesti 305 77 1/2
122 | 
123 | * Using rdrandinf on hsgrade_pct
124 | rdrandinf hsgrade_pct X , seed(50) wl(-.005) wr(.01)
125 | 
126 | * rdrandinf on All Covariates
127 | global covariates "hsgrade_pct totcredits_year1 age_at_entry male bpl_north_america english loc_campus1 loc_campus2 loc_campus3"
128 | matrix define R=J(9,5,.)
129 | local k=1
130 | foreach y of global covariates {
131 | 	ttest `y' if X>=-0.005 & X<=0.01, by(Z)
132 | 	matrix R[`k',1]=r(mu_1)
133 | 	matrix R[`k',2]=r(mu_2)
134 | 
135 | 	rdrandinf `y' X, wl(-0.005) wr(0.01) seed(50)
136 | 	local label_`k': variable label `y'
137 | 	matrix R[`k',3]=r(obs_stat)
138 | 	matrix R[`k',4]=r(randpval)
139 | 	matrix R[`k',5]=r(N)
140 | 	
141 | 	local k=`k'+1
142 | }
143 | 
144 | mat list R
145 | preserve
146 | 	clear
147 | 	local t=`k'-1
148 | 	svmat R
149 | 	gen R0=""
150 | 	forvalues k=1/`t' {
151 | 		replace R0="`label_`k''" if _n==`k'
152 | 	}
153 | 	order R0
154 | 	save "outputs/section7_rdrandinf_allcovariates.dta", replace
155 | restore
156 | 
157 | * Using rdwinselect with Covariates and Windows with Equal Number of Mass Points
158 | * to Each Side of the Cutoff
159 | global covariates "hsgrade_pct totcredits_year1 age_at_entry male bpl_north_america english loc_campus1 loc_campus2 loc_campus3"
160 | rdwinselect X $covariates, cutoff(0.00005) wmin(0.01) wstep(0.01)
161 | 
162 | rdwinselect X $covariates if loc_campus3==1, cutoff(0.00005) wmin(0.01) wstep(0.01)
163 | 
164 | * Using rdrandinf on the Outcome
165 | rdrandinf nextGPA_nonorm X, seed(50) wl(-0.005) wr(0.01) 
166 | 
167 | *-------------------------------------------------------------------------------------------------------------*
168 | clear all
169 | 


--------------------------------------------------------------------------------
/Code/README.md:
--------------------------------------------------------------------------------
1 | # Example Code
2 | 
3 | This folder contains example implementation of all the methods we discuss in this course in both `R` and `Stata`. They are numbered in order of the material covered. The necessary datasets are included in this folder as well.
4 | 
5 | 


--------------------------------------------------------------------------------
/Code/education.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Code/education.dta


--------------------------------------------------------------------------------
/Code/headstart.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Code/headstart.dta


--------------------------------------------------------------------------------
/Code/house.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Code/house.dta


--------------------------------------------------------------------------------
/Code/jtpa.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Code/jtpa.dta


--------------------------------------------------------------------------------
/Code/senate.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Code/senate.dta


--------------------------------------------------------------------------------
/Code/spp.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Code/spp.dta


--------------------------------------------------------------------------------
/Code/uruguayancct.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Code/uruguayancct.dta


--------------------------------------------------------------------------------
/Code/workflow.do:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | 
 5 | * (1) Establish whether the RD assumptions are plausible
 6 | [TOMORROW]
 7 | 
 8 | * (2) rdPLOT
 9 | rdplot Y X
10 | 
11 | * (3) If using local polynomial
12 | rdrobust Y X, p(1)
13 | 
14 | * (4) Explore robustness of the result
15 | rdrobust Y X, p(1) h(5)
16 | rdrobust Y X, p(1) h(5) kernel(uniform)
17 | rdrobust Y X, p(1) bwselect(cerrd)


--------------------------------------------------------------------------------
/Labs/Exercise-1/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Exercise 1: Continuity-based analysis of the Meyersson application
 3 | 
 4 | In this exercise, you will re-analyze the data in [Meyersson (2014)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/Meyersson2014-ECTA.pdf). The Meyersson application is described and replicated in Section 1 of [Cattaneo, Idrobo and Titiunik (2020)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf). The data is in the file `polecon.dta` (Stata format) and `polecon.csv` (comma separated values format). 
 5 | 
 6 | Meyersson is broadly interested in the effect of Islamic parties’ control of local governments on women’s rights, in particular on the educational attainment of young women. To study this ques- tion, he uses an RD design focused exclusively on the 1994 Turkish mayoral elections. Specifically, the unit of analysis is the municipality, and the score (variable `X` in the dataset) is the Islamic margin of victory—defined as the difference between the vote share obtained by the largest Islamic party, and the vote share obtained by the largest secular party opponent. The main outcome of interest (variable `Y` in the dataset) is the school attainment for women who were (potentially) in high school during the period 1994-2000, measured with variables extracted from the 2000 census. The particular outcome we re-analyze is the share of the cohort of women ages 15 to 20 in 2000 who had completed high school by 2000.
 7 | 
 8 | ## Part 1: Continuity-Based Analysis
 9 | 
10 | 1. Explore the range and density of the running variable. Is this a continuous random variable?
11 | 
12 | 2. Always plot the data first!
13 |   - Produce RD plots using rdplot for the outcome variable.
14 |   - Do not forget to vary the number of bins used, and look at the difference between quantile bins and evenly-spaced bins.
15 |   - Re-explore the effects of varying the polynomial order for the global fit.
16 | 
17 | 3. Compute optimal bandwidth choices using rdbwselect for the outcome variable.
18 |   - Consider the same and different bandwidths for each side of the cutoff, and MSE versus CER optimal bandwidths.
19 |   - Is the estimated bandwidth sensitive to these choices?
20 | 
21 | 4. Explore the influence of tuning parameters for bandwidth selection.
22 |   - How do estimated bandwidth change when you restrict the support of the running variable?
23 |   - How do estimated bandwidth change when you change the order of local polynomials?
24 | 
25 | 5. Use rdrobust to estimate the RD treatment effects using local polynomials with optimal bandwidth selection.
26 | 
27 | 6. Produce RD estimates and confidence intervals using rdrobust for each outcome variable.
28 |   - Obtain the same results for all three approaches: conventional, bias-corrected & robust.
29 |   - What happens if you change the kernel shape?
30 |   - What happens if you change the standard error estimation method?
31 |   - What happens if you change the bandwidth selection method?
32 |   - What happens if you change the order of polynomials used?
33 | 
34 | 


--------------------------------------------------------------------------------
/Labs/Exercise-1/exercise-Meyersson.do:
--------------------------------------------------------------------------------
 1 | /********************************************
 2 | 
 3 | In class exercise: Meyersson example
 4 | 
 5 | The impact of having a mayor from an Islamic party on educational attainment of women
 6 | Unit of observation: municipalities (in Turkey)
 7 | Period: 1994-2000
 8 | 
 9 | Running variable/score		: vote margin obtained by the Islamic party in the 1994 Turkish mayoral elections ==> X
10 | Outcome of interest		: percentage of women aged 15 to 20 in 2000 who had completed high school by 2000 ==> Y
11 | Treatment			: electoral victory of the Islamic party in 1994 ==> T
12 | 
13 | *********************************************/
14 | 
15 | use polecon.dta, clear
16 | 
17 | 


--------------------------------------------------------------------------------
/Labs/Exercise-1/polecon.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Labs/Exercise-1/polecon.dta


--------------------------------------------------------------------------------
/Labs/Exercise-2/CaugheySekhon2011.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Labs/Exercise-2/CaugheySekhon2011.dta


--------------------------------------------------------------------------------
/Labs/Exercise-2/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Exercise 2: Caughey and Sekhon Application
 3 | 
 4 | In this exercise, you will re-analyze the data in [Caughey and Sekhon (2011)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CaugheySekhon2011-PA.pdf). The data is in the file `CaugheySekhon2011.dta` (Stata format) and `CaugheySekhon2011.csv` (comma separated values format). 
 5 | 
 6 | The authors are interested in estimating the incumbency advantage at the party level. To study this question, they use an RD design focused exclusively on U.S. House elections. Specifically, the unit of analysis is the U.S. congressional district, and the score (variable `DifDPct` in the dataset) is the margin of victory of the Democratic party—defined as the difference between the vote share obtained by the Democratic party and the vote share obtained by the Democratic party’s strongest opponent. The main outcome of interest (variable `DWinNxt` in the dataset) is the Democratic party’s victory in the following election.
 7 | 
 8 | ## Part 1: Continuity-based Falsification
 9 | 
10 | 1. Try to validate the RD design.
11 |   - Plot the histogram of the running variable.
12 |   - Conduct a binomial test for counts and a density test.
13 |   - Look at covariate balance, both graphically and formally.
14 |   - Do not forget to plot with the rdbinselect command!
15 | 
16 | 2. Do you think this design is credible under a continuity assumption?
17 | 
18 | ## Part 2: Local randomization Falsification
19 | 
20 | 1. Select a window based on the two covariates given
21 | 
22 | 2. Try to provide empirical evidence supporting the local-randomization assumption in the window you chose.
23 | 
24 | 3. Do you think this design is credible under a local randomization assumption?
25 | 
26 | ## Part 3: Compare both approaches
27 | 
28 | 1. Do you reach different conclusions from the two approaches? Is this RD design valid? Why/Why not?
29 | 


--------------------------------------------------------------------------------
/Labs/Exercise-2/exercise-CaugheySekhon.do:
--------------------------------------------------------------------------------
 1 | /********************************************
 2 | 
 3 | In class exercise: Caughey and Sekhon example
 4 | 
 5 | Party incumbency advantage in the U.S. House
 6 | Unit of observation: house district
 7 | Period: 1942-2010
 8 | 
 9 | Running variable/score		: Democratic Party's Margin of Victory at Election t ==> DifDPct
10 | Outcome of interest		: Democratic Party's Victory at Election t+1 ==> DWinNxt
11 | Pre-determined covariate 1	: Democratic Party's Victory at Election t-1 (DWinPrv) 
12 | Pre-determined covariate 2	: Democratic Party's Vote Share at Election t-1 (DPctPrv) 
13 | 
14 | *********************************************/
15 | 
16 | use CaugheySekhon2011.dta, clear
17 | global x DifDPct  /* score */
18 | global y DWinNxt  /* outcome*/
19 | global z1 DWinPrv /* covariate 1*/
20 | global z2 DPctPrv /* covariate 2*/
21 | 	
22 | /* Step 1 : Analyze this RD using a local polinomial approach */
23 | * Do RD plot, falsification on density and covariates, and local polynomial estimation with optimal bandwidth selection	
24 | 
25 | /* Step 2 : Analyze this RD using a local randomization approach */
26 | * Select a window based on the two covariates and peform inference inside chosen window
27 | 
28 | /* Step 3: Compare results from both approaches */
29 | * Do you reach different conclusions from the two approaches? Is this RD design valid? Why/Why not?
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <img src="https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/main/img/banner.png" alt="Mixtape Sessions Banner" width="100%"> 
  2 | 
  3 | 
  4 | ## About
  5 | 
  6 | This course covers methods for the analysis and interpretation of the Regression Discontinuity (RD) design, a non-experimental strategy to study treatment effects that can be used when units receive a treatment based on a score and a cutoff. The course covers methods for estimation, inference, and falsification of RD treatment effects using two different approaches: the continuity-based framework, implemented with local polynomials, and the local randomization framework, implemented with standard tools from the analysis of experiments. The focus is on conceptual understanding of the underlying methodological issues and effective empirical implementation. Every topic is illustrated with the analysis of RD examples using real-world data, walking through R and Stata codes that fully implement all the methods discussed. At the end of the course, participants will have acquired the necessary skills to rigorously interpret, visualize, validate, estimate, and characterize the uncertainty of RD treatment effects.
  7 | 
  8 | ## Overview 
  9 | 
 10 | The goal of this workshop is to give an introduction to standard and recent methodological developments in the analysis and interpretation of regression discontinuity (RD) designs. The course focuses on methodology and empirical practice, as well as on conceptual issues, but not on the technical details of the statistical and econometric theory underlying the results. A brief
 11 | description of the course, along with references to further readings, is given below.
 12 | 
 13 | It is assumed that participants have elementary working knowledge of statistics, econometrics and policy evaluation. It would be useful, but not required, if participants were familiar with basic results from the literature on program evaluation and treatment effects at the level of Wooldridge (2010). The course is nonetheless meant to be self-contained and hence most underlying statistics/econometrics concepts and results are introduced and explained in class
 14 | 
 15 | ## Schedule
 16 | 
 17 | ### Day 1 
 18 | 
 19 | Introduction to Sharp RD design and graphical illustration. Continuity based framework: estimands, identification assumptions, and point estimation.
 20 | 
 21 | - Sharp RD design: introduction and graphical illustration with RD plots.
 22 | - Continuity based RD analysis: estimands and identification.
 23 | - Estimation of RD effects with local polynomials.
 24 | - Optimal bandwidth selection.
 25 | 
 26 | #### Readings
 27 | [Cattaneo, Idrobo, and Titiunik (2020)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf), Chapters 1, 2, 3, and Sections 4.1 and 4.2 in Chapter 4.
 28 | 
 29 | ### Day 2
 30 | 
 31 | Continuity based framework, continued: robust inference based on local polynomials. Introduction to local randomization framework: estimands, window selection, estimation and inference.
 32 | 
 33 | - Continuity based RD analysis, continued:
 34 |   – Robust confidence intervals based on local polynomials
 35 | - Local Randomization RD analysis
 36 |   – Inferences based on Fisherian methods
 37 |   – Window selection based on covariates
 38 |   – Inferences based on large-sample methods
 39 | 
 40 | #### Readings
 41 | 
 42 | [Cattaneo, Idrobo, and Titiunik (2020)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf), Chapter 4
 43 | 
 44 | [Cattaneo, Idrobo, and Titiunik (2023)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2023-CUP-Extensions.pdf), Chapter 2
 45 | 
 46 | ### Day 3
 47 | 
 48 | RD local randomization analysis, RD falsification methods, and extensions to canonical RD design.
 49 | 
 50 | - Falsification of RD assumptions: density and covariate balance tests
 51 | - The RD design under imperfect compliance
 52 | 
 53 | #### Readings
 54 | 
 55 | [Cattaneo, Idrobo, and Titiunik (2020)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf), Chapter 5
 56 | 
 57 | [Cattaneo, Idrobo, and Titiunik (2023)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2023-CUP-Extensions.pdf), Chapter 3
 58 | 
 59 | 
 60 | 
 61 | ## Slides
 62 | 
 63 | [RD Slides](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Slides/RD.pdf)
 64 | 
 65 | 
 66 | ## Coding Exercises
 67 | 
 68 | [Exercise 1: Meyersson (2014)](https://github.com/Mixtape-Sessions/Regression-Discontinuity/tree/main/Labs/Exercise-1/) 
 69 | 
 70 | - Paper: [Meyersson (2014)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/Meyersson2014-ECTA.pdf)
 71 | 
 72 | [Exercise 2: Caughey and Sekhon (2011)](https://github.com/Mixtape-Sessions/Regression-Discontinuity/tree/main/Labs/Exercise-2/) 
 73 | 
 74 | - Paper: [Caughey and Sekhon (2011)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CaugheySekhon2011-PA.pdf)
 75 | 
 76 | [Additonal code for all covered material](https://github.com/Mixtape-Sessions/Regression-Discontinuity/tree/main/Code/)
 77 | 
 78 | 
 79 | ## Readings
 80 | 
 81 | ### Essential
 82 | 
 83 | [Cattaneo, Idrobo, and Titiunik (2020)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf)
 84 | 
 85 | [Cattaneo, Idrobo, and Titiunik (2023)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2023-CUP-Extensions.pdf)
 86 | 
 87 | ### Background
 88 | 
 89 | [Cattaneo, Frandsen, and Titiunik (2015)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoFrandsenTitiunik2015-JCI.pdf)
 90 | 
 91 | [Cattaneo and Titiunik (2022)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoTitiunik2022-ARE.pdf)
 92 | 
 93 | [Imbens and Lemieux (2008)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/ImbensLemieux2008-JoE.pdf)
 94 | 
 95 | [Lee and Lemieux (2010)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/LeeLemieux2010-JEL.pdf)
 96 | 
 97 | 
 98 | ## Computing and Software 
 99 | 
100 | To participate in the hands-on exercises, you are strongly encouraged to secure access to a computer with the most recent version of `Stata` or `R` plus `RStudio` installed.
101 | 
102 | All functions and packages are available in `Stata`, which is not free, and `R`, a free and open- source statistical software environment. We also have `Python` versions of some of the packages (we are still in the process of creating Python libraries for all the commands).
103 | 
104 | The workshop will employ several empirical illustrations, which will be analyzed using `Stata` and/or `R`. The following `Stata`/`R`/`Python` modules modules/commands will be used:
105 | 
106 | - [rdrobust](https://rdpackages.github.io/rdrobust): RD inference employing local polynomial and partitioning methods. See Calonico, Cattaneo and Titiunik ([2014](https://journals.sagepub.com/doi/pdf/10.1177/1536867X1401400413), [2015](https://journal.r-project.org/archive/2015/RJ-2015-004/RJ-2015-004.pdf)) for introductions.
107 | 
108 | - [rddensity](https://rdpackages.github.io/rddensity): Manipulation testing for RD designs. See [Cattaneo, Jansson and Ma (2018)](https://journals.sagepub.com/doi/10.1177/1536867X1801800115) for an introduction.
109 | 
110 | - [rdlocrand](https://rdpackages.github.io/rdlocrand): RD inference employing randomization inference methods. See [Cattaneo, Titiunik and Vazquez-Bare (2016)](https://journals.sagepub.com/doi/10.1177/1536867X1601600205) for an introduction.
111 | 
112 | - [rdmulti](https://rdpackages.github.io/rdmulti): Local polynomial methods for estimation and inference for RD designs with multiple cutoffs.
113 | 
114 | - [rdpower](https://rdpackages.github.io/rdpower): Power and sample size calculations using robust bias-corrected local polynomial inference methods.
115 | 
116 | Further details, including how to install the packages in both R and Stata may be found at: https://rdpackages.github.io
117 | 
118 | Replication files in both `R` and `Stata` for all the empirical analyses in [Cattaneo, Idrobo, and Titiunik (2020)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf) and [Idrobo, and Titiunik (2023)](https://nbviewer.org/github/Mixtape-Sessions/Regression-Discontinuity/raw/main/Readings/CattaneoIdroboTitiunik2023-CUP-Extensions.pdf) may be found at: https://github.com/rdpackages-replication/CIT_2020_CUP
119 | 
120 | Please make sure you have `Stata` or `R` and the above modules/commands installed and fully functional in your personal computer before the course begins. Datasets, do-files and `R` files will be provided in advance.
121 | 
122 | <details closed><summary>Software installation instructions</summary>
123 | 
124 | If you will be using R, I suggest also installing RStudio. To install these programs, follow the
125 | instructions below:
126 | 
127 | 1. Download the most recent version of [R](https://www.r-project.org/) and install it in your computer. R is free and available for Windows, Mac, and Linux operating systems. Download the version of R compatible with your operating system from [CRAN](https://cloud.r-project.org/). If you are running Windows or MacOS, you should choose one of the precompiled binary distributions (i.e., ready-to-run applications) that are linked at the top of the page, not the source codes.
128 | 
129 | 2. Download and install [R Studio](https://www.rstudio.com/products/rstudio/download/). This is an integrated development environment (IDE) that includes a console, a syntax-highlighting editor, and other tools for plotting, history, debug- ging and workspace management for R use. RStudio is free and available for Windows, Mac, and Linux platforms. 
130 | 
131 | If you are using Stata, you will have to obtain a license from [here](https://www.stata.com/). Stata is licensed through [StataCorp](http://www.stata.com/) and is frequently offered at a significant discount through academic institutions to their employees and students. Seminar participants who are not yet ready to purchase Stata could take advantage of StataCorp’s 30-day software return policy and obtain the latest version of Stata on a trial basis in the weeks immediately preceding this course. Use this link for 30-day trial copy: http://www.stata.com/customer-service/evaluate-stata/
132 | 
133 | </details>
134 | 
135 | 
136 | <details closed><summary>Package installation instructions</summary>
137 | 
138 | You will need to install the following packages: `rdrobust`, `rddensity`, `rdlocrand`, `rdpower` and `rdmulti`. In R, make sure you have an internet connection and then launch R Studio. Copy the following line of text and paste them in to R’s command prompt (located in the window named “Console”):
139 | 
140 | ```r
141 | install.packages("rdrobust")
142 | install.packages("rddensity")
143 | install.packages("rdlocrand")
144 | install.packages("rdpower")
145 | install.packages("rdmulti")
146 | ```
147 | 
148 | In Stata, make sure you have an internet connection and then type:
149 | 
150 | ```stata
151 | net install rdrobust, from(https://raw.githubusercontent.com/rdpackages/rdrobust/master/stata) replace
152 | net install rddensity, from(https://raw.githubusercontent.com/rdpackages/rddensity/master/stata) replace
153 | net install rdlocrand, from(https://raw.githubusercontent.com/rdpackages/rdlocrand/master/stata) replace
154 | net install rdpower, from(https://raw.githubusercontent.com/rdpackages/rdpower/master/stata) replace
155 | net install rdmulti, from(https://raw.githubusercontent.com/rdpackages/rdmulti/master/stata) replace
156 | ```
157 | 
158 | </details>
159 | 
160 | 
161 | 
162 | 
163 | 


--------------------------------------------------------------------------------
/Readings/CattaneoFrandsenTitiunik2015-JCI.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Readings/CattaneoFrandsenTitiunik2015-JCI.pdf


--------------------------------------------------------------------------------
/Readings/CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Readings/CattaneoIdroboTitiunik2020-CUP-Foundations-preprint.pdf


--------------------------------------------------------------------------------
/Readings/CattaneoIdroboTitiunik2023-CUP-Extensions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Readings/CattaneoIdroboTitiunik2023-CUP-Extensions.pdf


--------------------------------------------------------------------------------
/Readings/CattaneoTitiunik2022-ARE.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Readings/CattaneoTitiunik2022-ARE.pdf


--------------------------------------------------------------------------------
/Readings/CaugheySekhon2011-PA.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Readings/CaugheySekhon2011-PA.pdf


--------------------------------------------------------------------------------
/Readings/ImbensLemieux2008-JoE.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Readings/ImbensLemieux2008-JoE.pdf


--------------------------------------------------------------------------------
/Readings/LeeLemieux2010-JEL.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Readings/LeeLemieux2010-JEL.pdf


--------------------------------------------------------------------------------
/Readings/Meyersson2014-ECTA.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Readings/Meyersson2014-ECTA.pdf


--------------------------------------------------------------------------------
/Slides/RD.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Slides/RD.pdf


--------------------------------------------------------------------------------
/Syllabus.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/Syllabus.pdf


--------------------------------------------------------------------------------
/img/banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/img/banner.png


--------------------------------------------------------------------------------
/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Mixtape-Sessions/Regression-Discontinuity/58a6148b6a1d89fe82e3fd44796dd56b33b9ee08/img/logo.png


--------------------------------------------------------------------------------