├── COPYRIGHT.md ├── ChasalowCourseNotes.pdf ├── README.md └── plotscripts ├── Module3-IllusStratification.R ├── Module3-nonidentifiability_ex.R ├── Module4-TwoStructuralCases.R └── Module8-BiasCorrection.R /COPYRIGHT.md: -------------------------------------------------------------------------------- 1 | **Copyright © 2023 Kyla Chasalow** 2 | 3 | You may use this material for educational purposes but must not edit, transform, or publish this material without prior written permission. -------------------------------------------------------------------------------- /ChasalowCourseNotes.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kchaz/CausalCourseNotes/c6c237b48603b388ded926163806e572d202336f/ChasalowCourseNotes.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Causal Course Notes 2 | 3 | This repo contains the course notes I created in Fall 2023 for **Stat 286 / Gov 2003 Causal Inference with Applications** at Harvard University, taught by [Kosuke Imai](https://imai.fas.harvard.edu/) and TAed by myself and another graduate student. Other materials for the course are available [here](https://imai.fas.harvard.edu/teaching/cause.html). My hope is that these notes will be useful to students in future iterations of the course and perhaps beyond. 4 | 5 | You are free to download and reference these notes under the following conditions: 6 | 7 | 1. You contact me at kyla_chasalow@g.harvard.edu if you find mistakes, typos, or ambiguities so that I can fix them! I am happy to add an acknowledgement for any substantial corrections. 8 | 2. You give proper attribution if referencing or quoting from these notes 9 | 10 | **Disclaimer:** these notes have not been peer reviewed and may contain errors. They also do not include every sub-topic covered in Stat 286 / Gov 2003 and hence are not a full replacement for the videos, slides, and lecture. 11 | 12 | -------------------------------------------------------------------------------- /plotscripts/Module3-IllusStratification.R: -------------------------------------------------------------------------------- 1 | #' 2 | #' this code is clunky - would be nice to clean up sometime 3 | #' 4 | 5 | n = 16 6 | 7 | Y1 <- c(rep(-2,4), rep(.5,4), 8 | rep(8,4), rep(10,4)) # rnorm(n,0,.25) 9 | 10 | Y0 <- c(rep(-1,4), rep(0,4), 11 | rep(0,4), rep(5,4)) #+ rnorm(n,0,.25) 12 | 13 | tau = mean(Y1)-mean(Y0) 14 | 15 | par(mfrow = c(2,2)) 16 | par(mar = c(3, 2, 2, 1)) 17 | 18 | # version with stratification 19 | l = min(Y1,Y0) 20 | u = max(Y1,Y0) 21 | plot(1:n,1:n, "n", 22 | ylim = c(l-1, u+1), 23 | ylab = "", 24 | xlab = "", 25 | xaxt = "n", 26 | yaxt = "n", 27 | cex.main = 1.2, 28 | cex.lab = 1.5, 29 | main = "Stratified Assignment") 30 | segments(1:n, Y0, 1:n, Y1, col = "grey", lty = 2, lwd = 2) 31 | points(1:n, Y0, cex = 1.5, lwd = 2, 32 | col = rep(c("blue","blue","grey","grey"),4)) 33 | points(1:n,Y1, pch = 20, cex = 2, 34 | col = rep(c("grey","grey","red","red"),4)) 35 | axis(1, at = c(2,6,10,14)+.5, 36 | labels = paste("Group", 1:4), 37 | cex.axis = 1) 38 | 39 | 40 | Y1bar = mean(Y1[c(3,4,7,8,11,12,15,16)]) 41 | Y0bar = mean(Y0[c(1,2,5,6,9,10,13,14)]) 42 | abline(h = Y1bar, col = "red", lty = 2) 43 | abline(h = Y0bar, col = "blue", lty = 2) 44 | text(1,Y1bar+1,expression(bar(Y)[1]), col = "red") 45 | text(1,Y0bar+1,expression(bar(Y)[0]), col = "blue") 46 | #abline(h=tau, lty = 2) 47 | #text(1,tau+.5, expression(tau), cex = 2) 48 | 49 | 50 | # problem without stratification 51 | l = min(Y1,Y0) 52 | u = max(Y1,Y0) 53 | plot(1:n,1:n, "n", 54 | ylim = c(l-1, u+1), 55 | ylab = "", 56 | xlab = "", 57 | cex.main = 1.2, 58 | cex.lab = 1.5, 59 | xaxt = "n", 60 | yaxt = "n", 61 | main = "Completely Randomized (extreme 1)") 62 | segments(1:n, Y0, 1:n, Y1, col = "grey", lty = 2, lwd = 2) 63 | points(1:n, Y0, cex = 1.5, lwd = 2, 64 | col = c(rep("blue",8), rep("grey",8))) 65 | points(1:n,Y1, pch = 20, cex = 2, 66 | col = c(rep("grey",8), rep("red",8))) 67 | axis(1, at = c(2,6,10,14)+.5, 68 | labels = paste("Group", 1:4), 69 | cex.axis = 1) 70 | Y1bar = mean(Y1[9:16]) 71 | Y0bar = mean(Y0[1:8]) 72 | abline(h = Y1bar, col = "red", lty = 2) 73 | abline(h = Y0bar, col = "blue", lty = 2) 74 | text(1,Y1bar+1,expression(bar(Y)[1]), col = "red") 75 | text(1,Y0bar+1,expression(bar(Y)[0]), col = "blue") 76 | #abline(h=tau, lty = 2) 77 | #text(1,tau+.5, expression(tau), cex = 2) 78 | 79 | 80 | # problem without stratification 81 | l = min(Y1,Y0) 82 | u = max(Y1,Y0) 83 | plot(1:n,1:n, "n", 84 | ylim = c(l-1, u+1), 85 | ylab = "", 86 | xlab = "", 87 | cex.main = 1.2, 88 | cex.lab = 1.5, 89 | xaxt = "n", 90 | yaxt = "n", 91 | main = "Completely Randomized (extreme 2)") 92 | segments(1:n, Y0, 1:n, Y1, col = "grey", lty = 2, lwd = 2) 93 | points(1:n, Y0, cex = 1.5, lwd = 2, 94 | col = c(rep("grey",8), rep("blue",8))) 95 | points(1:n,Y1, pch = 20, cex = 2, 96 | col = c(rep("red",8), rep("grey",8))) 97 | axis(1, at = c(2,6,10,14)+.5, 98 | labels = paste("Group", 1:4), 99 | cex.axis = 1) 100 | 101 | Y1bar = mean(Y1[1:8]) 102 | Y0bar = mean(Y0[9:16]) 103 | abline(h = Y1bar, col = "red", lty = 2) 104 | abline(h = Y0bar, col = "blue", lty = 2) 105 | text(16,Y1bar+1,expression(bar(Y)[1]), col = "red") 106 | text(16,Y0bar+1,expression(bar(Y)[0]), col = "blue") 107 | 108 | 109 | 110 | plot(1:n,1:n, "n", axes = F, xlab = "", ylab = "") 111 | 112 | legend("center", c("unobserved Y(0)", 113 | "observed Y(0)", 114 | "unobserved Y(1)", 115 | "observed Y(1)"), 116 | col= c("grey","blue","grey","red"), 117 | pch = c(1,1,20,20), 118 | bty = "n", cex = 1.8) 119 | -------------------------------------------------------------------------------- /plotscripts/Module3-nonidentifiability_ex.R: -------------------------------------------------------------------------------- 1 | library(mnormt) 2 | 3 | # covariance matrices 4 | Sigma1 = diag(2) 5 | Sigma2 = matrix(c(1,.85,.85,1), nrow = 2) 6 | 7 | # generate random draws 8 | n = 5000 9 | Y = rnorm(n) 10 | bvn1 = rmnorm(n, mean = c(0,0), varcov = Sigma1) 11 | bvn2 = rmnorm(n, mean = c(0,0), varcov = Sigma2) 12 | bvn3 = cbind(Y,Y) 13 | sims = list(bvn1,bvn2,bvn3) 14 | 15 | # plotting 16 | pdf("Module3-nonidentifiability.pdf", width = 15, height = 5) # Adjust size as needed 17 | par(mfrow = c(1,3)) 18 | par(mar = rep(6,4)) 19 | r = c(0,.85,1) 20 | for (i in 1:3){ 21 | mat = sims[[i]] 22 | plot(mat[,1],mat[,2], 23 | main = paste("r =", r[i]), 24 | cex.main = 3, 25 | cex.axis = 2, 26 | xlim = c(-5,5), 27 | ylim = c(-5,5), 28 | cex.lab = 3, 29 | xlab = "Y(0)", 30 | ylab = "Y(1)", 31 | axes = F) 32 | Axis(side=1, labels=FALSE) 33 | Axis(side=2, labels=FALSE) 34 | } 35 | 36 | # Finish plotting 37 | dev.off() 38 | 39 | 40 | -------------------------------------------------------------------------------- /plotscripts/Module4-TwoStructuralCases.R: -------------------------------------------------------------------------------- 1 | 2 | plotter <- function(Y0,Y1, title){ 3 | jitter_amount = 0.05 # Adjust this value to control the amount of jitter 4 | jitter_x0 = jitter(rep(0, n), amount = jitter_amount) 5 | jitter_x1 = jitter(rep(1, n), amount = jitter_amount) 6 | 7 | lim = c(min(Y0, Y1), max(Y0, Y1)) 8 | plot(jitter_x0, Y0, 9 | ylim = lim, 10 | xlim = c(-.5, 1.5), 11 | xaxt = "n", # removes the x-axis ticks and labels 12 | xlab = "", 13 | ylab = "Y(T)", 14 | col = "blue", 15 | pch = 20, 16 | main = title, 17 | cex.lab = 1.5, 18 | cex.axis = 1.5, 19 | cex = 1.5 20 | ) 21 | points(jitter_x1, Y1, col = "orange", pch = 20, cex = 1.5) 22 | 23 | # Adding custom axis labels 24 | axis(1, at = c(0, 1), labels = c("T = 0", "T = 1"), cex.axis = 1.5) 25 | 26 | # Adding lines connecting elements of Y0 and Y1 27 | for (i in 1:n) { 28 | segments(jitter_x0[i], Y0[i], jitter_x1[i], Y1[i], col = "gray") 29 | } 30 | 31 | } 32 | 33 | 34 | set.seed(60) 35 | 36 | par(mfrow = c(1,2)) 37 | n = 22 38 | alpha = 2 39 | beta = 4 40 | 41 | # model 1 42 | epsilons = rnorm(n, 0, 1) 43 | Y0 = alpha + epsilons 44 | Y1 = alpha + beta + epsilons 45 | plotter(Y0,Y1, title = "Constant Additive Treatment Effect") 46 | 47 | # model 2 48 | epsilon0 = rnorm(n, 0, 1) 49 | epsilon1 = rnorm(n, 0, 1) #homoskedasticity 50 | Y0 = alpha + epsilon0 51 | Y1 = alpha + beta + epsilon1 52 | plotter(Y0,Y1, title = "Heterogeneous Treatment Effect") 53 | -------------------------------------------------------------------------------- /plotscripts/Module8-BiasCorrection.R: -------------------------------------------------------------------------------- 1 | #' 2 | #' Comment: the spacing of this plot is a little finicky. 3 | #' Very annoying to create. 4 | #' 5 | 6 | #general plot set-ups 7 | par(mfrow = c(1,1), 8 | mar = c(4,4,2,2), 9 | cex.axis = 1.25) 10 | 11 | #setup plot 12 | plot(1:10, 1:10, "n", 13 | axes = F, 14 | xlab = "", 15 | ylab = "", 16 | xlim = c(0,14), 17 | cex.lab = 1) 18 | Axis(side=1, labels=FALSE) 19 | Axis(side=2, labels=FALSE) 20 | 21 | #draw some line to represent a line of best fit 22 | alpha = 2 23 | beta = .8 24 | abline(a = alpha, b = beta, lwd = 3) 25 | 26 | 27 | #treated unit 28 | tx = 5; ty = 8 29 | text(tx-.5,ty+.5, "Treated Unit i", col = "red") 30 | segments(tx,0,tx,ty, lty = 2, col = "grey") 31 | segments(0,ty,tx,ty, lty = 2, col = "grey") 32 | points(tx,ty, col = "red", cex = 3, pch = 18) 33 | 34 | 35 | #matched control 36 | cx = 6; cy = 5 37 | text(cx+2.5, cy, "Matched Control Unit k", col = "blue") 38 | segments(cx,0, cx,cy, lty = 2, col = "grey") 39 | segments(0,cy, cx,cy, lty = 2, col = "grey") 40 | points(cx,cy, col = "blue", cex = 3, pch = 20) 41 | 42 | #other controls scattered around line 43 | lblue = scales::alpha("blue",.45) 44 | x = c(1, 2, 2.5, 3, 7, 6.7, 7.8, 9) 45 | y = c(3.2, 4.8, 2.5, 4.5, 9.5, 8, 6.9, 8.7) 46 | points(x,y, col = lblue, cex = 3, pch = 20) 47 | #text(7.5, 10, "Other Controls", col = lblue) 48 | text(11, 9, "Regression line \n fit on all controls", col = lblue) 49 | 50 | 51 | #fitted values and correction 52 | v=alpha + beta*5 53 | segments(0,v,5,v, lty = 2, col = "grey") 54 | c=alpha + beta*6 55 | segments(0,c,6,c, lty = 2, col = "grey") 56 | #segments(cx,v,tx,v, col = "grey", lty = 2, lwd = 1) 57 | segments(cx,c,cx,v, col = "purple", lty = 3, lwd = 3) 58 | points(6,c, col = "black", cex = 2, pch = 17) 59 | points(5,v, col = "black", cex = 2, pch = 17) 60 | text(8.5,v+.43, 61 | expression("Correction " * hat(Y)[i](0) - hat(Y)[k](0)), 62 | col = "black") 63 | 64 | 65 | #corrected estimate 66 | z = cy+(v-c) 67 | points(tx, z, cex = 3, pch = 18, col = "purple") 68 | text(tx+3, z, 69 | expression("Corrected Estimate of " * Y[i](0)), 70 | col ="purple") 71 | segments(tx,z,tx,cy, col = "purple", lty = 3, lwd = 3) 72 | 73 | 74 | #labels 75 | Axis(side=1, 76 | at = c(tx,cx), 77 | labels = c(expression(X[i]),expression(X[k])), 78 | tick = T 79 | ) 80 | 81 | Axis(side=2, 82 | at = c(cy,ty,v,c), 83 | labels = c(expression(Y[k](0)), 84 | expression(Y[i](1)), 85 | expression(hat(Y)[i](0)), 86 | expression(hat(Y)[k](0)) 87 | ), 88 | tick = T, 89 | las = 2 90 | ) 91 | 92 | 93 | --------------------------------------------------------------------------------