├── COPYRIGHT.md
├── ChasalowCourseNotes.pdf
├── README.md
└── plotscripts
    ├── Module3-IllusStratification.R
    ├── Module3-nonidentifiability_ex.R
    ├── Module4-TwoStructuralCases.R
    └── Module8-BiasCorrection.R


/COPYRIGHT.md:
--------------------------------------------------------------------------------
1 | **Copyright © 2023 Kyla Chasalow**
2 | 
3 | You may use this material for educational purposes but must not edit, transform, or publish this material without prior written permission. 


--------------------------------------------------------------------------------
/ChasalowCourseNotes.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kchaz/CausalCourseNotes/c6c237b48603b388ded926163806e572d202336f/ChasalowCourseNotes.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Causal Course Notes
 2 | 
 3 | This repo contains the course notes I created in Fall 2023 for **Stat 286 / Gov 2003 Causal Inference with Applications** at Harvard University, taught by [Kosuke Imai](https://imai.fas.harvard.edu/) and TAed by myself and another graduate student. Other materials for the course are available [here](https://imai.fas.harvard.edu/teaching/cause.html). My hope is that these notes will be useful to students in future iterations of the course and perhaps beyond. 
 4 | 
 5 | You are free to download and reference these notes under the following conditions: 
 6 | 
 7 |  1. You contact me at kyla_chasalow@g.harvard.edu if you find mistakes, typos, or ambiguities so that I can fix them! I am happy to add an acknowledgement for any substantial corrections.
 8 |  2. You give proper attribution if referencing or quoting from these notes 
 9 | 
10 | **Disclaimer:** these notes have not been peer reviewed and may contain errors. They also do not include every sub-topic covered in Stat 286 / Gov 2003 and hence are not a full replacement for the videos, slides, and lecture.
11 |  
12 | 


--------------------------------------------------------------------------------
/plotscripts/Module3-IllusStratification.R:
--------------------------------------------------------------------------------
  1 | #' 
  2 | #' this code is clunky - would be nice to clean up sometime
  3 | #' 
  4 | 
  5 | n = 16
  6 | 
  7 | Y1 <- c(rep(-2,4), rep(.5,4),
  8 |         rep(8,4), rep(10,4)) # rnorm(n,0,.25)
  9 | 
 10 | Y0 <- c(rep(-1,4), rep(0,4),
 11 |         rep(0,4), rep(5,4)) #+ rnorm(n,0,.25)
 12 | 
 13 | tau = mean(Y1)-mean(Y0)
 14 | 
 15 | par(mfrow = c(2,2))
 16 | par(mar = c(3, 2, 2, 1))
 17 | 
 18 | # version with stratification
 19 | l = min(Y1,Y0)
 20 | u = max(Y1,Y0)
 21 | plot(1:n,1:n, "n",
 22 |      ylim = c(l-1, u+1),
 23 |      ylab = "",
 24 |      xlab = "",
 25 |      xaxt = "n",
 26 |      yaxt = "n",
 27 |      cex.main = 1.2,
 28 |      cex.lab = 1.5,
 29 |      main = "Stratified Assignment")
 30 | segments(1:n, Y0, 1:n, Y1, col = "grey", lty = 2, lwd = 2)
 31 | points(1:n, Y0, cex = 1.5, lwd = 2,
 32 |        col = rep(c("blue","blue","grey","grey"),4))
 33 | points(1:n,Y1, pch = 20, cex = 2,
 34 |        col = rep(c("grey","grey","red","red"),4))
 35 | axis(1, at = c(2,6,10,14)+.5,
 36 |      labels = paste("Group", 1:4),
 37 |      cex.axis = 1)
 38 | 
 39 | 
 40 | Y1bar = mean(Y1[c(3,4,7,8,11,12,15,16)])
 41 | Y0bar = mean(Y0[c(1,2,5,6,9,10,13,14)])
 42 | abline(h = Y1bar, col = "red", lty = 2)
 43 | abline(h = Y0bar, col = "blue", lty = 2)
 44 | text(1,Y1bar+1,expression(bar(Y)[1]), col = "red")
 45 | text(1,Y0bar+1,expression(bar(Y)[0]), col = "blue")
 46 | #abline(h=tau, lty = 2)
 47 | #text(1,tau+.5, expression(tau), cex = 2)
 48 | 
 49 | 
 50 | # problem without stratification
 51 | l = min(Y1,Y0)
 52 | u = max(Y1,Y0)
 53 | plot(1:n,1:n, "n",
 54 |      ylim = c(l-1, u+1),
 55 |      ylab = "",
 56 |      xlab = "",
 57 |      cex.main = 1.2,
 58 |      cex.lab = 1.5,
 59 |      xaxt = "n",
 60 |      yaxt = "n",
 61 |      main = "Completely Randomized (extreme 1)")
 62 | segments(1:n, Y0, 1:n, Y1, col = "grey", lty = 2, lwd = 2)
 63 | points(1:n, Y0, cex = 1.5, lwd = 2,
 64 |        col = c(rep("blue",8), rep("grey",8)))
 65 | points(1:n,Y1, pch = 20, cex = 2,
 66 |        col = c(rep("grey",8), rep("red",8)))
 67 | axis(1, at = c(2,6,10,14)+.5,
 68 |      labels = paste("Group", 1:4),
 69 |      cex.axis = 1)
 70 | Y1bar = mean(Y1[9:16])
 71 | Y0bar = mean(Y0[1:8])
 72 | abline(h = Y1bar, col = "red", lty = 2)
 73 | abline(h = Y0bar, col = "blue", lty = 2)
 74 | text(1,Y1bar+1,expression(bar(Y)[1]), col = "red")
 75 | text(1,Y0bar+1,expression(bar(Y)[0]), col = "blue")
 76 | #abline(h=tau, lty = 2)
 77 | #text(1,tau+.5, expression(tau), cex = 2)
 78 | 
 79 | 
 80 | # problem without stratification
 81 | l = min(Y1,Y0)
 82 | u = max(Y1,Y0)
 83 | plot(1:n,1:n, "n",
 84 |      ylim = c(l-1, u+1),
 85 |      ylab = "",
 86 |      xlab = "",
 87 |      cex.main = 1.2,
 88 |      cex.lab = 1.5,
 89 |      xaxt = "n",
 90 |      yaxt = "n",
 91 |      main = "Completely Randomized (extreme 2)")
 92 | segments(1:n, Y0, 1:n, Y1, col = "grey", lty = 2, lwd = 2)
 93 | points(1:n, Y0, cex = 1.5, lwd = 2,
 94 |        col = c(rep("grey",8), rep("blue",8)))
 95 | points(1:n,Y1, pch = 20, cex = 2,
 96 |        col = c(rep("red",8), rep("grey",8)))
 97 | axis(1, at = c(2,6,10,14)+.5,
 98 |      labels = paste("Group", 1:4),
 99 |      cex.axis = 1)
100 | 
101 | Y1bar = mean(Y1[1:8])
102 | Y0bar = mean(Y0[9:16])
103 | abline(h = Y1bar, col = "red", lty = 2)
104 | abline(h = Y0bar, col = "blue", lty = 2)
105 | text(16,Y1bar+1,expression(bar(Y)[1]), col = "red")
106 | text(16,Y0bar+1,expression(bar(Y)[0]), col = "blue")
107 | 
108 | 
109 | 
110 | plot(1:n,1:n, "n", axes = F, xlab = "", ylab = "")
111 | 
112 | legend("center", c("unobserved Y(0)",
113 |                     "observed Y(0)",
114 |                     "unobserved Y(1)",
115 |                     "observed Y(1)"),
116 |        col= c("grey","blue","grey","red"),
117 |        pch = c(1,1,20,20),
118 |        bty = "n", cex = 1.8)
119 | 


--------------------------------------------------------------------------------
/plotscripts/Module3-nonidentifiability_ex.R:
--------------------------------------------------------------------------------
 1 | library(mnormt)
 2 | 
 3 | # covariance matrices
 4 | Sigma1 = diag(2)
 5 | Sigma2 = matrix(c(1,.85,.85,1), nrow = 2)
 6 | 
 7 | # generate random draws
 8 | n = 5000
 9 | Y = rnorm(n)
10 | bvn1 = rmnorm(n, mean = c(0,0), varcov = Sigma1)
11 | bvn2 = rmnorm(n, mean = c(0,0), varcov = Sigma2)
12 | bvn3 = cbind(Y,Y)
13 | sims = list(bvn1,bvn2,bvn3)
14 | 
15 | # plotting
16 | pdf("Module3-nonidentifiability.pdf", width = 15, height = 5)  # Adjust size as needed
17 | par(mfrow = c(1,3))
18 | par(mar = rep(6,4))
19 | r = c(0,.85,1)
20 | for (i in 1:3){
21 |     mat = sims[[i]]
22 |     plot(mat[,1],mat[,2], 
23 |              main = paste("r =", r[i]),
24 |              cex.main = 3,
25 |              cex.axis = 2,
26 |              xlim = c(-5,5),
27 |              ylim = c(-5,5),
28 |              cex.lab = 3,
29 |              xlab = "Y(0)",
30 |              ylab = "Y(1)",
31 |              axes = F)
32 |     Axis(side=1, labels=FALSE)
33 |     Axis(side=2, labels=FALSE)
34 | }
35 | 
36 | # Finish plotting
37 | dev.off()
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/plotscripts/Module4-TwoStructuralCases.R:
--------------------------------------------------------------------------------
 1 | 
 2 | plotter <- function(Y0,Y1, title){
 3 |   jitter_amount = 0.05  # Adjust this value to control the amount of jitter
 4 |   jitter_x0 = jitter(rep(0, n), amount = jitter_amount)
 5 |   jitter_x1 = jitter(rep(1, n), amount = jitter_amount)
 6 |   
 7 |   lim = c(min(Y0, Y1), max(Y0, Y1))
 8 |   plot(jitter_x0, Y0,
 9 |        ylim = lim,
10 |        xlim = c(-.5, 1.5),
11 |        xaxt = "n", # removes the x-axis ticks and labels
12 |        xlab = "",
13 |        ylab = "Y(T)",
14 |        col = "blue",
15 |        pch = 20,
16 |        main = title,
17 |        cex.lab = 1.5,
18 |        cex.axis = 1.5,
19 |        cex = 1.5
20 |   )
21 |   points(jitter_x1, Y1, col = "orange", pch = 20, cex = 1.5)
22 |   
23 |   # Adding custom axis labels 
24 |   axis(1, at = c(0, 1), labels = c("T = 0", "T = 1"), cex.axis = 1.5)
25 |   
26 |   # Adding lines connecting elements of Y0 and Y1
27 |   for (i in 1:n) {
28 |     segments(jitter_x0[i], Y0[i], jitter_x1[i], Y1[i], col = "gray")
29 |   }
30 |   
31 | }
32 | 
33 | 
34 | set.seed(60)
35 | 
36 | par(mfrow = c(1,2))
37 | n = 22
38 | alpha = 2
39 | beta = 4
40 | 
41 | # model 1
42 | epsilons = rnorm(n, 0, 1)
43 | Y0 = alpha + epsilons
44 | Y1 = alpha + beta + epsilons
45 | plotter(Y0,Y1, title = "Constant Additive Treatment Effect")
46 | 
47 | # model 2
48 | epsilon0 = rnorm(n, 0, 1)
49 | epsilon1 = rnorm(n, 0, 1) #homoskedasticity
50 | Y0 = alpha + epsilon0
51 | Y1 = alpha + beta + epsilon1 
52 | plotter(Y0,Y1, title = "Heterogeneous Treatment Effect")
53 | 


--------------------------------------------------------------------------------
/plotscripts/Module8-BiasCorrection.R:
--------------------------------------------------------------------------------
 1 | #' 
 2 | #' Comment: the spacing of this plot is a little finicky.
 3 | #' Very annoying to create.
 4 | #' 
 5 | 
 6 | #general plot set-ups
 7 | par(mfrow = c(1,1),
 8 |     mar = c(4,4,2,2),
 9 |     cex.axis = 1.25)
10 | 
11 | #setup plot
12 | plot(1:10, 1:10, "n",
13 |      axes = F,
14 |      xlab = "", 
15 |      ylab = "",
16 |      xlim = c(0,14),
17 |      cex.lab = 1)
18 | Axis(side=1, labels=FALSE)
19 | Axis(side=2, labels=FALSE)
20 | 
21 | #draw some line to represent a line of best fit
22 | alpha = 2
23 | beta = .8
24 | abline(a = alpha, b = beta, lwd = 3)
25 | 
26 | 
27 | #treated unit
28 | tx = 5; ty = 8
29 | text(tx-.5,ty+.5, "Treated Unit i", col = "red")
30 | segments(tx,0,tx,ty, lty = 2, col = "grey")
31 | segments(0,ty,tx,ty, lty = 2, col = "grey")
32 | points(tx,ty, col = "red", cex = 3, pch = 18)
33 | 
34 | 
35 | #matched control
36 | cx = 6; cy = 5
37 | text(cx+2.5, cy, "Matched Control Unit k", col = "blue")
38 | segments(cx,0, cx,cy, lty = 2, col = "grey")
39 | segments(0,cy, cx,cy, lty = 2, col = "grey")
40 | points(cx,cy, col = "blue", cex = 3, pch = 20)
41 | 
42 | #other controls scattered around line
43 | lblue = scales::alpha("blue",.45)
44 | x = c(1, 2, 2.5, 3,  7, 6.7, 7.8, 9)
45 | y = c(3.2, 4.8, 2.5, 4.5,  9.5, 8, 6.9, 8.7)
46 | points(x,y, col = lblue, cex = 3, pch = 20)
47 | #text(7.5, 10, "Other Controls", col = lblue)
48 | text(11, 9, "Regression line \n fit on all controls", col = lblue)
49 | 
50 | 
51 | #fitted values and correction
52 | v=alpha + beta*5
53 | segments(0,v,5,v, lty = 2, col = "grey")
54 | c=alpha + beta*6
55 | segments(0,c,6,c, lty = 2, col = "grey")
56 | #segments(cx,v,tx,v, col = "grey", lty = 2, lwd = 1)
57 | segments(cx,c,cx,v, col = "purple", lty = 3, lwd = 3)
58 | points(6,c, col = "black", cex = 2, pch = 17)
59 | points(5,v, col = "black", cex = 2, pch = 17)
60 | text(8.5,v+.43, 
61 |      expression("Correction " * hat(Y)[i](0) - hat(Y)[k](0)),
62 |      col = "black")
63 | 
64 | 
65 | #corrected estimate
66 | z = cy+(v-c)
67 | points(tx, z, cex = 3, pch = 18, col = "purple")
68 | text(tx+3, z, 
69 |      expression("Corrected Estimate of " * Y[i](0)),
70 |      col ="purple")
71 | segments(tx,z,tx,cy, col = "purple", lty = 3, lwd = 3)
72 | 
73 | 
74 | #labels
75 | Axis(side=1, 
76 |      at = c(tx,cx),
77 |      labels = c(expression(X[i]),expression(X[k])),
78 |      tick = T
79 |      )
80 | 
81 | Axis(side=2, 
82 |      at = c(cy,ty,v,c),
83 |      labels = c(expression(Y[k](0)),
84 |                 expression(Y[i](1)),
85 |                 expression(hat(Y)[i](0)),
86 |                 expression(hat(Y)[k](0))
87 |                 ),
88 |      tick = T,
89 |      las = 2
90 | )
91 | 
92 | 
93 | 


--------------------------------------------------------------------------------