├── 1_causal_salad.r
├── README.md
├── Slides_Part_1.pdf
├── Slides_Part_2.pdf
└── Slides_Part_3.pdf


/1_causal_salad.r:
--------------------------------------------------------------------------------
  1 | # Causal Inference Workshop - 9 September 2021
  2 | 
  3 | library(rethinking)
  4 | 
  5 | ####################################################
  6 | # TWO MOMS
  7 | 
  8 | # first without confound
  9 | set.seed(1908)
 10 | N <- 200 # number of pairs
 11 | # birth order and family sizes
 12 | B1 <- rbinom(N,size=1,prob=0.5) # 50% first borns
 13 | M <- rnorm( N , 2*B1 )
 14 | B2 <- rbinom(N,size=1,prob=0.5)
 15 | D <- rnorm( N , 2*B2 + 0*M ) # change the 0 to turn on causal influence of mom
 16 | 
 17 | # model with B1 has worse precision, model with B2 has better precision
 18 | # what is going on here?
 19 | summary( lm( D ~ M ) )
 20 | summary( lm( D ~ M + B1 ) )
 21 | summary( lm( D ~ M + B2 ) )
 22 | 
 23 | plot( coeftab( lm( D ~ M ) , lm( D ~ M + B1 ) ,  lm( D ~ M + B2 ) ) , pars="M" )
 24 | 
 25 | # now with confound, it gets worse for B1
 26 | set.seed(1908)
 27 | N <- 200 # number of pairs
 28 | U <- rnorm(N) # simulate confounds
 29 | # birth order and family sizes
 30 | B1 <- rbinom(N,size=1,prob=0.5) # 50% first borns
 31 | M <- rnorm( N , 2*B1 + U )
 32 | B2 <- rbinom(N,size=1,prob=0.5)
 33 | D <- rnorm( N , 2*B2 + U + 0*M ) # change the 0 to turn on causal influence of mom
 34 | 
 35 | # fit the two regression models
 36 | summary( lm( D ~ M ) )
 37 | summary( lm( D ~ M + B1 + B2 ) )
 38 | 
 39 | plot( coeftab( lm( D ~ M ) , lm( D ~ M + B1 ) ,  lm( D ~ M + B2 ) ) , pars="M" )
 40 | 
 41 | # compare the models with AIC
 42 | AIC( lm( D ~ M ) )
 43 | AIC( lm( D ~ M + B1 ) )
 44 | 
 45 | # full-luxury bayesian inference
 46 | 
 47 | # best case scenario, if we observed confound
 48 | precis( lm( D ~ M + B2 + U ) )
 49 | 
 50 | library(rethinking)
 51 | library(cmdstanr)
 52 | dat <- list(N=N,M=M,D=D,B1=B1,B2=B2)
 53 | set.seed(1908)
 54 | flbi <- ulam(
 55 |     alist(
 56 |         # mom model
 57 |             M ~ normal( mu , sigma ),
 58 |             mu <- a1 + b*B1 + k*U[i],
 59 |         # daughter model
 60 |             D ~ normal( nu , tau ),
 61 |             nu <- a2 + b*B2 + m*M + k*U[i],
 62 |         # B1 and B2
 63 |             B1 ~ bernoulli(p),
 64 |             B2 ~ bernoulli(p),
 65 |         # unmeasured confound
 66 |             vector[N]:U ~ normal(0,1),
 67 |         # priors
 68 |             c(a1,a2,b,m) ~ normal( 0 , 0.5 ),
 69 |             c(k,sigma,tau) ~ exponential( 1 ),
 70 |             p ~ beta(2,2)
 71 |     ), data=dat , chains=4 , cores=4 , iter=2000 , cmdstan=TRUE )
 72 | 
 73 | precis(flbi)
 74 | 
 75 | m <- M
 76 | plot( coeftab( lm( D ~ m ) , lm( D ~ m + B1 ) ,  lm( D ~ m + B2 ) , flbi ) , pars="m" )
 77 | 
 78 | post <- extract.samples(flbi)
 79 | Uest <- apply(post$U,2,mean)
 80 | blank()
 81 | plot(U,Uest,xlab="U (simulated)",ylab="U (estimated)", col=2 , lwd=2 )
 82 | abline(a=0,b=1,lty=2)
 83 | 
 84 | # version that marginalizes out the missing data
 85 | flbi_plus <- ulam(
 86 |     alist(
 87 |         c(M,D) ~ multi_normal( c(mu,nu) , Rho , Sigma ),
 88 |         mu <- a1 + b*B1,
 89 |         nu <- a2 + b*B2 + m*M,
 90 |         c(a1,a2,b,m) ~ normal( 0 , 0.5 ),
 91 |         Rho ~ lkj_corr( 2 ),
 92 |         Sigma ~ exponential( 1 )
 93 |     ), data=dat , chains=4 , cores=4 , cmdstan=TRUE )
 94 | 
 95 | precis(flbi_plus,3)
 96 | 
 97 | # more exotic example - no instrument (B1 -> D), but have two measures of U
 98 | 
 99 | set.seed(1908)
100 | N <- 200 # number of pairs
101 | U <- rnorm(N,0,1) # simulate confound
102 | V <- rnorm(N,U,1)
103 | W <- rnorm(N,U,1)
104 | # birth order and family sizes
105 | B1 <- rbinom(N,size=1,prob=0.5) # 50% first borns
106 | M <- rnorm( N , 2*B1 + U )
107 | B2 <- rbinom(N,size=1,prob=0.5)
108 | D <- rnorm( N , 2*B2 + 0.5*B1 + U + 0*M )
109 | 
110 | # confounded regression
111 | precis( lm( D ~ M + B1 + B2 + V + W ) )
112 | 
113 | # full-luxury bayesian inference
114 | dat2 <- list(N=N,M=M,D=D,B1=B1,B2=B2,V=V,W=W)
115 | flbi2 <- ulam(
116 |     alist(
117 |         M ~ normal( muM , sigmaM ),
118 |         muM <- a1 + b*B1 + k*U[i],
119 |         D ~ normal( muD , sigmaD ),
120 |         muD <- a2 + b*B2 + d*B1 + m*M + k*U[i],
121 |         W ~ normal( muW , sigmaW ),
122 |         muW <- a3 + w*U[i],
123 |         V ~ normal( muV , sigmaV ),
124 |         muV <- a4 + v*U[i],
125 |         vector[N]:U ~ normal(0,1),
126 |         c(a1,a2,a3,a4,b,d,m) ~ normal( 0 , 0.5 ),
127 |         c(k,w,v) ~ exponential( 1 ),
128 |         c(sigmaM,sigmaD,sigmaW,sigmaV) ~ exponential( 1 )
129 |     ), data=dat2 , chains=4 , cores=4 , iter=2000 , cmdstan=TRUE )
130 | 
131 | precis(flbi2)
132 | 
133 | ####################################################
134 | # PEER BIAS
135 | 
136 | library(rethinking)
137 | 
138 | # simulation in which there is no discrimination
139 | # conditioning on E reveals the truth
140 | set.seed(1914)
141 | N <- 500
142 | X <- rbern(N,prob=0.5)
143 | pY <- c( 0.25 , 0.05 )
144 | pE <- X*inv_logit(-2) + (1-X)*inv_logit(+1)
145 | E <- sapply( 1:N , function(n) sample( 1:2 , size=1 , prob=c(pE[n],1-pE[n]) ) )
146 | 
147 | p <- pY[E]
148 | Y <- rbern(N,prob=p)
149 | 
150 | precis( glm( Y ~ X , family=binomial ) )
151 | 
152 | precis( glm( Y ~ X + E , family=binomial ) )
153 | 
154 | mg0 <- glm( Y ~ X , family=binomial )
155 | mg1 <- glm( Y ~ X + E , family=binomial )
156 | plot( coeftab( mg0 , mg1 ) , pars="X" )
157 | 
158 | 
159 | # simulation in which there really is discrimination
160 | # condition on E hides the truth
161 | set.seed(1914)
162 | set.seed(1964)
163 | N <- 500
164 | Q <- rnorm(N)
165 | X <- rbern(N,prob=0.5)
166 | pY <- c( 0.25 , 0.1 )
167 | pE <- X*inv_logit(Q-2) + (1-X)*inv_logit(Q+1)
168 | E <- sapply( 1:N , function(n) sample( 1:2 , size=1 , prob=c(pE[n],1-pE[n]) ) )
169 | 
170 | p <- inv_logit( logit(pY[E]) + Q - X )
171 | Y <- rbern(N,prob=p)
172 | 
173 | precis( glm( Y ~ X , family=binomial ) )
174 | 
175 | precis( glm( Y ~ X + E , family=binomial ) )
176 | 
177 | precis( glm( Y ~ X + E + Q , family=binomial ) )
178 | 
179 | mg0 <- glm( Y ~ X , family=binomial )
180 | mg1 <- glm( Y ~ X + E , family=binomial )
181 | mg2 <- glm( Y ~ X + E + Q , family=binomial )
182 | plot( coeftab( mg0 , mg1 ) , pars="X" )
183 | 
184 | # descendants of Q
185 | R1 <- rnorm(N,0.5*Q)
186 | R2 <- rnorm(N,0.5*Q)
187 | 
188 | mg3 <- glm( Y ~ X + E + R1 + R2 , family=binomial )
189 | precis( mg3 )
190 | plot( coeftab( mg0 , mg1 , mg3 ) , pars="X" )
191 | 
192 | # bayes model
193 | dat <- list( Y=Y , E=E , XX=X , id=1:N , R1=R1 , R2=R2 )
194 | mR <- ulam(
195 |     alist(
196 |         # Y model
197 |         Y ~ bernoulli(p),
198 |         logit(p) <- a[E] + X*XX + h*Q[id],
199 |         a[E] ~ normal(0,1),
200 |         X ~ normal(0,1),
201 |         h ~ half_normal(0,1),
202 |         # Q model
203 |         vector[id]:Q ~ normal(0,1),
204 |         R1 ~ normal(Q,1),
205 |         R2 ~ normal(Q,1)
206 |     ) , data=dat , chains=4 , cores=4 , cmdstan=TRUE )
207 | 
208 | precis(mR,2,omit="Q")
209 | 
210 | plot( coeftab( mg0 , mg1 , mg3 , mR ) , pars="X" )
211 | 
212 | post <- extract.samples(mR)
213 | Qest <- apply(post$Q,2,mean)
214 | blank()
215 | plot(Q,Qest)
216 | abline(a=0,b=1,lty=2)
217 | 
218 | # confounded but we do a partial identification analysis
219 | # we use an informative prior for h (effect of Q)
220 | 
221 | dat2 <- list( Y=Y , E=E , I=I , id=1:N )
222 | 
223 | m2 <- ulam(
224 |     alist(
225 |         # Y model
226 |         Y ~ bernoulli(p),
227 |         logit(p) <- a[E] + g*X + h*Q[id],
228 |         a[E] ~ normal(0,1),
229 |         g ~ normal(0,1),
230 |         h ~ uniform(0,2),
231 |         # Q model
232 |         vector[id]:Q ~ normal(0,1)
233 |     ) , data=dat2 , chains=4 , cores=4 )
234 | 
235 | precis(m2,2,omit="Q")
236 | 
237 | post <- extract.samples(m2)
238 | 
239 | plot( post$h , post$g , pch=16 , col=grau(0.2) , cex=2 , ylab="effect of I" , xlab="effect of Q" )
240 | abline(h=0,lty=2)
241 | 
242 | quantile(post$h)
243 | 
244 | ############################
245 | # d-separation plots
246 | a <- 0.7
247 | cols <- c( col.alpha(1,a) , col.alpha(2,a) )
248 | 
249 | # pipe
250 | 
251 | N <- 1000
252 | X <- rnorm(N)
253 | Z <- rbern(N,inv_logit(X))
254 | Y <- rnorm(N,(2*Z-1))
255 | 
256 | plot( X , Y , col=cols[Z+1] , pch=16 )
257 | abline(lm(Y[Z==1]~X[Z==1]),col=2,lwd=3)
258 | abline(lm(Y[Z==0]~X[Z==0]),col=1,lwd=3)
259 | abline(lm(Y~X),lwd=3,lty=3)
260 | 
261 | # fork
262 | 
263 | N <- 1000
264 | Z <- rbern(N)
265 | X <- rnorm(N,2*Z-1)
266 | Y <- rnorm(N,(2*Z-1))
267 | 
268 | plot( X , Y , col=cols[Z+1] , pch=16 )
269 | abline(lm(Y[Z==1]~X[Z==1]),col=2,lwd=3)
270 | abline(lm(Y[Z==0]~X[Z==0]),col=1,lwd=3)
271 | abline(lm(Y~X),lwd=3,lty=3)
272 | 
273 | # collider
274 | 
275 | N <- 1000
276 | X <- rnorm(N)
277 | Y <- rnorm(N)
278 | Z <- rbern(N,inv_logit(2*X+2*Y-2))
279 | 
280 | plot( X , Y , col=cols[Z+1] , pch=16 )
281 | abline(lm(Y[Z==1]~X[Z==1]),col=2,lwd=3)
282 | abline(lm(Y[Z==0]~X[Z==0]),col=1,lwd=3)
283 | abline(lm(Y~X),lwd=3,lty=3)
284 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # causal_salad_2021
 2 | One day course on causal inference, MPI-EVA 9 September 2021
 3 | 
 4 | ## Outline
 5 | 
 6 | Intro: Foundations of causal inference
 7 | Part 1: Inadequacy of ordinary statistical procedures
 8 | Part 2: Causal design with structural causal models
 9 | Part 3: Bayesian causal inference
10 | 
11 | There is nothing special to prepare ahead of time. Paper and pencil will be useful for the in-class exercises.
12 | 
13 | ## Slides
14 | The slides are divided into three parts, due to the size of the files. You should download the files, rather than try to view them in your browser.
15 | 
16 | ## Code examples
17 | The code for the examples in the lecture are found in the R script.
18 | 
19 | ## Recording
20 | The presentation was recorded and is available here https://www.youtube.com/watch?v=KNPYUVmY3NM
21 | 


--------------------------------------------------------------------------------
/Slides_Part_1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/causal_salad_2021/9fc04446966d54b5026749af2f3c991f1d897107/Slides_Part_1.pdf


--------------------------------------------------------------------------------
/Slides_Part_2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/causal_salad_2021/9fc04446966d54b5026749af2f3c991f1d897107/Slides_Part_2.pdf


--------------------------------------------------------------------------------
/Slides_Part_3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/causal_salad_2021/9fc04446966d54b5026749af2f3c991f1d897107/Slides_Part_3.pdf


--------------------------------------------------------------------------------