├── 1_causal_salad.r ├── README.md ├── Slides_Part_1.pdf ├── Slides_Part_2.pdf └── Slides_Part_3.pdf /1_causal_salad.r: -------------------------------------------------------------------------------- 1 | # Causal Inference Workshop - 9 September 2021 2 | 3 | library(rethinking) 4 | 5 | #################################################### 6 | # TWO MOMS 7 | 8 | # first without confound 9 | set.seed(1908) 10 | N <- 200 # number of pairs 11 | # birth order and family sizes 12 | B1 <- rbinom(N,size=1,prob=0.5) # 50% first borns 13 | M <- rnorm( N , 2*B1 ) 14 | B2 <- rbinom(N,size=1,prob=0.5) 15 | D <- rnorm( N , 2*B2 + 0*M ) # change the 0 to turn on causal influence of mom 16 | 17 | # model with B1 has worse precision, model with B2 has better precision 18 | # what is going on here? 19 | summary( lm( D ~ M ) ) 20 | summary( lm( D ~ M + B1 ) ) 21 | summary( lm( D ~ M + B2 ) ) 22 | 23 | plot( coeftab( lm( D ~ M ) , lm( D ~ M + B1 ) , lm( D ~ M + B2 ) ) , pars="M" ) 24 | 25 | # now with confound, it gets worse for B1 26 | set.seed(1908) 27 | N <- 200 # number of pairs 28 | U <- rnorm(N) # simulate confounds 29 | # birth order and family sizes 30 | B1 <- rbinom(N,size=1,prob=0.5) # 50% first borns 31 | M <- rnorm( N , 2*B1 + U ) 32 | B2 <- rbinom(N,size=1,prob=0.5) 33 | D <- rnorm( N , 2*B2 + U + 0*M ) # change the 0 to turn on causal influence of mom 34 | 35 | # fit the two regression models 36 | summary( lm( D ~ M ) ) 37 | summary( lm( D ~ M + B1 + B2 ) ) 38 | 39 | plot( coeftab( lm( D ~ M ) , lm( D ~ M + B1 ) , lm( D ~ M + B2 ) ) , pars="M" ) 40 | 41 | # compare the models with AIC 42 | AIC( lm( D ~ M ) ) 43 | AIC( lm( D ~ M + B1 ) ) 44 | 45 | # full-luxury bayesian inference 46 | 47 | # best case scenario, if we observed confound 48 | precis( lm( D ~ M + B2 + U ) ) 49 | 50 | library(rethinking) 51 | library(cmdstanr) 52 | dat <- list(N=N,M=M,D=D,B1=B1,B2=B2) 53 | set.seed(1908) 54 | flbi <- ulam( 55 | alist( 56 | # mom model 57 | M ~ normal( mu , sigma ), 58 | mu <- a1 + b*B1 + k*U[i], 59 | # daughter model 60 | D ~ normal( nu , tau ), 61 | nu <- a2 + b*B2 + m*M + k*U[i], 62 | # B1 and B2 63 | B1 ~ bernoulli(p), 64 | B2 ~ bernoulli(p), 65 | # unmeasured confound 66 | vector[N]:U ~ normal(0,1), 67 | # priors 68 | c(a1,a2,b,m) ~ normal( 0 , 0.5 ), 69 | c(k,sigma,tau) ~ exponential( 1 ), 70 | p ~ beta(2,2) 71 | ), data=dat , chains=4 , cores=4 , iter=2000 , cmdstan=TRUE ) 72 | 73 | precis(flbi) 74 | 75 | m <- M 76 | plot( coeftab( lm( D ~ m ) , lm( D ~ m + B1 ) , lm( D ~ m + B2 ) , flbi ) , pars="m" ) 77 | 78 | post <- extract.samples(flbi) 79 | Uest <- apply(post$U,2,mean) 80 | blank() 81 | plot(U,Uest,xlab="U (simulated)",ylab="U (estimated)", col=2 , lwd=2 ) 82 | abline(a=0,b=1,lty=2) 83 | 84 | # version that marginalizes out the missing data 85 | flbi_plus <- ulam( 86 | alist( 87 | c(M,D) ~ multi_normal( c(mu,nu) , Rho , Sigma ), 88 | mu <- a1 + b*B1, 89 | nu <- a2 + b*B2 + m*M, 90 | c(a1,a2,b,m) ~ normal( 0 , 0.5 ), 91 | Rho ~ lkj_corr( 2 ), 92 | Sigma ~ exponential( 1 ) 93 | ), data=dat , chains=4 , cores=4 , cmdstan=TRUE ) 94 | 95 | precis(flbi_plus,3) 96 | 97 | # more exotic example - no instrument (B1 -> D), but have two measures of U 98 | 99 | set.seed(1908) 100 | N <- 200 # number of pairs 101 | U <- rnorm(N,0,1) # simulate confound 102 | V <- rnorm(N,U,1) 103 | W <- rnorm(N,U,1) 104 | # birth order and family sizes 105 | B1 <- rbinom(N,size=1,prob=0.5) # 50% first borns 106 | M <- rnorm( N , 2*B1 + U ) 107 | B2 <- rbinom(N,size=1,prob=0.5) 108 | D <- rnorm( N , 2*B2 + 0.5*B1 + U + 0*M ) 109 | 110 | # confounded regression 111 | precis( lm( D ~ M + B1 + B2 + V + W ) ) 112 | 113 | # full-luxury bayesian inference 114 | dat2 <- list(N=N,M=M,D=D,B1=B1,B2=B2,V=V,W=W) 115 | flbi2 <- ulam( 116 | alist( 117 | M ~ normal( muM , sigmaM ), 118 | muM <- a1 + b*B1 + k*U[i], 119 | D ~ normal( muD , sigmaD ), 120 | muD <- a2 + b*B2 + d*B1 + m*M + k*U[i], 121 | W ~ normal( muW , sigmaW ), 122 | muW <- a3 + w*U[i], 123 | V ~ normal( muV , sigmaV ), 124 | muV <- a4 + v*U[i], 125 | vector[N]:U ~ normal(0,1), 126 | c(a1,a2,a3,a4,b,d,m) ~ normal( 0 , 0.5 ), 127 | c(k,w,v) ~ exponential( 1 ), 128 | c(sigmaM,sigmaD,sigmaW,sigmaV) ~ exponential( 1 ) 129 | ), data=dat2 , chains=4 , cores=4 , iter=2000 , cmdstan=TRUE ) 130 | 131 | precis(flbi2) 132 | 133 | #################################################### 134 | # PEER BIAS 135 | 136 | library(rethinking) 137 | 138 | # simulation in which there is no discrimination 139 | # conditioning on E reveals the truth 140 | set.seed(1914) 141 | N <- 500 142 | X <- rbern(N,prob=0.5) 143 | pY <- c( 0.25 , 0.05 ) 144 | pE <- X*inv_logit(-2) + (1-X)*inv_logit(+1) 145 | E <- sapply( 1:N , function(n) sample( 1:2 , size=1 , prob=c(pE[n],1-pE[n]) ) ) 146 | 147 | p <- pY[E] 148 | Y <- rbern(N,prob=p) 149 | 150 | precis( glm( Y ~ X , family=binomial ) ) 151 | 152 | precis( glm( Y ~ X + E , family=binomial ) ) 153 | 154 | mg0 <- glm( Y ~ X , family=binomial ) 155 | mg1 <- glm( Y ~ X + E , family=binomial ) 156 | plot( coeftab( mg0 , mg1 ) , pars="X" ) 157 | 158 | 159 | # simulation in which there really is discrimination 160 | # condition on E hides the truth 161 | set.seed(1914) 162 | set.seed(1964) 163 | N <- 500 164 | Q <- rnorm(N) 165 | X <- rbern(N,prob=0.5) 166 | pY <- c( 0.25 , 0.1 ) 167 | pE <- X*inv_logit(Q-2) + (1-X)*inv_logit(Q+1) 168 | E <- sapply( 1:N , function(n) sample( 1:2 , size=1 , prob=c(pE[n],1-pE[n]) ) ) 169 | 170 | p <- inv_logit( logit(pY[E]) + Q - X ) 171 | Y <- rbern(N,prob=p) 172 | 173 | precis( glm( Y ~ X , family=binomial ) ) 174 | 175 | precis( glm( Y ~ X + E , family=binomial ) ) 176 | 177 | precis( glm( Y ~ X + E + Q , family=binomial ) ) 178 | 179 | mg0 <- glm( Y ~ X , family=binomial ) 180 | mg1 <- glm( Y ~ X + E , family=binomial ) 181 | mg2 <- glm( Y ~ X + E + Q , family=binomial ) 182 | plot( coeftab( mg0 , mg1 ) , pars="X" ) 183 | 184 | # descendants of Q 185 | R1 <- rnorm(N,0.5*Q) 186 | R2 <- rnorm(N,0.5*Q) 187 | 188 | mg3 <- glm( Y ~ X + E + R1 + R2 , family=binomial ) 189 | precis( mg3 ) 190 | plot( coeftab( mg0 , mg1 , mg3 ) , pars="X" ) 191 | 192 | # bayes model 193 | dat <- list( Y=Y , E=E , XX=X , id=1:N , R1=R1 , R2=R2 ) 194 | mR <- ulam( 195 | alist( 196 | # Y model 197 | Y ~ bernoulli(p), 198 | logit(p) <- a[E] + X*XX + h*Q[id], 199 | a[E] ~ normal(0,1), 200 | X ~ normal(0,1), 201 | h ~ half_normal(0,1), 202 | # Q model 203 | vector[id]:Q ~ normal(0,1), 204 | R1 ~ normal(Q,1), 205 | R2 ~ normal(Q,1) 206 | ) , data=dat , chains=4 , cores=4 , cmdstan=TRUE ) 207 | 208 | precis(mR,2,omit="Q") 209 | 210 | plot( coeftab( mg0 , mg1 , mg3 , mR ) , pars="X" ) 211 | 212 | post <- extract.samples(mR) 213 | Qest <- apply(post$Q,2,mean) 214 | blank() 215 | plot(Q,Qest) 216 | abline(a=0,b=1,lty=2) 217 | 218 | # confounded but we do a partial identification analysis 219 | # we use an informative prior for h (effect of Q) 220 | 221 | dat2 <- list( Y=Y , E=E , I=I , id=1:N ) 222 | 223 | m2 <- ulam( 224 | alist( 225 | # Y model 226 | Y ~ bernoulli(p), 227 | logit(p) <- a[E] + g*X + h*Q[id], 228 | a[E] ~ normal(0,1), 229 | g ~ normal(0,1), 230 | h ~ uniform(0,2), 231 | # Q model 232 | vector[id]:Q ~ normal(0,1) 233 | ) , data=dat2 , chains=4 , cores=4 ) 234 | 235 | precis(m2,2,omit="Q") 236 | 237 | post <- extract.samples(m2) 238 | 239 | plot( post$h , post$g , pch=16 , col=grau(0.2) , cex=2 , ylab="effect of I" , xlab="effect of Q" ) 240 | abline(h=0,lty=2) 241 | 242 | quantile(post$h) 243 | 244 | ############################ 245 | # d-separation plots 246 | a <- 0.7 247 | cols <- c( col.alpha(1,a) , col.alpha(2,a) ) 248 | 249 | # pipe 250 | 251 | N <- 1000 252 | X <- rnorm(N) 253 | Z <- rbern(N,inv_logit(X)) 254 | Y <- rnorm(N,(2*Z-1)) 255 | 256 | plot( X , Y , col=cols[Z+1] , pch=16 ) 257 | abline(lm(Y[Z==1]~X[Z==1]),col=2,lwd=3) 258 | abline(lm(Y[Z==0]~X[Z==0]),col=1,lwd=3) 259 | abline(lm(Y~X),lwd=3,lty=3) 260 | 261 | # fork 262 | 263 | N <- 1000 264 | Z <- rbern(N) 265 | X <- rnorm(N,2*Z-1) 266 | Y <- rnorm(N,(2*Z-1)) 267 | 268 | plot( X , Y , col=cols[Z+1] , pch=16 ) 269 | abline(lm(Y[Z==1]~X[Z==1]),col=2,lwd=3) 270 | abline(lm(Y[Z==0]~X[Z==0]),col=1,lwd=3) 271 | abline(lm(Y~X),lwd=3,lty=3) 272 | 273 | # collider 274 | 275 | N <- 1000 276 | X <- rnorm(N) 277 | Y <- rnorm(N) 278 | Z <- rbern(N,inv_logit(2*X+2*Y-2)) 279 | 280 | plot( X , Y , col=cols[Z+1] , pch=16 ) 281 | abline(lm(Y[Z==1]~X[Z==1]),col=2,lwd=3) 282 | abline(lm(Y[Z==0]~X[Z==0]),col=1,lwd=3) 283 | abline(lm(Y~X),lwd=3,lty=3) 284 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # causal_salad_2021 2 | One day course on causal inference, MPI-EVA 9 September 2021 3 | 4 | ## Outline 5 | 6 | Intro: Foundations of causal inference 7 | Part 1: Inadequacy of ordinary statistical procedures 8 | Part 2: Causal design with structural causal models 9 | Part 3: Bayesian causal inference 10 | 11 | There is nothing special to prepare ahead of time. Paper and pencil will be useful for the in-class exercises. 12 | 13 | ## Slides 14 | The slides are divided into three parts, due to the size of the files. You should download the files, rather than try to view them in your browser. 15 | 16 | ## Code examples 17 | The code for the examples in the lecture are found in the R script. 18 | 19 | ## Recording 20 | The presentation was recorded and is available here https://www.youtube.com/watch?v=KNPYUVmY3NM 21 | -------------------------------------------------------------------------------- /Slides_Part_1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rmcelreath/causal_salad_2021/9fc04446966d54b5026749af2f3c991f1d897107/Slides_Part_1.pdf -------------------------------------------------------------------------------- /Slides_Part_2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rmcelreath/causal_salad_2021/9fc04446966d54b5026749af2f3c991f1d897107/Slides_Part_2.pdf -------------------------------------------------------------------------------- /Slides_Part_3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rmcelreath/causal_salad_2021/9fc04446966d54b5026749af2f3c991f1d897107/Slides_Part_3.pdf --------------------------------------------------------------------------------