├── homework
    ├── week01
    │   ├── week01.pdf
    │   ├── week01_solutions.pdf
    │   └── seminar_week01.R
    ├── week02
    │   ├── week02.pdf
    │   ├── week02_solutions.pdf
    │   └── week02_script.R
    ├── week03
    │   ├── week03.pdf
    │   ├── week03_solutions.pdf
    │   └── week03_solutions.R
    ├── week04
    │   ├── week04.pdf
    │   ├── week04_solutions.pdf
    │   └── week04_script.R
    ├── week05
    │   ├── week05.pdf
    │   ├── week05_solutions.pdf
    │   └── week05_script.R
    ├── week06
    │   ├── week06.pdf
    │   ├── week06_solutions.pdf
    │   └── week06_script.R
    ├── week07
    │   ├── week07.pdf
    │   ├── week07_solutions.pdf
    │   └── week07_script.R
    ├── week08
    │   ├── week08.pdf
    │   ├── week08_solutions.pdf
    │   └── week08_script.R
    ├── week09
    │   ├── week09.pdf
    │   ├── week09_solutions.pdf
    │   └── week09_script.R
    └── week10
    │   ├── week10.pdf
    │   ├── week10_solutions.pdf
    │   └── week10_script.R
├── related-reading.md
└── README.md


/homework/week01/week01.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week01/week01.pdf


--------------------------------------------------------------------------------
/homework/week02/week02.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week02/week02.pdf


--------------------------------------------------------------------------------
/homework/week03/week03.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week03/week03.pdf


--------------------------------------------------------------------------------
/homework/week04/week04.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week04/week04.pdf


--------------------------------------------------------------------------------
/homework/week05/week05.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week05/week05.pdf


--------------------------------------------------------------------------------
/homework/week06/week06.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week06/week06.pdf


--------------------------------------------------------------------------------
/homework/week07/week07.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week07/week07.pdf


--------------------------------------------------------------------------------
/homework/week08/week08.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week08/week08.pdf


--------------------------------------------------------------------------------
/homework/week09/week09.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week09/week09.pdf


--------------------------------------------------------------------------------
/homework/week10/week10.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week10/week10.pdf


--------------------------------------------------------------------------------
/homework/week01/week01_solutions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week01/week01_solutions.pdf


--------------------------------------------------------------------------------
/homework/week02/week02_solutions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week02/week02_solutions.pdf


--------------------------------------------------------------------------------
/homework/week03/week03_solutions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week03/week03_solutions.pdf


--------------------------------------------------------------------------------
/homework/week04/week04_solutions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week04/week04_solutions.pdf


--------------------------------------------------------------------------------
/homework/week05/week05_solutions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week05/week05_solutions.pdf


--------------------------------------------------------------------------------
/homework/week06/week06_solutions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week06/week06_solutions.pdf


--------------------------------------------------------------------------------
/homework/week07/week07_solutions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week07/week07_solutions.pdf


--------------------------------------------------------------------------------
/homework/week08/week08_solutions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week08/week08_solutions.pdf


--------------------------------------------------------------------------------
/homework/week09/week09_solutions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week09/week09_solutions.pdf


--------------------------------------------------------------------------------
/homework/week10/week10_solutions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmcelreath/stat_rethinking_2020/HEAD/homework/week10/week10_solutions.pdf


--------------------------------------------------------------------------------
/related-reading.md:
--------------------------------------------------------------------------------
 1 | # Related reading arising during discussion
 2 | 
 3 | ## Selection-bias
 4 | 
 5 | https://doi.org/10.1093/aje/kwx077
 6 | 
 7 | https://causalai.net/r29.pdf
 8 | 
 9 | https://doi.org/10.1097/01.ede.0000135174.63482.43
10 | 
11 | ## good and bad controls
12 | 
13 | https://papers.ssrn.com/sol3/papers.cfm?abstract_id=3689437
14 | 
15 | ## ways to estimate population size
16 | 
17 | Mark-recapture, distance sampling, etc. Hard to find approachable papers on these topics.
18 | 
19 | https://distancesampling.org/whatisds.html
20 | 
21 | https://doi.org/10.2307/1939816
22 | 
23 | https://doi.org/10.1016/j.gecco.2018.e00411
24 | 
25 | https://doi.org/10.3389/fevo.2020.563477
26 | 
27 | 


--------------------------------------------------------------------------------
/homework/week01/seminar_week01.R:
--------------------------------------------------------------------------------
 1 | # Statistical Rethinking Winter 2020/2021
 2 | # Discussion Seminar 1
 3 | # Points
 4 | # (1) Procedural questions
 5 | # (2) Software setup problems
 6 | # (3) Homework review - solutions and broader concepts
 7 | # (4) Prepare for next week - Chapter 4!
 8 | 
 9 | ## Problem 1
10 | 
11 | # define grid
12 | p_grid <- seq( from=0 , to=1 , length.out=1000 )
13 | # define prior
14 | prior <- rep( 1 , 1000 )
15 | # compute likelihood at each value in grid
16 | likelihood <- dbinom( 4 , size=15 , prob=p_grid )
17 | # compute product of likelihood and prior
18 | unstd.posterior <- likelihood * prior
19 | # standardize the posterior, so it sums to 1
20 | posterior <- unstd.posterior / sum(unstd.posterior)
21 | 
22 | plot( p_grid , posterior , type="l" , xlab="proportion water" , ylab="posterior probability" )
23 | 
24 | ## Problem 2
25 | 
26 | # define grid
27 | p_grid <- seq( from=0 , to=1 , length.out=1000 )
28 | # define prior
29 | prior <- c( rep( 0 , 500 ) , rep( 2 , 500 ) )
30 | # compute likelihood at each value in grid
31 | likelihood <- dbinom( 4 , size=15 , prob=p_grid )
32 | # compute product of likelihood and prior
33 | unstd.posterior <- likelihood * prior
34 | # standardize the posterior, so it sums to 1
35 | posterior <- unstd.posterior / sum(unstd.posterior)
36 | 
37 | plot( p_grid , posterior , type="l" , xlab="proportion water" , ylab="posterior probability" )
38 | 
39 | ## Problem 3
40 | 
41 | samples <- sample( p_grid , prob=posterior , size=1e4 , replace=TRUE )
42 | plot( samples , ylim=c(0,1) , xlab="samples" , ylab="proportion water" )
43 | PI( samples , 0.89 )
44 | HPDI( samples , 0.89)
45 | 


--------------------------------------------------------------------------------
/homework/week02/week02_script.R:
--------------------------------------------------------------------------------
 1 | # WEEK 2 - SOLUTIONS
 2 | 
 3 | # Main concepts:
 4 | # Structure and assumptions of simple linear models
 5 | # Bayesian inference with more than one parameter
 6 | # Prior simulation
 7 | # Posterior predictions
 8 | # NONE OF THIS IS CAUSAL INFERENCE (yet)
 9 | 
10 | # Problem 1
11 | 
12 | library(rethinking)
13 | data(Howell1)
14 | d <- Howell1
15 | d2 <- d[ d$age >= 18 , ]
16 | xbar <- mean(d2$weight)
17 | 
18 | m4.3 <- quap(
19 |     alist(
20 |         height ~ dnorm( mu , sigma ) ,
21 |         mu <- a + b*( weight - xbar ) ,
22 |         a ~ dnorm( 178 , 20 ) ,
23 |         b ~ dlnorm( 0 , 1 ) ,
24 |         sigma ~ dunif( 0 , 50 )
25 | ), data=d2 )
26 | 
27 | #45kg prediction
28 | 
29 | # step 1 : extract posterior samples
30 | post <- extract.samples(m4.3)
31 | 
32 | # step 2 : compute 'mu' for each set of samples
33 | mu <- post$a + post$b*( 45 - xbar )
34 | 
35 | # step 3 : compute distribution of observable heights
36 | sim_h <- rnorm( 1e4 , mu , post$sigma )
37 | 
38 | # compare to Prior
39 | prior <- extract.prior(m4.3)
40 | mu_prior <- prior$a + prior$b*( 45 - xbar )
41 | 
42 | dens( mu , xlim=c(140,175) , xlab="predicted height" )
43 | dens( sim_h , add=TRUE , col="red" )
44 | dens( mu_prior , add=TRUE , col="blue" )
45 | 
46 | mean(sim_h)
47 | PI(sim_h)
48 | 
49 | # problem 2
50 | 
51 | d$log_weight <- log( d$weight )
52 | 
53 | plot( d$log_weight , d$height )
54 | 
55 | xbar2 <- mean(d$log_weight)
56 | 
57 | m2 <- quap(
58 |     alist(
59 |         height ~ dnorm( mu , sigma ) ,
60 |         mu <- a + b*( log_weight - xbar2 ) ,
61 |         a ~ dnorm( 178 , 20 ) ,
62 |         b ~ dlnorm( 0 , 1 ) ,
63 |         sigma ~ dunif( 0 , 50 )
64 | ), data=d )
65 | 
66 | # plot posterior predictions
67 | 
68 | lw_seq <- seq( from=1 , to=5 , length.out=30 )
69 | pred_h <- sim( m2 , data=list(log_weight=lw_seq) )
70 | 
71 | plot( d$log_weight , d$height )
72 | mu <- apply( pred_h , 2 , mean )
73 | lines( lw_seq , mu , lwd=3 )
74 | CI <- apply( pred_h , 2 , PI )
75 | shade( CI , lw_seq )
76 | 
77 | # now plot model predictions on kg scale
78 | 
79 | plot( d$weight , d$height )
80 | lines( exp(lw_seq) , mu , lwd=3 )
81 | shade( CI , exp(lw_seq) )
82 | 
83 | # problem 3
84 | 
85 | # see solutions PDF
86 | 


--------------------------------------------------------------------------------
/homework/week07/week07_script.R:
--------------------------------------------------------------------------------
 1 | # Week 7 - Ordered categories
 2 | 
 3 | # Key concepts
 4 | #   GLM strategy extended by:
 5 | #   (1) embedding distributions to create mixtures
 6 | #   (2) using odd link functions to handle odd variables
 7 | #   Understand beta-binomial and gamma-Poisson as *overdispersed* binomial and Poisson models - simple "robust" GLMs, like Student-t regression instead of Normal
 8 | #   Ordered categories as a common (in social sciences) problem with common solution
 9 | #   Get used to building more complex models, doing it in pieces to manage complexity
10 | 
11 | # 1
12 | 
13 | library(rethinking)
14 | data(Trolley)
15 | d <- Trolley
16 | 
17 | # recode these in order
18 | edu_levels <- c( 6 , 1 , 8 , 4 , 7 , 2 , 5 , 3 )
19 | d$edu_new <- edu_levels[ d$edu ]
20 | 
21 | idx <- 1:nrow(d)
22 | dat <- list(
23 |     y = d$response[idx] ,
24 |     A = d$action[idx],
25 |     I = d$intention[idx],
26 |     C = d$contact[idx],
27 |     E = as.integer( d$edu_new[idx] ),
28 |     edu_norm = normalize( d$edu_new[idx] ),
29 |     age = standardize( d$age[idx] ),
30 |     alpha = rep(2,7) )
31 | 
32 | m1 <- ulam(
33 |     alist(
34 |         y ~ ordered_logistic( phi , cutpoints ),
35 |         phi <- bE*sum( delta_shell[1:E] ) + bA*A + bC*C + BI*I + bAge*age,
36 |         BI <- bI + bIA*A + bIC*C ,
37 |         c(bA,bI,bC,bIA,bIC,bE,bAge) ~ normal( 0 , 0.5 ),
38 |         cutpoints ~ normal( 0 , 1.5 ),
39 |         vector[8]: delta_shell <<- append_row( 0 , delta ),
40 |         simplex[7]: delta ~ dirichlet( alpha )
41 |     ), data=dat , chains=4 , cores=4 , cmdstan=TRUE )
42 | 
43 | # X -> Y <- Z
44 | # Z -> X -> Y
45 | 
46 | # blavaan
47 | 
48 | # 2
49 | 
50 | library(dagitty)
51 | dag2 <- dagitty("dag{
52 |     E -> R <- A
53 |     A -> E
54 |     G -> E
55 |     G -> R
56 | }")
57 | drawdag(dag2)
58 | adjustmentSets( dag2 , exposure="E" , outcome="R" , effect="total" )
59 | 
60 | dat$female <- ifelse( d$male==1 , 0L , 1L )
61 | m2 <- ulam(
62 |     alist(
63 |         y ~ ordered_logistic( phi , cutpoints ),
64 |         phi <- bE*sum( delta_shell[1:E] ) + bA*A + bC*C + BI*I + 
65 |                bAge*age + bF*female,
66 |         BI <- bI + bIA*A + bIC*C ,
67 |         c(bA,bI,bC,bIA,bIC,bE,bAge,bF) ~ normal( 0 , 0.5 ),
68 |         cutpoints ~ normal( 0 , 1.5 ),
69 |         vector[8]: delta_shell <<- append_row( 0 , delta ),
70 |         simplex[7]: delta ~ dirichlet( alpha )
71 |     ), data=dat , chains=4 , cores=4 , cmdstan=TRUE )
72 | 
73 | 


--------------------------------------------------------------------------------
/homework/week06/week06_script.R:
--------------------------------------------------------------------------------
 1 | # week 6 - maxent and GLMs
 2 | # key concepts
 3 | #   outcome distributions determined by CONSTRAINTS on variable + maxent
 4 | #   this is a form of prior knowledge
 5 | #   it is often OKAY to ignore some prior knowledge, but do better with it
 6 | #   GLMs and link functions: parameters not on outcome scale anymore
 7 | #   binomial and Poisson models
 8 | 
 9 | # 1
10 | 
11 | library(rethinking)
12 | data(NWOGrants)
13 | d <- NWOGrants
14 | dat_list <- list(
15 |     awards = as.integer(d$awards),
16 |     apps = as.integer(d$applications),
17 |     gid = ifelse( d$gender=="m" , 1L , 2L ) )
18 | 
19 | dat_list$disc <- as.integer(d$discipline)
20 | 
21 | m1 <- ulam(
22 |     alist(
23 |         awards ~ binomial( apps , p ),
24 |         logit(p) <- a[gid],
25 |         a[gid] ~ normal(0,1.5)
26 |     ),
27 |     data=dat_list , chains=4 , cores=4 , cmdstan=TRUE )
28 | 
29 | precis(m1,2)
30 | 
31 | post <- extract.samples(m1)
32 | 
33 | p_diff <- sapply( 1:2000 , function(i) 
34 |              inv_logit( post$a[i,1] ) - inv_logit( post$a[i,2] ) )
35 | precis(p_diff)
36 | 
37 | # now with discipline
38 | 
39 | m2 <- ulam(
40 |     alist(
41 |         awards ~ binomial( apps , p ),
42 |         logit(p) <- a[gid] + d[disc],
43 |         a[gid] ~ normal(0,1.5),
44 |         d[disc] ~ normal(0,1)
45 |     ),
46 |     data=dat_list , chains=4 , cores=4 , cmdstan=TRUE )
47 | 
48 | 
49 | # 2
50 | 
51 | set.seed(1913)
52 | N <- 1000
53 | G <- rbern(N)
54 | S <- rbern(N)
55 | D <- rbern( N , p=inv_logit( G + S ) )
56 | A <- rbern( N , p=inv_logit( 0.25*G + D + 2*S - 2 ) )
57 | dat_sim <- list( G=G , D=D , A=A )
58 | 
59 | # 3 
60 | 
61 | library(rethinking)
62 | library(MASS)
63 | data(eagles)
64 | d <- eagles
65 | d$pirateL <- ifelse( d$P=="L" , 1 , 0 )
66 | d$victimL <- ifelse( d$V=="L" , 1 , 0 )
67 | d$pirateA <- ifelse( d$A=="A" , 1 , 0 )
68 | 
69 | dat <- list( y=d$y , n=d$n , pirateL=d$pirateL , pirateA=d$pirateA , victimL=d$victimL )
70 | 
71 | # no interactions
72 | m3 <- ulam(
73 |     alist(
74 |         y ~ binomial( n , p ),
75 |         logit(p) <- a + bP*pirateL + bA*pirateA + bV*victimL,
76 |         a ~ normal(0,1.5),
77 |         c(bV,bP,bA) ~ normal(0,1)
78 |     ),
79 |     data=dat , chains=4 , cmdstan=TRUE  )
80 | 
81 | precis(m3,2)
82 | 
83 | # interactions
84 | m3b <- ulam(
85 |     alist(
86 |         y ~ binomial( n , p ),
87 |         logit(p) <- a + bP*pirateL + bA*pirateA + bV*victimL +
88 |                     bAV*pirateA*victimL,
89 |         a ~ normal(0,1.5),
90 |         c(bV,bP,bA,bAV) ~ normal(0,1)
91 |     ),
92 |     data=dat , chains=4 , cmdstan=TRUE  )
93 | 
94 | precis(m3b,2)
95 | 
96 | 
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/homework/week03/week03_solutions.R:
--------------------------------------------------------------------------------
  1 | # WEEK 3 SOLUTIONS
  2 | # Key concepts:
  3 | #  Prediction and inference are different tasks
  4 | #  Estimates are not automatically causal estimates
  5 | #  Adding variables (controls) can help, but also hurt
  6 | #  Causal inference only possible with explicit causal model
  7 | #  Recognize confounds and colliders in DAGs
  8 | #  Learn how to detect and close back door paths
  9 | #  There are more causal beasts to come (instruments, missing data, measurement error, bias amplification, selection bias, yadda yadda)
 10 | 
 11 | library(rethinking)
 12 | data(foxes)
 13 | d <- foxes
 14 | d$W <- standardize(d$weight)
 15 | d$A <- standardize(d$area)
 16 | d$F <- standardize(d$avgfood)
 17 | d$G <- standardize(d$groupsize)
 18 | 
 19 | # 1 
 20 | 
 21 | m1 <- quap(
 22 |     alist(
 23 |         W ~ dnorm( mu , sigma ),
 24 |         mu <- a + bA*A,
 25 |         a ~ dnorm(0,0.2),
 26 |         bA ~ dnorm(0,0.5),
 27 |         sigma ~ dexp(1)
 28 |     ), data=d )
 29 | 
 30 | # prior predictive simulation
 31 | 
 32 | N <- 1000
 33 | a_prior <- rnorm(N,0,0.2)
 34 | bA_prior <- rnorm(N,0,0.5)
 35 | sigma_prior <- rexp(N,1)
 36 | 
 37 | seq_A <- seq( from=-2 , to=2 , length.out=30 )
 38 | prior <- extract.prior(m1)
 39 | mu <- link( m1 , data=list(A=seq_A) , post=prior )
 40 | mu_mean <- apply( mu , 2 , mean )
 41 | 
 42 | plot( NULL , xlim=c(-2,2) , ylim=c(-2.5,2.5) , xlab="Area (std)" , ylab="Weight (std)"  )
 43 | 
 44 | for ( i in 1:100 ) lines( seq_A , mu[i,] )
 45 | 
 46 | # 2
 47 | 
 48 | m2 <- quap(
 49 |     alist(
 50 |         W ~ dnorm( mu , sigma ),
 51 |         mu <- a + bF*F,
 52 |         a ~ dnorm(0,0.2),
 53 |         bF ~ dnorm(0,0.5),
 54 |         sigma ~ dexp(1)
 55 |     ), data=d )
 56 | 
 57 | # 3
 58 | 
 59 | m3 <- quap(
 60 |     alist(
 61 |         W ~ dnorm( mu , sigma ),
 62 |         mu <- a + bF*F + bG*G,
 63 |         a ~ dnorm(0,0.2),
 64 |         c(bF,bG) ~ dnorm(0,0.5),
 65 |         sigma ~ dexp(1)
 66 |     ), data=d )
 67 | 
 68 | m4 <- quap(
 69 |     alist(
 70 |         W ~ dnorm( mu , sigma ),
 71 |         mu <- a + bF*F + bG*G + bA*A,
 72 |         a ~ dnorm(0,0.2),
 73 |         c(bF,bG,bA) ~ dnorm(0,0.5),
 74 |         sigma ~ dexp(1)
 75 |     ), data=d )
 76 | 
 77 | # implied conditional independencies
 78 | 
 79 | library(dagitty)
 80 | fox_dag <- dagitty( "dag {
 81 |     A -> F
 82 |     G <- F -> W
 83 |     G -> W
 84 | }")
 85 | impliedConditionalIndependencies(fox_dag)
 86 | 
 87 | # (1) A _||_ W | F
 88 | 
 89 | m5 <- quap(
 90 |     alist(
 91 |         A ~ dnorm( mu , sigma ),
 92 |         mu <- a + bW*W + bF*F,
 93 |         a ~ dnorm(0,0.2),
 94 |         c(bF,bW) ~ dnorm(0,0.5),
 95 |         sigma ~ dexp(1)
 96 |     ), data=d )
 97 | 
 98 | # (2) A _||_ G | F
 99 | 
100 | m6 <- quap(
101 |     alist(
102 |         A ~ dnorm( mu , sigma ),
103 |         mu <- a + bG*G + bF*F,
104 |         a ~ dnorm(0,0.2),
105 |         c(bF,bG) ~ dnorm(0,0.5),
106 |         sigma ~ dexp(1)
107 |     ), data=d )
108 | 
109 | 


--------------------------------------------------------------------------------
/homework/week09/week09_script.R:
--------------------------------------------------------------------------------
 1 | # Week 9 homework
 2 | # Key concepts:
 3 | #  How to build and interpret varying slopes
 4 | #  Using covariance to regularize across parameter types
 5 | #  See varying effects as latent variables
 6 | 
 7 | # 1
 8 | 
 9 | library(rethinking)
10 | data(bangladesh)
11 | d <- bangladesh
12 | dat_list <- list(
13 |     C = d$use.contraception,
14 |     did = as.integer( as.factor(d$district) ),
15 |     urban = d$urban
16 | )
17 | 
18 | m1.1 <- ulam(
19 |     alist(
20 |         C ~ bernoulli( p ),
21 |         logit(p) <- a[did] + b[did]*urban,
22 |         # a[did] ~ normal( abar , sigma_a ),
23 |         # b[did] ~ normal( bbar , sigma_b ),
24 |         c(a,b)[did] ~ multi_normal( c(abar,bbar) , Rho , Sigma ),
25 |         abar ~ normal(0,1),
26 |         bbar ~ normal(0,0.5),
27 |         Rho ~ lkj_corr(2),
28 |         Sigma ~ exponential(1)
29 |         # c(sigma_a,sigma_b) ~ exponential(1)
30 |     ) , data=dat_list , chains=4 , cores=4 , cmdstan=TRUE )
31 | 
32 | post <- extract.samples(m1.1)
33 | a <- apply( post$a , 2 , mean )
34 | b <- apply( post$b , 2 , mean )
35 | plot( a, b , xlab="a (intercept)" , ylab="b (urban slope)" )
36 | abline( h=0 , lty=2 )
37 | abline( v=0 , lty=2 )
38 | library(ellipse)
39 | R <- apply( post$Rho , 2:3 , mean )
40 | s <- apply( post$Sigma , 2 , mean )
41 | S <- diag(s) %*% R %*% diag(s)
42 | ll <- c( 0.5 , 0.67 , 0.89 , 0.97 )
43 | for ( l in ll ) {
44 |     el <- ellipse( S , centre=c( mean(post$abar) , mean(post$bbar) ) , level=l )
45 |     lines( el , col="black" , lwd=0.5 )
46 | }
47 | 
48 | # 2
49 | 
50 | dat_list$children <- standardize( d$living.children )
51 | dat_list$age <- standardize( d$age.centered )
52 | 
53 | m2.1 <- ulam(
54 |     alist(
55 |         C ~ bernoulli( p ),
56 |         logit(p) <- a[did] + b[did]*urban + bA*age,
57 |         c(a,b)[did] ~ multi_normal( c(abar,bbar) , Rho , Sigma ),
58 |         abar ~ normal(0,1),
59 |         c(bbar,bA) ~ normal(0,0.5),
60 |         Rho ~ lkj_corr(2),
61 |         Sigma ~ exponential(1)
62 |     ) , data=dat_list , chains=4 , cores=4 , cmdstan=TRUE )
63 | 
64 | m2.2 <- ulam(
65 |     alist(
66 |         C ~ bernoulli( p ),
67 |         logit(p) <- a[did] + b[did]*urban + bK*children + bA*age,
68 |         c(a,b)[did] ~ multi_normal( c(abar,bbar) , Rho , Sigma ),
69 |         abar ~ normal(0,1),
70 |         c(bbar,bK,bA) ~ normal(0,0.5),
71 |         Rho ~ lkj_corr(2),
72 |         Sigma ~ exponential(1)
73 |     ) , data=dat_list , chains=4 , cores=4 , cmdstan=TRUE )
74 | 
75 | precis(m2.1)
76 | precis(m2.2)
77 | 
78 | # 3
79 | 
80 | dat_list$K <- d$living.children
81 | dat_list$alpha <- rep(2,3)
82 | 
83 | m3.1 <- ulam(
84 |     alist(
85 |         C ~ bernoulli( p ),
86 |         logit(p) <- a[did] + b[did]*urban + bK*sum( delta_shell[1:K] ) + bA*age,
87 |         c(a,b)[did] ~ multi_normal( c(abar,bbar) , Rho , Sigma ),
88 |         abar ~ normal(0,1),
89 |         c(bbar,bK,bA) ~ normal(0,0.5),
90 |         Rho ~ lkj_corr(2),
91 |         Sigma ~ exponential(1),
92 |         vector[4]: delta_shell <<- append_row( 0 , delta ),
93 |         simplex[3]: delta ~ dirichlet( alpha )
94 |     ) , data=dat_list , chains=4 , cores=4 , cmdstan=TRUE )
95 | 
96 | precis(m3.1)
97 | 
98 | precis(m3.1,pars="delta",2)
99 | 


--------------------------------------------------------------------------------
/homework/week04/week04_script.R:
--------------------------------------------------------------------------------
  1 | # WEEK 4 PROBLEMS
  2 | # Key concepts:
  3 | #   Problem of overfitting
  4 | #   Information and entropy as measures of evenness of distributions
  5 | #   Divergence as natural measure of distance between distributions
  6 | #   Cross-validation/WAIC as estimates of Divergence of predictions from truth
  7 | #   Purpose of regularizing priors
  8 | 
  9 | # log(q) - log(p) - ( log(r) - log(p) ) = log(q) - log(r)
 10 | 
 11 | # 1
 12 | 
 13 | H <- function(p) -sum(p*log(p))
 14 | 
 15 | IB <- list()
 16 | IB[[1]] <- c( 0.2 , 0.2 , 0.2 , 0.2 , 0.2 )
 17 | IB[[2]] <- c( 0.8 , 0.1 , 0.05 , 0.025 , 0.025 )
 18 | IB[[3]] <- c( 0.05 , 0.15 , 0.7 , 0.05 , 0.05 )
 19 | sapply( IB , H )
 20 | 
 21 | DKL <- function(p,q) sum( p*(log(p)-log(q)) )
 22 | 
 23 | Dm <- matrix( NA , nrow=3 , ncol=3 )
 24 | for ( i in 1:3 ) for ( j in 1:3 ) Dm[i,j] <- DKL( IB[[j]] , IB[[i]] )
 25 | round( Dm , 2 )
 26 | 
 27 | # 2
 28 | 
 29 | 
 30 | ## R code 6.21
 31 | library(rethinking)
 32 | d <- sim_happiness( seed=1977 , N_years=1000 )
 33 | precis(d)
 34 | 
 35 | ## R code 6.22
 36 | d2 <- d[ d$age>17 , ] # only adults
 37 | d2$A <- ( d2$age - 18 ) / ( 65 - 18 )
 38 | 
 39 | ## R code 6.23
 40 | d2$mid <- d2$married + 1
 41 | m6.9 <- quap(
 42 |     alist(
 43 |         happiness ~ dnorm( mu , sigma ),
 44 |         mu <- a[mid] + bA*A,
 45 |         a[mid] ~ dnorm( 0 , 1 ),
 46 |         bA ~ dnorm( 0 , 2 ),
 47 |         sigma ~ dexp(1)
 48 |     ) , data=d2 )
 49 | 
 50 | ## R code 6.24
 51 | m6.10 <- quap(
 52 |     alist(
 53 |         happiness ~ dnorm( mu , sigma ),
 54 |         mu <- a + bA*A,
 55 |         a ~ dnorm( 0 , 1 ),
 56 |         bA ~ dnorm( 0 , 2 ),
 57 |         sigma ~ dexp(1)
 58 |     ) , data=d2 )
 59 | 
 60 | compare( m6.9 , m6.10 )
 61 | plot( compare( m6.9 , m6.10 ) )
 62 | 
 63 | compare( m6.9 , m6.10 , func=LOO )
 64 | 
 65 | # 3
 66 | 
 67 | library(rethinking)
 68 | data(foxes)
 69 | d <- foxes
 70 | d$W <- standardize(d$weight)
 71 | d$A <- standardize(d$area)
 72 | d$F <- standardize(d$avgfood)
 73 | d$G <- standardize(d$groupsize)
 74 | 
 75 | m1 <- quap(
 76 |     alist(
 77 |         W ~ dnorm( mu , sigma ),
 78 |         mu <- a + bF*F + bG*G + bA*A,
 79 |         a ~ dnorm(0,0.2),
 80 |         c(bF,bG,bA) ~ dnorm(0,0.5),
 81 |         sigma ~ dexp(1)
 82 |     ), data=d )
 83 | 
 84 | m2 <- quap(
 85 |     alist(
 86 |         W ~ dnorm( mu , sigma ),
 87 |         mu <- a + bF*F + bG*G,
 88 |         a ~ dnorm(0,0.2),
 89 |         c(bF,bG) ~ dnorm(0,0.5),
 90 |         sigma ~ dexp(1)
 91 |     ), data=d )
 92 | 
 93 | m3 <- quap(
 94 |     alist(
 95 |         W ~ dnorm( mu , sigma ),
 96 |         mu <- a + bG*G + bA*A,
 97 |         a ~ dnorm(0,0.2),
 98 |         c(bG,bA) ~ dnorm(0,0.5),
 99 |         sigma ~ dexp(1)
100 |     ), data=d )
101 | 
102 | m4 <- quap(
103 |     alist(
104 |         W ~ dnorm( mu , sigma ),
105 |         mu <- a + bF*F,
106 |         a ~ dnorm(0,0.2),
107 |         bF ~ dnorm(0,0.5),
108 |         sigma ~ dexp(1)
109 |     ), data=d )
110 | 
111 | m5 <- quap(
112 |     alist(
113 |         W ~ dnorm( mu , sigma ),
114 |         mu <- a + bA*A,
115 |         a ~ dnorm(0,0.2),
116 |         bA ~ dnorm(0,0.5),
117 |         sigma ~ dexp(1)
118 | ), data=d )
119 | 
120 | compare( m1 , m2 , m3 , m4 , m5 )
121 | 


--------------------------------------------------------------------------------
/homework/week05/week05_script.R:
--------------------------------------------------------------------------------
  1 | # WEEK 5
  2 | # Key concepts:
  3 | #   Causal relations and interaction effects
  4 | #   How to build and interpret interactions
  5 | #   Purpose and use of Markov chain Monte Carlo
  6 | #   How to check MCMC output
  7 | 
  8 | # 1
  9 | 
 10 | library(rethinking)
 11 | data(Wines2012)
 12 | d <- Wines2012
 13 | 
 14 | dat_list <- list(
 15 |     S = standardize(d$score),
 16 |     jid = as.integer(d$judge),
 17 |     wid = as.integer(d$wine) )
 18 | 
 19 | m1 <- ulam(
 20 |     alist(
 21 |         S ~ dnorm( mu , sigma ),
 22 |         mu <- a[jid] + w[wid],
 23 |         a[jid] ~ dnorm(0,0.5),
 24 |         w[wid] ~ dnorm(0,0.5),
 25 |         sigma ~ dexp(1)
 26 |     ), data=dat_list , chains=4 , cores=4 , cmdstan=TRUE )
 27 | 
 28 | precis(m1,2)
 29 | 
 30 | plot( precis(m1,2) )
 31 | 
 32 | # 2
 33 | 
 34 | # indicator (dummy) variables
 35 | 
 36 | dat_list2 <- list(
 37 |     S = standardize(d$score),
 38 |     W = d$wine.amer,
 39 |     J = d$judge.amer,
 40 |     R = ifelse(d$flight=="red",1L,0L) )
 41 | 
 42 | m2a <- ulam(
 43 |     alist(
 44 |         S ~ dnorm( mu , sigma ),
 45 |         mu <- a + bW*W + bJ*J + bR*R,
 46 |         a ~ dnorm( 0 , 0.2 ),
 47 |         c(bW,bJ,bR) ~ dnorm( 0 , 0.5 ),
 48 |         sigma ~ dexp(1)
 49 |     ), data=dat_list2 , chains=4 , cores=4 , cmdstan=TRUE )
 50 | 
 51 | plot( precis(m2a,omit="sigma") )
 52 | 
 53 | # index variables now
 54 | 
 55 | dat_list2b <- list(
 56 |     S = standardize(d$score),
 57 |     wid = d$wine.amer + 1L,
 58 |     jid = d$judge.amer + 1L,
 59 |     fid = ifelse(d$flight=="red",1L,2L) )
 60 | 
 61 | m2b <- ulam(
 62 |     alist(
 63 |         S ~ dnorm( mu , sigma ),
 64 |         mu <- w[wid] + j[jid] + f[fid],
 65 |         w[wid] ~ dnorm( 0 , 0.5 ),
 66 |         j[wid] ~ dnorm( 0 , 0.5 ),
 67 |         f[wid] ~ dnorm( 0 , 0.5 ),
 68 |         sigma ~ dexp(1)
 69 |     ), data=dat_list2b , chains=4 , cores=4 , cmdstan=TRUE )
 70 | 
 71 | plot( precis(m2b,depth=2,omit="sigma") )
 72 | 
 73 | post <- extract.samples(m2b)
 74 | diff_w <- post$w[,2] - post$w[,1]
 75 | precis( diff_w )
 76 | 
 77 | precis( m2a ) # compare bW to diff_w
 78 | 
 79 | # 3
 80 | 
 81 | dat_list2 <- list(
 82 |     S = standardize(d$score),
 83 |     W = d$wine.amer,
 84 |     J = d$judge.amer,
 85 |     R = ifelse(d$flight=="red",1L,0L) )
 86 | 
 87 | m3 <- ulam(
 88 |     alist(
 89 |         S ~ dnorm( mu , sigma ),
 90 |         mu <- a + bW*W + bJ*J + bR*R +
 91 |               bWJ*W*J + bWR*W*R + bJR*J*R,
 92 |         a ~ dnorm(0,0.2),
 93 |         c(bW,bJ,bR) ~ dnorm(0,0.5),
 94 |         c(bWJ,bWR,bJR) ~ dnorm(0,0.25),
 95 |         sigma ~ dexp(1)
 96 |     ), data=dat_list2 , chains=4 , cores=4 , cmdstan=TRUE )
 97 | 
 98 | plot(precis(m3,omit="sigma"))
 99 | 
100 | pred_dat <- data.frame(
101 |     W = rep( 0:1 , times=4 ),
102 |     J = rep( 0:1 , each=4 ),
103 |     R = rep( c(0,0,1,1) , times=2 ) )
104 | 
105 | mu <- link( m3 , data=pred_dat )
106 | 
107 | row_labels <- paste( ifelse(pred_dat$W==1,"A","F") , 
108 |                  ifelse(pred_dat$J==1,"A","F") , 
109 |                  ifelse(pred_dat$R==1,"R","W") , sep="" )
110 | 
111 | plot( precis( list(mu=mu) , 2 ) , labels=row_labels )
112 | 
113 | # what about 3-way interactions?
114 | 
115 | dat_list2b <- list(
116 |     S = standardize(d$score),
117 |     wid = d$wine.amer + 1L,
118 |     jid = d$judge.amer + 1L,
119 |     fid = ifelse(d$flight=="red",1L,2L) )
120 | 
121 | m3b <- ulam(
122 |     alist(
123 |         S ~ dnorm( mu , sigma ),
124 |         mu <- w[wid,jid,fid],
125 |         real['2,2,2']:w ~ normal(0,0.5),
126 |         sigma ~ dexp(1)
127 |     ), data=dat_list2b , chains=4 , cores=4 , cmdstan=TRUE )
128 | 
129 | row_labels = c("FFR","FFW","FAR","FAW","AFR","AFW","AAR","AAW" )
130 | plot( precis( m3b , 3 , pars="w" ) , labels=row_labels )
131 | 
132 | 


--------------------------------------------------------------------------------
/homework/week08/week08_script.R:
--------------------------------------------------------------------------------
  1 | # week 8 - multilevel models 1
  2 | # key concepts
  3 | #   partial pooling as automatic regularization
  4 | #   how to code partial pooling
  5 | #   divergent transitions and centered/non-centered parameterization
  6 | 
  7 | # 1
  8 | 
  9 | library(rethinking)
 10 | data(reedfrogs)
 11 | d <- reedfrogs
 12 | 
 13 | dat <- list(
 14 |     S = d$surv,
 15 |     n = d$density,
 16 |     tank = 1:nrow(d),
 17 |     pred = ifelse( d$pred=="no" , 0L , 1L ),
 18 |     size_ = ifelse( d$size=="small" , 1L , 2L )
 19 | )
 20 | 
 21 | # basic model, no predictors
 22 | 
 23 | m1.1 <- ulam(
 24 |     alist(
 25 |         S ~ binomial( n , p ),
 26 |         logit(p) <- a[tank],
 27 |         a[tank] ~ normal( a_bar , sigma ),
 28 |         a_bar ~ normal( 0 , 1.5 ),
 29 |         sigma ~ exponential( 1 )
 30 |     ), data=dat , chains=4 , cores=4 , log_lik=TRUE , cmdstan=TRUE )
 31 | 
 32 | # models with predictors
 33 | 
 34 | m1.2 <- ulam(
 35 |     alist(
 36 |         S ~ binomial( n , p ),
 37 |         logit(p) <- a[tank] + bp*pred,
 38 |         a[tank] ~ normal( a_bar , sigma ),
 39 |         bp ~ normal( -0.5 , 1 ),
 40 |         a_bar ~ normal( 0 , 1.5 ),
 41 |         sigma ~ exponential( 1 )
 42 |     ), data=dat , chains=4 , cores=4 , log_lik=TRUE , cmdstan=TRUE )
 43 | 
 44 | # size
 45 | m1.3 <- ulam(
 46 |     alist(
 47 |         S ~ binomial( n , p ),
 48 |         logit(p) <- a[tank] + s[size_],
 49 |         a[tank] ~ normal( a_bar , sigma ),
 50 |         s[size_] ~ normal( 0 , 0.5 ),
 51 |         a_bar ~ normal( 0 , 1.5 ),
 52 |         sigma ~ exponential( 1 )
 53 |     ), data=dat , chains=4 , cores=4 , log_lik=TRUE , cmdstan=TRUE )
 54 | 
 55 | # pred + size
 56 | m1.4 <- ulam(
 57 |     alist(
 58 |         S ~ binomial( n , p ),
 59 |         logit(p) <- a[tank] + bp*pred + s[size_],
 60 |         a[tank] ~ normal( a_bar , sigma ),
 61 |         bp ~ normal( -0.5 , 1 ),
 62 |         s[size_] ~ normal( 0 , 0.5 ),
 63 |         a_bar ~ normal( 0 , 1.5 ),
 64 |         sigma ~ exponential( 1 )
 65 |     ), data=dat , chains=4 , cores=4 , log_lik=TRUE , cmdstan=TRUE )
 66 | 
 67 | # pred + size + interaction
 68 | m1.5 <- ulam(
 69 |     alist(
 70 |         S ~ binomial( n , p ),
 71 |         logit(p) <- a_bar + z[tank]*sigma + bp[size_]*pred + s[size_],
 72 |         z[tank] ~ normal( 0 , 1 ),
 73 |         bp[size_] ~ normal( -0.5 , 1 ),
 74 |         s[size_] ~ normal( 0 , 0.5 ),
 75 |         a_bar ~ normal( 0 , 1.5 ),
 76 |         sigma ~ exponential( 1 )
 77 |     ), data=dat , chains=4 , cores=4 , log_lik=TRUE , cmdstan=TRUE )
 78 | 
 79 | # compare
 80 | 
 81 | compare( m1.1 , m1.2 , m1.3 , m1.4 , m1.5 )
 82 | 
 83 | plot( coeftab( m1.1 , m1.2 , m1.3 , m1.4 , m1.5 ), pars="sigma" )
 84 | 
 85 | # 2
 86 | 
 87 | library(rethinking)
 88 | data(bangladesh)
 89 | d <- bangladesh
 90 | d$district_id <- as.integer(as.factor(d$district))
 91 | 
 92 | dat_list <- list(
 93 |     C = d$use.contraception,
 94 |     did = d$district_id
 95 | )
 96 | 
 97 | # fixed model
 98 | 
 99 | m2.1 <- ulam(
100 |     alist(
101 |         C ~ bernoulli( p ),
102 |         logit(p) <- a[did],
103 |         a[did] ~ normal( 0 , 1.5 )
104 |     ) , data=dat_list , chains=4 , cores=4 , log_lik=TRUE , cmdstan=TRUE )
105 | 
106 | # pooling model
107 | 
108 | m2.2 <- ulam(
109 |     alist(
110 |         C ~ bernoulli( p ),
111 |         logit(p) <- a[did],
112 |         a[did] ~ normal( a_bar , sigma ),
113 |         a_bar ~ normal( 0 , 1.5 ),
114 |         sigma ~ exponential( 1 )
115 |     ) , data=dat_list , chains=4 , cores=4 , log_lik=TRUE , cmdstan=TRUE )
116 | 
117 | post1 <- extract.samples( m2.1 )
118 | post2 <- extract.samples( m2.2 )
119 | 
120 | p1 <- apply( inv_logit(post1$a) , 2 , mean )
121 | p2 <- apply( inv_logit(post2$a) , 2 , mean )
122 | 
123 | nd <- max(dat_list$did)
124 | plot( NULL , xlim=c(1,nd) , ylim=c(0,1) , ylab="prob use contraception" , 
125 |     xlab="district" )
126 | points( 1:nd , p1 , pch=16 , col=rangi2 )
127 | points( 1:nd , p2 )
128 | abline( h=mean(inv_logit(post2$a_bar)) , lty=2 )
129 | 
130 | # 3
131 | 
132 | data(Trolley)
133 | d <- Trolley
134 | 
135 | dat <- list(
136 |     R = d$response,
137 |     A = d$action,
138 |     I = d$intention,
139 |     C = d$contact )
140 | 
141 | # model without individual intercepts
142 | m3.1 <- ulam(
143 |     alist(
144 |         R ~ dordlogit( phi , cutpoints ),
145 |         phi <- bA*A + bC*C + BI*I ,
146 |         BI <- bI + bIA*A + bIC*C ,
147 |         c(bA,bI,bC,bIA,bIC) ~ dnorm( 0 , 0.5 ),
148 |         cutpoints ~ dnorm( 0 , 1.5 )
149 |     ) , data=dat , chains=4 , cores=4 , log_lik=TRUE , cmdstan=TRUE )
150 | 
151 | # model with individual intercepts (and pooling)
152 | 
153 | m3.2 <- ulam(
154 |     alist(
155 |         R ~ dordlogit( phi , cutpoints ),
156 |         phi <- a[id] + bA*A + bC*C + BI*I ,
157 |         BI <- bI + bIA*A + bIC*C ,
158 |         a[id] ~ normal( 0 , sigma ),
159 |         c(bA,bI,bC,bIA,bIC) ~ dnorm( 0 , 0.5 ),
160 |         cutpoints ~ dnorm( 0 , 1.5 ),
161 |         sigma ~ exponential(1)
162 |     ) , data=dat , chains=4 , cores=4 , log_lik=TRUE , cmdstan=TRUE )
163 | 
164 | 


--------------------------------------------------------------------------------
/homework/week10/week10_script.R:
--------------------------------------------------------------------------------
  1 | # week 10
  2 | # key concepts
  3 | #   Gaussian processes as smoothing with continuous categories
  4 | #   How to include measurement error & missing data in DAGs
  5 | #   How to include measurement error & missing data in models
  6 | 
  7 | # 1
  8 | 
  9 | # X -> Y -> Z
 10 | 
 11 | N <- 1000
 12 | X <- rnorm(N,0,1)   # X has no parents, so just normal(0,1)
 13 | Y <- rnorm(N,X,1) # X -> Y
 14 | Z <- rnorm(N,Y,1) # Y -> Z
 15 | dat <- list(Y=Y,X=X,Z=Z)
 16 | 
 17 | library(rethinking)
 18 | m1.1 <- quap(
 19 |     alist(
 20 |         Y ~ dnorm(mu,sigma),
 21 |         mu <- a + bX*X,
 22 |         c(a,bX) ~ dnorm(0,1),
 23 |         sigma ~ dexp(1)
 24 |     ), data=dat )
 25 | 
 26 | m1.2 <- quap(
 27 |     alist(
 28 |         Y ~ dnorm(mu,sigma),
 29 |         mu <- a + bX*X + bZ*Z,
 30 |         c(a,bX,bZ) ~ dnorm(0,1),
 31 |         sigma ~ dexp(1)
 32 |     ), data=dat )
 33 | 
 34 | # case-control bias (selection bias)
 35 | # cloudy -> rain -> ground wet
 36 | 
 37 | # 2
 38 | 
 39 | library(rethinking)
 40 | set.seed(73)
 41 | N <- 500
 42 | U_sim <- rnorm( N )
 43 | Q_sim <- sample( 1:4 , size=N , replace=TRUE )
 44 | E_sim <- rnorm( N , U_sim + Q_sim )
 45 | W_sim <- rnorm( N , U_sim + 0*E_sim )
 46 | dat_sim <- list(
 47 |     W=standardize(W_sim) ,
 48 |     E=standardize(E_sim) ,
 49 |     Q=standardize(Q_sim) )
 50 | 
 51 | m14.6b <- ulam(
 52 |     alist(
 53 |         W ~ normal( muW , sigmaW ),
 54 |         E ~ normal( muE , sigmaE ),
 55 |         muW <- aW + bEW*E + bUW*U[i],
 56 |         muE <- aE + bQE*Q + bUE*U[i],
 57 | 
 58 |         vector[500]:U ~ normal( 0 , 1 ),
 59 |         
 60 |         c(aW,aE) ~ normal( 0 , 0.2 ),
 61 |         c(bEW,bQE) ~ normal( 0 , 0.5 ),
 62 |         c(bUE,bUW) ~ normal( 0 , 0.5 ),
 63 |         c(sigmaW,sigmaE) ~ exponential( 1 )
 64 |     ), data=dat_sim , chains=4 , cores=4 , cmdstan=TRUE )
 65 | 
 66 | precis(m14.6b)
 67 | 
 68 | traceplot(m14.6b,pars=c("bUE","bUW"))
 69 | 
 70 | post <- extract.samples(m14.6b)
 71 | dens(post$bUE)
 72 | 
 73 | # 3
 74 | 
 75 | y <- c( 18 , 19 , 22 , NA , NA , 19 , 20 , 22 )
 76 | 
 77 | # required assumptions:
 78 | # (1) what was total number of spins?
 79 | #   S > 120
 80 | 
 81 | # (2) what prior for the probabilities of each value?
 82 | 
 83 | library(gtools)
 84 | p <- rdirichlet( 1e3 , alpha=rep(4,8) )
 85 | plot( NULL , xlim=c(1,8) , ylim=c(0,0.3) , xlab="outcome" , ylab="probability" )
 86 | for ( i in 1:10 ) lines( 1:8 , p[i,] , type="b" , col=grau() , lwd=2 )
 87 | 
 88 | twicer <- function( p ) {
 89 |     o <- order( p )
 90 |     if ( p[o][8]/p[o][1] > 2 ) return( TRUE ) else return( FALSE )
 91 | }
 92 | 
 93 | p <- rdirichlet( 1e3 , alpha=rep(4,8) )
 94 | sum( apply( p , 1 , twicer ) )
 95 | 
 96 | # then marginalize over each possible combination of counts for 4 and 5, weighed by probability computed form (1) and (2)
 97 | 
 98 | code15H7 <- '
 99 | data{
100 |     int N;
101 |     int y[N];
102 |     int y_max; // consider at most this many spins for y4 and y5
103 |     int S_mean;
104 | }
105 | parameters{
106 |     simplex[N] p;   // probabilities of each outcome
107 | }
108 | model{
109 |     vector[(1+y_max)*(1+y_max)] terms;
110 |     int k = 1;
111 | 
112 |     p ~ dirichlet( rep_vector(50,N) );
113 | 
114 |     // loop over possible values for unknown cells 4 and 5
115 |     // this code updates posterior of p
116 |     for ( y4 in 0:y_max ) {
117 |         for ( y5 in 0:y_max ) {
118 |             int Y[N] = y;
119 |             Y[4] = y4;
120 |             Y[5] = y5;
121 |             terms[k] = poisson_lpmf(y4+y5|S_mean-120) 
122 |                        + multinomial_lpmf(Y|p);
123 |             k = k + 1;
124 |         }//y5
125 |     }//y4
126 |     target += log_sum_exp(terms);
127 | }
128 | generated quantities{
129 |     matrix[y_max+1,y_max+1] P45; // prob y4,y5 takes joint values
130 |     // now compute Prob(y4,y5|p)
131 |     {
132 |         matrix[(1+y_max),(1+y_max)] terms;
133 |         int k = 1;
134 |         real Z;
135 |         for ( y4 in 0:y_max ) {
136 |             for ( y5 in 0:y_max ) {
137 |               int Y[N] = y;
138 |               Y[4] = y4;
139 |               Y[5] = y5;
140 |               terms[y4+1,y5+1] = poisson_lpmf(y4+y5|S_mean-120) 
141 |                                  + multinomial_lpmf(Y|p);
142 |             }//y5
143 |         }//y4
144 |         Z = log_sum_exp( to_vector(terms) );
145 |         for ( y4 in 0:y_max )
146 |             for ( y5 in 0:y_max )
147 |                 P45[y4+1,y5+1] = exp( terms[y4+1,y5+1] - Z );
148 |     }
149 | }
150 | '
151 | 
152 | y <- c(18,19,22,-1,-1,19,20,22)
153 | dat <- list(
154 |     N = length(y),
155 |     y = y,
156 |     S_mean = 160,
157 |     y_max = 40 )
158 | 
159 | m15H7 <- cstan( model_code=code15H7 , data=dat , chains=4 , cores=4 )
160 | 
161 | post <- extract.samples(m15H7)
162 | 
163 | y_max <- dat$y_max
164 | plot( NULL , xlim=c(10,y_max-10) , ylim=c(10,y_max-10) ,
165 |     xlab="number of 4s" , ylab="number of 5s" )
166 | mtext( "posterior distribution of 4s and 5s" )
167 | for ( y4 in 0:y_max ) for ( y5 in 0:y_max ) {
168 |     k <- grau( mean( post$P45[,y4+1,y5+1] )/0.01 )
169 |     points( y4 , y5 , col=k , pch=16 , cex=1.5 )
170 | }
171 | 
172 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Statistical Rethinking: A Bayesian Course (with Code Examples in R/Stan/Python/Julia)
 2 | ===============
 3 | 
 4 | Winter 2020/2021
 5 | 
 6 | Instructor: Richard McElreath
 7 | 
 8 | Format: Online, flipped instruction. The lectures are pre-recorded. We'll meet online once a week for an hour to work through the solutions to the assigned problems.
 9 | 
10 | When: Wednesdays 3-4PM CET, starting 2 December 2020 (see full calendar at bottom). A Zoom link will be given to enrolled students.
11 | 
12 | Registration: Please sign up via <[EventBright](https://www.eventbrite.co.uk/e/statistical-rethinking-course-winter-20202021-tickets-126062047979)>. I've also set aside 100 audit tickets at the same link, for people who want to participate, but who don't need graded work and course credit. Apologies for using an external service, but it will make distributing the access information and course materials easier for all of us.
13 | 
14 | # Book
15 | 
16 | We'll use the 2nd edition of my book, Statistical Rethinking. I'll provide a PDF of the book to enrolled students.
17 | 
18 | # Lectures
19 | 
20 | The full lecture video playlist is here: <YouTube:[Statistical Rethinking 2019](https://www.youtube.com/playlist?list=PLDcUM9US4XdNM4Edgs7weiyIguLSToZRI)>. Links to individual lectures, slides and videos are in the calendar at the very bottom. 
21 | 
22 | # Code examples
23 | 
24 | Students can engage with the material using either the original R code examples or one of several conversions to other computing environments. The conversions are not always exact, but they are rather complete. Each option is listed below.
25 | 
26 | ## Original R Flavor
27 | 
28 | For those who want to use the original R code examples in the print book, you need to first install `rstan`. Go to <http://mc-stan.org/> and find the instructions for your platform. Then you can install the `rethinking` package:
29 | ```
30 | install.packages(c("devtools","mvtnorm","loo","coda"),dependencies=TRUE)
31 | library(devtools)
32 | install_github("rmcelreath/rethinking")
33 | ```
34 | The code is all on github <https://github.com/rmcelreath/rethinking/> and there are additional details about the package there, including information about using the more-up-to-date `cmdstanr` instead of `rstan` as the underlying MCMC engine.
35 | 
36 | ## R + Tidyverse + ggplot2 + brms
37 | 
38 | The <[Tidyverse/brms](https://bookdown.org/content/4857/)> conversion is very high quality and complete through Chapter 14.
39 | 
40 | ## Python and PyMC3
41 | 
42 | The <[Python/PyMC3](https://github.com/pymc-devs/resources/tree/master/Rethinking_2)> conversion is quite complete.
43 | 
44 | ## Julia and Turing
45 | 
46 | The <[Julia/Turing](https://github.com/StatisticalRethinkingJulia)> conversion is not as complete, but is growing fast and presents the Rethinking examples in multiple Julia engines, including the great <[TuringLang](https://github.com/StatisticalRethinkingJulia/TuringModels.jl)>.
47 | 
48 | ## Other
49 | 
50 | The are several other conversions. See the full list at <https://xcelab.net/rm/statistical-rethinking/>.
51 | 
52 | # Homework and solutions
53 | I will also post problem sets and solutions here. Check the folders at the top.
54 | 
55 | 
56 | 
57 | # Calendar & Topical Outline
58 | 
59 | There are 10 weeks of instruction.
60 | 
61 | | Week ## | Meeting date | Reading | Lectures |
62 | | ------- | -------------- | ------------- | ---------------------- |
63 | | Week 01 | 02 December  | Chapters 1, 2 and 3 | The Golem of Prague <[slides](https://speakerdeck.com/rmcelreath/l01-statistical-rethinking-winter-2019)> <[video](https://www.youtube.com/watch?v=4WVelCswXo4)> <br>Garden of Forking Data <[slides](https://speakerdeck.com/rmcelreath/l02-statistical-rethinking-winter-2019)> <[video](https://www.youtube.com/watch?v=XoVtOAN0htU&list=PLDcUM9US4XdNM4Edgs7weiyIguLSToZRI&index=2)> |
64 | | Week 02 | 09 December | Chapter 4 | Geocentric Models <[slides](https://speakerdeck.com/rmcelreath/l03-statistical-rethinking-winter-2019)> <[video](https://youtu.be/h5aPo5wXN8E)><br> Wiggly Orbits <[slides](https://speakerdeck.com/rmcelreath/l04-statistical-rethinking-winter-2019)> <[video](https://youtu.be/ENxTrFf9a7c)>  
65 | | Week 03 | 06 January | Chapters 5 and 6 | Spurious Waffles <[slides](https://speakerdeck.com/rmcelreath/l05-statistical-rethinking-winter-2019)> <[video](https://www.youtube.com/watch?v=e0tO64mtYMU&index=5&list=PLDcUM9US4XdNM4Edgs7weiyIguLSToZRI)> <br>Haunted DAG <[slides](https://speakerdeck.com/rmcelreath/l06-statistical-rethinking-winter-2019)> <[video](https://youtu.be/l_7yIUqWBmE)> 
66 | | Week 04 | 13 January | Chapter 7 | Ulysses' Compass <[slides](https://speakerdeck.com/rmcelreath/l07-statistical-rethinking-winter-2019)> <[video](https://youtu.be/0Jc6Kgw5qc0)>  <br>Model Comparison <[slides](https://speakerdeck.com/rmcelreath/l08-statistical-rethinking-winter-2019)> <[video](https://youtu.be/gjrsYDJbRh0)> 
67 | | Week 05 | 20 January | Chapters 8 and 9 | Conditional Manatees <[slides](https://speakerdeck.com/rmcelreath/l09-statistical-rethinking-winter-2019)> <[video](https://youtu.be/QhHfo6-Bx8o)> <br>Markov Chain Monte Carlo <[slides](https://speakerdeck.com/rmcelreath/l10-statistical-rethinking-winter-2019)> <[video](https://youtu.be/v-j0UmWf3Us)>  
68 | | Week 06 | 27 January | Chapters 10 and 11 | Maximum entropy & GLMs <[slides](https://speakerdeck.com/rmcelreath/l11-statistical-rethinking-winter-2019)> <[video](https://youtu.be/-4y4X8ELcEM)> <br>God Spiked the Integers <[slides](https://speakerdeck.com/rmcelreath/l12-statistical-rethinking-winter-2019)> <[video](https://youtu.be/hRJtKCIDTwc)>
69 | | Week 07 | 03 February | Chapter 12 | Monsters & Mixtures <[slides](https://speakerdeck.com/rmcelreath/l13-statistical-rethinking-winter-2019)> <[video](https://youtu.be/p7g-CgGCS34)> <br>Ordered Categories, Left & Right <[slides](https://speakerdeck.com/rmcelreath/l14-statistical-rethinking-winter-2019)> <[video](https://youtu.be/zA3Jxv8LOrA)> 
70 | | Week 08 | 10 February | Chapter 13 | Multilevel Models <[slides](https://speakerdeck.com/rmcelreath/l15-statistical-rethinking-winter-2019)> <[video](https://youtu.be/AALYPv5xSos)> <br>Multilevel Models 2 <[slides](https://speakerdeck.com/rmcelreath/l16-statistical-rethinking-winter-2019)> <[video](https://youtu.be/ZG3Oe35R5sY)>  
71 | | Week 09 | 24 February | Chapter 14 | Adventures in Covariance <[slides](https://speakerdeck.com/rmcelreath/l17-statistical-rethinking-winter-2019)> <[video](https://youtu.be/yfXpjmWgyXU)> <br> Slopes, Instruments and Social Relations <[slides](https://speakerdeck.com/rmcelreath/l18-statistical-rethinking-winter-2019)> <[video](https://youtu.be/e5cgiAGBKzI)>
72 | | Week 10 | 03 March | Chapter 15 | Gaussian Processes <[slides](https://speakerdeck.com/rmcelreath/l19-statistical-rethinking-winter-2019)> <[video](https://youtu.be/pwMRbt2CbSU)>  <br>Missing Values and Measurement Error <[slides](https://speakerdeck.com/rmcelreath/l20-statistical-rethinking-winter-2019)> <[video](https://youtu.be/UgLF0aLk85s)>  
73 | 
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------