├── .DS_Store ├── .Rhistory ├── .gitignore ├── MCMC_probability_demo.Rmd ├── MCMC_probability_demo.pdf ├── README.md ├── angell.instructions.R ├── beer.instructions.R ├── diagnostics.R ├── econvote.instructions.R ├── factor.dotplot.R ├── goals.instructions.R ├── interaction.instructions.R ├── legislators.instructions.R ├── logit.pp.plot.instructions.R ├── mac.R2openBUGS.R ├── mac.R2winBUGS.R ├── mlm.state.instructions.R ├── ologit.pp.plot.instructions.R ├── pumps.instructions.R ├── rats.instructions.R ├── regression.dotplot.R ├── regression.table.R ├── results.rmarkdown.Rmd ├── results.rmarkdown.html ├── results.rmarkdown.pdf ├── rstan.R └── wvs.instructions.R /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkarreth/Bayes/6084383b894b4a78899b19bea0d7284fa57c2644/.DS_Store -------------------------------------------------------------------------------- /.Rhistory: -------------------------------------------------------------------------------- 1 | library(R2WinBUGS) 2 | model.file <- system.file(package="R2WinBUGS", "model", "schools.txt") 3 | # Some example data (see ?schools for details): 4 | data(schools) 5 | J <- nrow(schools) 6 | y <- schools$estimate 7 | sigma.y <- schools$sd 8 | data <- list(J=J, y=y, sigma.y=sigma.y) 9 | inits <- function(){ 10 | list(theta=rnorm(J, 0, 100), mu.theta=rnorm(1, 0, 100), 11 | sigma.theta=runif(1, 0, 100)) 12 | } 13 | ## or alternatively something like: 14 | # inits <- list( 15 | # list(theta=rnorm(J, 0, 90), mu.theta=rnorm(1, 0, 90), 16 | # sigma.theta=runif(1, 0, 90)), 17 | # list(theta=rnorm(J, 0, 100), mu.theta=rnorm(1, 0, 100), 18 | # sigma.theta=runif(1, 0, 100)) 19 | # list(theta=rnorm(J, 0, 110), mu.theta=rnorm(1, 0, 110), 20 | # sigma.theta=runif(1, 0, 110))) 21 | parameters <- c("theta", "mu.theta", "sigma.theta") 22 | schools.sim <- bugs(data, inits, parameters, model.file, 23 | n.chains=3, n.iter=5000, 24 | bugs.directory="/Applications/Wineskin/WinBUGS.app/Contents/Resources/drive_c/Program Files/WinBUGS14") 25 | print(schools.sim) 26 | plot(schools.sim) 27 | # For a summary table, use 28 | # https://github.com/jkarreth/JKmisc/blob/master/mcmctab.R 29 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R") 30 | mcmctab(schools.sim, jags = FALSE) 31 | # Access posterior draws for postestimation 32 | schools.out <- schools.sim$sims.matrix 33 | summary(schools.out) 34 | str(schools.sim$sims.matrix) 35 | library(coda) 36 | foo <- as.mcmc(schools.sim$sims.array) 37 | summary(foo) 38 | foo2 <- mcmcUpgrade(foo) 39 | str(schools.sim) 40 | str(schools.sim$summary) 41 | (schools.sim$summary) 42 | class(schools.sim) 43 | class(angell.fit) 44 | Statelevel <- read.dta("http://www.princeton.edu/~jkastell/MRP_primer/state_level_update.dta",convert.underscore = TRUE) 45 | library(foreign) 46 | Statelevel <- read.dta("http://www.princeton.edu/~jkastell/MRP_primer/state_level_update.dta",convert.underscore = TRUE) 47 | table(Statelevel$sstate) 48 | length(Statelevel$sstate) 49 | Megapoll <- read.dta("http://www.princeton.edu/~jkastell/MRP_primer/gay_marriage_megapoll.dta", convert.underscore = TRUE) 50 | Megapoll <- Megapoll[Megapoll$state != "", ] 51 | Megapoll <- Megapoll[order(Megapoll$state), ] 52 | State.names <- sort(unique(Megapoll$state)) 53 | Statelevel <- Statelevel[order(Statelevel$sstate), ] 54 | Statelevel 55 | Statelevel$state.cat <- 0 56 | Statelevel[which(Statelevel$sstate %in% State.names), ] 57 | as.numeric(as.factor(Statelevel[which(Statelevel$sstate %in% State.names), ]$sstate)) 58 | Statelevel$state.cat <- 0 59 | Statelevel[which(Statelevel$sstate %in% State.names), ]$state.cat <- as.numeric(as.factor(Statelevel[which(Statelevel$sstate %in% State.names), ]$sstate)) 60 | Census <- read.dta("http://www.princeton.edu/~jkastell/MRP_primer/poststratification%202000.dta",convert.underscore = TRUE) 61 | Megapoll.dat <- Megapoll[, c("yes.of.all", "race.female", "age.cat", "edu.cat", "age.edu.cat", "state.cat", "region.cat", "poll.cat", "p.relig.full", "p.kerry.full")] 62 | head(Megapoll) 63 | Megapoll$race.female <- (Megapoll$female * 3) + Megapoll$race.wbh # from 1 for white males to 6 for hispanic females 64 | Megapoll$age.edu.cat <- 4 * (Megapoll$age.cat - 1) + Megapoll$edu.cat # from 1 for 18-29 with low edu to 16 for 65+ with high edu 65 | Megapoll$poll.cat <- as.numeric(as.factor(Megapoll$poll)) # indicator for each of the 5 polls 66 | Megapoll$state.cat <- as.numeric(as.factor(Megapoll$state)) # indicator for state (for varying intercepts) 67 | Megapoll$p.evang.full <- Statelevel$p.evang[Megapoll$state.initnum] # proportion of evangelicals in respondent's state 68 | Megapoll$p.mormon.full <-Statelevel$p.mormon[Megapoll$state.initnum] # proportion of mormon's in respondent's state 69 | Megapoll$p.relig.full <- Megapoll$p.evang.full + Megapoll$p.mormon.full # combined evangelical + mormon proportions 70 | Megapoll$p.kerry.full <- Statelevel$kerry.04[Megapoll$state.initnum] # kerry's % of 2-party vote in respondent's state in 2004 71 | Megapoll.dat <- Megapoll[, c("yes.of.all", "race.female", "age.cat", "edu.cat", "age.edu.cat", "state.cat", "region.cat", "poll.cat", "p.relig.full", "p.kerry.full")] 72 | individual.bmod <- glmer2stan( 73 | formula = yes.of.all ~ (1|race.female) + (1|age.cat) + (1|edu.cat) + 74 | (1|age.edu.cat) + (1|state.cat) + (1|region.cat) + (1|poll.cat) + 75 | p.relig.full + p.kerry.full, 76 | data = Megapoll.dat, 77 | family = "binomial", 78 | sample = FALSE) 79 | library(glmer2stan) 80 | individual.bmod <- glmer2stan( 81 | formula = yes.of.all ~ (1|race.female) + (1|age.cat) + (1|edu.cat) + 82 | (1|age.edu.cat) + (1|state.cat) + (1|region.cat) + (1|poll.cat) + 83 | p.relig.full + p.kerry.full, 84 | data = Megapoll.dat, 85 | family = "binomial", 86 | sample = FALSE) 87 | show(individual.bmod) 88 | ?stan 89 | library(rstan) 90 | ?stan 91 | Megapoll1.dat <- Megapoll[, c("state", "state.cat", "region.cat", "yes.of.all", "race.female", "age.cat", "edu.cat", "age.edu.cat", "poll.cat", "p.relig.full", "p.kerry.full")] 92 | Megapoll1.dat <- na.omit(Megapoll1.dat) 93 | Megapoll2.dat <- Megapoll[, c("state.cat", "region.cat", "p.relig.full", "p.kerry.full")] 94 | Megapoll2.dat <- aggregate(x = Megapoll, by = list(statenum = Megapoll$state.cat), FUN = function(x) x[1]) 95 | Megapoll2.dat <- Megapoll2.dat[, -c(1)] 96 | Megapoll.datjags <- as.list(Megapoll1.dat) 97 | Megapoll.datjags$statenuml2 <- Megapoll2.dat$state.cat 98 | Megapoll.datjags$region.catl2 <- Megapoll2.dat$region.cat 99 | Megapoll.datjags$poll.catl2 <- Megapoll2.dat$poll.cat 100 | Megapoll.datjags$p.relig.full.state <- Megapoll2.dat$p.relig.full 101 | Megapoll.datjags$p.kerry.full.state <- Megapoll2.dat$p.kerry.full 102 | Megapoll.datjags$N.resp <- nrow(Megapoll1.dat) 103 | Megapoll.datjags$N.state <- nrow(Megapoll2.dat) 104 | Megapoll.datjags$N.race.female <- length(unique(Megapoll1.dat$race.female)) 105 | Megapoll.datjags$N.age.cat <- length(unique(Megapoll1.dat$age.cat)) 106 | Megapoll.datjags$N.edu.cat <- length(unique(Megapoll1.dat$edu.cat)) 107 | Megapoll.datjags$N.age.edu.cat <- length(unique(Megapoll1.dat$age.edu.cat)) 108 | Megapoll.datjags$N.edu.cat <- length(unique(Megapoll1.dat$edu.cat)) 109 | Megapoll.datjags$N.region.cat <- length(unique(Megapoll1.dat$region.cat)) 110 | Megapoll.datjags$N.poll <- length(unique(Megapoll1.dat$poll.cat)) 111 | individual.modjags <- function(){ 112 | for (i in 1:N.resp){ 113 | yes.of.all[i] ~ dbern(p[i]) 114 | logit(p[i]) <- b.intercept + 115 | b.relig * p.relig.full[i] + 116 | b.kerry * p.kerry.full[i] + 117 | b.race.female[race.female[i]] + 118 | b.age[age.cat[i]] + 119 | b.edu[edu.cat[i]] + 120 | b.age.edu[age.edu.cat[i]] + 121 | b.state[state.cat[i]] + 122 | b.region[region.cat[i]] + 123 | b.poll[poll.cat[i]] 124 | } 125 | for(i in 1:N.race.female){b.race.female[i] ~ dnorm(0, tau.race.female)} 126 | for(i in 1:N.age.cat){b.age[i] ~ dnorm(0, tau.age)} 127 | for(i in 1:N.edu.cat){b.edu[i] ~ dnorm(0, tau.edu)} 128 | for(i in 1:N.age.edu.cat){b.age.edu[i] ~ dnorm(0, tau.age.edu)} 129 | for(i in 1:N.state){b.state[i] ~ dnorm(0, tau.state)} 130 | for(i in 1:N.region.cat){b.region[i] ~ dnorm(0, tau.region)} 131 | for(i in 1:N.poll){b.poll[i] ~ dnorm(0, tau.poll)} 132 | b.intercept ~ dnorm(0, .1) 133 | b.relig ~ dnorm(0, .1) 134 | b.kerry ~ dnorm(0, .1) 135 | # mu.race.female ~ dnorm(0, .1) 136 | # mu.age.cat ~ dnorm(0, .1) 137 | # mu.edu.cat ~ dnorm(0, .1) 138 | # mu.age.edu.cat ~ dnorm(0, .1) 139 | # mu.state ~ dnorm(0, .1) 140 | # mu.region ~ dnorm(0, .1) 141 | # mu.poll ~ dnorm(0, .1) 142 | tau.race.female ~ dgamma(1, 1) 143 | tau.age ~ dgamma(1, 1) 144 | tau.edu ~ dgamma(1, 1) 145 | tau.age.edu ~ dgamma(1, 1) 146 | tau.state ~ dgamma(1, 1) 147 | tau.region ~ dgamma(1, 1) 148 | tau.poll ~ dgamma(1, 1) 149 | } 150 | individual.params <- c("b.intercept", "b.race.female", "b.age", "b.edu", "b.age.edu", "b.state", "b.region", 151 | "b.poll", "b.relig", "b.kerry") 152 | library(ggmcmc) 153 | ?ggs 154 | individual.ggs <- ggs(individual.mcmc) 155 | individual.fit <- jags( 156 | data = Megapoll.datjags, 157 | inits = NULL, 158 | parameters.to.save = individual.params, 159 | model.file = individual.modjags, 160 | n.chains = 3, 161 | n.iter = 20000, 162 | n.burnin = 10000, 163 | n.thin = 5) 164 | library(R2jags) 165 | ## Level-1 (individual) data 166 | Megapoll1.dat <- Megapoll[, c("state", "state.cat", "region.cat", "yes.of.all", "race.female", "age.cat", "edu.cat", "age.edu.cat", "poll.cat", "p.relig.full", "p.kerry.full")] 167 | Megapoll1.dat <- na.omit(Megapoll1.dat) 168 | ## Level-2 (state) data 169 | Megapoll2.dat <- Megapoll[, c("state.cat", "region.cat", "p.relig.full", "p.kerry.full")] 170 | ## Collapse to state-level 171 | Megapoll2.dat <- aggregate(x = Megapoll, by = list(statenum = Megapoll$state.cat), FUN = function(x) x[1]) 172 | Megapoll2.dat <- Megapoll2.dat[, -c(1)] 173 | Megapoll.datjags <- as.list(Megapoll1.dat) 174 | Megapoll.datjags$statenuml2 <- Megapoll2.dat$state.cat 175 | Megapoll.datjags$region.catl2 <- Megapoll2.dat$region.cat 176 | Megapoll.datjags$poll.catl2 <- Megapoll2.dat$poll.cat 177 | Megapoll.datjags$p.relig.full.state <- Megapoll2.dat$p.relig.full 178 | Megapoll.datjags$p.kerry.full.state <- Megapoll2.dat$p.kerry.full 179 | Megapoll.datjags$N.resp <- nrow(Megapoll1.dat) 180 | Megapoll.datjags$N.state <- nrow(Megapoll2.dat) 181 | Megapoll.datjags$N.race.female <- length(unique(Megapoll1.dat$race.female)) 182 | Megapoll.datjags$N.age.cat <- length(unique(Megapoll1.dat$age.cat)) 183 | Megapoll.datjags$N.edu.cat <- length(unique(Megapoll1.dat$edu.cat)) 184 | Megapoll.datjags$N.age.edu.cat <- length(unique(Megapoll1.dat$age.edu.cat)) 185 | Megapoll.datjags$N.edu.cat <- length(unique(Megapoll1.dat$edu.cat)) 186 | Megapoll.datjags$N.region.cat <- length(unique(Megapoll1.dat$region.cat)) 187 | Megapoll.datjags$N.poll <- length(unique(Megapoll1.dat$poll.cat)) 188 | ## Model 189 | individual.modjags <- function(){ 190 | for (i in 1:N.resp){ 191 | yes.of.all[i] ~ dbern(p[i]) 192 | logit(p[i]) <- b.intercept + 193 | b.relig * p.relig.full[i] + 194 | b.kerry * p.kerry.full[i] + 195 | b.race.female[race.female[i]] + 196 | b.age[age.cat[i]] + 197 | b.edu[edu.cat[i]] + 198 | b.age.edu[age.edu.cat[i]] + 199 | b.state[state.cat[i]] + 200 | b.region[region.cat[i]] + 201 | b.poll[poll.cat[i]] 202 | } 203 | for(i in 1:N.race.female){b.race.female[i] ~ dnorm(0, tau.race.female)} 204 | for(i in 1:N.age.cat){b.age[i] ~ dnorm(0, tau.age)} 205 | for(i in 1:N.edu.cat){b.edu[i] ~ dnorm(0, tau.edu)} 206 | for(i in 1:N.age.edu.cat){b.age.edu[i] ~ dnorm(0, tau.age.edu)} 207 | for(i in 1:N.state){b.state[i] ~ dnorm(0, tau.state)} 208 | for(i in 1:N.region.cat){b.region[i] ~ dnorm(0, tau.region)} 209 | for(i in 1:N.poll){b.poll[i] ~ dnorm(0, tau.poll)} 210 | b.intercept ~ dnorm(0, .1) 211 | b.relig ~ dnorm(0, .1) 212 | b.kerry ~ dnorm(0, .1) 213 | # mu.race.female ~ dnorm(0, .1) 214 | # mu.age.cat ~ dnorm(0, .1) 215 | # mu.edu.cat ~ dnorm(0, .1) 216 | # mu.age.edu.cat ~ dnorm(0, .1) 217 | # mu.state ~ dnorm(0, .1) 218 | # mu.region ~ dnorm(0, .1) 219 | # mu.poll ~ dnorm(0, .1) 220 | tau.race.female ~ dgamma(1, 1) 221 | tau.age ~ dgamma(1, 1) 222 | tau.edu ~ dgamma(1, 1) 223 | tau.age.edu ~ dgamma(1, 1) 224 | tau.state ~ dgamma(1, 1) 225 | tau.region ~ dgamma(1, 1) 226 | tau.poll ~ dgamma(1, 1) 227 | } 228 | individual.params <- c("b.intercept", "b.race.female", "b.age", "b.edu", "b.age.edu", "b.state", "b.region", 229 | "b.poll", "b.relig", "b.kerry") 230 | individual.fit <- jags( 231 | data = Megapoll.datjags, 232 | inits = NULL, 233 | parameters.to.save = individual.params, 234 | model.file = individual.modjags, 235 | n.chains = 3, 236 | n.iter = 20000, 237 | n.burnin = 10000, 238 | n.thin = 5) 239 | print(individual.fit) 240 | library(mcmcplots) 241 | mcmcplot(as.mcmc(individual.fit)) 242 | individual.modjags <- function(){ 243 | for (i in 1:N.resp){ 244 | yes.of.all[i] ~ dbern(p[i]) 245 | logit(p[i]) <- b.intercept + 246 | b.relig * p.relig.full[i] + 247 | b.kerry * p.kerry.full[i] + 248 | b.race.female[race.female[i]] + 249 | b.age[age.cat[i]] + 250 | b.edu[edu.cat[i]] + 251 | b.age.edu[age.edu.cat[i]] + 252 | b.state[state.cat[i]] + 253 | b.region[region.cat[i]] + 254 | b.poll[poll.cat[i]] 255 | } 256 | for(i in 1:N.race.female){b.race.female[i] ~ dnorm(0, tau.race.female)} 257 | for(i in 1:N.age.cat){b.age[i] ~ dnorm(0, tau.age)} 258 | for(i in 1:N.edu.cat){b.edu[i] ~ dnorm(0, tau.edu)} 259 | for(i in 1:N.age.edu.cat){b.age.edu[i] ~ dnorm(0, tau.age.edu)} 260 | for(i in 1:N.state){b.state[i] ~ dnorm(0, tau.state)} 261 | for(i in 1:N.region.cat){b.region[i] ~ dnorm(0, tau.region)} 262 | for(i in 1:N.poll){b.poll[i] ~ dnorm(0, tau.poll)} 263 | b.intercept ~ dnorm(0, .1) 264 | b.relig ~ dnorm(0, .1) 265 | b.kerry ~ dnorm(0, .1) 266 | # mu.race.female ~ dnorm(0, .1) 267 | # mu.age.cat ~ dnorm(0, .1) 268 | # mu.edu.cat ~ dnorm(0, .1) 269 | # mu.age.edu.cat ~ dnorm(0, .1) 270 | # mu.state ~ dnorm(0, .1) 271 | # mu.region ~ dnorm(0, .1) 272 | # mu.poll ~ dnorm(0, .1) 273 | tau.race.female ~ dgamma(.5, .5) 274 | tau.age ~ dgamma(.5, .5) 275 | tau.edu ~ dgamma(.5, .5) 276 | tau.age.edu ~ dgamma(.5, .5) 277 | tau.state ~ dgamma(.5, .5) 278 | tau.region ~ dgamma(.5, .5) 279 | tau.poll ~ dgamma(.5, .5) 280 | } 281 | individual.fit <- jags( 282 | data = Megapoll.datjags, 283 | inits = NULL, 284 | parameters.to.save = individual.params, 285 | model.file = individual.modjags, 286 | n.chains = 3, 287 | n.iter = 20000, 288 | n.burnin = 10000, 289 | n.thin = 5) 290 | print(individual.fit) 291 | mcmcplot(as.mcmc(individual.fit)) 292 | individual.mcmc <- as.mcmc(individual.fit) 293 | ?save 294 | save(individual.mcmc, file = "/Users/johanneskarreth/Documents/Uni/9 - ICPSR/2015/Applied Bayes/Slides (new)/Day 19/MRP/individual_mcmc.Rdata") 295 | library(ggmcmc) 296 | individual.ggs <- ggs(individual.mcmc) 297 | ggs_caterpillar(individual.ggs) 298 | ggs_caterpillar(individual.ggs, family = "b.") 299 | str(individual.ggs) 300 | head(individual.ggs) 301 | ggs_density(individual.ggs, family = "b.") 302 | individual.out.dat <- as.data.frame(as.matrix(individual.mcmc)) 303 | individual.out <- as.data.frame(as.matrix(individual.mcmc)) 304 | summary(individual.out) 305 | cp.dat <- individual.out[, grep(x = names(individual.out), pattern = "b.")] 306 | summary(cp.dat) 307 | cp.dat <- individual.out[, !grep(x = names(individual.out), pattern = "b.state", fixed = TRUE)] 308 | summary(cp.dat) 309 | cp.dat <- individual.out[, grep(x = names(individual.out), pattern = "b.", fixed = TRUE)] 310 | cp.dat <- individual.out[, -grep(x = names(individual.out), pattern = "b.state", fixed = TRUE)] 311 | summary(cp.dat) 312 | cp.dat <- individual.out[, grep(x = names(individual.out), pattern = "b.", fixed = TRUE)] 313 | cp.dat <- cp.dat[, -grep(x = names(cp.dat), pattern = "b.state", fixed = TRUE)] 314 | cp.dat <- cp.dat[, -grep(x = names(cp.dat), pattern = "b.region", fixed = TRUE)] 315 | cp.dat <- cp.dat[, -grep(x = names(cp.dat), pattern = "b.poll", fixed = TRUE)] 316 | summary(cp.dat) 317 | ggs_caterpillar(cp.dat) 318 | ?ggs_caterpillar 319 | ggs_caterpillar(ggs(cp.dat)) 320 | apply(cp.dat, 1, plot(density(x))) 321 | apply(cp.dat, 1, function(x) plot(density(x))) 322 | plot(density(rnorm(100))) 323 | par(mfrow = c(10, 5)); apply(cp.dat, 1, function(x) plot(density(x))); par(mfrow = c(1, 1)) 324 | par(mfrow = c(10, 5)); apply(cp.dat, 1, function(x) plot(density(x))); par(mfrow = c(1, 1)) 325 | par(mfrow = c(10, 5)); apply(cp.dat, 1, function(x) plot(density(x))); par(mfrow = c(1, 1)) 326 | par(mfrow = c(5, 5)); apply(cp.dat, 1, function(x) plot(density(x))); par(mfrow = c(1, 1)) 327 | par(mfrow = c(5, 5)); apply(cp.dat, 2, function(x) plot(density(x))); par(mfrow = c(1, 1)) 328 | par(mfrow = c(5, 5)); apply(cp.dat, 2, function(x) plot(density(x))); par(mfrow = c(1, 1)) 329 | head(cp.dat) 330 | plot(density(rnorm(100))) 331 | ?apply 332 | cp.dat.sum <- apply(cp.dat, MARGIN = 2, function(x) quantile(x, probs = c(0.025, 0.1, 0.5, 0.9, 0.975))) 333 | cp.dat.sum 334 | cp.dat.sum <- t(apply(cp.dat, MARGIN = 2, function(x) quantile(x, probs = c(0.025, 0.1, 0.5, 0.9, 0.975)))) 335 | cp.dat.sum 336 | names(cp.dat.sum) <- c("Low95", "Low80", "Median", "High80", "High95") 337 | ?geom_segment 338 | ggplot(data = cp.dat.sum, aes(x = Median, y = variable)) + geom_point() + 339 | geom_segment(aes(x = Low80, xend = High80), size = 2) + 340 | geom_segment(aes(x = Low95, xend = High95), size = 2) 341 | cp.dat.sum <- t(apply(cp.dat, MARGIN = 2, function(x) quantile(x, probs = c(0.025, 0.1, 0.5, 0.9, 0.975)))) 342 | names(cp.dat.sum) <- c("Low95", "Low80", "Median", "High80", "High95") 343 | cp.dat.sum <- as.data.frame(cp.dat.sum) 344 | cp.dat.sum$variable <- rownames(cp.dat.sum) 345 | ggplot(data = cp.dat.sum, aes(x = Median, y = variable)) + geom_point() + 346 | geom_segment(aes(x = Low80, xend = High80), size = 2) + 347 | geom_segment(aes(x = Low95, xend = High95), size = 2) 348 | cp.dat.sum 349 | cp.dat.sum <- t(apply(cp.dat, MARGIN = 2, function(x) quantile(x, probs = c(0.025, 0.1, 0.5, 0.9, 0.975)))) 350 | names(cp.dat.sum) <- c("Low95", "Low80", "Median", "High80", "High95") 351 | cp.dat.sum <- as.data.frame(cp.dat.sum) 352 | cp.dat.sum$variable <- rownames(cp.dat.sum) 353 | cp.dat.sum 354 | cp.dat.sum <- t(apply(cp.dat, MARGIN = 2, function(x) quantile(x, probs = c(0.025, 0.1, 0.5, 0.9, 0.975)))) 355 | cp.dat.sum <- as.data.frame(cp.dat.sum) 356 | names(cp.dat.sum) <- c("Low95", "Low80", "Median", "High80", "High95") 357 | cp.dat.sum$variable <- rownames(cp.dat.sum) 358 | cp.dat.sum 359 | cp.dat <- cp.dat[, -grep(x = names(cp.dat), pattern = "b.intercept", fixed = TRUE)] 360 | cp.dat.sum <- t(apply(cp.dat, MARGIN = 2, function(x) quantile(x, probs = c(0.025, 0.1, 0.5, 0.9, 0.975)))) 361 | cp.dat.sum <- as.data.frame(cp.dat.sum) 362 | names(cp.dat.sum) <- c("Low95", "Low80", "Median", "High80", "High95") 363 | cp.dat.sum$variable <- rownames(cp.dat.sum) 364 | cp.dat.sum 365 | ggplot(data = cp.dat.sum, aes(x = Median, y = variable)) + geom_point() + 366 | geom_segment(aes(x = Low80, xend = High80), size = 2) + 367 | geom_segment(aes(x = Low95, xend = High95), size = 1) 368 | ggplot(data = cp.dat.sum, aes(x = Median, y = variable)) + geom_point() + 369 | geom_segment(aes(x = Low80, xend = High80, yend = variable), size = 2) + 370 | geom_segment(aes(x = Low95, xend = High95, yend = variable), size = 1) 371 | ggplot(data = cp.dat.sum, aes(x = Median, y = variable)) + geom_point() + 372 | geom_segment(aes(x = Low80, xend = High80, yend = variable), size = 1) + 373 | geom_segment(aes(x = Low95, xend = High95, yend = variable), size = 0.5) 374 | one.post <- list(posterior = as.numeric(c(rep(0, times = nrow(posterior.state.ranefs)))), 375 | state.cat = 0, 376 | state.name = "", 377 | state.initnum = 0) 378 | posterior.state.ranefs <- individual.out[, grep(pattern = "b.state", x = colnames(individual.out), fixed = TRUE)] 379 | one.post <- list(posterior = as.numeric(c(rep(0, times = nrow(posterior.state.ranefs)))), 380 | state.cat = 0, 381 | state.name = "", 382 | state.initnum = 0) 383 | obs.state.initnum <- sort(unique(Megapoll$state.initnum)) 384 | obs.state <- sort(unique(Megapoll$state)) 385 | obs.state 386 | obs.state.initnum 387 | posterior.state.ranefs.list <- list() 388 | for(i in 1:ncol(posterior.state.ranefs)){ 389 | posterior.state.ranefs.list[[i]] <- one.post 390 | posterior.state.ranefs.list[[i]][[1]] <- as.numeric(posterior.state.ranefs[, i]) 391 | posterior.state.ranefs.list[[i]][[2]] <- i 392 | posterior.state.ranefs.list[[i]][[3]] <- obs.state[i] 393 | posterior.state.ranefs.list[[i]][[4]] <- obs.state.initnum[i] 394 | } 395 | posterior.state.ranefs.list[[50]] <- list(posterior = as.numeric(c(rep(0, times = nrow(posterior.state.ranefs)))), 396 | state.cat = 0, 397 | state.name = "AK", 398 | state.initnum = 1) 399 | posterior.state.ranefs.list[[51]] <- list(posterior = as.numeric(c(rep(0, times = nrow(posterior.state.ranefs)))), 400 | state.cat = 0, 401 | state.name = "HI", 402 | state.initnum = 12) 403 | posterior.state.ranefs.list <- posterior.state.ranefs.list[order(sapply(posterior.state.ranefs.list, function(x) x[[4]], simplify=TRUE), decreasing=FALSE)] 404 | posterior.state.ranefs.mat <- do.call("cbind", lapply(posterior.state.ranefs.list, "[[", 1)) 405 | summary(posterior.state.ranefs.mat) 406 | b.state <- as.data.frame(posterior.state.ranefs.mat) 407 | names(b.state) <- Statelevel$sstate 408 | b.intercept <- individual.out[, grep(pattern = "b.intercept", x = colnames(individual.out), fixed = TRUE)] 409 | b.race.female <- individual.out[, grep(pattern = "b.race.female", x = colnames(individual.out), fixed = TRUE)] 410 | b.age <- individual.out[, grep(pattern = "b.age", x = colnames(individual.out), fixed = TRUE)] 411 | b.age <- b.age[, -c(grep(pattern = "b.age.edu", x = colnames(b.age), fixed = TRUE))] 412 | b.edu <- individual.out[, grep(pattern = "b.edu", x = colnames(individual.out), fixed = TRUE)] 413 | b.age.edu <- individual.out[, grep(pattern = "b.age.edu", x = colnames(individual.out), fixed = TRUE)] 414 | b.region <- individual.out[, grep(pattern = "b.region", x = colnames(individual.out), fixed = TRUE)] 415 | b.relig <- individual.out[, grep(pattern = "b.relig", x = colnames(individual.out), fixed = TRUE)] 416 | b.kerry <- individual.out[, grep(pattern = "b.kerry", x = colnames(individual.out), fixed = TRUE)] 417 | Xb <- b.intercept + 418 | b.race.female[, Census$crace.female] + 419 | b.age[, Census$cage.cat] + 420 | b.edu[, Census$cedu.cat] + 421 | b.age.edu[, Census$cage.edu.cat] + 422 | b.state[, Census$cstate.initnum] + 423 | b.region[, Census$cregion.cat] + 424 | (b.relig %*% t(Census$cp.relig.full)) + 425 | (b.kerry %*% t(Census$cp.kerry.full)) 426 | str(b.intercept) 427 | str(b.race.female) 428 | Xb <- b.intercept + 429 | b.race.female[, Census$crace.female] 430 | Xb <- b.intercept + 431 | b.race.female[, Census$crace.female] + 432 | b.age[, Census$cage.cat] + 433 | b.edu[, Census$cedu.cat] + 434 | b.age.edu[, Census$cage.edu.cat] + 435 | b.state[, Census$cstate.initnum] 436 | Xb <- b.intercept + 437 | b.race.female[, Census$crace.female] + 438 | b.age[, Census$cage.cat] + 439 | b.edu[, Census$cedu.cat] + 440 | b.age.edu[, Census$cage.edu.cat] 441 | Xb <- b.intercept + 442 | b.race.female[, Census$crace.female] + 443 | b.age[, Census$cage.cat] + 444 | b.edu[, Census$cedu.cat] 445 | Xb <- b.intercept + 446 | b.race.female[, Census$crace.female] + 447 | b.age[, Census$cage.cat] 448 | Xb <- b.intercept + 449 | b.race.female[, Census$crace.female] 450 | str(b.age) 451 | head(Census) 452 | Xb <- b.intercept + 453 | b.race.female[, Census$crace.female] + 454 | b.age[, Census$cage.cat] 455 | table(Census$cage.cat) 456 | Xb <- b.intercept + 457 | b.race.female[, Census$crace.female] + 458 | b.age[, Census$cage.cat] 459 | b.state <- as.matrix(b.state) 460 | b.intercept <- as.matrix(b.intercept) 461 | b.race.female <- as.matrix(b.race.female) 462 | b.age <- as.matrix(b.age) 463 | b.edu <- as.matrix(b.edu) 464 | b.age.edu <- as.matrix(b.age.edu) 465 | b.region <- as.matrix(b.region) 466 | Xb <- b.intercept + 467 | b.race.female[, Census$crace.female] + 468 | b.age[, Census$cage.cat] + 469 | b.edu[, Census$cedu.cat] + 470 | b.age.edu[, Census$cage.edu.cat] + 471 | b.state[, Census$cstate.initnum] + 472 | b.region[, Census$cregion.cat] + 473 | (b.relig %*% t(Census$cp.relig.full)) + 474 | (b.kerry %*% t(Census$cp.kerry.full)) 475 | Xb <- b.intercept + 476 | b.race.female[, Census$crace.female] + 477 | b.age[, Census$cage.cat] + 478 | b.edu[, Census$cedu.cat] + 479 | b.age.edu[, Census$cage.edu.cat] + 480 | b.state[, Census$cstate.initnum] + 481 | b.region[, Census$cregion.cat] 482 | b.state <- as.data.frame(posterior.state.ranefs.mat) 483 | names(b.state) <- Statelevel$sstate 484 | b.intercept <- individual.out[, grep(pattern = "b.intercept", x = colnames(individual.out), fixed = TRUE)] 485 | b.race.female <- individual.out[, grep(pattern = "b.race.female", x = colnames(individual.out), fixed = TRUE)] 486 | b.age <- individual.out[, grep(pattern = "b.age", x = colnames(individual.out), fixed = TRUE)] 487 | b.age <- b.age[, -c(grep(pattern = "b.age.edu", x = colnames(b.age), fixed = TRUE))] 488 | b.edu <- individual.out[, grep(pattern = "b.edu", x = colnames(individual.out), fixed = TRUE)] 489 | b.age.edu <- individual.out[, grep(pattern = "b.age.edu", x = colnames(individual.out), fixed = TRUE)] 490 | b.region <- individual.out[, grep(pattern = "b.region", x = colnames(individual.out), fixed = TRUE)] 491 | b.relig <- individual.out[, grep(pattern = "b.relig", x = colnames(individual.out), fixed = TRUE)] 492 | b.kerry <- individual.out[, grep(pattern = "b.kerry", x = colnames(individual.out), fixed = TRUE)] 493 | str(b.relig) 494 | str(b.region) 495 | str(b.age.edu) 496 | Xb <- b.intercept + 497 | b.race.female[, Census$crace.female] + 498 | b.age[, Census$cage.cat] + 499 | b.edu[, Census$cedu.cat] + 500 | b.age.edu[, Census$cage.edu.cat] + 501 | b.state[, Census$cstate.initnum] + 502 | b.region[, Census$cregion.cat] + 503 | (b.relig %*% t(Census$cp.relig.full)) + 504 | (b.kerry %*% t(Census$cp.kerry.full)) 505 | str(individual.out) 506 | str(b.age) 507 | str(b.age.edu) 508 | str(b.region) 509 | str(b.relig) 510 | str(b.kerry) 511 | str(b.race.female) 512 | sessionInfo() 513 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkarreth/Bayes/6084383b894b4a78899b19bea0d7284fa57c2644/.gitignore -------------------------------------------------------------------------------- /MCMC_probability_demo.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Predicted probabilities after Bayesian logit/probit models" 3 | author: "Johannes Karreth" 4 | date: "July 12, 2017" 5 | output: 6 | pdf_document: default 7 | html_document: 8 | code_folding: hide 9 | --- 10 | 11 | ```{r setup, include=FALSE} 12 | rm(list = ls()) ### To clear namespace 13 | knitr::opts_chunk$set(echo = TRUE, error = FALSE, message = FALSE, warning = FALSE, size = "scriptsize", fig.width = 6, fig.height = 4, out.width = '.99\\linewidth') 14 | ``` 15 | 16 | # Purpose 17 | 18 | This document shows how to use two functions I wrote, `MCMC_simcase_probs` and `MCMC_observed_probs`, to quickly calculate predicted probabilities for "average" and "observed" cases based on estimates from Bayesian logit or probit models (for binary outcomes). For more details on each approach, see King et al. (AJPS 2000) and Hanmer \& Kalkan (AJPS 2013), listed in the syllabus. 19 | 20 | Both functions can be accessed from my Github repository at . **These functions are not fully tested and should be used with extreme caution. Please let me know right away about any problems you run into so I can improve these functions.** 21 | 22 | The functions require the following R packages to be installed: dplyr and reshape2. 23 | 24 | # Example 1 25 | 26 | Source: Hainmueller, J. and Hiscox, M. J. (2006). Learning to Love Globalization: Education and Individual Attitudes Toward International Trade. International Organization, 60 (2):469-498. 27 | 28 | This example analyzes a simplified version of one of the empirical models in Hainmueller and Hiscox' study of individuals' support for free trade or protectionist policies. The data are a cleaned up and modified version of the 1996 American National Election studies as used in Hainmueller & Hiscox (2006). The outcome variable `protectionist` is binary - coded as 1 if a respondent expressed a preference for more protectionist policies, and coded as 0 if a respondent favored free trade. The explanatory variables are (see Table A2 in Hainmueller & Hiscox (2006) for more details): 29 | 30 | - `age`: the respondent's age in years. 31 | - `female`: a binary indicator for female respondents. 32 | - `TUmember`: a binary indicator for trade union members. 33 | - `partyid`: the respondent's party identification: coded from 0 "strong Democrat"" to 6 "strong Republican". 34 | - `ideology`: the respondent's ideology: coded 0 if conservative, 1 if moderate, and 2 if liberal. 35 | - `schooling`: years of full-time education completed. 36 | 37 | The estimated model is a Bayesian logistic regression model, fit in JAGS. 38 | 39 | ```{r, echo = FALSE} 40 | prot.dat <- rio::import("http://www.jkarreth.net/files/hw5.dat.csv") 41 | 42 | prot.datjags <- as.list(na.omit(prot.dat)) 43 | prot.datjags$N <- length(prot.datjags$protectionist) 44 | 45 | prot.mod <- function() { 46 | 47 | for(i in 1:N){ 48 | protectionist[i] ~ dbern(p[i]) ## Bernoulli distribution of y_i 49 | logit(p[i]) <- mu[i] ## Logit link function 50 | mu[i] <- b[1] + b[2] * age[i] + b[3] * female[i] + b[4] * TUmember[i] 51 | + b[5] * partyid[i] + b[6] * ideology[i] + b[7] * schooling[i] 52 | } 53 | 54 | for(j in 1:7){ 55 | b[j] ~ dnorm(0, 0.01) ## Use a coefficient vector for simplicity 56 | } 57 | 58 | } 59 | 60 | prot.params <- c("b") 61 | prot.inits1 <- list("b" = rep(0, 7)) 62 | prot.inits2 <- list("b" = rep(0, 7)) 63 | prot.inits <- list(prot.inits1, prot.inits2) 64 | 65 | set.seed(123) 66 | 67 | library(R2jags) 68 | prot.fit <- jags(data = prot.datjags, inits = prot.inits, 69 | parameters.to.save = prot.params, n.chains = 2, n.iter = 2000, 70 | n.burnin = 1000, model.file = prot.mod) 71 | 72 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R") 73 | mcmctab(as.mcmc(prot.fit)) 74 | ``` 75 | 76 | ## Probabilities 77 | 78 | ### Average case approach 79 | 80 | ```{r} 81 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/MCMC_simcase_probs.R") 82 | 83 | prot_xmat <- model.matrix(protectionist ~ age + female + TUmember + partyid + 84 | ideology + schooling, 85 | data = prot.dat) 86 | 87 | prot_mcmc <- as.mcmc(prot.fit) 88 | prot_mcmc_mat <- as.matrix(prot_mcmc)[, 1:ncol(prot_xmat)] 89 | 90 | prot_schooling_sim <- seq(from = min(prot.dat$schooling), 91 | to = max(prot.dat$schooling), 92 | length.out = 10) 93 | 94 | prot_sim_prob <- MCMC_simcase_probs(model_matrix = prot_xmat, 95 | mcmc_out = prot_mcmc_mat, 96 | x_col = 7, 97 | x_range_vec = prot_schooling_sim) 98 | 99 | library(ggplot2) 100 | p_sim <- ggplot(data = prot_sim_prob, aes(x = predictor, y = median_pp)) + 101 | geom_line() + geom_ribbon(aes(ymin = lower_pp, ymax = upper_pp), alpha = 0.25) + 102 | ylim(c(0, 1)) + xlab("Schooling") + ylab("Pr(Against trade agreement)") + 103 | ggtitle("Average case approach") + theme_bw() 104 | p_sim 105 | ``` 106 | 107 | ### Observed value approach 108 | 109 | ```{r} 110 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/MCMC_observed_probs.R") 111 | 112 | prot_xmat <- model.matrix(protectionist ~ age + female + TUmember + partyid + 113 | ideology + schooling, 114 | data = prot.dat) 115 | 116 | prot_mcmc <- as.mcmc(prot.fit) 117 | prot_mcmc_mat <- as.matrix(prot_mcmc)[, 1:ncol(prot_xmat)] 118 | 119 | prot_schooling_sim <- seq(from = min(prot.dat$schooling), 120 | to = max(prot.dat$schooling), 121 | length.out = 10) 122 | 123 | prot_obs_prob <- MCMC_observed_probs(model_matrix = prot_xmat, 124 | mcmc_out = prot_mcmc_mat, 125 | x_col = 7, 126 | x_range_vec = prot_schooling_sim) 127 | 128 | library(ggplot2) 129 | p_obs <- ggplot(data = prot_obs_prob, aes(x = predictor, y = median_pp)) + 130 | geom_line() + geom_ribbon(aes(ymin = lower_pp, ymax = upper_pp), alpha = 0.25) + 131 | ylim(c(0, 1)) + xlab("Schooling") + ylab("Pr(Against trade agreement") + 132 | ggtitle("Observed value approach") + theme_bw() 133 | p_obs 134 | ``` 135 | 136 | ### Comparing both approaches 137 | 138 | ```{r} 139 | p_combined <- ggplot(data = prot_sim_prob, aes(x = predictor, y = median_pp)) + 140 | geom_line(color = "blue") + 141 | geom_ribbon(aes(ymin = lower_pp, ymax = upper_pp), fill = "blue", alpha = 0.25) + 142 | ylim(c(0, 1)) + 143 | geom_line(data = prot_obs_prob, aes(x = predictor, y = median_pp), color = "orange") + 144 | geom_ribbon(data = prot_obs_prob, aes(ymin = lower_pp, ymax = upper_pp), fill = "orange", alpha = 0.25) + 145 | xlab("Schooling") + ylab("Pr(Against trade agreement)") + theme_bw() 146 | 147 | p_combined 148 | ``` 149 | 150 | # Example 2 151 | 152 | Source: Epstein, L., Lindstädt, R., Segal, J. A., and Westerland, C. (2006). The Changing Dynamics of Senate Voting on Supreme Court Nominees. Journal of Politics, 68 (2): 296–307. 153 | 154 | This example is a simplified version of the "Additional nominees" model in Table 2 of Epstein et al. (2006). Epstein et al. examine why U.S. senators cast votes in favor or against nominees for the U.S. Supreme Court. The data contain 3709 observations, with each observation being one senator's vote, from 1937 to 2005. The outcome variable `vote` is binary - coded as 1 if a senator voted Yea on a candidate, and coded as 0 if the senator voted against the nominee. The explanatory variables are (see p. 298 in Epstein et al. (2006) for more details): 155 | 156 | - `lackqual`: the degree to which senators perceive the candidate as qualified for office. 157 | - `eucldist`: the ideological distance between the senator and the candidate. 158 | - `strngprs`: a binary indicator for whether the president was "strong"" in the sense that his party controlled the Senate and he was not in his fourth year of office. 159 | - `sameprty`: a binary indicator for whether a senator is of the same political party as the president. 160 | 161 | The estimated model is a Bayesian logistic regression model, fit in JAGS. 162 | 163 | ```{r} 164 | nom.dat <- rio::import("http://epstein.wustl.edu/research/Bork.dta") 165 | 166 | nom.datjags <- as.list(na.omit(nom.dat[, c("vote", "lackqual", "eucldist", 167 | "strngprs", "sameprty")])) 168 | nom.datjags$N <- length(nom.datjags$vote) 169 | 170 | nom.mod <- function() { 171 | 172 | for(i in 1:N){ 173 | vote[i] ~ dbern(p[i]) ## Bernoulli distribution of y_i 174 | logit(p[i]) <- mu[i] ## Logit link function 175 | mu[i] <- b[1] + b[2] * lackqual[i] + b[3] * eucldist[i] + b[4] * strngprs[i] 176 | + b[5] * sameprty[i] 177 | } 178 | 179 | for(j in 1:5){ 180 | b[j] ~ dnorm(0, 0.01) ## Use a coefficient vector for simplicity 181 | } 182 | 183 | } 184 | 185 | nom.params <- c("b") 186 | nom.inits1 <- list("b" = rep(0, 5)) 187 | nom.inits2 <- list("b" = rep(0, 5)) 188 | nom.inits <- list(nom.inits1, nom.inits2) 189 | 190 | set.seed(123) 191 | 192 | library(R2jags) 193 | nom.fit <- jags(data = nom.datjags, inits = nom.inits, 194 | parameters.to.save = nom.params, n.chains = 2, n.iter = 10000, 195 | n.burnin = 5000, n.thin = 5, model.file = nom.mod) 196 | 197 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R") 198 | mcmctab(as.mcmc(nom.fit)) 199 | ``` 200 | 201 | ## Probabilities 202 | 203 | ### Average case approach 204 | 205 | ```{r} 206 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/MCMC_simcase_probs.R") 207 | 208 | nom_xmat <- model.matrix(vote ~ lackqual + eucldist + strngprs + sameprty, 209 | data = nom.dat) 210 | 211 | nom_mcmc <- as.mcmc(nom.fit) 212 | nom_mcmc_mat <- as.matrix(nom_mcmc)[, 1:ncol(nom_xmat)] 213 | 214 | nom_distance_sim <- seq(from = min(nom.dat$eucldist), 215 | to = max(nom.dat$eucldist), 216 | length.out = 10) 217 | 218 | nom_sim_prob <- MCMC_simcase_probs(model_matrix = nom_xmat, 219 | mcmc_out = nom_mcmc_mat, 220 | x_col = 3, 221 | x_range_vec = nom_distance_sim) 222 | 223 | library(ggplot2) 224 | p_sim <- ggplot(data = nom_sim_prob, aes(x = predictor, y = median_pp)) + 225 | geom_line() + geom_ribbon(aes(ymin = lower_pp, ymax = upper_pp), alpha = 0.25) + 226 | ylim(c(0, 1)) + xlab("Distance") + ylab("Pr(Yea vote)") + 227 | ggtitle("Average case approach") + theme_bw() 228 | p_sim 229 | ``` 230 | 231 | ### Observed value approach 232 | 233 | ```{r} 234 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/MCMC_observed_probs.R") 235 | 236 | nom_xmat <- model.matrix(vote ~ lackqual + eucldist + strngprs + sameprty, 237 | data = nom.dat) 238 | 239 | nom_mcmc <- as.mcmc(nom.fit) 240 | nom_mcmc_mat <- as.matrix(nom_mcmc)[, 1:ncol(nom_xmat)] 241 | 242 | nom_distance_sim <- seq(from = min(nom.dat$eucldist), 243 | to = max(nom.dat$eucldist), 244 | length.out = 10) 245 | 246 | nom_obs_prob <- MCMC_observed_probs(model_matrix = nom_xmat, 247 | mcmc_out = nom_mcmc_mat, 248 | x_col = 3, 249 | x_range_vec = nom_distance_sim) 250 | 251 | library(ggplot2) 252 | p_obs <- ggplot(data = nom_obs_prob, aes(x = predictor, y = median_pp)) + 253 | geom_line() + geom_ribbon(aes(ymin = lower_pp, ymax = upper_pp), alpha = 0.25) + 254 | ylim(c(0, 1)) + xlab("Distance") + ylab("Pr(Yea vote)") + 255 | ggtitle("Observed value approach") + theme_bw() 256 | p_obs 257 | ``` 258 | 259 | ### Comparing both approaches 260 | 261 | ```{r} 262 | p_combined <- ggplot(data = nom_sim_prob, aes(x = predictor, y = median_pp)) + 263 | geom_line(color = "blue") + geom_ribbon(aes(ymin = lower_pp, ymax = upper_pp), fill = "blue", alpha = 0.25) + 264 | ylim(c(0, 1)) + 265 | geom_line(data = nom_obs_prob, aes(x = predictor, y = median_pp), color = "orange") + 266 | geom_ribbon(data = nom_obs_prob, aes(ymin = lower_pp, ymax = upper_pp), fill = "orange", alpha = 0.25) + 267 | xlab("Distance") + ylab("Pr(Yea vote)") + theme_bw() 268 | p_combined 269 | ``` 270 | 271 | -------------------------------------------------------------------------------- /MCMC_probability_demo.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkarreth/Bayes/6084383b894b4a78899b19bea0d7284fa57c2644/MCMC_probability_demo.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Applied Bayesian Modeling at ICPSR 2 | ========= 3 | 4 | R and JAGS code accompanying my ICPSR summer program course on Applied Bayesian Modeling. See for more information. Please feel free to fork and develop any code you see in this repository. If you notice a problem or have a question, please email me at and/or create an issue with the respective file. 5 | 6 | Modeling examples 7 | ----------------- 8 | 9 | - [angell.instructions.R](https://github.com/jkarreth/Bayes/blob/master/angell.instructions.R): code for a Bayesian linear regression model, using the Angell data from 10 | John Fox's [R and S-PLUS Companion to Applied 11 | Regression](http://socserv.socsci.mcmaster.ca/jfox/Books/Companion/index.html) 12 | in R using R2jags. This example contains code for data preparation, 13 | model fitting, and diagnostics. 14 | - [angell.jags.zip](http://www.jkarreth.net/files/angell.jags.zip): code for the same model directly in JAGS from the Terminal and analyze the output using the `coda` package in R. 15 | - [beer.instructions.R](https://github.com/jkarreth/Bayes/blob/master/beer.instructions.R): code for a Bayesian ordered logit model, using data on beer ratings 16 | - [econvote.instructions.R](https://github.com/jkarreth/Bayes/blob/master/econvote.instructions.R): code for a Bayesian multinomial logit model, using Austrian voting data. 17 | - [legislators.instructions.R](https://github.com/jkarreth/Bayes/blob/master/legislators.instructions.R): code for [Simon Jackman's Legislators example](http://jackman.stanford.edu/mcmc/mainFrameWinBugs.php#Legislators) using R2jags. [Bayesian IRT model] 18 | - [pumps.instructions.R](https://github.com/jkarreth/Bayes/blob/master/pumps.instructions.R): code for the Pumps demo from the WinBUGS examples folder using R2jags. [Conjugate gamma-Poisson hierarchical model] 19 | - [rats.instructions.R](https://github.com/jkarreth/Bayes/blob/master/rats.instructions.R): code for the Rats demo from the WinBUGS examples folder using R2jags. [Normal hierarchical model] 20 | 21 | Multilevel models 22 | ----------------- 23 | 24 | - [wvs.instructions.R](https://github.com/jkarreth/Bayes/blob/master/wvs.instructions.R): 25 | Linear model with group-level coefficients using an old subsample of World Values Survey. 26 | - [mlm.state.instructions.R](https://github.com/jkarreth/Bayes/blob/master/mlm.state.instructions.R): Step-by-step instructions to set up hierarchical data in R and fit a multilevel logit model using R2Jags, following the example in chapter 17 of Gelman and Hill's [Data Analysis Using Regression and Multilevel/Hierarchical Models](http://www.stat.columbia.edu/~gelman/arm/). Data: 27 | [polls.subset.merged.dta](http://www.jkarreth.net/files/polls.subset.merged.dta), 28 | [polls.subset.JK.dta](http://www.jkarreth.net/files/polls.subset.JK.dta), 29 | [presvote.dta](http://www.jkarreth.net/files/presvote.dta). 30 | 31 | Diagnostics 32 | ----------- 33 | 34 | - [diagnostics.R](https://github.com/jkarreth/Bayes/blob/master/diagnostics.R): 35 | This script shows a variety of ways to obtain diagnostics 36 | (traceplots, density plots, BGR, etc.) of JAGS/MCMC objects in R 37 | using the `coda`, `superdiag`, `ggmcmc`, and `mcmcplots` packages. 38 | 39 | Model presentation 40 | ------------------ 41 | 42 | - [regression.table.R](https://github.com/jkarreth/Bayes/blob/master/regression.table.R): example code to easily export JAGS/BUGS results to LaTeX or HTML. Based on my [mcmctab](https://github.com/jkarreth/JKmisc/blob/master/mcmctab.R) function. 43 | - [regression.dotplot.R](https://github.com/jkarreth/Bayes/blob/master/regression.dotplot.R): example code to easily make regression coefficient plots from JAGS/BUGS results. 44 | - [Posterior-Plots.R](https://github.com/reuning/EVDebs/blob/master/Bayesian/Posterior-Plots.R): a function written by [Kevin Reuning](http://www.kevinreuning.com) (participant in the 2015 Applied Bayes workshop at ICPSR) to create a coefficient dot plot with added posterior density. 45 | - [interaction.instructions.R](https://github.com/jkarreth/Bayes/blob/master/interaction.instructions.R): code to plot marginal effects from a Bayesian linear model with an 46 | interaction term across the range of a moderating variable. 47 | - [limited.dep.vars.funcs.R](https://github.com/edunford/bayes.functions/blob/master/limited.dep.vars.funcs.R): a set of functions written by [Eric Dunford](http://gvpt.umd.edu/gradprofile/Dunford/Eric%20) (participant in the 2015 Applied Bayes workshop at ICPSR) to calculate and visualize predicted probabilities from Bayesian logit or probit models for observed and simulated data. 48 | - [factor.dotplot.R](https://github.com/jkarreth/Bayes/blob/master/factor.dotplot.R): code to make a dot plot (with credible intervals) of a Bayesian 49 | factor score. -------------------------------------------------------------------------------- /angell.instructions.R: -------------------------------------------------------------------------------- 1 | ############################################################ 2 | ## R2jags example using Angell data - Simple linear model ## 3 | ############################################################ 4 | 5 | ## Johannes Karreth 6 | ## ICPSR Summer Program 2016 7 | 8 | ## Required packages 9 | library(foreign) 10 | library(R2jags) 11 | ## calls: 12 | # library(coda) 13 | # library(lattice) 14 | # library(R2WinBUGS) 15 | # library(rjags) 16 | 17 | #### R2jags help 18 | ?jags 19 | 20 | ## Read the data in car library 21 | library(car) 22 | data(Angell) 23 | angell.1 <- Angell[, -4] ## take off the fourth column (remember, the order is (row, column)) 24 | 25 | ## What do the data look like? 26 | head(angell.1) 27 | 28 | ## Save the model as "angell.model.jags" in your working directory, using a text editor. Make sure you specify the full name below (check if your text editor gives the file a .txt extension). Do not run this model from within R. You can set your working directory in R's preferences, or via: 29 | 30 | setwd("/Users/johanneskarreth/R/Bayes/angell") 31 | 32 | # model { 33 | # for(i in 1:N){ 34 | # moral[i]~dnorm(mu[i], tau) 35 | # mu[i]<-alpha + beta1*hetero[i] + beta2*mobility[i] 36 | # } 37 | # 38 | # alpha~dnorm(0, .01) 39 | # beta1~dunif(-100,100) 40 | # beta2~dunif(-100,100) 41 | # tau~dgamma(.01,.01) 42 | # } 43 | 44 | ## Instead, you can also write the model directly in your R script. For use with R2jags, do this: 45 | 46 | angell.model.jags <- function() { 47 | 48 | for(i in 1:N){ 49 | moral[i]~dnorm(mu[i], tau) 50 | mu[i]<-alpha + beta1*hetero[i] + beta2*mobility[i] 51 | } 52 | 53 | alpha~dnorm(0, .01) 54 | beta1~dunif(-100,100) 55 | beta2~dunif(-100,100) 56 | tau~dgamma(.01,.01) 57 | 58 | } 59 | 60 | ## Now define the vectors of the data matrix for JAGS: 61 | 62 | moral <- angell.1$moral 63 | hetero <- angell.1$hetero 64 | mobility <- angell.1$mobility 65 | N <- length(angell.1$moral) 66 | 67 | ## Read in the Angell data for JAGS 68 | 69 | angell.data <- list("moral", "hetero", "mobility", "N") 70 | angell.data <- list(moral = Angell$moral, hetero = Angell$hetero, mobility = Angell$mobility, N = length(Angell$moral)) 71 | 72 | ## Name the parameters of the JAGS model you will monitor 73 | 74 | angell.params <- c("alpha", "beta1", "beta2") 75 | 76 | ## Define the starting values for JAGS 77 | 78 | angell.inits <- function(){ 79 | list("alpha"=c(20), "beta1"=c(-0.1), "beta2" =c(-.02)) 80 | } 81 | 82 | ## Alternatively, provide different starting values for each chain: 83 | inits1 <- list("alpha"=0, "beta1"=0, "beta2"=0) 84 | inits2 <- list("alpha"=1, "beta1"=1, "beta2"=1) 85 | angell.inits <- list(inits1, inits2) 86 | 87 | ## Fit the model in JAGS, having previously copied the BUGS model into your working directory as "angell.model.jags" 88 | 89 | angellfit <- jags(data=angell.data, inits=angell.inits, angell.params, n.chains=2, n.iter=9000, n.burnin=1000, model.file=angell.model.jags) 90 | 91 | ## If we wanted JAGS to generate initial values: 92 | angellfit <- jags(data=angell.data, inits=NULL, angell.params, n.chains=2, n.iter=9000, n.burnin=1000, model.file= angell.model.jags) 93 | 94 | ## Note: If you had saved the model file in your working directory, put the file name in quotation marks: 95 | # angellfit <- jags(data=angell.data, inits=NULL, angell.params, n.chains=2, n.iter=9000, n.burnin=1000, model.file="angell.model.jags") 96 | 97 | ## Update your model if necessary - e.g. if there is no/little convergence: 98 | 99 | angellfit.upd <- update(angellfit, n.iter=1000) 100 | angellfit.upd <- autojags(angellfit) # this function will auto-update until convergence - check R2jags documentation how this is defined 101 | 102 | ## Model summary 103 | 104 | print(angellfit) 105 | 106 | ## For a function to create a barebones summary table, use 107 | ## https://github.com/jkarreth/JKmisc/blob/master/mcmctab.R 108 | ## install.packages("devtools") 109 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R") 110 | mcmctab(angellfit) 111 | 112 | plot(angellfit) 113 | 114 | ## Re-specify the model to include a distribution for R-squared 115 | 116 | angell.model.jags <- function() { 117 | 118 | for(i in 1:N){ 119 | moral[i]~dnorm(mu[i], tau) 120 | mu[i]<-alpha + beta1*hetero[i] + beta2*mobility[i] 121 | y_ybar[i] <- pow(moral[i] - mean(moral[]), 2) 122 | y_yhat[i] <- pow(moral[i] - mu[i], 2) 123 | } 124 | 125 | r2 <- (sum(y_ybar[]) - sum(y_yhat[])) / sum(y_ybar[]) 126 | 127 | alpha~dnorm(0, .01) 128 | beta1~dunif(-100,100) 129 | beta2~dunif(-100,100) 130 | tau~dgamma(.01,.01) 131 | 132 | } 133 | 134 | angell.params <- c("alpha", "beta1", "beta2", "r2") 135 | angellfit <- jags(data=angell.data, inits=angell.inits, angell.params, n.chains=2, n.iter=9000, n.burnin=1000, model.file= angell.model.jags) 136 | 137 | ## Diagnostics 138 | 139 | traceplot(angellfit) 140 | 141 | ## if you want to print & save the plot, you can use the following set of commands: 142 | pdf("angell.trace.pdf") # defines that the plot will be saved as a PDF file with the name "angell.trace.pdf" in your working directory 143 | traceplot(angellfit) # creates the plot in the background (you will not see it) 144 | dev.off() # finishes the printing process and creates the PDF file of the plot. If successful, R will display the message "null device 1" 145 | 146 | ## Generate MCMC object for analysis 147 | angellfit.mcmc <- as.mcmc(angellfit) 148 | 149 | summary(angellfit.mcmc) 150 | 151 | xyplot(angellfit.mcmc) 152 | ## maybe better display (you can use other Lattice options here as well): 153 | xyplot(angellfit.mcmc, layout=c(2,2), aspect="fill") 154 | 155 | densityplot(angellfit.mcmc) 156 | ## maybe better: 157 | densityplot(angellfit.mcmc, layout=c(2,2), aspect="fill") 158 | 159 | ## and more plots... all using the MCMC object and the CODA package: 160 | 161 | ## Trace- and density in one plot, print directly to your working directory: 162 | pdf("angellfit.mcmc.plot.pdf") 163 | plot(angellfit.mcmc) 164 | dev.off() 165 | 166 | ## Autocorrelation plot, print directly to your working directory: 167 | pdf("angellfit.mcmc.autocorr.pdf") 168 | autocorr.plot(angellfit.mcmc) 169 | dev.off() 170 | 171 | ## Other diagnostics using CODA: 172 | 173 | gelman.plot(angellfit.mcmc) 174 | geweke.diag(angellfit.mcmc) 175 | geweke.plot(angellfit.mcmc) 176 | raftery.diag(angellfit.mcmc) 177 | heidel.diag(angellfit.mcmc) 178 | 179 | ## Another option for diagnostics and results: the ggmcmc package 180 | library(ggmcmc) 181 | 182 | ## All plots into one PDF file in your working directory (!): 183 | ggmcmc(angellfit.mcmc) 184 | angellfit.gg <- ggs(angellfit.mcmc) 185 | ggmcmc(angellfit.gg) 186 | 187 | ## Individual plots: 188 | ggs_histogram(angellfit.gg) 189 | ggs_density(angellfit.gg) 190 | ggs_traceplot(angellfit.gg) 191 | ggs_running(angellfit.gg) 192 | 193 | ## For more, see http://xavier-fim.net/packages/ggmcmc/ 194 | 195 | ## Yet another option with a similar look, and many diagnostics printed into one file: the mcmcplots package 196 | ## If you like the look of ggplot2, check out the package "mcmcplots": 197 | library(mcmcplots) 198 | denplot(angellfit.mcmc, parms = c("alpha", "beta1", "beta2")) 199 | traplot(angellfit.mcmc, parms = c("alpha", "beta1", "beta2")) 200 | caterplot(angellfit.mcmc) 201 | caterplot(angellfit.mcmc, parms = c("alpha", "beta1", "beta2"), labels = c("Intercept", "Heterogeneity", "Mobility")) 202 | ## The following command prints diagnostic plots into a (temporary) HTML file for quick viewing: 203 | mcmcplot(angellfit.mcmc) 204 | 205 | ## Another convenient function to obtain many diagnostics at once: 206 | ## superdiag 207 | library(superdiag) 208 | superdiag(angellfit.mcmc, burnin = 100) -------------------------------------------------------------------------------- /beer.instructions.R: -------------------------------------------------------------------------------- 1 | ################################################################# 2 | ## R2jags example using the Beer example - ordered logit model ## 3 | ################################################################# 4 | 5 | ## Johannes Karreth 6 | ## ICPSR Summer Program 2016 7 | 8 | ## Required libraries 9 | library(foreign) 10 | library(R2jags) 11 | 12 | setwd("~/R/Bayes/beer") 13 | 14 | ## Read the data 15 | 16 | beer.data <- list(price = c(7.19, 3.15, 3.35, 2.59, 4.59, 4.39, 2.99, 2.49, 3.65, 2.59, 2.89, 2.99, 2.29, 4.75, 1.69, 2.55, 4.22, 2.63, 2.73, 1.79, 2.65, 2.39, 2.49, 4.55, 2.65, 1.79, 1.59, 2.79, 2.75, 2.59, 2.59, 2.15, 4.59, 2.59, 2.29), calories = c( 154, 147, 154, 144, 152, 170, 162, 149, 149, 151, 157, 135, 152, 149, 145, 99, 145, 113, 102, 147, 140, 175, 149, 150, 153, 144, 145, 97, 72, 155, 136, 144, 144, 144, 68), sodium = c( 17, 17, 17, 15, 11, 7, 10, 17, 7, 19, 15, 11, 8, 6, 23, 10, 14, 8, 15, 7, 18, 24, 27, 19, 27, 13, 18, 7, 6, 13, 19, 8, 21, 24, 15), alcohol = c( 4.7, 5, 5.1, 4.7, 5, 5.2, 5, 4.7, 4.7, 4.9, 4.9, 4.2, 4.9, 5, 4.6, 4.3, 4.5, 3.7, 4.1, 4.7, 4.6, 5.5, 4.7, 4.7, 4.6, 4.6, 4.5, 4.2, 2.9, 5, 4.4, 4.7, 4.7, 4.9, 2.3), quality = c( 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), N=35 ) 17 | 18 | price <- beer.data$price 19 | calories <- beer.data$calories 20 | sodium <- beer.data$sodium 21 | alcohol <- beer.data$alcohol 22 | quality <- beer.data$quality 23 | N <- length(beer.data$price) 24 | 25 | ## Model: Compare the BUGS language to the JAGS version 26 | 27 | ## BUGS model 28 | 29 | # model{ 30 | # for(i in 1:N){ 31 | # for(j in 1:2){ 32 | # logit(gamma[i,j]) <- theta[j] - mu[i]} 33 | # p[i,1]<- gamma[i,1] ## Probability that each beer is in cat1, or the probability that tau > xbeta. Compare: Pr(y_i=1) = logit(tau_1 - X_i \beta) - 0 34 | # p[i,2] <- gamma[i,2] - gamma[i,1] ## Probability that each beer is in cat2. Compare: Pr(y_i=2) = logit(tau_2 - X_i \beta) - logit(tau_1 - X_i \beta) 35 | # p[i,3] <- 1-gamma[i,2] ## Probability that each beer is in cat3. Compare: Pr(y_i=1) = 1 - logit(tau_2 - X_i \beta) 36 | # mu[i] <- b[1]*price[i] + b[2]*sodium[i] + b[3]*alcohol[i]+b[4]*calories[i] 37 | # quality[i] ~ dcat(p[i,1:3]) ## Categorical distribution (check WinBUGS help for more info) 38 | # 39 | # 40 | # 41 | # for(j in 1:3){ 42 | # pred[i,j] <- equals(p[i,j], ranked(p[i,1:3],3)) ## pred[i,j] is always 0 or 1 43 | # } 44 | # predcat[i] <- pred[i,1] + 2*pred[i,2] + 3*pred[i,3] 45 | # } 46 | # for(m in 1:4){ 47 | # b[m] ~ dnorm(0, .0001) 48 | # } 49 | # theta[1] ~ dnorm(0,.1)I(0, theta[2]) ## the capital I restricts the values of theta[1] to be between 0 and theta[2] 50 | # theta[2] ~ dnorm(0,.1)I(theta[1], ) 51 | # 52 | # } 53 | # 54 | # 55 | 56 | ## JAGS model !! 57 | ## Note the differences in handling the cutpoints AND the pred. category code. 58 | ## For more info: jags.tutorial.pdf on http://www.jkarreth.net/Bayes2013.html 59 | 60 | beer.model.jags <- function() { 61 | 62 | for (i in 1:N){ 63 | for (j in 1:2){ 64 | logit(gamma[i,j]) <- theta1[j] - mu[i] 65 | } 66 | quality[i] ~ dcat(p[i,1:3]) 67 | p[i,1]<- gamma[i,1] 68 | p[i,2] <- gamma[i,2] - gamma[i,1] 69 | p[i,3] <- 1-gamma[i,2] 70 | mu[i] <- b1*price[i] + b2*sodium[i] + b3*alcohol[i]+b4*calories[i] 71 | 72 | pred[i,1] <- equals(p[i,1], max(p[i,1], p[i,2], p[i,3])) # "1 if p[i,1] = max(p[i,1], p[i,2], p[i,3]), 0 otherwise" 73 | pred[i,2] <- equals(p[i,2], max(p[i,1], p[i,2], p[i,3])) 74 | pred[i,3] <- equals(p[i,3], max(p[i,1], p[i,2], p[i,3])) 75 | 76 | predcat[i] <- pred[i,1] + 2*pred[i,2] + 3*pred[i,3] 77 | 78 | } 79 | for (i in 1:2) { 80 | theta[i] ~ dnorm(0, 1.0E-3) 81 | } 82 | theta1[1:2] <- sort(theta) 83 | b1 ~ dnorm(0, .1) 84 | b2 ~ dnorm(0, .1) 85 | b3 ~ dnorm(0, .1) 86 | b4 ~ dnorm(0, .1) 87 | 88 | } 89 | 90 | 91 | ## Now define the vectors of the data matrix for JAGS: 92 | 93 | ## Read in the Beer data for JAGS 94 | 95 | beer.data <- list("price", "calories", "sodium", "alcohol", "quality", "N") 96 | 97 | ## Name the JAGS parameters to be monitored 98 | 99 | beer.params <- c("b1", "b2", "b3", "b4", "predcat") 100 | 101 | ## Define the starting values for JAGS 102 | ## Note that we give starting values to theta, not theta1 103 | 104 | beer.inits <- function(){ 105 | list("b1" = c(0.56), 106 | "b2" = c(-0.05), 107 | "b3" = c(0.95), 108 | "b4" = c(0.03), 109 | "theta" = c(7, 10)) 110 | } 111 | 112 | ## Fit the model in JAGS, having previously copied the BUGS model into your working directory as "beer.mod" 113 | 114 | beerfit <- jags(data=beer.data, inits=beer.inits, beer.params, n.chains=2, n.iter=2000, n.burnin=500, model.file=beer.model.jags) 115 | 116 | ## Update your model if necessary - e.g. if there is no/little convergence: 117 | 118 | beerfit.upd <- update(beerfit, n.iter=1000) 119 | beerfit.upd <- autojags(beerfit) # this function will auto-update until convergence - check R2jags documentation how this is defined 120 | 121 | ## Obtain info on predicted categories, i.e. 122 | ## which category does your model predict each observation to be in? 123 | 124 | # Monitor the "predcat" node from the code above 125 | # Then inspect the MEDIAN (not mean) of predcat 126 | beer.mcmc <- as.mcmc(beerfit) 127 | summary(beer.mcmc) 128 | 129 | # For a table of the distribution of predicted categories: 130 | chains <- do.call(rbind, beer.mcmc) 131 | pc <- chains[ , grep("predcat", colnames(chains))] 132 | tab <- apply(pc, 2, function(x)table(factor(x, levels = 1:3))) 133 | tab <- t(tab) 134 | library(car) 135 | prop.table(tab, 1) -------------------------------------------------------------------------------- /diagnostics.R: -------------------------------------------------------------------------------- 1 | ############################################# 2 | ## Example code to fit Bayesian models and 3 | ## generate convergence diagnostics in R 4 | ############################################# 5 | 6 | ## Johannes Karreth 7 | ## ICPSR Summer Program 2016 8 | 9 | ## This code is mostly extracted from my tutorial, 10 | ## "Using JAGS and BUGS via R", posted online at 11 | ## 12 | 13 | ## Please refer to that file for documentation of the code below. 14 | 15 | ######################################## 16 | ## Install required packages (after having installed JAGS separately) 17 | ######################################## 18 | 19 | 20 | ## install.packages("R2jags", dependencies = TRUE, repos = "https://cloud.r-project.org") 21 | ## install.packages("runjags", dependencies = TRUE, repos = "https://cloud.r-project.org") 22 | ## install.packages("MCMCpack", dependencies = TRUE, repos = "https://cloud.r-project.org") 23 | 24 | ## ------------------------------------- 25 | library("R2jags") 26 | 27 | ######################################## 28 | ## Fit example model to check if R2jags is properly installed 29 | ######################################## 30 | 31 | # An example model file is given in: 32 | model.file <- system.file(package = "R2jags", "model", "schools.txt") 33 | 34 | # data 35 | J <- 8.0 36 | y <- c(28.4,7.9,-2.8,6.8,-0.6,0.6,18.0,12.2) 37 | sd <- c(14.9,10.2,16.3,11.0,9.4,11.4,10.4,17.6) 38 | 39 | jags.data <- list("y","sd","J") 40 | jags.params <- c("mu","sigma","theta") 41 | jags.inits <- function(){ 42 | list("mu"=rnorm(1),"sigma"=runif(1),"theta"=rnorm(J)) 43 | } 44 | 45 | # Fit the model 46 | jagsfit <- jags(data=list("y","sd","J"), inits = jags.inits, 47 | jags.params, n.iter = 10, model.file = model.file) 48 | 49 | ######################################## 50 | ## Simulate data and fit a linear regression model 51 | ######################################## 52 | 53 | 54 | ## ------------------------------------- 55 | n.sim <- 100; set.seed(123) 56 | x1 <- rnorm(n.sim, mean = 5, sd = 2) 57 | x2 <- rbinom(n.sim, size = 1, prob = 0.3) 58 | e <- rnorm(n.sim, mean = 0, sd = 1) 59 | 60 | ## ------------------------------------- 61 | b1 <- 1.2 62 | b2 <- -3.1 63 | a <- 1.5 64 | y <- a + b1 * x1 + b2 * x2 + e 65 | 66 | ## ------------------------------------- 67 | sim.dat <- data.frame(y, x1, x2) 68 | 69 | ## ------------------------------------- 70 | freq.mod <- lm(y ~ x1 + x2, data = sim.dat) 71 | summary(freq.mod) 72 | 73 | ## ------------------------------------- 74 | ## Note: use the path to your own WD here 75 | setwd("~/Documents/Dropbox/Uni/9 - ICPSR/2016/Applied Bayes/Tutorials/2 - JAGS and R") 76 | 77 | ## ------------------------------------- 78 | bayes.mod <- function() { 79 | 80 | for(i in 1:N){ 81 | y[i] ~ dnorm(mu[i], tau) 82 | mu[i] <- alpha + beta1 * x1[i] + beta2 * x2[i] 83 | } 84 | 85 | alpha ~ dnorm(0, .01) 86 | beta1 ~ dunif(-100, 100) 87 | beta2 ~ dunif(-100, 100) 88 | tau ~ dgamma(.01, .01) 89 | 90 | } 91 | 92 | ## ------------------------------------- 93 | y <- sim.dat$y 94 | x1 <- sim.dat$x1 95 | x2 <- sim.dat$x2 96 | N <- nrow(sim.dat) 97 | 98 | ## ------------------------------------- 99 | sim.dat.jags <- list("y", "x1", "x2", "N") 100 | 101 | ## ------------------------------------- 102 | sim.dat.jags <- as.list(sim.dat) 103 | 104 | ## ------------------------------------- 105 | sim.dat.jags$N <- nrow(sim.dat) 106 | 107 | ## ------------------------------------- 108 | bayes.mod.params <- c("alpha", "beta1", "beta2") 109 | 110 | ## ------------------------------------- 111 | bayes.mod.inits <- function(){ 112 | list("alpha" = rnorm(1), "beta1" = rnorm(1), "beta2" = rnorm(1)) 113 | } 114 | 115 | ## ------------------------------------- 116 | inits1 <- list("alpha" = 0, "beta1" = 0, "beta2" = 0) 117 | inits2 <- list("alpha" = 1, "beta1" = 1, "beta2" = 1) 118 | inits3 <- list("alpha" = -1, "beta1" = -1, "beta2" = -1) 119 | bayes.mod.inits <- list(inits1, inits2, inits3) 120 | 121 | ## ------------------------------------- 122 | library("R2jags") 123 | set.seed(123) 124 | 125 | ## ------------------------------------- 126 | ## Note: use the path to your own model.file here 127 | bayes.mod.fit.R2jags <- jags(data = sim.dat.jags, inits = bayes.mod.inits, 128 | parameters.to.save = bayes.mod.params, n.chains = 3, n.iter = 9000, 129 | n.burnin = 1000, 130 | model.file = "~/Documents/Dropbox/Uni/9 - ICPSR/2016/Applied Bayes/Tutorials/2 - JAGS and R/bayes.mod") 131 | 132 | ## ------------------------------------- 133 | bayes.mod.fit.R2jags <- jags(data = sim.dat.jags, inits = bayes.mod.inits, 134 | parameters.to.save = bayes.mod.params, n.chains = 3, n.iter = 9000, 135 | n.burnin = 1000, model.file = bayes.mod) 136 | 137 | ## ------------------------------------- 138 | bayes.mod.fit.R2jags.upd <- update(bayes.mod.fit.R2jags, n.iter = 1000) 139 | bayes.mod.fit.R2jags.upd <- autojags(bayes.mod.fit.R2jags) 140 | 141 | ## ------------------------------------- 142 | print(bayes.mod.fit.R2jags) 143 | plot(bayes.mod.fit.R2jags) 144 | traceplot(bayes.mod.fit.R2jags) 145 | 146 | ######################################## 147 | ## Posterior plots and diagnostics 148 | ######################################## 149 | 150 | ## ------------------------------------- 151 | bayes.mod.fit.mcmc <- as.mcmc(bayes.mod.fit.R2jags) 152 | summary(bayes.mod.fit.mcmc) 153 | 154 | ## ------------------------------------- 155 | library("lattice") 156 | xyplot(bayes.mod.fit.mcmc) 157 | 158 | ## ------------------------------------- 159 | xyplot(bayes.mod.fit.mcmc, layout = c(2, 2), aspect = "fill") 160 | 161 | ## ------------------------------------- 162 | densityplot(bayes.mod.fit.mcmc) 163 | 164 | ## ------------------------------------- 165 | densityplot(bayes.mod.fit.mcmc, layout = c(2, 2), as.table = TRUE, aspect = "fill") 166 | 167 | ## ------------------------------------- 168 | gelman.plot(bayes.mod.fit.mcmc) 169 | geweke.diag(bayes.mod.fit.mcmc) 170 | raftery.diag(bayes.mod.fit.mcmc) 171 | heidel.diag(bayes.mod.fit.mcmc) 172 | 173 | ## ------------------------------------- 174 | ## install.packages("superdiag", dependencies = TRUE, repos = "http://cran.us.r-project.org") 175 | 176 | ## ------------------------------------- 177 | library("superdiag") 178 | 179 | ## ------------------------------------- 180 | superdiag(bayes.mod.fit.mcmc, burnin = 1000) 181 | 182 | ## ------------------------------------- 183 | ## install.packages("mcmcplots", dependencies = TRUE, repos = "http://cran.us.r-project.org") 184 | 185 | ## ------------------------------------- 186 | library("mcmcplots") 187 | 188 | ## ------------------------------------- 189 | denplot(bayes.mod.fit.mcmc, parms = c("alpha", "beta1", "beta2")) 190 | traplot(bayes.mod.fit.mcmc, parms = c("alpha", "beta1", "beta2")) 191 | 192 | ## ------------------------------------- 193 | ## mcmcplot(bayes.mod.fit.mcmc) 194 | 195 | ## ------------------------------------- 196 | caterplot(bayes.mod.fit.mcmc, parms = c("alpha", "beta1", "beta2"), 197 | labels = c("alpha", "beta1", "beta2")) 198 | 199 | ## ------------------------------------- 200 | ## install.packages("ggmcmc", dependencies = TRUE, repos = "http://cran.us.r-project.org") 201 | 202 | ## ------------------------------------- 203 | library("ggmcmc") 204 | 205 | ## ------------------------------------- 206 | bayes.mod.fit.gg <- ggs(bayes.mod.fit.mcmc) 207 | ggs_histogram(bayes.mod.fit.gg) 208 | ggs_density(bayes.mod.fit.gg) 209 | ggs_traceplot(bayes.mod.fit.gg) 210 | ggs_running(bayes.mod.fit.gg) 211 | ggs_compare_partial(bayes.mod.fit.gg) 212 | ggs_autocorrelation(bayes.mod.fit.gg) 213 | ggs_geweke(bayes.mod.fit.gg) 214 | ggs_caterpillar(bayes.mod.fit.gg) 215 | 216 | ## ------------------------------------- 217 | ## Note: provide the path and filename for your desired output here 218 | ggmcmc(bayes.mod.fit.gg, 219 | file = "~/Documents/Dropbox/Uni/9 - ICPSR/2016/Applied Bayes/Tutorials/2 - JAGS and R/bayes_fit_ggmcmc.pdf") 220 | 221 | ######################################## 222 | ## Use runjags instead of R2Jags 223 | ######################################## 224 | 225 | ## ------------------------------------- 226 | library("runjags") 227 | 228 | ## ------------------------------------- 229 | n.sim <- 100; set.seed(123) 230 | x1 <- rnorm(n.sim, mean = 5, sd = 2) 231 | x2 <- rbinom(n.sim, size = 1, prob = 0.3) 232 | e <- rnorm(n.sim, mean = 0, sd = 1) 233 | 234 | ## ------------------------------------- 235 | b1 <- 1.2 236 | b2 <- -3.1 237 | a <- 1.5 238 | y <- a + b1 * x1 + b2 * x2 + e 239 | 240 | ## ------------------------------------- 241 | sim.dat <- data.frame(y, x1, x2) 242 | 243 | ## ------------------------------------- 244 | freq.mod <- lm(y ~ x1 + x2, data = sim.dat) 245 | summary(freq.mod) 246 | 247 | ## ------------------------------------- 248 | bayes.mod <- "model{ 249 | 250 | for(i in 1:N){ 251 | y[i] ~ dnorm(mu[i], tau) 252 | mu[i] <- alpha + beta1 * x1[i] + beta2 * x2[i] 253 | } 254 | 255 | alpha ~ dnorm(0, .01) 256 | beta1 ~ dunif(-100, 100) 257 | beta2 ~ dunif(-100, 100) 258 | tau ~ dgamma(.01, .01) 259 | 260 | }" 261 | 262 | ## ------------------------------------- 263 | sim.list <- as.list(sim.dat) 264 | 265 | ## ------------------------------------- 266 | sim.list$N <- nrow(sim.dat) 267 | 268 | ## ------------------------------------- 269 | sim.dat.runjags <- dump.format(sim.list) 270 | 271 | ## ------------------------------------- 272 | inits1 <- list(alpha = 1, beta1 = 1, beta2 = 1) 273 | inits2 <- list(alpha = 0, beta1 = 0, beta2 = 0) 274 | inits3 <- list(alpha = -1, beta1 = -1, beta2 = -1) 275 | 276 | ## ------------------------------------- 277 | bayes.mod.fit.runjags <- run.jags(model = bayes.mod, monitor = c("alpha", "beta1", "beta2"), 278 | data = sim.dat.runjags, n.chains = 3, inits = list(inits1, inits2, inits3), 279 | burnin = 1000, sample = 5000, keep.jags.files = TRUE) 280 | 281 | ## ------------------------------------- 282 | print(bayes.mod.fit.runjags) 283 | 284 | ## ------------------------------------- 285 | bayes.mod.fit.mcmc <- as.mcmc.list(bayes.mod.fit.runjags) 286 | summary(bayes.mod.fit.mcmc) 287 | 288 | ## ------------------------------------- 289 | mcmcplot(bayes.mod.fit.mcmc) 290 | superdiag(bayes.mod.fit.mcmc, burnin = 1000) 291 | 292 | ## ------------------------------------- 293 | 294 | ######################################## 295 | ## Use JAGS via terminal and analyze posterior in R using coda 296 | ######################################## 297 | 298 | 299 | ## ------------------------------------- 300 | sim.dat.list <- as.list(sim.dat) 301 | sim.dat.list$N <- nrow(sim.dat) 302 | dump("sim.dat.list", file = "sim.dat.dump") 303 | bugs2jags("sim.dat.dump", "sim.dat") 304 | 305 | ## ------------------------------------- 306 | ## Note: use the path to your own WD and names for your output files here 307 | library("coda") 308 | setwd("~/Documents/Dropbox/Uni/9 - ICPSR/2016/Applied Bayes/Tutorials/2 - JAGS and R/") 309 | chain1 <- read.coda(output.file = "bayes_outchain1.txt", 310 | index.file = "bayes_outindex.txt") 311 | chain2 <- read.coda(output.file = "bayes_outchain2.txt", 312 | index.file = "bayes_outindex.txt") 313 | chain3 <- read.coda(output.file = "bayes_outchain3.txt", 314 | index.file = "bayes_outindex.txt") 315 | bayes.chains <- as.mcmc.list(list(chain1, chain2, chain3)) 316 | 317 | ## ------------------------------------- 318 | help(package = "coda") 319 | 320 | ## ------------------------------------- 321 | summary(bayes.chains) 322 | 323 | ######################################## 324 | ## Use MCMCpack 325 | ######################################## 326 | 327 | ## ------------------------------------- 328 | library("MCMCpack") 329 | 330 | ## ------------------------------------- 331 | n.sim <- 100; set.seed(123) 332 | x1 <- rnorm(n.sim, mean = 5, sd = 2) 333 | x2 <- rbinom(n.sim, size = 1, prob = 0.3) 334 | e <- rnorm(n.sim, mean = 0, sd = 1) 335 | 336 | ## ------------------------------------- 337 | b1 <- 1.2 338 | b2 <- -3.1 339 | a <- 1.5 340 | y <- a + b1 * x1 + b2 * x2 + e 341 | 342 | ## ------------------------------------- 343 | sim.dat <- data.frame(y, x1, x2) 344 | 345 | ## ------------------------------------- 346 | freq.mod <- lm(y ~ x1 + x2, data = sim.dat) 347 | summary(freq.mod) 348 | 349 | ## ------------------------------------- 350 | bayes.mod.fit.MCMCpack <- MCMCregress(y ~ x1 + x2, data = sim.dat, burnin = 1000, 351 | mcmc = 5000, seed = 123, beta.start = c(0, 0, 0), 352 | b0 = c(0, 0, 0), B0 = c(0.1, 0.1, 0.1)) 353 | summary(bayes.mod.fit.MCMCpack) 354 | 355 | ## ------------------------------------- 356 | mcmcplot(bayes.mod.fit.MCMCpack) 357 | superdiag(bayes.mod.fit.MCMCpack, burnin = 1000) -------------------------------------------------------------------------------- /econvote.instructions.R: -------------------------------------------------------------------------------- 1 | ######################################################### 2 | ## Economic voting in Germany: Multinomial logit model ## 3 | ######################################################### 4 | 5 | ## Johannes Karreth 6 | ## ICPSR Summer Program 2016 7 | 8 | library(R2jags) 9 | 10 | # Download the data from https://dbk.gesis.org/DBKSearch/download.asp?id=37837 11 | 12 | d <- rio::import("2009_ZA5384_v1-0-0.dta") 13 | 14 | # Data cleaning and recoding 15 | 16 | d$votechoice <- NA 17 | d$votechoice <- ifelse(d$v3c == 1, 1, d$votechoice) # CDU/CSU 18 | d$votechoice <- ifelse(d$v3c == 2, 2, d$votechoice) # SPD 19 | d$votechoice <- ifelse(d$v3c == 3, 3, d$votechoice) # FDP 20 | d$votechoice <- ifelse(d$v3c == 4, 4, d$votechoice) # Linke 21 | d$votechoice <- ifelse(d$v3c == 5, 5, d$votechoice) # Green 22 | 23 | d$interest <- d$v15 - 3 24 | d$interest <- ifelse(d$interest == 3, NA, d$interest) 25 | 26 | d$leftright <- d$v56 - 6 27 | d$leftright <- ifelse(d$leftright == 6, NA, d$leftright) 28 | 29 | d$age <- d$vb - 5 30 | 31 | d$children <- ifelse(d$vx1 == 1, 1, ifelse(d$vx1 == 2, 0, NA)) 32 | 33 | d$education <- ifelse(d$vf == 4 | d$vf == 5, -1, ifelse(d$vf == 1, 0, ifelse(d$vf == 2, 1, ifelse(d$vf == 3, 2, NA)))) 34 | 35 | d$union <- ifelse(d$vp == 1 | d$vp == 2 | d$vp == 3, 1, ifelse(d$vp == 4, 0, NA)) 36 | 37 | d$female <- d$va- 1 38 | 39 | d$econcountrynow <- ifelse(d$v25 == 1, 1, ifelse(d$v25 == 2, 0, ifelse(d$v25 == 3, 0, NA))) 40 | 41 | d$econselfnow <- ifelse(d$v27 == 1, 1, ifelse(d$v27 == 2, 0, ifelse(d$v27 == 3, 0, NA))) 42 | 43 | d$econctryworse <- ifelse(d$v25 == 1, 0, ifelse(d$v25 == 2, 0, ifelse(d$v25 == 3, 1, NA))) 44 | 45 | d$econselfworse <- ifelse(d$v27 == 1, 0, ifelse(d$v27 == 2, 0, ifelse(d$v27 == 3, 1, NA))) 46 | 47 | 48 | d$econcountryfuture <- ifelse(d$v29 == 2, -1, ifelse(d$v29 == 3, 0, ifelse(d$v29 == 1, 1, NA))) 49 | 50 | d$econselffuture <- ifelse(d$v28 == 3, -1, ifelse(d$v28 == 2, 0, ifelse(d$v29 == 1, 1, NA))) 51 | 52 | d$fearterror <- ifelse(d$v43 == 1, 1, ifelse(d$v43 == 2, 0, NA)) 53 | 54 | d$class <- ifelse(d$vl2 == 1, -1, ifelse(d$vl2 == 2, 0, ifelse(d$vl2 == 3, 1, NA))) 55 | 56 | d <- d[, c("votechoice", "interest", "leftright", "age", "children", "education", "union", "female", "econctryworse", "econselfworse")] 57 | 58 | # Data for JAGS/BUGS 59 | 60 | d_nm <- na.omit(d[, c("votechoice", "interest", "leftright", "age", "children", "education", "union", "female", "econctryworse", "econselfworse")]) 61 | dj <- as.list(d_nm) 62 | dj$N <- nrow(d_nm) 63 | dj$J <- length(as.numeric(levels(as.factor(dj$votechoice)))) 64 | 65 | econ.mod <- function() { 66 | 67 | for(i in 1:N){ 68 | votechoice[i] ~ dcat(p[i, 1:J]) 69 | 70 | for (j in 1:J){ 71 | log(q[i,j]) <- b[1,j] + 72 | b[2,j] * interest[i] + 73 | b[3,j] * leftright[i] + 74 | b[4,j] * age[i] + 75 | b[5,j] * children[i] + 76 | b[6,j] * education[i] + 77 | b[7,j] * union[i] + 78 | b[8,j] * female[i] + 79 | b[9,j] * econctryworse[i] + 80 | b[10,j] * econselfworse[i] 81 | s 82 | p[i,j] <- q[i,j]/sum(q[i,1:J]) ## should be familiar from MLE notes: q is exp(Xb) 83 | } # close J loop 84 | } # close N loop 85 | 86 | for(k in 1:10){ 87 | b[k,1] <- 0 ## MUST set the first set of covariates (for the first outcome category) to 0 88 | for(j in 2:J){ 89 | b[k,j] ~ dnorm(0, 0.1) 90 | } # close J loop 91 | } # close K loop 92 | } # close model loop 93 | 94 | econ.params <- c("b") 95 | 96 | econ.inits <- function(){ 97 | list(b = matrix(c(rep(NA, 10), 98 | rep(0, 40)), 99 | nrow = 10, ncol = 5, byrow = FALSE)) 100 | } 101 | 102 | library(R2jags) 103 | econ.fit <- jags(data = dj, inits = econ.inits, econ.params, n.chains = 3, n.iter = 10, n.burnin = 5, n.thin = 1, model.file = econ.mod) 104 | econ.mcmc <- as.mcmc(econ.fit) 105 | library(mcmcplots) 106 | mcmcplot(econ.mcmc) 107 | 108 | econ.mcmc.dat <- as.data.frame(as.matrix(econ.mcmc)) 109 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R") 110 | mcmctab(econ.mcmc) -------------------------------------------------------------------------------- /factor.dotplot.R: -------------------------------------------------------------------------------- 1 | ######################################## 2 | ## Bayesian factor scores as dot plot ## 3 | #### Workflow for R2Jags/R2WinBUGS ##### 4 | ######################################## 5 | 6 | ## Johannes Karreth 7 | ## jkarreth@albany.edu 8 | 9 | library(lattice) 10 | library(ggplot2) 11 | library(stats) 12 | 13 | ## if fa.fit is your factor model output from R2jags or R2WinBUGS: 14 | fa.mcmc <- as.mcmc(fa.fit) 15 | fa.mat <- as.matrix(fa.mcmc) 16 | fa.dat <- as.data.frame(fa.mat) 17 | 18 | fa.dat <- fa.dat[, -1] 19 | 20 | ## name your predicted factor latent.mean, and the CI between latent.lower and latent.upper 21 | latent.mean <- apply(fa.dat, 2, mean) 22 | latent.lower <- apply(fa.dat, 2, function(x) quantile(x, probs = c(0.025))) 23 | latent.upper <- apply(fa.dat, 2, function(x) quantile(x, probs = c(0.975))) 24 | subject <- colnames(fa.dat) 25 | 26 | dat <- data.frame(latent.mean, latent.lower, latent.upper, subject) 27 | 28 | ## here, take only 50 of the 1500 observations (just for demonstration) 29 | plot.dat <- dat[sample(1:nrow(dat), 50, replace=FALSE),] 30 | 31 | ## order the data by the factor scores, for better visualization 32 | 33 | plot.dat <- plot.dat[order(plot.dat$latent.mean), ] 34 | 35 | ## order the observation IDs as well (seems redundant, but is necessary) 36 | 37 | plot.dat$subject2 <- reorder(plot.dat$subject, plot.dat$latent.mean) 38 | 39 | ## define range of the scores, for the axis limits in the plot 40 | 41 | rg <- diff(range(c(plot.dat$latent.upper, plot.dat$latent.lower))) 42 | 43 | ## make plot using the lattice package: 44 | 45 | dotplot(subject2 ~ latent.mean, data=plot.dat,scales=list(y=list(cex=.45)), xlim=c(min(plot.dat$latent.lower)-.1*rg, max(plot.dat$latent.upper)+.1*rg),main="Latent trait", panel=function(x,y, subscripts){ 46 | panel.abline(h = as.numeric(y), col = "gray80", lty = 2) 47 | panel.segments(plot.dat$latent.lower[subscripts], y, plot.dat$latent.upper[subscripts], y, lty=1, col="gray40") 48 | panel.points(x,y, pch=16, col="black")}) 49 | 50 | ## make plot using the ggplot2 package: 51 | library(ggplot2) 52 | factorplot <- ggplot(plot.dat, aes(x = latent.mean, y = subject2)) + geom_point() + geom_segment(aes(x = latent.lower, xend = latent.upper, y = subject2, yend = subject2)) 53 | factorplot <- factorplot + xlab("Latent trait") + ylab("") + theme_bw() 54 | factorplot <- factorplot + geom_text(aes(x = latent.upper + 0.1, label = rownames(plot.dat)), size = 3) + theme(axis.ticks = element_blank(), axis.text.y = element_blank()) 55 | factorplot 56 | -------------------------------------------------------------------------------- /goals.instructions.R: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | ## R2jags example - Students' goals - Multilevel MNL model example ## 3 | ##################################################################### 4 | 5 | ## Johannes Karreth 6 | ## jkarreth@albany.edu 7 | 8 | ## Note: This example is taken from Lunn et al. (The BUGS Book), Example 10.3.4 9 | ## Their WINBUGS code can be downloaded at 10 | ## 11 | 12 | goals.dat <- read.csv("http://www.jkarreth.net/files/goals.csv") 13 | 14 | goals.datjags <- as.list(goals.dat) 15 | goals.datjags$npupils <- nrow(goals.dat) 16 | goals.datjags$nschools <- length(unique(goals.datjags$School)) 17 | goals.datjags$J <- length(unique(goals.datjags$Goals)) 18 | 19 | ## Pooled model 20 | ## Only one coefficient for boys & goal 1 (sports) 21 | 22 | mod_pool <- function() { 23 | for (i in 1:npupils) { 24 | Goals[i] ~ dcat(p[i, 1:J]) 25 | 26 | for (j in 1:J) { 27 | log(q[i,j]) <- a[i, j] 28 | p[i, j] <- q[i, j] / sum(q[i, 1:J]) 29 | } 30 | a[i, 1] <- b[1] + b.boy * Gender[i] # only estimate b for Goals = 1 31 | a[i, 2] <- b[2] 32 | a[i, 3] <- 0 33 | } 34 | 35 | b[1] ~ dnorm(0, 0.0001) 36 | b[2] ~ dnorm(0, 0.0001) 37 | b.boy ~ dnorm(0, 0.0001) 38 | or.boy <- exp(b.boy) 39 | 40 | } 41 | 42 | inits.pool1 <- list("b.boy" = 0, "b" = c(0, 0)) 43 | inits.pool2 <- list("b.boy" = 0, "b" = c(0, 0)) 44 | inits.pool3 <- list("b.boy" = 0, "b" = c(0, 0)) 45 | inits.pool <- list(inits.pool1, inits.pool2, inits.pool3) 46 | 47 | params <- c("b.boy", "b") 48 | 49 | fit.pool <- jags(data = goals.datjags, inits = inits.pool, parameters.to.save = params, 50 | n.chains = 3, n.iter = 1000, n.burnin = 500, 51 | model.file = mod_pool) 52 | 53 | ## Varying intercepts for schools 54 | ## Still only one coefficient for boys & goal 1 (sports) 55 | 56 | mod_vi <- function() { 57 | for (i in 1:npupils) { 58 | Goals[i] ~ dcat(p[i, 1:J]) 59 | 60 | for (j in 1:J) { 61 | log(q[i,j]) <- a[i, j] 62 | p[i, j] <- q[i, j] / sum(q[i, 1:J]) 63 | } 64 | a[i, 1] <- b[School[i], 1] + b.boy * Gender[i] # only estimate b for Goals = 1 65 | a[i, 2] <- b[School[i], 2] 66 | a[i, 3] <- 0 67 | } 68 | 69 | b.boy ~ dnorm(0, 0.0001) 70 | or.boy <- exp(b.boy) 71 | 72 | for (j in 1:nschools) { 73 | b[j, 1] ~ dnorm(0, 0.0001) 74 | b[j, 2] ~ dnorm(0, 0.0001) 75 | b[j, 3] <- 0 76 | } 77 | 78 | } 79 | 80 | inits.vi1 <- list("b.boy" = 0, "b" = structure(.Data=c(0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA),.Dim=c(9,3))) 81 | inits.vi2 <- list("b.boy" = 0, "b" = structure(.Data=c(0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA),.Dim=c(9,3))) 82 | inits.vi3 <- list("b.boy" = 0, "b" = structure(.Data=c(0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA),.Dim=c(9,3))) 83 | inits.vi <- list(inits.vi1, inits.vi2, inits.vi3) 84 | 85 | params <- c("b.boy", "b") 86 | 87 | fit.vi <- jags(data = goals.datjags, inits = inits.vi, parameters.to.save = params, 88 | n.chains = 3, n.iter = 1000, n.burnin = 500, 89 | model.file = mod_vi) -------------------------------------------------------------------------------- /interaction.instructions.R: -------------------------------------------------------------------------------- 1 | ##################################################################### 2 | ## Interaction example: Conditional effects, Bayes vs. frequentist ## 3 | ##################################################################### 4 | 5 | ## Johannes Karreth 6 | ## ICPSR Summer Program 2015 7 | 8 | ## Required libraries 9 | library(R2jags) 10 | 11 | ## Use example data from Dave Armstrong's package 12 | library(DAMisc) 13 | data(InteractionEx) 14 | 15 | ## What do the data look like? 16 | head(InteractionEx) 17 | 18 | InteractionEx2 <- as.data.frame(scale(as.matrix(InteractionEx))) 19 | 20 | ## Frequentist model w/ interaction 21 | f.mod <- lm(y ~ x1 + x2 + x1:x2, data = InteractionEx2) 22 | summary(f.mod) 23 | 24 | ## Look at var-cov matrix (for later) 25 | vcov(lm(y ~ x1 + x2 + x1:x2, data = InteractionEx2)) 26 | 27 | ## Bayesian model w/ interaction 28 | int.model.jags <- function() { 29 | 30 | for(i in 1:N){ 31 | y[i]~dnorm(mu[i], tau) 32 | mu[i]<-alpha + beta1 * x1[i] + beta2 * x2[i] + beta3 * x1[i]*x2[i] 33 | } 34 | 35 | alpha~dnorm(0, .01) 36 | beta1~dunif(-100,100) 37 | beta2~dunif(-100,100) 38 | beta3~dunif(-100,100) 39 | tau~dgamma(.01,.01) 40 | 41 | } 42 | 43 | ## Data for JAGS 44 | 45 | int.data <- list(y = InteractionEx2$y, x1 = InteractionEx2$x1, x2 = InteractionEx2$x2, N = length(InteractionEx2$y)) 46 | 47 | ## Name the parameters of the JAGS model you will monitor 48 | 49 | int.params <- c("alpha", "beta1", "beta2", "beta3") 50 | 51 | ## Define the starting values for JAGS 52 | 53 | int.inits <- function(){ 54 | list("alpha"=c(0), "beta1"=c(0), "beta2" =c(0), "beta3"= c(0)) 55 | } 56 | 57 | ## Fit the model in JAGS, having previously copied the BUGS model into your working directory as "angell.model.jags" 58 | 59 | int.fit <- jags(data=int.data, inits=int.inits, int.params, n.chains=2, n.iter=20000, n.burnin=2000, model.file=int.model.jags) 60 | 61 | int.mcmc <- as.mcmc(int.fit) 62 | int.mcmc.mat <- as.matrix(int.mcmc) 63 | int.mcmc.dat <- as.data.frame(int.mcmc.mat) 64 | 65 | ## Simulate the range of the moderating variable 66 | 67 | x2.sim <- seq(min(InteractionEx2$x2), max(InteractionEx2$x2), by = 0.1) 68 | 69 | ## Calculate conditional effect of X1 across the range of X2 70 | 71 | ## Bayes: 72 | int.sim <- matrix(rep(NA, nrow(int.mcmc.dat)*length(x2.sim)), nrow = nrow(int.mcmc.dat)) 73 | for(i in 1:length(x2.sim)){ 74 | int.sim[, i] <- int.mcmc.dat$beta1 + int.mcmc.dat$beta3 * x2.sim[i] 75 | } 76 | 77 | ## Note: the variance now comes from the posterior, not the vcov matrix 78 | 79 | bayes.c.eff.mean <- apply(int.sim, 2, mean) 80 | bayes.c.eff.lower <- apply(int.sim, 2, function(x) quantile(x, probs = c(0.025))) 81 | bayes.c.eff.upper <- apply(int.sim, 2, function(x) quantile(x, probs = c(0.975))) 82 | 83 | ## Frequentist (cf. Brambor et al. 2006) 84 | freq.c.eff.pe <- coef(f.mod)[2] + coef(f.mod)[4] * x2.sim 85 | freq.c.eff.sd <- sqrt(vcov(f.mod)[2,2] + x2.sim^2 * vcov(f.mod)[4,4] + 2 * x2.sim * vcov(f.mod)[2,4]) 86 | 87 | ## Combine both estimates 88 | plot.dat <- data.frame(x2.sim, bayes.c.eff.mean, bayes.c.eff.lower, bayes.c.eff.upper, freq.c.eff.pe, freq.c.eff.sd) 89 | 90 | ## Compare 91 | cor(plot.dat$bayes.c.eff.mean, plot.dat$freq.c.eff.pe) 92 | 93 | ## Plot both estimates 94 | 95 | library(ggplot2) 96 | 97 | ## Use blue for Bayesian, red for frequentist estimates. Transparency to allow overlay; purple indicates complete overlay. Take a close look at the upper and lower limits of the CI for each estimate. 98 | 99 | ## Foundation for the plot & line for the posterior mean of the Bayesian conditional effect 100 | p <- ggplot(plot.dat, aes(x = x2.sim, y = bayes.c.eff.mean)) + geom_line(color = "blue", alpha = 0.8, size = 0.5) 101 | 102 | ## CI for the Bayesian conditional effect 103 | p <- p + geom_ribbon(aes(ymin = bayes.c.eff.lower, ymax = bayes.c.eff.upper), fill = "blue", alpha = 0.2) 104 | 105 | ## Lines for the lower and upper bound of the Bayesian conditional effect 106 | p <- p + geom_line(aes(x = x2.sim, y = bayes.c.eff.lower), color = "blue", alpha = 0.8, size = 0.5) + geom_line(aes(x = x2.sim, y = bayes.c.eff.upper), color = "blue", alpha = 0.8, size = 0.5) 107 | 108 | ## Line for the point estimate of the frequentist conditional effect 109 | p <- p + geom_line(aes(x = x2.sim, y = freq.c.eff.pe), color = "red", alpha = 0.5, size = 0.5) 110 | 111 | ## CI for the frequentist conditional effect 112 | p <- p + geom_ribbon(aes(ymin = freq.c.eff.pe - 1.96 * freq.c.eff.sd, ymax= freq.c.eff.pe + 1.96 * freq.c.eff.sd), fill = "red", alpha = 0.1) 113 | 114 | ## Lines for the lower and upper bound of the frequentist conditional effect 115 | p <- p + geom_line(aes(x = x2.sim, y = freq.c.eff.pe - 1.96 * freq.c.eff.sd), color = "red", alpha = 0.5, size = 0.5) + geom_line(aes(x = x2.sim, y = freq.c.eff.pe + 1.96 * freq.c.eff.sd), color = "red", alpha = 0.5, size = 0.5) 116 | 117 | ## Plot labels and theme 118 | p <- p + xlab("X2") + ylab("Conditional effect of X1") + theme_bw() 119 | 120 | ## Print the plot 121 | p -------------------------------------------------------------------------------- /legislators.instructions.R: -------------------------------------------------------------------------------- 1 | ################################################################################### 2 | ## R2jags example using Simon Jackman's Legislators example - Bayesian IRT model ## 3 | ################################################################################### 4 | 5 | ## Johannes Karreth 6 | ## ICPSR Summer Program 2015 7 | 8 | ## Compare to http://jackman.stanford.edu/mcmc/mainFrameWinBugs.php#Legislators 9 | 10 | ## Model (unchanged!) 11 | 12 | legismod.jags <- function() { 13 | for (i in 1:100){ 14 | for(j in 1:486){ 15 | y[i,j] ~ dbern(pi[i,j]) 16 | logit(pi[i,j]) <- x[i]*beta[j,1] - beta[j,2] 17 | } 18 | } 19 | 20 | helms <- x[66]; ## monitor these nodes as sanity checks 21 | kennedy <- x[41]; 22 | fiengold <- x[97]; 23 | boxer <- x[9]; 24 | chafee <- x[77]; 25 | mosely <- x[26]; 26 | gramm <- x[85]; 27 | 28 | ## priors 29 | for (i in 1:40){ x[i] ~ dnorm(0,1) } 30 | for (i in 42:65){ x[i] ~ dnorm(0,1) } 31 | for (i in 67:100){ x[i] ~ dnorm(0,1) } 32 | x[41] <- -1.0 ## Kennedy on the left 33 | x[66] <- 1.0 ## Helms on the right 34 | 35 | for (j in 1:486){ 36 | beta[j,1:2] ~ dmnorm(b0[1:2],B0[1:2,1:2]); 37 | } 38 | b0[1] <- 0; b0[2] <- 0; 39 | B0[1,1] <- .16; B0[2,2] <- .16; 40 | B0[1,2] <- 0; B0[2,1] <- 0; 41 | } 42 | 43 | 44 | 45 | setwd("~/R/Bayes/legislators") 46 | 47 | # Data: copy & paste from legislators.dat.jags, created via 48 | # bugs2jags("legislators.dat.bugs", "legislators.dat.jags") 49 | source("http://www.jkarreth.net/files/legislators.dat.jags") 50 | 51 | # Inits: from legislators.init.jags, slightly changed from the WinBUGS example 52 | 53 | legislators.inits <- function(){ 54 | list("beta" = 55 | structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 60 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 61 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 64 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 66 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 67 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 69 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 70 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 71 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 72 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 73 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 74 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 75 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 76 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 77 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 78 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 79 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 80 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 82 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 83 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 84 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 85 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 86 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 87 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 88 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 89 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 91 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 93 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 94 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 96 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 98 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 99 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 100 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 101 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), .Dim = c(486L, 2L)), "x" = c(-1, -1, -1, -1, -1, -1, 1, -1, 1, 1, 102 | -1, -1, 1, 1, 1, -1, 1, -1, 1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 103 | -1, 1, -1, -1, 1, -1, 1, 1, -1, -1, 1, 1, NA, 1, -1, 1, -1, 1, 104 | -1, -1, -1, -1, 1, -1, -1, 1, 1, 1, -1, -1, 1, 1, 1, -1, -1, 105 | 1, -1, NA, 1, 1, -1, 1, -1, -1, -1, 1, -1, -1, -1, 1, 1, -1, 106 | 1, 1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1, -1, 1, 1, 1, 1, 1, 107 | -1, -1) 108 | ) 109 | } 110 | 111 | legislators.params <- c("x") 112 | legislators.dat <- list("y") 113 | 114 | legisfit <- jags(data=legislators.dat, inits=legislators.inits, legislators.params, n.chains=2, n.iter=1000, n.burnin=100, model.file=legismod.jags) -------------------------------------------------------------------------------- /logit.pp.plot.instructions.R: -------------------------------------------------------------------------------- 1 | ################################################################################## 2 | ## Plot predicted probabilities over simulated range of data after Logit models ## 3 | ############# Using the turnout model from Simon Jackman's examples ############## 4 | ##################### Workflow for WinBUGS/R2Jags/R2WinBUGS ###################### 5 | ################################################################################## 6 | 7 | ## Johannes Karreth 8 | ## jkarreth@albany.edu 9 | 10 | ## Fit your Bayesian model, monitor the coefficients (in this example, named b[]) 11 | ## and the cut points (in this example, named theta[]) 12 | ## and if you like, the predicted probability of y=1 for each case (in this example, named p[]) 13 | 14 | library(lattice) 15 | library(ggplot2) 16 | 17 | ##################################################### 18 | ## FIRST, PREDICTED PROBABILITIES ON OBSERVED DATA ## 19 | ##################################################### 20 | 21 | ## Data 22 | turnout.dat <- read.csv("http://www.jkarreth.net/files/turnout.csv") 23 | 24 | ## First, fit a logit model using 25 | ## https://github.com/jkarreth/Bayes/blob/master/turnout.instructions.R 26 | ## But be sure to monitor individual predicted probabilities (if you want to plot them) 27 | ## The BUGS/JAGS object is named "turnout.fit" from here on 28 | 29 | ## R2JAGS USERS, extract the posterior distributions from your jags/bugs object: 30 | turnout.mcmc <- as.mcmc(turnout.fit) 31 | turnout.mat <- as.matrix(turnout.mcmc) 32 | turnout.out <- as.data.frame(turnout.mat) 33 | 34 | ## R2WINBUGS/R2OPENBUGS USERS, extract the posterior distributions from your jags/bugs object: 35 | ## (in this case, we have 2 chains only) 36 | turnout.mat <- rbind(turnout.fit$sims.array[, 1, ], turnout.fit$sims.array[, 2, ]) 37 | turnout.out <- data.frame(turnout.mat) 38 | 39 | ## Define vectors with the values of the predicted probabilities (pulled from the coda files or your mcmc list) 40 | ## This assumes that you used p in logit(p[i]) in your model 41 | ## grep("p[",) pulls all columns that start with p[ 42 | p <- turnout.out[, grep("p[", colnames(turnout.out), fixed = T)] 43 | 44 | ## If you want to plot mean predicted observed probabilities against a covariate, 45 | ## collapse p to the mean of all iterations 46 | p.mean <- apply(p, 2, mean) 47 | 48 | ## Plot predicted probability (of y=1) against *observed* values of age \ 49 | ## (most likely an ugly plot, b/c we have several y_i(x_i)) 50 | plot(p.mean ~ turnout.dat.dat$age, xlim = c(min(turnout.dat.dat$age), max(turnout.dat.dat$age))) 51 | 52 | ################################################################# 53 | ######### SECOND, OUT-OF-SAMPLE PREDICTED PROBABILITIES, ######## 54 | ## across the range of X1 and given values of other covariates ## 55 | ################################################################# 56 | 57 | ## Import the chains containing the coefficients from your BUGS model, 58 | ## after monitoring *only* the coefficients (in this example, named b) 59 | 60 | ## R2JAGS/R2WINBUGS USERS: 61 | turnout.mcmc <- as.mcmc(turnout.fit) 62 | turnout.mat <- as.matrix(turnout.mcmc) 63 | b <- turnout.mat[ , 1:6] ## one column for each coefficient, in this case I had 6 coefficients 64 | 65 | # Generate vector with the simulated range of X1 (here, age) 66 | new.age <- seq(min(turnout.dat$age), max(turnout.dat$age)) 67 | 68 | # Generate vectors set at desired values of the other covariates 69 | new.education <- rep(median(turnout.dat$educ), length(new.age)) 70 | new.closing <- rep(median(turnout.dat$closing), length(new.age)) 71 | new.govelec <- rep(median(turnout.dat$govelec), length(new.age)) 72 | new.south <- rep(median(turnout.dat$south), length(new.age)) 73 | # Need value of 1 for the constant 74 | constant <- rep(1, length(new.age)) 75 | 76 | # Generate dataframe with simulated values 77 | turnout.sim <- cbind(constant,new.education,new.age, new.south, new.govelec, new.closing) ## cbind: combine (bind) columns 78 | 79 | # Or: generate two dataframes to plot PPs for each value of the South dummy (continued further below) 80 | turnout.sim.s <- cbind(constant,new.education,new.age,rep(1,max(turnout.dat$age)-min(turnout.dat$age)+1), new.govelec, new.closing) 81 | turnout.sim.n <- cbind(constant,new.education,new.age, rep(0,max(turnout.dat$age)-min(turnout.dat$age)+1), new.govelec, new.closing) 82 | 83 | # Multiply X by the betas from your BUGS output 84 | Xb <- t(turnout.sim%*% t(b)) 85 | 86 | # Transform linear prediction to probability 87 | turnout.pp.age <- exp(Xb)/(1+exp(Xb)) 88 | 89 | # Get CIs (for plotting) 90 | turnout.ci.age <- apply(turnout.pp.age, 2, quantile, probs=c(.025,.975)) ## apply(a, b, c): apply function (c) to object(a), by(b: 1 for row, 2 for column) 91 | 92 | # Get mean predictions over the n (from BUGS/JAGS iterations) sampled values of b 93 | mean.turnout.pp.age <- apply(turnout.pp.age, 2, mean) 94 | mean.turnout.ci.age <- apply(turnout.ci.age, 2, quantile, probs=c(.025,.975)) 95 | 96 | # Plot mean probability against the full (simulated) range of X (=age) 97 | plot(new.age, mean.turnout.pp.age, pch=19, main="Predicted probability of voting", xlab="Age", ylab="Pr(Voting)", xlim=c(min(turnout.dat$age), max(turnout.dat$age)), ylim=c(0,1)) 98 | 99 | # Add standard errors as vertical lines (could also do this using 2.5% and 97.5% values from p.chains) 100 | segments(new.age, mean.turnout.ci.age[1, ], new.age, mean.turnout.ci.age[2, ], lty=1) 101 | 102 | ## Continue two predictions for south=[0,1] 103 | 104 | # Multiply X by the betas from your BUGS output 105 | Xb.s <- t(turnout.sim.s %*% t(b)) 106 | Xb.n <- t(turnout.sim.n %*% t(b)) 107 | 108 | # Transform linear prediction to probability 109 | turnout.pp.age.s <- exp(Xb.s)/(1+exp(Xb.s)) 110 | turnout.pp.age.n <- exp(Xb.n)/(1+exp(Xb.n)) 111 | 112 | # Get mean linear predictions & SDS over the n (from BUGS iterations) sampled values of b 113 | mean.turnout.pp.age.s <- apply(turnout.pp.age.s, 2, mean) 114 | mean.turnout.pp.age.n <- apply(turnout.pp.age.n, 2, mean) 115 | turnout.s.ci <- apply(turnout.pp.age.s, 2, quantile, probs=c(.025,.975)) 116 | turnout.n.ci <- apply(turnout.pp.age.n, 2, quantile, probs=c(.025,.975)) 117 | 118 | ##################### 119 | ## Plot 1 (simple) ## 120 | ##################### 121 | 122 | # Plot mean probability against the full (simulated) range of X (=age) 123 | adjust <- rep(.3, max(turnout.dat$age)-min(turnout.dat$age)+1) ## slightly adjust the position of the "North" points to avoid overlay 124 | plot(new.age, mean.turnout.pp.age.s, pch=19, main="Predicted probability of voting", xlab="Age", ylab="Pr(Voting)", col="red", xlim=c(min(turnout.dat$age), max(turnout.dat$age)), ylim=c(0,1)) 125 | points(new.age + adjust, mean.turnout.pp.age.n, pch=19, col="black") 126 | segments(new.age, turnout.s.ci[1, ], new.age, turnout.s.ci[2, ], lty=1, col="red") 127 | segments(new.age + adjust, turnout.n.ci[1, ], new.age + adjust, turnout.n.ci[2, ], lty=1, col="black") 128 | legend("bottomright", c("South", "Rest of the U.S."), col=c("red", "black"), pch=19, inset=.01, bty="n") 129 | 130 | ##################### 131 | ## Plot 2 (panels) ## 132 | ##################### 133 | 134 | ## Generate data set used for the two plots below 135 | plot.dat <- data.frame( 136 | means = c(mean.turnout.pp.age.s , mean.turnout.pp.age.n), ## means of the pred. probabilities 137 | lower = c(turnout.s.ci[1, ] , turnout.n.ci[1, ]), ## upper CI 138 | upper = c(turnout.s.ci[2, ], turnout.n.ci[2, ]), ## lower CI 139 | south = factor(rep(c(1,0), each=max(turnout.dat$age)-min(turnout.dat$age)+1), levels=c(1,0), labels=c("South", "Rest of the U.S.")), ## Outcome variable 140 | age = rep(new.age, 2)) ## Explanatory variable of interest (here: age) 141 | 142 | xyplot(means ~ age | south, data=plot.dat, as.table=T, 143 | ylim=c(min(plot.dat$lower), max(plot.dat$upper)), xlab="Age", ylab="Pr(Voting)", main="Probability of Voting", 144 | panel = function(x,y,subscripts){ 145 | panel.lines(x,y,lty=1, col="black") 146 | panel.lines(x, plot.dat$lower[subscripts], lty=2, col="red") 147 | panel.lines(x, plot.dat$upper[subscripts], lty=2, col="red")}) 148 | 149 | ################################# 150 | ## Plot 3 (transparent colors) ## 151 | ################################# 152 | 153 | xyplot(mean.turnout.pp.age.s ~ new.age, ylim=c(0,1), xlab="Age", ylab="Pr(Voting)", main="Probability of voting", 154 | key=list(space=list("right"), rectangles=list(col=c(rgb(1,0,0, alpha=.35), rgb(0,0,1, alpha=.35))), text=list(c("South", "Rest of the U.S."))), 155 | panel=function(x,y){ 156 | panel.polygon(x=c(x,rev(x),x[1]), y=c(turnout.s.ci[1,], rev(turnout.s.ci[2,]), turnout.s.ci[1,1]), 157 | col=rgb(1,0,0,alpha=.35), border=NA) 158 | panel.polygon(x=c(x,rev(x),x[1]), y=c(turnout.n.ci[1,], rev(turnout.n.ci[2,]), turnout.n.ci[1,1]), 159 | col=rgb(0,0,1,alpha=.35), border=NA) 160 | panel.lines(x, mean.turnout.pp.age.s, col="red") 161 | panel.lines(x, mean.turnout.pp.age.n, col="blue") 162 | }) 163 | 164 | ###################### 165 | ## Plot 4 (ggplot2) ## 166 | ###################### 167 | 168 | ## Dataframe for plotting 169 | plot.dat <- data.frame(south = as.factor(c(rep(1, length(new.age)), rep(0, length(new.age)))), new.age = c(new.age, new.age), mean.turnout.pp = c(mean.turnout.pp.age.s, mean.turnout.pp.age.n), turnout.lower = c(turnout.s.ci[1, ], turnout.n.ci[1, ]), turnout.upper = c(turnout.s.ci[2, ], turnout.n.ci[2, ])) 170 | 171 | ## Make lines and ribbons separately 172 | p <- ggplot(dat = plot.dat, aes(x = new.age, y = mean.turnout.pp, group = south)) + geom_line(aes(colour = south)) 173 | p <- p + geom_ribbon(aes(ymin = turnout.lower, ymax = turnout.upper, fill = south), alpha = 0.2) 174 | p <- p + xlab("Age") + ylab("Pr(Voting)") + theme_bw() + scale_colour_manual(values=c("blue", "red")) + scale_fill_manual(values=c("blue", "red")) 175 | p <- p + theme(legend.position = "none") + annotate("text", x = 30, y = 0.8, label = "Rest of US", colour = "blue") + annotate("text", x = 60, y = 0.5, label = "South", colour = "red") 176 | 177 | ## Simpler: use geom_smooth() 178 | p2 <- ggplot(dat = plot.dat, aes(x = new.age, y = mean.turnout.pp, group = south)) + geom_smooth(aes(x = new.age, ymin = turnout.lower, ymax = turnout.upper, fill = south, colour = south), stat = "identity") 179 | p2 <- p2 + xlab("Age") + ylab("Pr(Voting)") + theme_bw() + scale_colour_manual(values=c("blue", "red")) + scale_fill_manual(values=c("blue", "red")) 180 | p2 <- p2 + theme(legend.position = "none") + annotate("text", x = 30, y = 0.8, label = "Rest of US", colour = "blue") + annotate("text", x = 60, y = 0.5, label = "South", colour = "red") -------------------------------------------------------------------------------- /mac.R2openBUGS.R: -------------------------------------------------------------------------------- 1 | ################################################# 2 | ## Run OpenBUGS through R2OpenBUGS on Mac OS X ## 3 | ################################################# 4 | 5 | # Johannes Karreth 6 | # jkarreth@albany.edu 7 | 8 | # The code below uses the help file for bugs() 9 | # Adjust file paths to your system 10 | 11 | library(R2OpenBUGS) 12 | 13 | model.file <- system.file(package="R2WinBUGS", "model", "schools.txt") 14 | 15 | # Some example data (see ?schools for details): 16 | data(schools) 17 | 18 | J <- nrow(schools) 19 | y <- schools$estimate 20 | sigma.y <- schools$sd 21 | data <- list(J=J, y=y, sigma.y=sigma.y) 22 | inits <- function(){ 23 | list(theta=rnorm(J, 0, 100), mu.theta=rnorm(1, 0, 100), 24 | sigma.theta=runif(1, 0, 100)) 25 | } 26 | ## or alternatively something like: 27 | # inits <- list( 28 | # list(theta=rnorm(J, 0, 90), mu.theta=rnorm(1, 0, 90), 29 | # sigma.theta=runif(1, 0, 90)), 30 | # list(theta=rnorm(J, 0, 100), mu.theta=rnorm(1, 0, 100), 31 | # sigma.theta=runif(1, 0, 100)) 32 | # list(theta=rnorm(J, 0, 110), mu.theta=rnorm(1, 0, 110), 33 | # sigma.theta=runif(1, 0, 110))) 34 | 35 | parameters <- c("theta", "mu.theta", "sigma.theta") 36 | 37 | schools.sim <- bugs(data = data, 38 | inits = inits, 39 | parameters = parameters, 40 | model.file = model.file, 41 | n.chains=3, 42 | n.burnin = 2500, 43 | n.iter=5000, 44 | debug = FALSE, 45 | OpenBUGS.pgm = "/Applications/Wineskin/OpenBUGS.app/Contents/Resources/drive_c/Program Files/OpenBUGS/OpenBUGS323/OpenBUGS.exe", 46 | working.directory = "/Users/johanneskarreth/R/Bayes/R2OpenBUGS", # Note: spaces in this path will create errors! 47 | useWINE = TRUE, 48 | bugs.seed = 12) 49 | 50 | print(schools.sim) 51 | plot(schools.sim) 52 | 53 | # For a function to create a barebones summary table, use 54 | # https://github.com/jkarreth/JKmisc/blob/master/mcmctab.R 55 | # install.packages("devtools") 56 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R") 57 | mcmctab(schools.sim) 58 | 59 | # Access posterior draws for postestimation 60 | # Rows are draws, columns are parameters 61 | 62 | schools.out <- schools.sim$sims.matrix -------------------------------------------------------------------------------- /mac.R2winBUGS.R: -------------------------------------------------------------------------------- 1 | ############################################### 2 | ## Run WinBUGS through R2WinBUGS on Mac OS X ## 3 | ############################################### 4 | 5 | # Johannes Karreth 6 | # jkarreth@albany.edu 7 | 8 | # The code below uses the help file for bugs() 9 | # Adjust file paths to your system 10 | 11 | library(R2WinBUGS) 12 | 13 | model.file <- system.file(package="R2WinBUGS", "model", "schools.txt") 14 | 15 | # Some example data (see ?schools for details): 16 | data(schools) 17 | 18 | J <- nrow(schools) 19 | y <- schools$estimate 20 | sigma.y <- schools$sd 21 | data <- list(J=J, y=y, sigma.y=sigma.y) 22 | inits <- function(){ 23 | list(theta=rnorm(J, 0, 100), mu.theta=rnorm(1, 0, 100), 24 | sigma.theta=runif(1, 0, 100)) 25 | } 26 | ## or alternatively something like: 27 | # inits <- list( 28 | # list(theta=rnorm(J, 0, 90), mu.theta=rnorm(1, 0, 90), 29 | # sigma.theta=runif(1, 0, 90)), 30 | # list(theta=rnorm(J, 0, 100), mu.theta=rnorm(1, 0, 100), 31 | # sigma.theta=runif(1, 0, 100)) 32 | # list(theta=rnorm(J, 0, 110), mu.theta=rnorm(1, 0, 110), 33 | # sigma.theta=runif(1, 0, 110))) 34 | 35 | parameters <- c("theta", "mu.theta", "sigma.theta") 36 | 37 | schools.sim <- bugs(data, inits, parameters, model.file, 38 | n.chains=3, n.iter=5000, 39 | bugs.directory="/Applications/Wineskin/WinBUGS.app/Contents/Resources/drive_c/Program Files/WinBUGS14") 40 | print(schools.sim) 41 | plot(schools.sim) 42 | 43 | # For a function to create a barebones summary table, use 44 | # https://github.com/jkarreth/JKmisc/blob/master/mcmctab.R 45 | # install.packages("devtools") 46 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R") 47 | mcmctab(schools.sim) 48 | 49 | # Access posterior draws for postestimation 50 | # Rows are draws, columns are parameters 51 | 52 | schools.out <- schools.sim$sims.matrix -------------------------------------------------------------------------------- /mlm.state.instructions.R: -------------------------------------------------------------------------------- 1 | #################################################################### 2 | ## R2jags example - Bush votes in 1988 - Multilevel model example ## 3 | #################################################################### 4 | 5 | ## Johannes Karreth 6 | ## jkarreth@albany.edu 7 | 8 | ## Note: This example is taken from Gelman & Hill, Ch. 17. 9 | ## state.mod is a simplified version with fewer interactions 10 | ## and with categorical instead of dummy variables for the age & education categories. 11 | 12 | ## Simple model 13 | 14 | state.mod <- function() { 15 | for (i in 1:n){ 16 | y[i] ~ dbin (p.bound[i], 1) 17 | p.bound[i] <- max(0, min(1, p[i])) 18 | logit(p[i]) <- Xbeta[i] 19 | Xbeta[i] <- b.female*female[i] + b.black*black[i] + 20 | b.age*age[i] + b.edu*edu[i] 21 | + b.state[state[i]] 22 | } 23 | b.female ~ dnorm (0, .01) 24 | b.black ~ dnorm (0, .01) 25 | b.age ~ dnorm (0, .01) 26 | b.edu ~ dnorm (0, .01) 27 | 28 | 29 | for (j in 1:n.state){ 30 | b.state[j] ~ dnorm(b.state.hat[j], tau.state) 31 | b.state.hat[j] <- b.state0 + b.v.prev*v.prev[j] 32 | } 33 | b.state.hat.mu<-mean(b.state.hat[]) 34 | b.v.prev ~ dnorm(0, .01) 35 | b.state0 ~ dnorm(0, .01) 36 | tau.state <- pow(sigma.state, -2) 37 | sigma.state ~ dunif (0, 100) 38 | } 39 | 40 | setwd("~/R/Bayes/mlm.states") 41 | 42 | library(R2jags) 43 | library(foreign) 44 | 45 | ### Read in the data 46 | 47 | ## Option A) Use complete dataset including level-2 predictor 48 | 49 | polls.subset <- read.dta("polls.subset.merged.dta") 50 | 51 | y <- polls.subset$bush 52 | female <- polls.subset$female 53 | black <- polls.subset$black 54 | age <- polls.subset$age 55 | edu <- polls.subset$edu 56 | 57 | # To get ID for state beginning with 1, do this: 58 | # (Note that it is redundant in this case, but useful in other situations) 59 | uniqstate <- unique(polls.subset$state) 60 | polls.subset$stateid <- match(polls.subset$state, uniqstate) 61 | state <- polls.subset$stateid 62 | 63 | v.prev <- as.vector(by(polls.subset$g76_84pr, polls.subset$stateid, mean)) 64 | 65 | ## Option B) Combine level-1 and level-2 dataset in R 66 | 67 | polls.subset <- read.dta("polls.subset.JK.dta") 68 | 69 | # Load in level-1 data 70 | 71 | y <- polls.subset$bush 72 | female <- polls.subset$female 73 | black <- polls.subset$black 74 | age <- polls.subset$age 75 | edu <- polls.subset$edu 76 | state <- polls.subset$state 77 | 78 | # Load in level-2 data. Note that these data should be ordered by state, 79 | # so you will NOT need a level-2 ID 80 | 81 | presvote <- read.dta("presvote.dta") 82 | v.prev <- presvote$g76_84pr 83 | 84 | ### Data are ready 85 | 86 | ### Define number of observations on each level 87 | 88 | n <- length(y) # of survey respondents 89 | n.state <- max(state) # of states 90 | 91 | ### Fit the model in JAGS 92 | 93 | state.dat <- list ("n", "n.state", "y", "female", "black", "age", "edu", "state", "v.prev") 94 | 95 | state.params <- c ("b.female", "b.black", "b.age", "b.edu", "b.state.hat", "b.v.prev") 96 | 97 | state.inits <- function (){ 98 | list(b.female = c(0), b.black = c(0), 99 | b.age = c(0), b.edu = c(0), b.state = rnorm(n.state), b.v.prev = c(0)) 100 | } 101 | 102 | state.fit <- jags(data=state.dat, inits=state.inits, state.params, model.file=state.mod, n.chains=2, n.iter=100, n.burnin=10) 103 | 104 | ### Here, in nested structures, the plot() command in R2Jags comes in handy 105 | plot(state.fit) 106 | 107 | ### Compare to GLM estimates 108 | 109 | state.df <- read.dta("polls.subset.merged.dta") 110 | library(arm) 111 | state.lmer <- lmer(bush ~ edu + age + female + black + g76_84pr + (1|state), data=state.df, family=binomial) 112 | summary(state.lmer) -------------------------------------------------------------------------------- /ologit.pp.plot.instructions.R: -------------------------------------------------------------------------------- 1 | ############################################################################################ 2 | #### Plot predicted probabilities and other things after a Bayesian ordered logit model #### 3 | ############################################################################################ 4 | 5 | ## Johannes Karreth 6 | ## jkarreth@albany.edu 7 | 8 | ## Much of the code below is (a) old and (b) based on code originally written by 9 | ## Dave Armstrong, UW-Milwaukee, armstrod@uwm.edu 10 | ## See the slides for Day 11 for more compact code, also with explanations 11 | 12 | library(foreign) 13 | library(R2jags) 14 | 15 | ## Estimate frequentist and Bayesian models 16 | 17 | hw5.dat <- read.dta("http://www.jkarreth.net/files/ordered.logit.dta") 18 | hw5.dat <- na.omit(hw5.dat) 19 | 20 | hw5.dat$interact_f <- as.factor(hw5.dat$interact) 21 | freq.ologit <- MASS::polr(interact_f ~ orgmembs + indmembs + age + taxexmpt, data = hw5.dat) 22 | summary(freq.ologit) 23 | 24 | hw5.dat.jags <- list(interact = hw5.dat$interact, orgmembs = hw5.dat$orgmembs, indmembs = hw5.dat$indmembs, age = hw5.dat$age, taxexmpt = hw5.dat$taxexmpt, N = length(hw5.dat$interact)) 25 | 26 | hw5.mod <- function() { 27 | 28 | for (i in 1:N){ 29 | for (j in 1:3){ 30 | logit(gamma[i,j]) <- theta1[j] - mu[i] ## Note the use of theta1 instead of theta 31 | } 32 | interact[i] ~ dcat(p[i, 1:4]) 33 | p[i,1] <- gamma[i,1] 34 | p[i,2] <- gamma[i,2]-gamma[i,1] 35 | p[i,3] <- gamma[i,3]-gamma[i,2] 36 | p[i,4] <- 1-gamma[i,3] 37 | mu[i] <- b1 * orgmembs[i]+b2 * indmembs[i]+b3 * age[i]+b4 * taxexmpt[i] 38 | 39 | } 40 | 41 | for(i in 1:3){ 42 | theta[i] ~ dnorm(0,.0001) ## Note the use of theta 43 | } 44 | theta1[1:3] <- sort(theta) ## Note the use of theta1 / theta 45 | 46 | b1 ~ dnorm(0,.0001) 47 | b2 ~ dnorm(0,.0001) 48 | b3 ~ dnorm(0,.0001) 49 | b4 ~ dnorm(0,.0001) 50 | 51 | } 52 | 53 | hw5.params <- c("b1", "b2", "b3", "b4", "p", "theta") ## Monitor "p" for observed probabilities, PRE, etc. 54 | 55 | hw5.inits <- function(){ 56 | list("b1" = c(0), "b2" = c(0), "b3" = c(0), "b4" = c(0), "theta" = c(0,10,13)) ## Note that we're giving inits to theta, not theta1 57 | } 58 | 59 | hw5.fit <- jags(data = hw5.dat.jags, inits = hw5.inits, hw5.params, n.chains = 3, n.iter = 1000, n.burnin = 500, model.file = hw5.mod) 60 | 61 | ## Posterior distributions of coefficients: 62 | hw5.fit$BUGSoutput$summary[1:5, ] 63 | 64 | ## JAGS/BUGS USERS: Read the coda output from Bugs or Jags into R like so: 65 | # chains <- rbind(read.coda("ologit_chain1.txt", "ologit_index.txt"), 66 | # read.coda("ologit_chain2.txt", "ologit_index.txt")) 67 | 68 | ## Or: R2JAGS/R2WINBUGS USERS, if hw5.fit is your R2jags/R2WinBUGS object: 69 | chains <- as.mcmc(hw5.fit) 70 | chains <- as.matrix(chains) 71 | chains <- as.data.frame(chains) 72 | 73 | ## Be sure to remember the order of your coefficients. In this case: 74 | ## b1: orgmembs 75 | ## b2: indmembs 76 | ## b3: age 77 | ## b4: taxexmpt 78 | 79 | ##################################################### 80 | ## FIRST, PREDICTED PROBABILITIES ON OBSERVED DATA ## 81 | ##################################################### 82 | 83 | ## Define vectors with the values of the predicted probabilities (pulled from the coda files) 84 | ## grep("p[",) pulls all columns that start with p[ 85 | probs <- chains[, grep("p[", colnames(chains), fixed = T)] 86 | ## Note: this will be in the order p[observation, outcome category] 87 | 88 | ## Now, make a new list with n.iter (here, 1000) elements, 89 | ## where each is a matrix of the probability of being in one of the categories, 90 | ## hence ncol = number of categories (here, 4) and nrow (not specified) = 772 (N of 91 | ## the original data. 92 | prob.list <- lapply(1:nrow(probs), function(x) matrix(probs[x, ], ncol = 4)) ## nrow(probs) is the N of your simulations (if using several chains, the N is be the number of all iterations combined, i.e. the N of chain1 + the N of chain2, etc.). 93 | 94 | ## 95 | ## Summarize classification and proportional reduction of error (PRE) 96 | ## 97 | 98 | ## Identify the predicted category (simply the largest value of the four columns) 99 | pred.cats <- sapply(prob.list, function(x) apply(x, 1, which.max)) 100 | 101 | ## % correctly predicted: average # of simulations where pred.cats = observed outcome 102 | ## Here, dat is the dataframe of the observed data, and interact is the observed DV 103 | pcp <- apply(pred.cats, 2, function(x) mean(x == as.numeric(hw5.dat$interact))) 104 | 105 | ## PMC: percentage of observations in the modal category of the observed data 106 | ## This would be the ``naive'' guess in a null model - simply predict the modal category 107 | pmc <- max(table(as.numeric(hw5.dat$interact))/sum(table(as.numeric(hw5.dat$interact)))) 108 | 109 | ## PRE is defined as \frac{PCP - PMC}{1 - PMC}, so use this formula 110 | pre <- (pcp - pmc)/(1-pmc) 111 | 112 | ## Neat: remember, we are doing all this over a list of 1000 simulations 113 | ## Hence, PRE is actually a distribution 114 | summary(pre) 115 | mean(pre > 0) 116 | 117 | ## Again, PRE is a distribution 118 | plot(density(pre, bw = .05)) 119 | ## Terrible model - but recall that almost all obs. had interact = = {4} 120 | ## So the modal null model should already fare pretty well. 121 | 122 | ## Again, note the nice fact that we have a distribution for PRE, 123 | 124 | ## PRE and ePRE should be quite similar to the posterior means from the Bayesian model. 125 | ## Use the pre() function from Dave Armstrong's DAMisc package: 126 | DAMisc::pre(freq.ologit) 127 | 128 | ################################################################# 129 | ######### SECOND, OUT-OF-SAMPLE PREDICTED PROBABILITIES, ######## 130 | ## across the range of X1 and given values of other covariates ## 131 | ################################################################# 132 | 133 | ## Generate dataset with simulated data (i.e. where your explanatory variables are set to min->max or held constant) 134 | newdat <- data.frame( 135 | age = seq(min(hw5.dat$age), max(hw5.dat$age), length = max(hw5.dat$age) - min(hw5.dat$age) + 1), ## length: number of individual values of this continuous expl. var. 136 | orgmembs = median(hw5.dat$orgmembs), 137 | indmembs = median(hw5.dat$indmembs), 138 | taxexmpt = median(hw5.dat$taxexmpt) 139 | ) 140 | 141 | ## Define matrices with the values of the coefficients (pulled from the coda files) 142 | ## in this case, note that all my coefficients were named b[j], where j = number of coefficients 143 | b <- chains[,grep("b", colnames(chains), fixed = T)] 144 | 145 | ## In the MNL, you would define as many coefficient matrices as you have categories, minus the base category. 146 | ## Then you can simply proceed by calculating predicted probabilities for each category 147 | 148 | ## Define X matrix (explanatory variables) 149 | ## Important: The order of predictors must match the order in your model code! 150 | X <- model.matrix( ~ orgmembs + indmembs + age + taxexmpt, data = newdat) 151 | ## Remove the intercept for ordered logit 152 | ## (You would keep it for MNL) 153 | X <- X[ , -1] 154 | 155 | ## Multiply X by the betas from your JAGS/BUGS output 156 | Xb <- t(X %*% t(b)) 157 | 158 | ## Define vectors with the values of the cutoff points (pulled from the coda files) 159 | ## Note that in my model, the cutpoints were called theta 160 | ## Theta will only be in your chains if you monitored them earlier 161 | kaps <- chains[, grep("theta", colnames(chains), fixed = T)] 162 | q1 <- plogis(kaps[ , 1] - Xb) 163 | q2 <- plogis(kaps[ , 2] - Xb) 164 | q3 <- plogis(kaps[ , 3] - Xb) 165 | 166 | ## Probabilities to be in each of the 4 (j) categories 167 | p1 <- q1 168 | p2 <- q2 - q1 169 | p3 <- q3 - q2 170 | p4 <- 1 - q3 171 | 172 | ## ... and the respective credible intervals 173 | p1.ci <- apply(p1, 2, quantile, probs = c(.025,.975)) 174 | p2.ci <- apply(p2, 2, quantile, probs = c(.025,.975)) 175 | p3.ci <- apply(p3, 2, quantile, probs = c(.025,.975)) 176 | p4.ci <- apply(p4, 2, quantile, probs = c(.025,.975)) 177 | 178 | ## Get the simulation mean for each quantity of interest 179 | kap.mean <- apply(kaps, 2, mean) 180 | b.mean <- apply(b, 2, mean) 181 | mean.q1 <- plogis(kap.mean[1] - X %*% b.mean) 182 | mean.q2 <- plogis(kap.mean[2] - X %*% b.mean) 183 | mean.q3 <- plogis(kap.mean[3] - X %*% b.mean) 184 | mean.p1 <- mean.q1 185 | mean.p2 <- mean.q2 - mean.q1 186 | mean.p3 <- mean.q3 - mean.q2 187 | mean.p4 <- 1 - mean.q3 188 | 189 | ## Generate data set used for the two plots below 190 | plot.dat <- data.frame( 191 | means = c(mean.p1, mean.p2, mean.p3, mean.p4), ## means of the pred. probabilities 192 | lower = c(p1.ci[1, ] , p2.ci[1, ], p3.ci[1, ], p4.ci[1, ]), ## upper CI 193 | upper = c(p1.ci[2, ], p2.ci[2, ], p3.ci[2, ], p4.ci[2, ]), ## lower CI 194 | Interaction = factor(rep(c(1,2,3,4), each = 145), levels = c(1,2,3,4), labels = c("Never", "Seldom", "Occasionally", "Frequently")), ## Outcome variable 195 | age = rep(newdat$age, 4)) ## Predictor of interest (here: age) 196 | 197 | ###### 198 | ## Plots using ggplot2 199 | #### 200 | 201 | ## PLOT 1 (four separate panels for each outcome) 202 | 203 | library(ggplot2) 204 | 205 | p1 <- ggplot(dat = plot.dat, aes(x = age, y = means)) + geom_smooth(aes(x = age, ymin = lower, ymax = upper), stat = "identity") + facet_wrap(~ Interaction) 206 | p1 <- p1 + xlab("Age") + ylab("Pr(Interaction)") + theme_bw() 207 | 208 | ## PLOT 2 (all outcomes in one panel) 209 | 210 | p2 <- ggplot(dat = plot.dat, aes(x = age, y = means, group = Interaction)) + geom_smooth(aes(x = age, ymin = lower, ymax = upper, fill = Interaction, colour = Interaction), stat = "identity") 211 | p2 <- p2 + xlab("Age") + ylab("Pr(Interaction)") + theme_bw() 212 | 213 | 214 | ###### 215 | ## Plots using lattice 216 | #### 217 | 218 | library(lattice) 219 | 220 | ## PLOT 1 (four separate panels for each outcome) 221 | 222 | xyplot(means ~ age | Interaction, data = plot.dat, as.table = T, 223 | ylim = c(min(plot.dat$lower), max(plot.dat$upper)), xlab = "Age", ylab = "Probability", 224 | panel = function(x,y,subscripts){ 225 | panel.lines(x,y,lty = 1, col = "black") 226 | panel.lines(x, plot.dat$lower[subscripts], lty = 2, col = "red") 227 | panel.lines(x, plot.dat$upper[subscripts], lty = 2, col = "red")}) 228 | 229 | ## PLOT 2 (all outcomes in one panel) 230 | 231 | xyplot(mean.p1 ~ newdat$age, ylim = c(0,1), xlab = "Age", ylab = "Probability", 232 | key = list(space = list("top"), 233 | rectangles = list(col = c(rgb(1,0,0, alpha = .35), rgb(0,1,0, alpha = .35), 234 | rgb(0,0,1,alpha = .35), rgb(1,1,0,alpha = .35))), 235 | text = list(c("Pr(Interact = Never)", "Pr(Interact = Seldom)", "Pr(Interact = Occasionally)", "Pr(Interact = Frequently)"))), 236 | panel = function(x,y){ 237 | panel.polygon(x = c(x,rev(x),x[1]), y = c(p1.ci[1,], rev(p1.ci[2,]), p1.ci[1,1]), 238 | col = rgb(1,0,0,alpha = .35), border = NA) 239 | panel.polygon(x = c(x,rev(x),x[1]), y = c(p2.ci[1,], rev(p2.ci[2,]), p2.ci[1,1]), 240 | col = rgb(0,1,0,alpha = .35), border = NA) 241 | panel.polygon(x = c(x,rev(x),x[1]), y = c(p3.ci[1,], rev(p3.ci[2,]), p3.ci[1,1]), 242 | col = rgb(0,0,1,alpha = .35), border = NA) 243 | panel.polygon(x = c(x,rev(x),x[1]), y = c(p4.ci[1,], rev(p4.ci[2,]), p4.ci[1,1]), 244 | col = rgb(1,1,0,alpha = .35), border = NA) 245 | panel.lines(x, mean.p1, col = "red") 246 | panel.lines(x, mean.p2, col = "green") 247 | panel.lines(x, mean.p3, col = "blue") 248 | panel.lines(x, mean.p4, col = "yellow") 249 | }) 250 | 251 | #### Questions: Johannes Karreth, jkarreth@albany.edu -------------------------------------------------------------------------------- /pumps.instructions.R: -------------------------------------------------------------------------------- 1 | ######################################################################## 2 | ## WinBUGS example Pumps - conjugate gamma-Poisson hierarchical model ## 3 | ######################################################################## 4 | 5 | ## Johannes Karreth 6 | ## ICPSR Summer Program 2016 7 | 8 | ## Data 9 | 10 | pumps.data <- list(t = c(94.3, 15.7, 62.9, 126, 5.24, 31.4, 1.05, 1.05, 2.1, 10.5), 11 | x = c(5, 1, 5, 14, 3, 19, 1, 1, 4, 22), N = 10) 12 | 13 | t <- pumps.data$t 14 | x <- pumps.data$x 15 | N <- pumps.data$N 16 | 17 | pumps.data <- list("t", "x", "N") 18 | 19 | ## Model 20 | 21 | ## Save the following model separately in "pumps.model.jags" in your WD. Don't type it into R. 22 | 23 | pumps.mod <- function() { 24 | for (i in 1:N) { 25 | theta[i] ~ dgamma(alpha, beta) 26 | lambda[i] <- theta[i] * t[i] ## t is an independent variable 27 | x[i] ~ dpois(lambda[i]) ## x is a stochastic node, lambda is a deterministic node. 28 | } 29 | alpha ~ dexp(1) ## alpha and beta are the hyperpriors (and parental nodes) 30 | beta ~ dgamma(0.1, 1.0) 31 | } 32 | 33 | ## Set WD: 34 | setwd("~/R/Bayes/pumps") 35 | 36 | ## Model parameters 37 | 38 | pumps.params <- c("theta") 39 | 40 | ## Run JAGS 41 | 42 | pumpsfit <- jags(data=pumps.data, inits=NULL, pumps.params, n.chains=2, n.iter=5000, n.burnin=1000, model.file=pumps.mod) 43 | 44 | ## Results & diagnostics 45 | 46 | print(pumpsfit) 47 | 48 | plot(pumpsfit) 49 | 50 | traceplot(pumpsfit) 51 | ## if we want to keep these: 52 | pdf("pumps.traceplot.pdf") ## open PDF device 53 | traceplot(pumpsfit) ## write the plot 54 | dev.off() ## finish the plotting command 55 | 56 | ## Generate MCMC object for analysis 57 | pumpsfit.mcmc <- as.mcmc(pumpsfit) 58 | 59 | xyplot(pumpsfit.mcmc) 60 | ## large plot (many variables): use different output technique - print as PDF 61 | pdf("pumps.xyplot.pdf") # open pdf device 62 | xyplot(pumpsfit.mcmc, layout = c(5,4,3), aspect="fill") ## provide commands for the layout to fit all plots into one graph 63 | dev.off() # close PDF device; graph is in the WD 64 | 65 | densityplot(pumpsfit.mcmc) 66 | ## large plot (many variables): use different output technique 67 | pdf("pumps.densityplot.pdf") # open pdf device 68 | densityplot(pumpsfit.mcmc, layout = c(5,4,3), aspect="fill") 69 | dev.off() # close PDF device; graph is in the WD 70 | 71 | ## More diagnostics: 72 | 73 | gelman.plot(pumpsfit.mcmc) 74 | geweke.diag(pumpsfit.mcmc) 75 | geweke.plot(pumpsfit.mcmc) 76 | raftery.diag(pumpsfit.mcmc) 77 | heidel.diag(pumpsfit.mcmc) 78 | 79 | 80 | ## Update models: 81 | 82 | pumpsfit.upd <- update(pumpsfit, n.iter=1000) 83 | pumpsfit.upd <- autojags(pumpsfit) 84 | 85 | ## Run JAGS such that it produces coda files for analysis with boa or coda packages: 86 | 87 | pumpsfit <- jags2(data=pumps.data, inits=pumps.inits, pumps.params, n.chains=2, n.iter=5000, n.burnin=1000, model.file="pumps.model.jags", clearWD=FALSE) 88 | 89 | 90 | -------------------------------------------------------------------------------- /rats.instructions.R: -------------------------------------------------------------------------------- 1 | ###################################################### 2 | ## WinBUGS example Rats - Normal hierarchical model ## 3 | ###################################################### 4 | 5 | ## Johannes Karreth 6 | ## ICPSR Summer Program 2016 7 | 8 | ## Required libraries 9 | library(R2jags) 10 | 11 | ## Model 12 | 13 | rats.model <- function() { 14 | for( i in 1 : N ) { 15 | for( j in 1 : T ) { 16 | Y[i , j] ~ dnorm(mu[i , j],tau.c) 17 | mu[i , j] <- alpha[i] + beta[i] * (x[j] - xbar) 18 | } 19 | alpha[i] ~ dnorm(alpha.c,tau.alpha) 20 | beta[i] ~ dnorm(beta.c,tau.beta) 21 | } 22 | tau.c ~ dgamma(0.001,0.001) 23 | sigma <- 1 / sqrt(tau.c) 24 | alpha.c ~ dnorm(0.0,1.0E-6) 25 | sigma.alpha~ dunif(0,100) 26 | sigma.beta~ dunif(0,100) 27 | tau.alpha<-1/(sigma.alpha*sigma.alpha) 28 | tau.beta<-1/(sigma.beta*sigma.beta) 29 | beta.c ~ dnorm(0.0,1.0E-6) 30 | alpha0 <- alpha.c - xbar * beta.c 31 | } 32 | 33 | ## Copy the data from the WinBUGS code: 34 | 35 | rats.data <- list(x = c(8.0, 15.0, 22.0, 29.0, 36.0), xbar = 22, N = 30, T = 5, 36 | Y = structure( 37 | .Data = c(151, 199, 246, 283, 320, 38 | 145, 199, 249, 293, 354, 39 | 147, 214, 263, 312, 328, 40 | 155, 200, 237, 272, 297, 41 | 135, 188, 230, 280, 323, 42 | 159, 210, 252, 298, 331, 43 | 141, 189, 231, 275, 305, 44 | 159, 201, 248, 297, 338, 45 | 177, 236, 285, 350, 376, 46 | 134, 182, 220, 260, 296, 47 | 160, 208, 261, 313, 352, 48 | 143, 188, 220, 273, 314, 49 | 154, 200, 244, 289, 325, 50 | 171, 221, 270, 326, 358, 51 | 163, 216, 242, 281, 312, 52 | 160, 207, 248, 288, 324, 53 | 142, 187, 234, 280, 316, 54 | 156, 203, 243, 283, 317, 55 | 157, 212, 259, 307, 336, 56 | 152, 203, 246, 286, 321, 57 | 154, 205, 253, 298, 334, 58 | 139, 190, 225, 267, 302, 59 | 146, 191, 229, 272, 302, 60 | 157, 211, 250, 285, 323, 61 | 132, 185, 237, 286, 331, 62 | 160, 207, 257, 303, 345, 63 | 169, 216, 261, 295, 333, 64 | 157, 205, 248, 289, 316, 65 | 137, 180, 219, 258, 291, 66 | 153, 200, 244, 286, 324), 67 | .Dim = c(30,5))) 68 | 69 | ## Now define the vectors of the data matrix for JAGS: 70 | 71 | Y <- rats.data$Y 72 | T <- rats.data$T 73 | x <- rats.data$x 74 | xbar <- rats.data$xbar 75 | N <- rats.data$N 76 | 77 | ## Read in the rats data for JAGS 78 | 79 | rats.data <- list("Y", "x", "T", "N", "xbar") 80 | 81 | ## Name the JAGS parameters 82 | 83 | rats.params <- c("tau.c", "sigma", "alpha.c", "sigma.alpha", "sigma.beta", "tau.alpha", "tau.beta", "beta.c", "alpha0") 84 | 85 | ## Define the starting values for JAGS 86 | 87 | rats.inits <- function(){ 88 | list("alpha"=c(250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250), 89 | "beta"=c(6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6), 90 | "alpha.c"=c(150), "beta.c"=c(10), "tau.c"=c(1), "sigma.alpha"=c(1), "sigma.beta"=c(1)) 91 | } 92 | 93 | ## Fit the model in JAGS, having previously copied the BUGS model in my working directory as "rats.model.jags" 94 | 95 | ratsfit <- jags(data=rats.data, inits=rats.inits, rats.params, n.chains=2, n.iter=2000, n.burnin=1000, model.file=rats.model) 96 | 97 | ## If we wanted to specify different initial values for the different chains, specify: 98 | ## inits = c() ??? 99 | 100 | ## Updating? 101 | ratsfit.upd <- update(ratsfit, n.iter=1000) 102 | ratsfit.upd <- autojags(ratsfit) 103 | 104 | ## Diagnoses 105 | 106 | print(ratsfit) 107 | 108 | plot(ratsfit) 109 | 110 | traceplot(ratsfit) 111 | ## if you want to save them: 112 | pdf("rats.trace.pdf") 113 | traceplot(ratsfit) 114 | dev.off() 115 | 116 | ## Generate MCMC object for analysis 117 | ratsfit.mcmc <- as.mcmc(ratsfit) 118 | 119 | summary(ratsfit.mcmc) 120 | 121 | xyplot(ratsfit.mcmc) 122 | ## maybe better (you can use other Lattice options here as well): 123 | xyplot(ratsfit.mcmc, layout=c(2,6), aspect="fill") 124 | 125 | densityplot(ratsfit.mcmc) 126 | ## maybe better: 127 | densityplot(ratsfit.mcmc, layout=c(2,6), aspect="fill") 128 | 129 | ## and more plots... all using the MCMC object and the CODA package: 130 | 131 | ## Trace- and density in one plot 132 | pdf("ratsfit.mcmc.plot.pdf") 133 | plot(ratsfit.mcmc) 134 | dev.off() 135 | 136 | ## Autocorrelation plot 137 | pdf("ratsfit.mcmc.autocorr.pdf") 138 | autocorr.plot(ratsfit.mcmc) 139 | dev.off() 140 | 141 | ## Others (not working with this example - yet. Convergence problems?) 142 | 143 | gelman.plot(ratsfit.mcmc) 144 | geweke.diag(ratsfit.mcmc) 145 | geweke.plot(ratsfit.mcmc) 146 | raftery.diag(ratsfit.mcmc) 147 | heidel.diag(ratsfit.mcmc) 148 | 149 | ## If we want to examine the Coda files via Boa or Coda, use the function jags2: 150 | 151 | ratsfit <- jags2(data=rats.data, inits=rats.inits, rats.params, n.iter=5000, model.file="rats.model.jags") 152 | 153 | print(ratsfit) 154 | plot(ratsfit) 155 | ## etc. 156 | 157 | ## This will create the following files in your working directory: 158 | /Users/johanneskarreth/R/CODAchain1.txt 159 | /Users/johanneskarreth/R/CODAchain2.txt 160 | /Users/johanneskarreth/R/CODAchain3.txt 161 | /Users/johanneskarreth/R/CODAindex.txt 162 | /Users/johanneskarreth/R/jagsdata.txt 163 | /Users/johanneskarreth/R/jagsinits1.txt 164 | /Users/johanneskarreth/R/jagsinits2.txt 165 | /Users/johanneskarreth/R/jagsinits3.txt 166 | /Users/johanneskarreth/R/jagsscript.txt 167 | 168 | ## ... which can then be analyzed 169 | library(boa) 170 | boa.menu() 171 | 172 | ## remember to rename the index file's extension to .ind, and the chain files to .out 173 | 174 | library(coda) 175 | codamenu() -------------------------------------------------------------------------------- /regression.dotplot.R: -------------------------------------------------------------------------------- 1 | #################################################################### 2 | ## Coefficient dot plot of posterior distributions from JAGS/BUGS ## 3 | #################################################################### 4 | 5 | ## Johannes Karreth 6 | ## ICPSR Summer Program 2016 7 | 8 | ## if angell.fit is your R2jags/R2WinBUGS output object, and 9 | ## angellfit.mcmc is that object as mcmc object: 10 | angellfit.mcmc <- as.mcmc(angell.fit) 11 | 12 | ## using the mcmcplots package: 13 | 14 | library(mcmcplots) 15 | caterplot(angellfit.mcmc, parms = c("beta1", "beta2"), labels = c("Diversity", "Mobility"), val.lim = c(-0.27, 0.05)) 16 | abline(v = 0) 17 | 18 | ## using the arm package: 19 | 20 | coef.vect <- angell.fit$BUGSoutput$summary[2:3, 1] 21 | sd.vect <- angell.fit$BUGSoutput$summary[2:3, 2] 22 | short.names <- rownames(angell.fit$BUGSoutput$summary[2:3,]) 23 | long.names <- c("Diversity", "Mobility") 24 | 25 | library(arm) 26 | coefplot(coef.vect, sd.vect, varnames = long.names, main = "", xlim = c(-0.3, 0.05)) 27 | 28 | ## using ggplot2 from scratch 29 | library(ggplot2) 30 | angellfit.mat <- as.matrix(angellfit.mcmc) 31 | angellfit.dat <- as.data.frame(angellfit.mat) 32 | coef.vect <- apply(angellfit.dat, 2, mean) 33 | lower.vect <- apply(angellfit.dat, 2, function(x) quantile(x, probs = c(0.025))) 34 | upper.vect <- apply(angellfit.dat, 2, function(x) quantile(x, probs = c(0.975))) 35 | long.names <- c("Intercept", "Diversity", "Mobility", "Deviance") 36 | plot.dat <- data.frame(coef.vect, lower.vect, upper.vect, long.names)[c(2,3), ] 37 | 38 | p <- ggplot(data = plot.dat, aes(x = coef.vect, y = long.names)) + geom_point() + geom_segment(aes(x = lower.vect, xend = upper.vect, y = long.names, yend = long.names)) 39 | p <- p + geom_vline(xintercept = 0, colour = "blue", linetype = 2) + xlab("Posterior estimates") + ylab("") 40 | p -------------------------------------------------------------------------------- /regression.table.R: -------------------------------------------------------------------------------- 1 | ############################################################################ 2 | ## Regression coefficient table of posterior distributions from JAGS/BUGS ## 3 | ############################################################################ 4 | 5 | ## Johannes Karreth 6 | ## ICPSR Summer Program 2016 7 | 8 | ## if angell.fit is your R2jags/R2WinBUGS output object 9 | 10 | angell.fit$BUGSoutput$summary[, c(1, 2, 3, 7)] 11 | library(xtable) 12 | regtable1 <- xtable(angell.fit$BUGSoutput$summary[, c(1, 2, 3, 7)]) 13 | print(regtable1, type = "latex") 14 | print(regtable1, type = "html") 15 | 16 | ## Another option: 17 | # install.packages("devtools") 18 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R") 19 | regtable <- mcmctab(as.mcmc(angell.fit)) 20 | 21 | ## Use regtable() for the summary 22 | 23 | ## pick out the parameters of interest (e.g., the coefficients and SDs) 24 | 25 | regtable1 <- regtable[c(1:3), c(1:2)] 26 | 27 | library(xtable) 28 | regtable1 <- xtable(regtable1) 29 | print(regtable1, type = "latex") 30 | print(regtable1, type = "html") 31 | 32 | ## you can save the HTML table as a .doc file and open it with MS word. You might have to make some modifications, but the basic layout should be functionable. 33 | print(regtable1, type = "html", 34 | file = "~/Desktop/angell_table.doc") 35 | 36 | ## pick out other parameters of interest (here, the coefficients and CIs) 37 | 38 | regtable2 <- regtable[c(1:3), c(1, 3, 4)] 39 | 40 | xtable(regtable2) -------------------------------------------------------------------------------- /results.rmarkdown.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Summarizing posteriors through tables in RMarkdown" 3 | author: "Johannes Karreth" 4 | date: "July 4, 2016" 5 | output: pdf_document 6 | --- 7 | 8 | ## Simulate data and fit a model 9 | 10 | ```{r} 11 | n.sim <- 100; set.seed(123) 12 | x1 <- rnorm(n.sim, mean = 5, sd = 2) 13 | x2 <- rbinom(n.sim, size = 1, prob = 0.3) 14 | e <- rnorm(n.sim, mean = 0, sd = 1) 15 | b1 <- 1.2 16 | b2 <- -3.1 17 | a <- 1.5 18 | y <- a + b1 * x1 + b2 * x2 + e 19 | 20 | sim.dat <- data.frame(y, x1, x2) 21 | 22 | bayes.mod <- function() { 23 | 24 | for(i in 1:N){ 25 | y[i] ~ dnorm(mu[i], tau) 26 | mu[i] <- alpha + beta1 * x1[i] + beta2 * x2[i] 27 | } 28 | 29 | alpha ~ dnorm(0, .01) 30 | beta1 ~ dunif(-100, 100) 31 | beta2 ~ dunif(-100, 100) 32 | tau ~ dgamma(.01, .01) 33 | 34 | } 35 | 36 | sim.dat.jags <- as.list(sim.dat) 37 | 38 | sim.dat.jags$N <- nrow(sim.dat) 39 | 40 | bayes.mod.params <- c("alpha", "beta1", "beta2") 41 | 42 | 43 | inits1 <- list("alpha" = 0, "beta1" = 0, "beta2" = 0) 44 | inits2 <- list("alpha" = 1, "beta1" = 1, "beta2" = 1) 45 | inits3 <- list("alpha" = -1, "beta1" = -1, "beta2" = -1) 46 | bayes.mod.inits <- list(inits1, inits2, inits3) 47 | 48 | library(R2jags) 49 | set.seed(123) 50 | 51 | bayes.mod.fit <- jags(data = sim.dat.jags, inits = bayes.mod.inits, 52 | parameters.to.save = bayes.mod.params, n.chains = 3, n.iter = 9000, 53 | n.burnin = 1000, 54 | model.file = bayes.mod) 55 | ``` 56 | 57 | ## Create a summary table 58 | 59 | ```{r} 60 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R") 61 | regtable <- mcmctab(as.mcmc(bayes.mod.fit))[-c(4), ] # remove the fourth row 62 | ``` 63 | 64 | ## Option 1: the pander package 65 | 66 | ```{r} 67 | library(pander) 68 | pander(regtable) 69 | ``` 70 | 71 | ## Option 2: the knitr package 72 | 73 | ```{r} 74 | library(knitr) 75 | kable(regtable) 76 | ``` -------------------------------------------------------------------------------- /results.rmarkdown.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jkarreth/Bayes/6084383b894b4a78899b19bea0d7284fa57c2644/results.rmarkdown.pdf -------------------------------------------------------------------------------- /rstan.R: -------------------------------------------------------------------------------- 1 | ############################ 2 | ## Run Stan through rstan ## 3 | ############################ 4 | 5 | # Johannes Karreth 6 | # jkarreth@albany.edu 7 | 8 | # The code below adapts the help file from jags() for use with stan() 9 | # Adjust file paths to your system 10 | 11 | # data 12 | y <- c(28.4,7.9,-2.8,6.8,-0.6,0.6,18.0,12.2) 13 | sigma <- c(14.9,10.2,16.3,11.0,9.4,11.4,10.4,17.6) 14 | 15 | schools_data <- as.list(data.frame(y, sigma)) 16 | schools_data$J <- 8 17 | 18 | 19 | schools_mod <- " 20 | data { 21 | int J; // number of schools 22 | real y[J]; // estimated treatment effects 23 | real sigma[J]; // s.d. of effect estimates 24 | } 25 | parameters { 26 | real mu; 27 | real tau; 28 | vector[J] eta; 29 | } 30 | transformed parameters { 31 | vector[J] theta; 32 | theta <- mu + tau * eta; 33 | } 34 | model { 35 | eta ~ normal(0, 1); 36 | y ~ normal(theta, sigma); 37 | } 38 | 39 | " 40 | 41 | schools_fit <- stan(model_code = schools_mod, 42 | data = schools_data, 43 | chains = 4, iter = 2000, warmup = 1000, thin = 1, 44 | init = "0", seed = 123, verbose = FALSE) 45 | 46 | ## Same with Angell data & OLS 47 | 48 | # Read the data in car library 49 | library(car) 50 | data(Angell) 51 | angell_dat <- Angell[, -4] ## take off the fourth column (remember, the order is (row, column)) 52 | angell_dat <- as.list(angell_dat) 53 | angell_dat$N <- length(angell_dat$moral) 54 | 55 | ## Model 56 | 57 | angell_mod <- " 58 | 59 | data{ 60 | int N; 61 | vector[N] moral; 62 | vector[N] hetero; 63 | vector[N] mobility; 64 | } 65 | 66 | parameters{ 67 | vector[3] beta; 68 | real sigma; 69 | } 70 | 71 | model{ 72 | moral ~ normal(beta[1] + beta[2] * hetero + beta[3] * mobility, 73 | sigma); 74 | } 75 | 76 | " 77 | 78 | ## Fit the model in rstan 79 | 80 | angell_fit <- stan(model_code = angell_mod, 81 | data = angell_dat, 82 | chains = 4, iter = 2000, warmup = 1000, thin = 1, 83 | init = "0", seed = 123, verbose = FALSE) -------------------------------------------------------------------------------- /wvs.instructions.R: -------------------------------------------------------------------------------- 1 | ################################################################# 2 | ## R2jags example using the WVS example - random effects model ## 3 | ################################################################# 4 | 5 | ## Johannes Karreth 6 | ## jkarreth@albany.edu 7 | 8 | 9 | ## Required libraries 10 | library(foreign) 11 | library(R2jags) 12 | 13 | ## Read the data 14 | 15 | source("http://www.jkarreth.net/files/wvs.jags.txt") 16 | 17 | wvs.dat <- list("N", "nation1", "trust1", "lifesat", "lr", "relig1", "proud1", "sex1", "age", "educ") 18 | 19 | ## JAGS model: very simple, but can be extended as you see fit 20 | 21 | 22 | wvs.mod <- function(){ 23 | for(i in 1:N) 24 | { 25 | 26 | 27 | lifesat[i]~dnorm(mu[i],sigma2inv[nation1[i]]) ## DV: life satisfaction; distributed normal. 28 | ## BUT: variance is indexed by NATION, mean is indexed by individual. 29 | ## Each country gets its own within-group variance.. 30 | mu[i]<-alpha[nation1[i]] +beta[nation1[i]]*proud1[i] ## mu gets its own intercept by nation (country-specific intercepts) 31 | ## If we want to get different slopes, do this: mu[i]<-alpha[nation1[i]] +beta[nation1[i]]*proud1[i] 32 | } 33 | for(i in 1:4){ 34 | alpha[i]~dnorm(malpha,tau2inv) ## Note: nothing indexed here... IF this were [0;1], it'd be a fixed effects model... 35 | beta[i]~dnorm(mbeta,tau3inv) 36 | 37 | } ## Closing the forloop for the four countries 38 | mbeta~dnorm(0,1.0E-4) ## Mean for beta; note that scientific writing is permitted in model code. 39 | malpha~dnorm(0,1.0E-4) ## Mbeta and Malpha are stochastic parent nodes. If those were to be modeled as a function of country-level covariates, we'd have # additional mean functions for these! 40 | tau2inv~dgamma(.1,.1) ## Between-group variance 41 | tau2<-1/sqrt(tau2inv) 42 | tau3inv~dgamma(.1,.1) 43 | tau3<-1/sqrt(tau2inv) 44 | 45 | 46 | for(i in 1:4){ 47 | sigma2inv[i]~dgamma(.1,.1) ## Prior for within-country variance 48 | sigma2[i]<-1/sqrt(sigma2inv[i]) 49 | } 50 | } 51 | 52 | ## Set wd 53 | 54 | setwd("~/R/Bayes/wvs") 55 | 56 | ## Name the JAGS parameters to be monitored 57 | 58 | wvs.params <- c("alpha", "beta") 59 | 60 | ## Define the starting values for JAGS 61 | 62 | wvs.inits <- function(){ 63 | list("alpha"=c(0, 0, 0, 0), "beta"=c(0, 0, 0, 0)) 64 | } 65 | 66 | ## Fit the model in JAGS 67 | 68 | wvsfit <- jags(data=wvs.dat, inits=wvs.inits, wvs.params, n.chains=2, n.iter=500, n.burnin=100, model.file=wvs.mod) --------------------------------------------------------------------------------