├── .DS_Store
├── .Rhistory
├── .gitignore
├── MCMC_probability_demo.Rmd
├── MCMC_probability_demo.pdf
├── README.md
├── angell.instructions.R
├── beer.instructions.R
├── diagnostics.R
├── econvote.instructions.R
├── factor.dotplot.R
├── goals.instructions.R
├── interaction.instructions.R
├── legislators.instructions.R
├── logit.pp.plot.instructions.R
├── mac.R2openBUGS.R
├── mac.R2winBUGS.R
├── mlm.state.instructions.R
├── ologit.pp.plot.instructions.R
├── pumps.instructions.R
├── rats.instructions.R
├── regression.dotplot.R
├── regression.table.R
├── results.rmarkdown.Rmd
├── results.rmarkdown.html
├── results.rmarkdown.pdf
├── rstan.R
└── wvs.instructions.R


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkarreth/Bayes/6084383b894b4a78899b19bea0d7284fa57c2644/.DS_Store


--------------------------------------------------------------------------------
/.Rhistory:
--------------------------------------------------------------------------------
  1 | library(R2WinBUGS)
  2 | model.file <- system.file(package="R2WinBUGS", "model", "schools.txt")
  3 | # Some example data (see ?schools for details):
  4 | data(schools)
  5 | J <- nrow(schools)
  6 | y <- schools$estimate
  7 | sigma.y <- schools$sd
  8 | data <- list(J=J, y=y, sigma.y=sigma.y)
  9 | inits <- function(){
 10 | list(theta=rnorm(J, 0, 100), mu.theta=rnorm(1, 0, 100),
 11 | sigma.theta=runif(1, 0, 100))
 12 | }
 13 | ## or alternatively something like:
 14 | # inits <- list(
 15 | #   list(theta=rnorm(J, 0, 90), mu.theta=rnorm(1, 0, 90),
 16 | #        sigma.theta=runif(1, 0, 90)),
 17 | #   list(theta=rnorm(J, 0, 100), mu.theta=rnorm(1, 0, 100),
 18 | #        sigma.theta=runif(1, 0, 100))
 19 | #   list(theta=rnorm(J, 0, 110), mu.theta=rnorm(1, 0, 110),
 20 | #        sigma.theta=runif(1, 0, 110)))
 21 | parameters <- c("theta", "mu.theta", "sigma.theta")
 22 | schools.sim <- bugs(data, inits, parameters, model.file,
 23 | n.chains=3, n.iter=5000,
 24 | bugs.directory="/Applications/Wineskin/WinBUGS.app/Contents/Resources/drive_c/Program Files/WinBUGS14")
 25 | print(schools.sim)
 26 | plot(schools.sim)
 27 | # For a summary table, use
 28 | # https://github.com/jkarreth/JKmisc/blob/master/mcmctab.R
 29 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R")
 30 | mcmctab(schools.sim, jags = FALSE)
 31 | # Access posterior draws for postestimation
 32 | schools.out <- schools.sim$sims.matrix
 33 | summary(schools.out)
 34 | str(schools.sim$sims.matrix)
 35 | library(coda)
 36 | foo <- as.mcmc(schools.sim$sims.array)
 37 | summary(foo)
 38 | foo2 <- mcmcUpgrade(foo)
 39 | str(schools.sim)
 40 | str(schools.sim$summary)
 41 | (schools.sim$summary)
 42 | class(schools.sim)
 43 | class(angell.fit)
 44 | Statelevel <- read.dta("http://www.princeton.edu/~jkastell/MRP_primer/state_level_update.dta",convert.underscore = TRUE)
 45 | library(foreign)
 46 | Statelevel <- read.dta("http://www.princeton.edu/~jkastell/MRP_primer/state_level_update.dta",convert.underscore = TRUE)
 47 | table(Statelevel$sstate)
 48 | length(Statelevel$sstate)
 49 | Megapoll <- read.dta("http://www.princeton.edu/~jkastell/MRP_primer/gay_marriage_megapoll.dta", convert.underscore = TRUE)
 50 | Megapoll <- Megapoll[Megapoll$state != "", ]
 51 | Megapoll <- Megapoll[order(Megapoll$state), ]
 52 | State.names <- sort(unique(Megapoll$state))
 53 | Statelevel <- Statelevel[order(Statelevel$sstate), ]
 54 | Statelevel
 55 | Statelevel$state.cat <- 0
 56 | Statelevel[which(Statelevel$sstate %in% State.names), ]
 57 | as.numeric(as.factor(Statelevel[which(Statelevel$sstate %in% State.names), ]$sstate))
 58 | Statelevel$state.cat <- 0
 59 | Statelevel[which(Statelevel$sstate %in% State.names), ]$state.cat <- as.numeric(as.factor(Statelevel[which(Statelevel$sstate %in% State.names), ]$sstate))
 60 | Census <- read.dta("http://www.princeton.edu/~jkastell/MRP_primer/poststratification%202000.dta",convert.underscore = TRUE)
 61 | Megapoll.dat <- Megapoll[, c("yes.of.all", "race.female", "age.cat", "edu.cat", "age.edu.cat", "state.cat", "region.cat", "poll.cat", "p.relig.full", "p.kerry.full")]
 62 | head(Megapoll)
 63 | Megapoll$race.female <- (Megapoll$female * 3) + Megapoll$race.wbh # from 1 for white males to 6 for hispanic females
 64 | Megapoll$age.edu.cat <- 4 * (Megapoll$age.cat - 1) + Megapoll$edu.cat # from 1 for 18-29 with low edu to 16 for 65+ with high edu
 65 | Megapoll$poll.cat <- as.numeric(as.factor(Megapoll$poll)) # indicator for each of the 5 polls
 66 | Megapoll$state.cat <- as.numeric(as.factor(Megapoll$state)) # indicator for state (for varying intercepts)
 67 | Megapoll$p.evang.full <- Statelevel$p.evang[Megapoll$state.initnum] # proportion of evangelicals in respondent's state
 68 | Megapoll$p.mormon.full <-Statelevel$p.mormon[Megapoll$state.initnum] # proportion of mormon's in respondent's state
 69 | Megapoll$p.relig.full <- Megapoll$p.evang.full + Megapoll$p.mormon.full # combined evangelical + mormon proportions
 70 | Megapoll$p.kerry.full <- Statelevel$kerry.04[Megapoll$state.initnum] # kerry's % of 2-party vote in respondent's state in 2004
 71 | Megapoll.dat <- Megapoll[, c("yes.of.all", "race.female", "age.cat", "edu.cat", "age.edu.cat", "state.cat", "region.cat", "poll.cat", "p.relig.full", "p.kerry.full")]
 72 | individual.bmod <- glmer2stan(
 73 | formula = yes.of.all ~ (1|race.female) + (1|age.cat) + (1|edu.cat) +
 74 | (1|age.edu.cat) + (1|state.cat) + (1|region.cat) + (1|poll.cat) +
 75 | p.relig.full + p.kerry.full,
 76 | data = Megapoll.dat,
 77 | family = "binomial",
 78 | sample = FALSE)
 79 | library(glmer2stan)
 80 | individual.bmod <- glmer2stan(
 81 | formula = yes.of.all ~ (1|race.female) + (1|age.cat) + (1|edu.cat) +
 82 | (1|age.edu.cat) + (1|state.cat) + (1|region.cat) + (1|poll.cat) +
 83 | p.relig.full + p.kerry.full,
 84 | data = Megapoll.dat,
 85 | family = "binomial",
 86 | sample = FALSE)
 87 | show(individual.bmod)
 88 | ?stan
 89 | library(rstan)
 90 | ?stan
 91 | Megapoll1.dat <- Megapoll[, c("state", "state.cat", "region.cat", "yes.of.all", "race.female", "age.cat", "edu.cat", "age.edu.cat", "poll.cat", "p.relig.full", "p.kerry.full")]
 92 | Megapoll1.dat <- na.omit(Megapoll1.dat)
 93 | Megapoll2.dat <- Megapoll[, c("state.cat", "region.cat", "p.relig.full", "p.kerry.full")]
 94 | Megapoll2.dat <- aggregate(x = Megapoll, by = list(statenum = Megapoll$state.cat), FUN = function(x) x[1])
 95 | Megapoll2.dat <- Megapoll2.dat[, -c(1)]
 96 | Megapoll.datjags <- as.list(Megapoll1.dat)
 97 | Megapoll.datjags$statenuml2 <- Megapoll2.dat$state.cat
 98 | Megapoll.datjags$region.catl2 <- Megapoll2.dat$region.cat
 99 | Megapoll.datjags$poll.catl2 <- Megapoll2.dat$poll.cat
100 | Megapoll.datjags$p.relig.full.state <- Megapoll2.dat$p.relig.full
101 | Megapoll.datjags$p.kerry.full.state <- Megapoll2.dat$p.kerry.full
102 | Megapoll.datjags$N.resp <- nrow(Megapoll1.dat)
103 | Megapoll.datjags$N.state <- nrow(Megapoll2.dat)
104 | Megapoll.datjags$N.race.female <- length(unique(Megapoll1.dat$race.female))
105 | Megapoll.datjags$N.age.cat <- length(unique(Megapoll1.dat$age.cat))
106 | Megapoll.datjags$N.edu.cat <- length(unique(Megapoll1.dat$edu.cat))
107 | Megapoll.datjags$N.age.edu.cat <- length(unique(Megapoll1.dat$age.edu.cat))
108 | Megapoll.datjags$N.edu.cat <- length(unique(Megapoll1.dat$edu.cat))
109 | Megapoll.datjags$N.region.cat <- length(unique(Megapoll1.dat$region.cat))
110 | Megapoll.datjags$N.poll <- length(unique(Megapoll1.dat$poll.cat))
111 | individual.modjags <- function(){
112 | for (i in 1:N.resp){
113 | yes.of.all[i] ~ dbern(p[i])
114 | logit(p[i]) <- b.intercept +
115 | b.relig * p.relig.full[i] +
116 | b.kerry * p.kerry.full[i] +
117 | b.race.female[race.female[i]] +
118 | b.age[age.cat[i]] +
119 | b.edu[edu.cat[i]] +
120 | b.age.edu[age.edu.cat[i]] +
121 | b.state[state.cat[i]] +
122 | b.region[region.cat[i]] +
123 | b.poll[poll.cat[i]]
124 | }
125 | for(i in 1:N.race.female){b.race.female[i] ~ dnorm(0, tau.race.female)}
126 | for(i in 1:N.age.cat){b.age[i] ~ dnorm(0, tau.age)}
127 | for(i in 1:N.edu.cat){b.edu[i] ~ dnorm(0, tau.edu)}
128 | for(i in 1:N.age.edu.cat){b.age.edu[i] ~ dnorm(0, tau.age.edu)}
129 | for(i in 1:N.state){b.state[i] ~ dnorm(0, tau.state)}
130 | for(i in 1:N.region.cat){b.region[i] ~ dnorm(0, tau.region)}
131 | for(i in 1:N.poll){b.poll[i] ~ dnorm(0, tau.poll)}
132 | b.intercept ~ dnorm(0, .1)
133 | b.relig ~ dnorm(0, .1)
134 | b.kerry ~ dnorm(0, .1)
135 | #   mu.race.female ~ dnorm(0, .1)
136 | #   mu.age.cat ~ dnorm(0, .1)
137 | #   mu.edu.cat ~ dnorm(0, .1)
138 | #   mu.age.edu.cat ~ dnorm(0, .1)
139 | #   mu.state ~ dnorm(0, .1)
140 | #   mu.region ~ dnorm(0, .1)
141 | #   mu.poll ~ dnorm(0, .1)
142 | tau.race.female ~ dgamma(1, 1)
143 | tau.age ~ dgamma(1, 1)
144 | tau.edu ~ dgamma(1, 1)
145 | tau.age.edu ~ dgamma(1, 1)
146 | tau.state ~ dgamma(1, 1)
147 | tau.region ~ dgamma(1, 1)
148 | tau.poll ~ dgamma(1, 1)
149 | }
150 | individual.params <- c("b.intercept", "b.race.female", "b.age", "b.edu", "b.age.edu", "b.state", "b.region",
151 | "b.poll", "b.relig", "b.kerry")
152 | library(ggmcmc)
153 | ?ggs
154 | individual.ggs <- ggs(individual.mcmc)
155 | individual.fit <- jags(
156 | data = Megapoll.datjags,
157 | inits = NULL,
158 | parameters.to.save = individual.params,
159 | model.file = individual.modjags,
160 | n.chains = 3,
161 | n.iter = 20000,
162 | n.burnin = 10000,
163 | n.thin = 5)
164 | library(R2jags)
165 | ## Level-1 (individual) data
166 | Megapoll1.dat <- Megapoll[, c("state", "state.cat", "region.cat", "yes.of.all", "race.female", "age.cat", "edu.cat", "age.edu.cat", "poll.cat", "p.relig.full", "p.kerry.full")]
167 | Megapoll1.dat <- na.omit(Megapoll1.dat)
168 | ## Level-2 (state) data
169 | Megapoll2.dat <- Megapoll[, c("state.cat", "region.cat", "p.relig.full", "p.kerry.full")]
170 | ## Collapse to state-level
171 | Megapoll2.dat <- aggregate(x = Megapoll, by = list(statenum = Megapoll$state.cat), FUN = function(x) x[1])
172 | Megapoll2.dat <- Megapoll2.dat[, -c(1)]
173 | Megapoll.datjags <- as.list(Megapoll1.dat)
174 | Megapoll.datjags$statenuml2 <- Megapoll2.dat$state.cat
175 | Megapoll.datjags$region.catl2 <- Megapoll2.dat$region.cat
176 | Megapoll.datjags$poll.catl2 <- Megapoll2.dat$poll.cat
177 | Megapoll.datjags$p.relig.full.state <- Megapoll2.dat$p.relig.full
178 | Megapoll.datjags$p.kerry.full.state <- Megapoll2.dat$p.kerry.full
179 | Megapoll.datjags$N.resp <- nrow(Megapoll1.dat)
180 | Megapoll.datjags$N.state <- nrow(Megapoll2.dat)
181 | Megapoll.datjags$N.race.female <- length(unique(Megapoll1.dat$race.female))
182 | Megapoll.datjags$N.age.cat <- length(unique(Megapoll1.dat$age.cat))
183 | Megapoll.datjags$N.edu.cat <- length(unique(Megapoll1.dat$edu.cat))
184 | Megapoll.datjags$N.age.edu.cat <- length(unique(Megapoll1.dat$age.edu.cat))
185 | Megapoll.datjags$N.edu.cat <- length(unique(Megapoll1.dat$edu.cat))
186 | Megapoll.datjags$N.region.cat <- length(unique(Megapoll1.dat$region.cat))
187 | Megapoll.datjags$N.poll <- length(unique(Megapoll1.dat$poll.cat))
188 | ## Model
189 | individual.modjags <- function(){
190 | for (i in 1:N.resp){
191 | yes.of.all[i] ~ dbern(p[i])
192 | logit(p[i]) <- b.intercept +
193 | b.relig * p.relig.full[i] +
194 | b.kerry * p.kerry.full[i] +
195 | b.race.female[race.female[i]] +
196 | b.age[age.cat[i]] +
197 | b.edu[edu.cat[i]] +
198 | b.age.edu[age.edu.cat[i]] +
199 | b.state[state.cat[i]] +
200 | b.region[region.cat[i]] +
201 | b.poll[poll.cat[i]]
202 | }
203 | for(i in 1:N.race.female){b.race.female[i] ~ dnorm(0, tau.race.female)}
204 | for(i in 1:N.age.cat){b.age[i] ~ dnorm(0, tau.age)}
205 | for(i in 1:N.edu.cat){b.edu[i] ~ dnorm(0, tau.edu)}
206 | for(i in 1:N.age.edu.cat){b.age.edu[i] ~ dnorm(0, tau.age.edu)}
207 | for(i in 1:N.state){b.state[i] ~ dnorm(0, tau.state)}
208 | for(i in 1:N.region.cat){b.region[i] ~ dnorm(0, tau.region)}
209 | for(i in 1:N.poll){b.poll[i] ~ dnorm(0, tau.poll)}
210 | b.intercept ~ dnorm(0, .1)
211 | b.relig ~ dnorm(0, .1)
212 | b.kerry ~ dnorm(0, .1)
213 | #   mu.race.female ~ dnorm(0, .1)
214 | #   mu.age.cat ~ dnorm(0, .1)
215 | #   mu.edu.cat ~ dnorm(0, .1)
216 | #   mu.age.edu.cat ~ dnorm(0, .1)
217 | #   mu.state ~ dnorm(0, .1)
218 | #   mu.region ~ dnorm(0, .1)
219 | #   mu.poll ~ dnorm(0, .1)
220 | tau.race.female ~ dgamma(1, 1)
221 | tau.age ~ dgamma(1, 1)
222 | tau.edu ~ dgamma(1, 1)
223 | tau.age.edu ~ dgamma(1, 1)
224 | tau.state ~ dgamma(1, 1)
225 | tau.region ~ dgamma(1, 1)
226 | tau.poll ~ dgamma(1, 1)
227 | }
228 | individual.params <- c("b.intercept", "b.race.female", "b.age", "b.edu", "b.age.edu", "b.state", "b.region",
229 | "b.poll", "b.relig", "b.kerry")
230 | individual.fit <- jags(
231 | data = Megapoll.datjags,
232 | inits = NULL,
233 | parameters.to.save = individual.params,
234 | model.file = individual.modjags,
235 | n.chains = 3,
236 | n.iter = 20000,
237 | n.burnin = 10000,
238 | n.thin = 5)
239 | print(individual.fit)
240 | library(mcmcplots)
241 | mcmcplot(as.mcmc(individual.fit))
242 | individual.modjags <- function(){
243 | for (i in 1:N.resp){
244 | yes.of.all[i] ~ dbern(p[i])
245 | logit(p[i]) <- b.intercept +
246 | b.relig * p.relig.full[i] +
247 | b.kerry * p.kerry.full[i] +
248 | b.race.female[race.female[i]] +
249 | b.age[age.cat[i]] +
250 | b.edu[edu.cat[i]] +
251 | b.age.edu[age.edu.cat[i]] +
252 | b.state[state.cat[i]] +
253 | b.region[region.cat[i]] +
254 | b.poll[poll.cat[i]]
255 | }
256 | for(i in 1:N.race.female){b.race.female[i] ~ dnorm(0, tau.race.female)}
257 | for(i in 1:N.age.cat){b.age[i] ~ dnorm(0, tau.age)}
258 | for(i in 1:N.edu.cat){b.edu[i] ~ dnorm(0, tau.edu)}
259 | for(i in 1:N.age.edu.cat){b.age.edu[i] ~ dnorm(0, tau.age.edu)}
260 | for(i in 1:N.state){b.state[i] ~ dnorm(0, tau.state)}
261 | for(i in 1:N.region.cat){b.region[i] ~ dnorm(0, tau.region)}
262 | for(i in 1:N.poll){b.poll[i] ~ dnorm(0, tau.poll)}
263 | b.intercept ~ dnorm(0, .1)
264 | b.relig ~ dnorm(0, .1)
265 | b.kerry ~ dnorm(0, .1)
266 | #   mu.race.female ~ dnorm(0, .1)
267 | #   mu.age.cat ~ dnorm(0, .1)
268 | #   mu.edu.cat ~ dnorm(0, .1)
269 | #   mu.age.edu.cat ~ dnorm(0, .1)
270 | #   mu.state ~ dnorm(0, .1)
271 | #   mu.region ~ dnorm(0, .1)
272 | #   mu.poll ~ dnorm(0, .1)
273 | tau.race.female ~ dgamma(.5, .5)
274 | tau.age ~ dgamma(.5, .5)
275 | tau.edu ~ dgamma(.5, .5)
276 | tau.age.edu ~ dgamma(.5, .5)
277 | tau.state ~ dgamma(.5, .5)
278 | tau.region ~ dgamma(.5, .5)
279 | tau.poll ~ dgamma(.5, .5)
280 | }
281 | individual.fit <- jags(
282 | data = Megapoll.datjags,
283 | inits = NULL,
284 | parameters.to.save = individual.params,
285 | model.file = individual.modjags,
286 | n.chains = 3,
287 | n.iter = 20000,
288 | n.burnin = 10000,
289 | n.thin = 5)
290 | print(individual.fit)
291 | mcmcplot(as.mcmc(individual.fit))
292 | individual.mcmc <- as.mcmc(individual.fit)
293 | ?save
294 | save(individual.mcmc, file = "/Users/johanneskarreth/Documents/Uni/9 - ICPSR/2015/Applied Bayes/Slides (new)/Day 19/MRP/individual_mcmc.Rdata")
295 | library(ggmcmc)
296 | individual.ggs <- ggs(individual.mcmc)
297 | ggs_caterpillar(individual.ggs)
298 | ggs_caterpillar(individual.ggs, family = "b.")
299 | str(individual.ggs)
300 | head(individual.ggs)
301 | ggs_density(individual.ggs, family = "b.")
302 | individual.out.dat <- as.data.frame(as.matrix(individual.mcmc))
303 | individual.out <- as.data.frame(as.matrix(individual.mcmc))
304 | summary(individual.out)
305 | cp.dat <- individual.out[, grep(x = names(individual.out), pattern = "b.")]
306 | summary(cp.dat)
307 | cp.dat <- individual.out[, !grep(x = names(individual.out), pattern = "b.state", fixed = TRUE)]
308 | summary(cp.dat)
309 | cp.dat <- individual.out[, grep(x = names(individual.out), pattern = "b.", fixed = TRUE)]
310 | cp.dat <- individual.out[, -grep(x = names(individual.out), pattern = "b.state", fixed = TRUE)]
311 | summary(cp.dat)
312 | cp.dat <- individual.out[, grep(x = names(individual.out), pattern = "b.", fixed = TRUE)]
313 | cp.dat <- cp.dat[, -grep(x = names(cp.dat), pattern = "b.state", fixed = TRUE)]
314 | cp.dat <- cp.dat[, -grep(x = names(cp.dat), pattern = "b.region", fixed = TRUE)]
315 | cp.dat <- cp.dat[, -grep(x = names(cp.dat), pattern = "b.poll", fixed = TRUE)]
316 | summary(cp.dat)
317 | ggs_caterpillar(cp.dat)
318 | ?ggs_caterpillar
319 | ggs_caterpillar(ggs(cp.dat))
320 | apply(cp.dat, 1, plot(density(x)))
321 | apply(cp.dat, 1, function(x) plot(density(x)))
322 | plot(density(rnorm(100)))
323 | par(mfrow = c(10, 5)); apply(cp.dat, 1, function(x) plot(density(x))); par(mfrow = c(1, 1))
324 | par(mfrow = c(10, 5)); apply(cp.dat, 1, function(x) plot(density(x))); par(mfrow = c(1, 1))
325 | par(mfrow = c(10, 5)); apply(cp.dat, 1, function(x) plot(density(x))); par(mfrow = c(1, 1))
326 | par(mfrow = c(5, 5)); apply(cp.dat, 1, function(x) plot(density(x))); par(mfrow = c(1, 1))
327 | par(mfrow = c(5, 5)); apply(cp.dat, 2, function(x) plot(density(x))); par(mfrow = c(1, 1))
328 | par(mfrow = c(5, 5)); apply(cp.dat, 2, function(x) plot(density(x))); par(mfrow = c(1, 1))
329 | head(cp.dat)
330 | plot(density(rnorm(100)))
331 | ?apply
332 | cp.dat.sum <- apply(cp.dat, MARGIN = 2, function(x) quantile(x, probs = c(0.025, 0.1, 0.5, 0.9, 0.975)))
333 | cp.dat.sum
334 | cp.dat.sum <- t(apply(cp.dat, MARGIN = 2, function(x) quantile(x, probs = c(0.025, 0.1, 0.5, 0.9, 0.975))))
335 | cp.dat.sum
336 | names(cp.dat.sum) <- c("Low95", "Low80", "Median", "High80", "High95")
337 | ?geom_segment
338 | ggplot(data = cp.dat.sum, aes(x = Median, y = variable)) + geom_point() +
339 | geom_segment(aes(x = Low80, xend = High80), size = 2) +
340 | geom_segment(aes(x = Low95, xend = High95), size = 2)
341 | cp.dat.sum <- t(apply(cp.dat, MARGIN = 2, function(x) quantile(x, probs = c(0.025, 0.1, 0.5, 0.9, 0.975))))
342 | names(cp.dat.sum) <- c("Low95", "Low80", "Median", "High80", "High95")
343 | cp.dat.sum <- as.data.frame(cp.dat.sum)
344 | cp.dat.sum$variable <- rownames(cp.dat.sum)
345 | ggplot(data = cp.dat.sum, aes(x = Median, y = variable)) + geom_point() +
346 | geom_segment(aes(x = Low80, xend = High80), size = 2) +
347 | geom_segment(aes(x = Low95, xend = High95), size = 2)
348 | cp.dat.sum
349 | cp.dat.sum <- t(apply(cp.dat, MARGIN = 2, function(x) quantile(x, probs = c(0.025, 0.1, 0.5, 0.9, 0.975))))
350 | names(cp.dat.sum) <- c("Low95", "Low80", "Median", "High80", "High95")
351 | cp.dat.sum <- as.data.frame(cp.dat.sum)
352 | cp.dat.sum$variable <- rownames(cp.dat.sum)
353 | cp.dat.sum
354 | cp.dat.sum <- t(apply(cp.dat, MARGIN = 2, function(x) quantile(x, probs = c(0.025, 0.1, 0.5, 0.9, 0.975))))
355 | cp.dat.sum <- as.data.frame(cp.dat.sum)
356 | names(cp.dat.sum) <- c("Low95", "Low80", "Median", "High80", "High95")
357 | cp.dat.sum$variable <- rownames(cp.dat.sum)
358 | cp.dat.sum
359 | cp.dat <- cp.dat[, -grep(x = names(cp.dat), pattern = "b.intercept", fixed = TRUE)]
360 | cp.dat.sum <- t(apply(cp.dat, MARGIN = 2, function(x) quantile(x, probs = c(0.025, 0.1, 0.5, 0.9, 0.975))))
361 | cp.dat.sum <- as.data.frame(cp.dat.sum)
362 | names(cp.dat.sum) <- c("Low95", "Low80", "Median", "High80", "High95")
363 | cp.dat.sum$variable <- rownames(cp.dat.sum)
364 | cp.dat.sum
365 | ggplot(data = cp.dat.sum, aes(x = Median, y = variable)) + geom_point() +
366 | geom_segment(aes(x = Low80, xend = High80), size = 2) +
367 | geom_segment(aes(x = Low95, xend = High95), size = 1)
368 | ggplot(data = cp.dat.sum, aes(x = Median, y = variable)) + geom_point() +
369 | geom_segment(aes(x = Low80, xend = High80, yend = variable), size = 2) +
370 | geom_segment(aes(x = Low95, xend = High95, yend = variable), size = 1)
371 | ggplot(data = cp.dat.sum, aes(x = Median, y = variable)) + geom_point() +
372 | geom_segment(aes(x = Low80, xend = High80, yend = variable), size = 1) +
373 | geom_segment(aes(x = Low95, xend = High95, yend = variable), size = 0.5)
374 | one.post <- list(posterior = as.numeric(c(rep(0, times = nrow(posterior.state.ranefs)))),
375 | state.cat = 0,
376 | state.name = "",
377 | state.initnum = 0)
378 | posterior.state.ranefs <- individual.out[, grep(pattern = "b.state", x = colnames(individual.out), fixed = TRUE)]
379 | one.post <- list(posterior = as.numeric(c(rep(0, times = nrow(posterior.state.ranefs)))),
380 | state.cat = 0,
381 | state.name = "",
382 | state.initnum = 0)
383 | obs.state.initnum <- sort(unique(Megapoll$state.initnum))
384 | obs.state <- sort(unique(Megapoll$state))
385 | obs.state
386 | obs.state.initnum
387 | posterior.state.ranefs.list <- list()
388 | for(i in 1:ncol(posterior.state.ranefs)){
389 | posterior.state.ranefs.list[[i]] <- one.post
390 | posterior.state.ranefs.list[[i]][[1]] <- as.numeric(posterior.state.ranefs[, i])
391 | posterior.state.ranefs.list[[i]][[2]] <- i
392 | posterior.state.ranefs.list[[i]][[3]] <- obs.state[i]
393 | posterior.state.ranefs.list[[i]][[4]] <- obs.state.initnum[i]
394 | }
395 | posterior.state.ranefs.list[[50]] <- list(posterior = as.numeric(c(rep(0, times = nrow(posterior.state.ranefs)))),
396 | state.cat = 0,
397 | state.name = "AK",
398 | state.initnum = 1)
399 | posterior.state.ranefs.list[[51]] <- list(posterior = as.numeric(c(rep(0, times = nrow(posterior.state.ranefs)))),
400 | state.cat = 0,
401 | state.name = "HI",
402 | state.initnum = 12)
403 | posterior.state.ranefs.list <- posterior.state.ranefs.list[order(sapply(posterior.state.ranefs.list, function(x) x[[4]], simplify=TRUE), decreasing=FALSE)]
404 | posterior.state.ranefs.mat <- do.call("cbind", lapply(posterior.state.ranefs.list, "[[", 1))
405 | summary(posterior.state.ranefs.mat)
406 | b.state <- as.data.frame(posterior.state.ranefs.mat)
407 | names(b.state) <- Statelevel$sstate
408 | b.intercept <- individual.out[, grep(pattern = "b.intercept", x = colnames(individual.out), fixed = TRUE)]
409 | b.race.female <- individual.out[, grep(pattern = "b.race.female", x = colnames(individual.out), fixed = TRUE)]
410 | b.age <- individual.out[, grep(pattern = "b.age", x = colnames(individual.out), fixed = TRUE)]
411 | b.age <- b.age[, -c(grep(pattern = "b.age.edu", x = colnames(b.age), fixed = TRUE))]
412 | b.edu <- individual.out[, grep(pattern = "b.edu", x = colnames(individual.out), fixed = TRUE)]
413 | b.age.edu <- individual.out[, grep(pattern = "b.age.edu", x = colnames(individual.out), fixed = TRUE)]
414 | b.region <- individual.out[, grep(pattern = "b.region", x = colnames(individual.out), fixed = TRUE)]
415 | b.relig <- individual.out[, grep(pattern = "b.relig", x = colnames(individual.out), fixed = TRUE)]
416 | b.kerry <- individual.out[, grep(pattern = "b.kerry", x = colnames(individual.out), fixed = TRUE)]
417 | Xb <- b.intercept +
418 | b.race.female[, Census$crace.female] +
419 | b.age[, Census$cage.cat] +
420 | b.edu[, Census$cedu.cat] +
421 | b.age.edu[, Census$cage.edu.cat] +
422 | b.state[, Census$cstate.initnum] +
423 | b.region[, Census$cregion.cat] +
424 | (b.relig %*% t(Census$cp.relig.full)) +
425 | (b.kerry %*% t(Census$cp.kerry.full))
426 | str(b.intercept)
427 | str(b.race.female)
428 | Xb <- b.intercept +
429 | b.race.female[, Census$crace.female]
430 | Xb <- b.intercept +
431 | b.race.female[, Census$crace.female] +
432 | b.age[, Census$cage.cat] +
433 | b.edu[, Census$cedu.cat] +
434 | b.age.edu[, Census$cage.edu.cat] +
435 | b.state[, Census$cstate.initnum]
436 | Xb <- b.intercept +
437 | b.race.female[, Census$crace.female] +
438 | b.age[, Census$cage.cat] +
439 | b.edu[, Census$cedu.cat] +
440 | b.age.edu[, Census$cage.edu.cat]
441 | Xb <- b.intercept +
442 | b.race.female[, Census$crace.female] +
443 | b.age[, Census$cage.cat] +
444 | b.edu[, Census$cedu.cat]
445 | Xb <- b.intercept +
446 | b.race.female[, Census$crace.female] +
447 | b.age[, Census$cage.cat]
448 | Xb <- b.intercept +
449 | b.race.female[, Census$crace.female]
450 | str(b.age)
451 | head(Census)
452 | Xb <- b.intercept +
453 | b.race.female[, Census$crace.female] +
454 | b.age[, Census$cage.cat]
455 | table(Census$cage.cat)
456 | Xb <- b.intercept +
457 | b.race.female[, Census$crace.female] +
458 | b.age[, Census$cage.cat]
459 | b.state <- as.matrix(b.state)
460 | b.intercept <- as.matrix(b.intercept)
461 | b.race.female <- as.matrix(b.race.female)
462 | b.age <- as.matrix(b.age)
463 | b.edu <- as.matrix(b.edu)
464 | b.age.edu <- as.matrix(b.age.edu)
465 | b.region <- as.matrix(b.region)
466 | Xb <- b.intercept +
467 | b.race.female[, Census$crace.female] +
468 | b.age[, Census$cage.cat] +
469 | b.edu[, Census$cedu.cat] +
470 | b.age.edu[, Census$cage.edu.cat] +
471 | b.state[, Census$cstate.initnum] +
472 | b.region[, Census$cregion.cat] +
473 | (b.relig %*% t(Census$cp.relig.full)) +
474 | (b.kerry %*% t(Census$cp.kerry.full))
475 | Xb <- b.intercept +
476 | b.race.female[, Census$crace.female] +
477 | b.age[, Census$cage.cat] +
478 | b.edu[, Census$cedu.cat] +
479 | b.age.edu[, Census$cage.edu.cat] +
480 | b.state[, Census$cstate.initnum] +
481 | b.region[, Census$cregion.cat]
482 | b.state <- as.data.frame(posterior.state.ranefs.mat)
483 | names(b.state) <- Statelevel$sstate
484 | b.intercept <- individual.out[, grep(pattern = "b.intercept", x = colnames(individual.out), fixed = TRUE)]
485 | b.race.female <- individual.out[, grep(pattern = "b.race.female", x = colnames(individual.out), fixed = TRUE)]
486 | b.age <- individual.out[, grep(pattern = "b.age", x = colnames(individual.out), fixed = TRUE)]
487 | b.age <- b.age[, -c(grep(pattern = "b.age.edu", x = colnames(b.age), fixed = TRUE))]
488 | b.edu <- individual.out[, grep(pattern = "b.edu", x = colnames(individual.out), fixed = TRUE)]
489 | b.age.edu <- individual.out[, grep(pattern = "b.age.edu", x = colnames(individual.out), fixed = TRUE)]
490 | b.region <- individual.out[, grep(pattern = "b.region", x = colnames(individual.out), fixed = TRUE)]
491 | b.relig <- individual.out[, grep(pattern = "b.relig", x = colnames(individual.out), fixed = TRUE)]
492 | b.kerry <- individual.out[, grep(pattern = "b.kerry", x = colnames(individual.out), fixed = TRUE)]
493 | str(b.relig)
494 | str(b.region)
495 | str(b.age.edu)
496 | Xb <- b.intercept +
497 | b.race.female[, Census$crace.female] +
498 | b.age[, Census$cage.cat] +
499 | b.edu[, Census$cedu.cat] +
500 | b.age.edu[, Census$cage.edu.cat] +
501 | b.state[, Census$cstate.initnum] +
502 | b.region[, Census$cregion.cat] +
503 | (b.relig %*% t(Census$cp.relig.full)) +
504 | (b.kerry %*% t(Census$cp.kerry.full))
505 | str(individual.out)
506 | str(b.age)
507 | str(b.age.edu)
508 | str(b.region)
509 | str(b.relig)
510 | str(b.kerry)
511 | str(b.race.female)
512 | sessionInfo()
513 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkarreth/Bayes/6084383b894b4a78899b19bea0d7284fa57c2644/.gitignore


--------------------------------------------------------------------------------
/MCMC_probability_demo.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Predicted probabilities after Bayesian logit/probit models"
  3 | author: "Johannes Karreth"
  4 | date: "July 12, 2017"
  5 | output:
  6 |   pdf_document: default
  7 |   html_document:
  8 |     code_folding: hide
  9 | ---
 10 | 
 11 | ```{r setup, include=FALSE}
 12 | rm(list = ls()) ### To clear namespace
 13 | knitr::opts_chunk$set(echo = TRUE, error = FALSE, message = FALSE, warning = FALSE, size = "scriptsize", fig.width = 6, fig.height = 4, out.width = '.99\\linewidth')
 14 | ```
 15 | 
 16 | # Purpose
 17 | 
 18 | This document shows how to use two functions I wrote, `MCMC_simcase_probs` and `MCMC_observed_probs`, to quickly calculate predicted probabilities for "average" and "observed" cases based on estimates from Bayesian logit or probit models (for binary outcomes). For more details on each approach, see King et al. (AJPS 2000) and Hanmer \& Kalkan (AJPS 2013), listed in the syllabus.
 19 | 
 20 | Both functions can be accessed from my Github repository at <https://github.com/jkarreth/JKmisc>. **These functions are not fully tested and should be used with extreme caution. Please let me know right away about any problems you run into so I can improve these functions.**
 21 | 
 22 | The functions require the following R packages to be installed: dplyr and reshape2.
 23 | 
 24 | # Example 1
 25 | 
 26 | Source: Hainmueller, J. and Hiscox, M. J. (2006). Learning to Love Globalization: Education and Individual Attitudes Toward International Trade. International Organization, 60 (2):469-498.
 27 | 
 28 | This example analyzes a simplified version of one of the empirical models in Hainmueller and Hiscox' study of individuals' support for free trade or protectionist policies. The data are a cleaned up and modified version of the 1996 American National Election studies as used in Hainmueller & Hiscox (2006). The outcome variable `protectionist` is binary - coded as 1 if a respondent expressed a preference for more protectionist policies, and coded as 0 if a respondent favored free trade. The explanatory variables are (see Table A2 in Hainmueller & Hiscox (2006) for more details):
 29 | 
 30 | - `age`:  the respondent's age in years.
 31 | - `female`:  a binary indicator for female respondents.
 32 | - `TUmember`:  a binary indicator for trade union members.
 33 | - `partyid`:  the respondent's party identification: coded from 0 "strong Democrat"" to 6 "strong Republican".
 34 | - `ideology`:  the respondent's ideology: coded 0 if conservative, 1 if moderate, and 2 if liberal.
 35 | - `schooling`:  years of full-time education completed.
 36 | 
 37 | The estimated model is a Bayesian logistic regression model, fit in JAGS.
 38 | 
 39 | ```{r, echo = FALSE}
 40 | prot.dat <- rio::import("http://www.jkarreth.net/files/hw5.dat.csv")
 41 | 
 42 | prot.datjags <- as.list(na.omit(prot.dat))
 43 | prot.datjags$N <- length(prot.datjags$protectionist)
 44 | 
 45 | prot.mod <- function()  {
 46 |   
 47 |   for(i in 1:N){
 48 |     protectionist[i] ~ dbern(p[i])  ## Bernoulli distribution of y_i
 49 |     logit(p[i]) <- mu[i]    ## Logit link function
 50 |     mu[i] <- b[1] + b[2] * age[i] + b[3] * female[i] + b[4] * TUmember[i]
 51 |     + b[5] * partyid[i] + b[6] * ideology[i] + b[7] * schooling[i]
 52 |   }
 53 |   
 54 |   for(j in 1:7){
 55 |     b[j] ~ dnorm(0, 0.01) ## Use a coefficient vector for simplicity
 56 |   }
 57 |   
 58 | }
 59 | 
 60 | prot.params <- c("b")
 61 | prot.inits1 <- list("b" = rep(0, 7))
 62 | prot.inits2 <- list("b" = rep(0, 7))
 63 | prot.inits <- list(prot.inits1, prot.inits2)
 64 | 
 65 | set.seed(123)
 66 | 
 67 | library(R2jags)
 68 | prot.fit <- jags(data = prot.datjags, inits = prot.inits, 
 69 |                 parameters.to.save = prot.params, n.chains = 2, n.iter = 2000, 
 70 |                 n.burnin = 1000, model.file = prot.mod)
 71 | 
 72 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R")
 73 | mcmctab(as.mcmc(prot.fit))
 74 | ```
 75 | 
 76 | ## Probabilities
 77 | 
 78 | ### Average case approach
 79 | 
 80 | ```{r}
 81 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/MCMC_simcase_probs.R")
 82 | 
 83 | prot_xmat <- model.matrix(protectionist ~ age + female + TUmember + partyid + 
 84 |                             ideology + schooling, 
 85 |                           data = prot.dat)
 86 | 
 87 | prot_mcmc <- as.mcmc(prot.fit)
 88 | prot_mcmc_mat <- as.matrix(prot_mcmc)[, 1:ncol(prot_xmat)]
 89 | 
 90 | prot_schooling_sim <- seq(from = min(prot.dat$schooling), 
 91 |                           to = max(prot.dat$schooling), 
 92 |                           length.out = 10)
 93 | 
 94 | prot_sim_prob <- MCMC_simcase_probs(model_matrix = prot_xmat, 
 95 |                 mcmc_out = prot_mcmc_mat, 
 96 |                 x_col = 7, 
 97 |                 x_range_vec = prot_schooling_sim)
 98 | 
 99 | library(ggplot2)
100 | p_sim <- ggplot(data = prot_sim_prob, aes(x = predictor, y = median_pp)) + 
101 |   geom_line() + geom_ribbon(aes(ymin = lower_pp, ymax = upper_pp), alpha = 0.25) + 
102 |   ylim(c(0, 1)) + xlab("Schooling") + ylab("Pr(Against trade agreement)") + 
103 |   ggtitle("Average case approach") + theme_bw()
104 | p_sim
105 | ```
106 | 
107 | ### Observed value approach
108 | 
109 | ```{r}
110 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/MCMC_observed_probs.R")
111 | 
112 | prot_xmat <- model.matrix(protectionist ~ age + female + TUmember + partyid + 
113 |                             ideology + schooling, 
114 |                           data = prot.dat)
115 | 
116 | prot_mcmc <- as.mcmc(prot.fit)
117 | prot_mcmc_mat <- as.matrix(prot_mcmc)[, 1:ncol(prot_xmat)]
118 | 
119 | prot_schooling_sim <- seq(from = min(prot.dat$schooling), 
120 |                           to = max(prot.dat$schooling), 
121 |                           length.out = 10)
122 | 
123 | prot_obs_prob <- MCMC_observed_probs(model_matrix = prot_xmat, 
124 |                 mcmc_out = prot_mcmc_mat, 
125 |                 x_col = 7, 
126 |                 x_range_vec = prot_schooling_sim)
127 | 
128 | library(ggplot2)
129 | p_obs <- ggplot(data = prot_obs_prob, aes(x = predictor, y = median_pp)) + 
130 |   geom_line() + geom_ribbon(aes(ymin = lower_pp, ymax = upper_pp), alpha = 0.25) + 
131 |   ylim(c(0, 1)) + xlab("Schooling") + ylab("Pr(Against trade agreement") + 
132 |   ggtitle("Observed value approach") + theme_bw()
133 | p_obs
134 | ```
135 | 
136 | ### Comparing both approaches
137 | 
138 | ```{r}
139 | p_combined <- ggplot(data = prot_sim_prob, aes(x = predictor, y = median_pp)) + 
140 |   geom_line(color = "blue") + 
141 |   geom_ribbon(aes(ymin = lower_pp, ymax = upper_pp), fill = "blue", alpha = 0.25) + 
142 |   ylim(c(0, 1)) + 
143 |   geom_line(data = prot_obs_prob, aes(x = predictor, y = median_pp), color = "orange") + 
144 |   geom_ribbon(data = prot_obs_prob, aes(ymin = lower_pp, ymax = upper_pp), fill = "orange", alpha = 0.25) + 
145 |   xlab("Schooling") + ylab("Pr(Against trade agreement)") + theme_bw()
146 | 
147 | p_combined
148 | ```
149 | 
150 | # Example 2
151 | 
152 | Source: Epstein, L., Lindstädt, R., Segal, J. A., and Westerland, C. (2006). The Changing Dynamics of Senate Voting on Supreme Court Nominees. Journal of Politics, 68 (2): 296–307.
153 | 
154 | This example is a simplified version of the "Additional nominees" model in Table 2 of Epstein et al. (2006). Epstein et al. examine why U.S. senators cast votes in favor or against nominees for the U.S. Supreme Court. The data contain 3709 observations, with each observation being one senator's vote, from 1937 to 2005. The outcome variable `vote` is binary - coded as 1 if a senator voted Yea on a candidate, and coded as 0 if the senator voted against the nominee. The explanatory variables are (see p. 298 in Epstein et al. (2006) for more details):
155 | 
156 | - `lackqual`: the degree to which senators perceive the candidate as qualified for office.
157 | - `eucldist`: the ideological distance between the senator and the candidate.
158 | - `strngprs`: a binary indicator for whether the president was "strong"" in the sense that his party controlled the Senate and he was not in his fourth year of office.
159 | - `sameprty`: a binary indicator for whether a senator is of the same political party as the president.
160 | 
161 | The estimated model is a Bayesian logistic regression model, fit in JAGS.
162 | 
163 | ```{r}
164 | nom.dat <- rio::import("http://epstein.wustl.edu/research/Bork.dta")
165 | 
166 | nom.datjags <- as.list(na.omit(nom.dat[, c("vote", "lackqual", "eucldist", 
167 |                                            "strngprs", "sameprty")]))
168 | nom.datjags$N <- length(nom.datjags$vote)
169 | 
170 | nom.mod <- function()  {
171 |   
172 |   for(i in 1:N){
173 |     vote[i] ~ dbern(p[i])  ## Bernoulli distribution of y_i
174 |     logit(p[i]) <- mu[i]    ## Logit link function
175 |     mu[i] <- b[1] + b[2] * lackqual[i] + b[3] * eucldist[i] + b[4] * strngprs[i]
176 |     + b[5] * sameprty[i]
177 |   }
178 |   
179 |   for(j in 1:5){
180 |     b[j] ~ dnorm(0, 0.01) ## Use a coefficient vector for simplicity
181 |   }
182 |   
183 | }
184 | 
185 | nom.params <- c("b")
186 | nom.inits1 <- list("b" = rep(0, 5))
187 | nom.inits2 <- list("b" = rep(0, 5))
188 | nom.inits <- list(nom.inits1, nom.inits2)
189 | 
190 | set.seed(123)
191 | 
192 | library(R2jags)
193 | nom.fit <- jags(data = nom.datjags, inits = nom.inits, 
194 |                 parameters.to.save = nom.params, n.chains = 2, n.iter = 10000, 
195 |                 n.burnin = 5000, n.thin = 5, model.file = nom.mod)
196 | 
197 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R")
198 | mcmctab(as.mcmc(nom.fit))
199 | ```
200 | 
201 | ## Probabilities
202 | 
203 | ### Average case approach
204 | 
205 | ```{r}
206 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/MCMC_simcase_probs.R")
207 | 
208 | nom_xmat <- model.matrix(vote ~ lackqual + eucldist + strngprs + sameprty, 
209 |                           data = nom.dat)
210 | 
211 | nom_mcmc <- as.mcmc(nom.fit)
212 | nom_mcmc_mat <- as.matrix(nom_mcmc)[, 1:ncol(nom_xmat)]
213 | 
214 | nom_distance_sim <- seq(from = min(nom.dat$eucldist), 
215 |                         to = max(nom.dat$eucldist), 
216 |                         length.out = 10)
217 | 
218 | nom_sim_prob <- MCMC_simcase_probs(model_matrix = nom_xmat, 
219 |                 mcmc_out = nom_mcmc_mat, 
220 |                 x_col = 3, 
221 |                 x_range_vec = nom_distance_sim)
222 | 
223 | library(ggplot2)
224 | p_sim <- ggplot(data = nom_sim_prob, aes(x = predictor, y = median_pp)) + 
225 |   geom_line() + geom_ribbon(aes(ymin = lower_pp, ymax = upper_pp), alpha = 0.25) + 
226 |   ylim(c(0, 1)) + xlab("Distance") + ylab("Pr(Yea vote)") + 
227 |   ggtitle("Average case approach") + theme_bw()
228 | p_sim
229 | ```
230 | 
231 | ### Observed value approach
232 | 
233 | ```{r}
234 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/MCMC_observed_probs.R")
235 | 
236 | nom_xmat <- model.matrix(vote ~ lackqual + eucldist + strngprs + sameprty, 
237 |                           data = nom.dat)
238 | 
239 | nom_mcmc <- as.mcmc(nom.fit)
240 | nom_mcmc_mat <- as.matrix(nom_mcmc)[, 1:ncol(nom_xmat)]
241 | 
242 | nom_distance_sim <- seq(from = min(nom.dat$eucldist), 
243 |                         to = max(nom.dat$eucldist), 
244 |                         length.out = 10)
245 | 
246 | nom_obs_prob <- MCMC_observed_probs(model_matrix = nom_xmat, 
247 |                 mcmc_out = nom_mcmc_mat, 
248 |                 x_col = 3, 
249 |                 x_range_vec = nom_distance_sim)
250 | 
251 | library(ggplot2)
252 | p_obs <- ggplot(data = nom_obs_prob, aes(x = predictor, y = median_pp)) + 
253 |   geom_line() + geom_ribbon(aes(ymin = lower_pp, ymax = upper_pp), alpha = 0.25) + 
254 |   ylim(c(0, 1))  + xlab("Distance") + ylab("Pr(Yea vote)") + 
255 |   ggtitle("Observed value approach") + theme_bw()
256 | p_obs
257 | ```
258 | 
259 | ### Comparing both approaches
260 | 
261 | ```{r}
262 | p_combined <- ggplot(data = nom_sim_prob, aes(x = predictor, y = median_pp)) + 
263 |   geom_line(color = "blue") + geom_ribbon(aes(ymin = lower_pp, ymax = upper_pp), fill = "blue", alpha = 0.25) + 
264 |   ylim(c(0, 1)) + 
265 |   geom_line(data = nom_obs_prob, aes(x = predictor, y = median_pp), color = "orange") + 
266 |   geom_ribbon(data = nom_obs_prob, aes(ymin = lower_pp, ymax = upper_pp), fill = "orange", alpha = 0.25) + 
267 |   xlab("Distance") + ylab("Pr(Yea vote)") + theme_bw()
268 | p_combined
269 | ```
270 | 
271 | 


--------------------------------------------------------------------------------
/MCMC_probability_demo.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkarreth/Bayes/6084383b894b4a78899b19bea0d7284fa57c2644/MCMC_probability_demo.pdf


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Applied Bayesian Modeling at ICPSR
 2 | =========
 3 | 
 4 | R and JAGS code accompanying my ICPSR summer program course on Applied Bayesian Modeling. See <http://www.jkarreth.net/bayes-icpsr.html> for more information. Please feel free to fork and develop any code you see in this repository. If you notice a problem or have a question, please email me at <jkarreth@albany.edu> and/or create an issue with the respective file.
 5 | 
 6 | Modeling examples
 7 | -----------------
 8 | 
 9 | -   [angell.instructions.R](https://github.com/jkarreth/Bayes/blob/master/angell.instructions.R): code 	   for a Bayesian linear regression model, using the Angell data from
10 |     John Fox's [R and S-PLUS Companion to Applied
11 |     Regression](http://socserv.socsci.mcmaster.ca/jfox/Books/Companion/index.html)
12 |     in R using R2jags. This example contains code for data preparation,
13 |     model fitting, and diagnostics.
14 | -   [angell.jags.zip](http://www.jkarreth.net/files/angell.jags.zip): code for the same model directly 	   in JAGS from the Terminal and analyze the output using the `coda` package in R.
15 | -   [beer.instructions.R](https://github.com/jkarreth/Bayes/blob/master/beer.instructions.R): code for 	   a Bayesian ordered logit model, using data on beer ratings
16 | -   [econvote.instructions.R](https://github.com/jkarreth/Bayes/blob/master/econvote.instructions.R): 	  code for a Bayesian multinomial logit model, using Austrian voting data.
17 | -   [legislators.instructions.R](https://github.com/jkarreth/Bayes/blob/master/legislators.instructions.R): code for [Simon Jackman's Legislators example](http://jackman.stanford.edu/mcmc/mainFrameWinBugs.php#Legislators) using R2jags. [Bayesian IRT model]
18 | -   [pumps.instructions.R](https://github.com/jkarreth/Bayes/blob/master/pumps.instructions.R): code 	 for the Pumps demo from the WinBUGS examples folder using R2jags. [Conjugate gamma-Poisson hierarchical model]
19 | -   [rats.instructions.R](https://github.com/jkarreth/Bayes/blob/master/rats.instructions.R): code for 	   the Rats demo from the WinBUGS examples folder using R2jags. [Normal hierarchical model]
20 | 
21 | Multilevel models
22 | -----------------
23 | 
24 | -   [wvs.instructions.R](https://github.com/jkarreth/Bayes/blob/master/wvs.instructions.R): 
25 |     Linear model with group-level coefficients using an old subsample of World Values Survey.
26 | -   [mlm.state.instructions.R](https://github.com/jkarreth/Bayes/blob/master/mlm.state.instructions.R): Step-by-step instructions to set up hierarchical data in R and fit a multilevel logit model using R2Jags, following the example in chapter 17 of Gelman and Hill's [Data Analysis Using Regression and Multilevel/Hierarchical Models](http://www.stat.columbia.edu/~gelman/arm/). Data:
27 |     [polls.subset.merged.dta](http://www.jkarreth.net/files/polls.subset.merged.dta),
28 |     [polls.subset.JK.dta](http://www.jkarreth.net/files/polls.subset.JK.dta),
29 |     [presvote.dta](http://www.jkarreth.net/files/presvote.dta).
30 | 
31 | Diagnostics
32 | -----------
33 | 
34 | -   [diagnostics.R](https://github.com/jkarreth/Bayes/blob/master/diagnostics.R): 
35 |     This script shows a variety of ways to obtain diagnostics
36 |     (traceplots, density plots, BGR, etc.) of JAGS/MCMC objects in R
37 |     using the `coda`, `superdiag`, `ggmcmc`, and `mcmcplots` packages.
38 | 
39 | Model presentation
40 | ------------------
41 | 
42 | -   [regression.table.R](https://github.com/jkarreth/Bayes/blob/master/regression.table.R): example code to easily export JAGS/BUGS results to LaTeX or HTML. Based on my [mcmctab](https://github.com/jkarreth/JKmisc/blob/master/mcmctab.R) function.
43 | -   [regression.dotplot.R](https://github.com/jkarreth/Bayes/blob/master/regression.dotplot.R): example code to easily make regression coefficient plots from JAGS/BUGS results.
44 | -	[Posterior-Plots.R](https://github.com/reuning/EVDebs/blob/master/Bayesian/Posterior-Plots.R): a function written by [Kevin Reuning](http://www.kevinreuning.com) (participant in the 2015 Applied Bayes workshop at ICPSR) to create a coefficient dot plot with added posterior density.
45 | -   [interaction.instructions.R](https://github.com/jkarreth/Bayes/blob/master/interaction.instructions.R): code to plot marginal effects from a Bayesian linear model with an
46 |     interaction term across the range of a moderating variable.
47 | -	[limited.dep.vars.funcs.R](https://github.com/edunford/bayes.functions/blob/master/limited.dep.vars.funcs.R): a set of functions written by [Eric Dunford](http://gvpt.umd.edu/gradprofile/Dunford/Eric%20) (participant in the 2015 Applied Bayes workshop at ICPSR) to calculate and visualize predicted probabilities from Bayesian logit or probit models for observed and simulated data.
48 | -   [factor.dotplot.R](https://github.com/jkarreth/Bayes/blob/master/factor.dotplot.R): code to make a dot plot (with credible intervals) of a Bayesian
49 |     factor score.


--------------------------------------------------------------------------------
/angell.instructions.R:
--------------------------------------------------------------------------------
  1 | ############################################################
  2 | ## R2jags example using Angell data - Simple linear model ##
  3 | ############################################################
  4 | 
  5 | ## Johannes Karreth
  6 | ## ICPSR Summer Program 2016
  7 | 
  8 | ## Required packages
  9 | library(foreign)
 10 | library(R2jags)
 11 | ## calls:
 12 | # library(coda)
 13 | # library(lattice)
 14 | # library(R2WinBUGS)
 15 | # library(rjags)
 16 | 
 17 | #### R2jags help
 18 | ?jags
 19 | 
 20 | ## Read the data in car library
 21 | library(car)
 22 | data(Angell)
 23 | angell.1 <- Angell[, -4] ## take off the fourth column (remember, the order is (row, column))
 24 | 
 25 | ## What do the data look like?
 26 | head(angell.1)
 27 | 
 28 | ## Save the model as "angell.model.jags" in your working directory, using a text editor. Make sure you specify the full name below (check if your text editor gives the file a .txt extension). Do not run this model from within R. You can set your working directory in R's preferences, or via:
 29 | 
 30 | setwd("/Users/johanneskarreth/R/Bayes/angell")
 31 | 
 32 | #  model {
 33 | #  for(i in 1:N){
 34 | #  moral[i]~dnorm(mu[i], tau)
 35 | #  mu[i]<-alpha + beta1*hetero[i] + beta2*mobility[i]
 36 | #  }
 37 | #  
 38 | #  alpha~dnorm(0, .01)
 39 | #  beta1~dunif(-100,100)
 40 | #  beta2~dunif(-100,100)
 41 | #  tau~dgamma(.01,.01)
 42 | #  }
 43 | 
 44 | ## Instead, you can also write the model directly in your R script. For use with R2jags, do this:
 45 | 
 46 | angell.model.jags <- function()  {
 47 | 
 48 |  for(i in 1:N){
 49 |  moral[i]~dnorm(mu[i], tau)
 50 |  mu[i]<-alpha + beta1*hetero[i] + beta2*mobility[i]
 51 |  }
 52 |  
 53 |  alpha~dnorm(0, .01)
 54 |  beta1~dunif(-100,100)
 55 |  beta2~dunif(-100,100)
 56 |  tau~dgamma(.01,.01)
 57 | 
 58 | }
 59 | 
 60 | ## Now define the vectors of the data matrix for JAGS:
 61 | 
 62 | moral <- angell.1$moral
 63 | hetero <- angell.1$hetero
 64 | mobility <- angell.1$mobility
 65 | N <- length(angell.1$moral)
 66 | 
 67 | ## Read in the Angell data for JAGS
 68 | 
 69 | angell.data  <- list("moral", "hetero", "mobility", "N")
 70 | angell.data  <- list(moral = Angell$moral, hetero = Angell$hetero, mobility = Angell$mobility, N = length(Angell$moral))
 71 | 
 72 | ## Name the parameters of the JAGS model you will monitor
 73 | 
 74 | angell.params <- c("alpha", "beta1", "beta2")
 75 | 
 76 | ## Define the starting values for JAGS
 77 | 
 78 | angell.inits <- function(){
 79 |   list("alpha"=c(20), "beta1"=c(-0.1), "beta2" =c(-.02))
 80 | }
 81 | 
 82 | ## Alternatively, provide different starting values for each chain:
 83 | inits1 <- list("alpha"=0, "beta1"=0, "beta2"=0) 
 84 | inits2 <- list("alpha"=1, "beta1"=1, "beta2"=1)
 85 | angell.inits <- list(inits1, inits2)
 86 | 
 87 | ## Fit the model in JAGS, having previously copied the BUGS model into your working directory as "angell.model.jags"
 88 | 
 89 | angellfit <- jags(data=angell.data, inits=angell.inits, angell.params, n.chains=2, n.iter=9000, n.burnin=1000, model.file=angell.model.jags)
 90 | 
 91 | ## If we wanted JAGS to generate initial values:
 92 | angellfit <- jags(data=angell.data, inits=NULL, angell.params, n.chains=2, n.iter=9000, n.burnin=1000, model.file= angell.model.jags)
 93 | 
 94 | ## Note: If you had saved the model file in your working directory, put the file name in quotation marks:
 95 | # angellfit <- jags(data=angell.data, inits=NULL, angell.params, n.chains=2, n.iter=9000, n.burnin=1000, model.file="angell.model.jags")
 96 | 
 97 | ## Update your model if necessary - e.g. if there is no/little convergence:
 98 | 
 99 | angellfit.upd <- update(angellfit, n.iter=1000)
100 | angellfit.upd <- autojags(angellfit)  # this function will auto-update until convergence - check R2jags documentation how this is defined
101 | 
102 | ## Model summary
103 | 
104 | print(angellfit)
105 | 
106 | ## For a function to create a barebones summary table, use
107 | ## https://github.com/jkarreth/JKmisc/blob/master/mcmctab.R
108 | ## install.packages("devtools")
109 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R")
110 | mcmctab(angellfit)
111 | 
112 | plot(angellfit)
113 | 
114 | ## Re-specify the model to include a distribution for R-squared
115 | 
116 | angell.model.jags <- function()  {
117 | 
118 |  for(i in 1:N){
119 |  moral[i]~dnorm(mu[i], tau)
120 |  mu[i]<-alpha + beta1*hetero[i] + beta2*mobility[i]
121 |  y_ybar[i] <- pow(moral[i] - mean(moral[]), 2)
122 |  y_yhat[i] <- pow(moral[i] - mu[i], 2)
123 |  }
124 |  
125 |  r2 <- (sum(y_ybar[]) - sum(y_yhat[])) / sum(y_ybar[])
126 | 
127 |  alpha~dnorm(0, .01)
128 |  beta1~dunif(-100,100)
129 |  beta2~dunif(-100,100)
130 |  tau~dgamma(.01,.01)
131 | 
132 | }
133 | 
134 | angell.params <- c("alpha", "beta1", "beta2", "r2")
135 | angellfit <- jags(data=angell.data, inits=angell.inits, angell.params, n.chains=2, n.iter=9000, n.burnin=1000, model.file= angell.model.jags)
136 | 
137 | ## Diagnostics
138 | 
139 | traceplot(angellfit)
140 | 
141 | ## if you want to print & save the plot, you can use the following set of commands:
142 | pdf("angell.trace.pdf")    # defines that the plot will be saved as a PDF file with the name "angell.trace.pdf" in your working directory
143 | traceplot(angellfit)	# creates the plot in the background (you will not see it)
144 | dev.off()				# finishes the printing process and creates the PDF file of the plot. If successful, R will display the message "null device 1" 
145 | 
146 | ## Generate MCMC object for analysis
147 | angellfit.mcmc <- as.mcmc(angellfit)
148 | 
149 | summary(angellfit.mcmc)
150 | 
151 | xyplot(angellfit.mcmc)
152 | ## maybe better display (you can use other Lattice options here as well):
153 | xyplot(angellfit.mcmc, layout=c(2,2), aspect="fill")
154 | 
155 | densityplot(angellfit.mcmc)
156 | ## maybe better:
157 | densityplot(angellfit.mcmc, layout=c(2,2), aspect="fill") 
158 | 
159 | ## and more plots... all using the MCMC object and the CODA package:
160 | 
161 | ## Trace- and density in one plot, print directly to your working directory:
162 | pdf("angellfit.mcmc.plot.pdf")
163 | plot(angellfit.mcmc)
164 | dev.off()
165 | 
166 | ## Autocorrelation plot, print directly to your working directory:
167 | pdf("angellfit.mcmc.autocorr.pdf")
168 | autocorr.plot(angellfit.mcmc)
169 | dev.off()
170 | 
171 | ## Other diagnostics using CODA:
172 | 
173 | gelman.plot(angellfit.mcmc)
174 | geweke.diag(angellfit.mcmc)
175 | geweke.plot(angellfit.mcmc)
176 | raftery.diag(angellfit.mcmc)
177 | heidel.diag(angellfit.mcmc)
178 | 
179 | ## Another option for diagnostics and results: the ggmcmc package
180 | library(ggmcmc)
181 | 
182 | ## All plots into one PDF file in your working directory (!):
183 | ggmcmc(angellfit.mcmc)
184 | angellfit.gg <- ggs(angellfit.mcmc)
185 | ggmcmc(angellfit.gg)
186 | 
187 | ## Individual plots:
188 | ggs_histogram(angellfit.gg)
189 | ggs_density(angellfit.gg)
190 | ggs_traceplot(angellfit.gg)
191 | ggs_running(angellfit.gg)
192 | 
193 | ## For more, see http://xavier-fim.net/packages/ggmcmc/
194 | 
195 | ## Yet another option with a similar look, and many diagnostics printed into one file: the mcmcplots package
196 | ## If you like the look of ggplot2, check out the package "mcmcplots":
197 | library(mcmcplots) 
198 | denplot(angellfit.mcmc, parms = c("alpha", "beta1", "beta2"))
199 | traplot(angellfit.mcmc, parms = c("alpha", "beta1", "beta2"))
200 | caterplot(angellfit.mcmc)
201 | caterplot(angellfit.mcmc, parms = c("alpha", "beta1", "beta2"), labels = c("Intercept", "Heterogeneity", "Mobility"))
202 | ## The following command prints diagnostic plots into a (temporary) HTML file for quick viewing:
203 | mcmcplot(angellfit.mcmc)
204 | 
205 | ## Another convenient function to obtain many diagnostics at once:
206 | ## superdiag
207 | library(superdiag)
208 | superdiag(angellfit.mcmc, burnin = 100)


--------------------------------------------------------------------------------
/beer.instructions.R:
--------------------------------------------------------------------------------
  1 | #################################################################
  2 | ## R2jags example using the Beer example - ordered logit model ##
  3 | #################################################################
  4 | 
  5 | ## Johannes Karreth
  6 | ## ICPSR Summer Program 2016
  7 | 
  8 | ## Required libraries
  9 | library(foreign)
 10 | library(R2jags)
 11 | 
 12 | setwd("~/R/Bayes/beer")
 13 | 
 14 | ## Read the data
 15 | 
 16 | beer.data <- list(price = c(7.19, 3.15, 3.35, 2.59, 4.59, 4.39, 2.99, 2.49, 3.65, 2.59, 2.89, 2.99, 2.29, 4.75, 1.69, 2.55, 4.22, 2.63, 2.73, 1.79, 2.65, 2.39, 2.49, 4.55, 2.65, 1.79, 1.59, 2.79, 2.75, 2.59, 2.59, 2.15, 4.59, 2.59, 2.29), calories = c( 154, 147, 154, 144, 152, 170, 162, 149, 149, 151, 157, 135, 152, 149, 145, 99, 145, 113, 102, 147, 140, 175, 149, 150, 153, 144, 145, 97, 72, 155, 136, 144, 144, 144, 68), sodium = c( 17, 17, 17, 15, 11, 7, 10, 17, 7, 19, 15, 11, 8, 6, 23, 10, 14, 8, 15, 7, 18, 24, 27, 19, 27, 13, 18, 7, 6, 13, 19, 8, 21, 24, 15), alcohol = c( 4.7, 5, 5.1, 4.7, 5, 5.2, 5, 4.7, 4.7, 4.9, 4.9, 4.2, 4.9, 5, 4.6, 4.3, 4.5, 3.7, 4.1, 4.7, 4.6, 5.5, 4.7, 4.7, 4.6, 4.6, 4.5, 4.2, 2.9, 5, 4.4, 4.7, 4.7, 4.9, 2.3), quality = c( 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), N=35 )
 17 | 
 18 | price <- beer.data$price
 19 | calories <- beer.data$calories
 20 | sodium <- beer.data$sodium
 21 | alcohol <- beer.data$alcohol
 22 | quality <- beer.data$quality
 23 | N <- length(beer.data$price)
 24 | 
 25 | ## Model: Compare the BUGS language to the JAGS version
 26 | 
 27 | ## BUGS model
 28 | 
 29 | #  model{  
 30 | #  for(i in 1:N){
 31 | #  for(j in 1:2){
 32 | #  logit(gamma[i,j]) <- theta[j] - mu[i]}
 33 | #  p[i,1]<- gamma[i,1]  				## Probability that each beer is in cat1, or the probability that tau > xbeta. Compare: Pr(y_i=1) = logit(tau_1 - X_i \beta) - 0
 34 | #  p[i,2] <- gamma[i,2] - gamma[i,1]	## Probability that each beer is in cat2. Compare: Pr(y_i=2) = logit(tau_2 - X_i \beta) - logit(tau_1 - X_i \beta)
 35 | #  p[i,3] <- 1-gamma[i,2]				## Probability that each beer is in cat3. Compare: Pr(y_i=1) = 1 - logit(tau_2 - X_i \beta)
 36 | #  mu[i] <-  b[1]*price[i] + b[2]*sodium[i] + b[3]*alcohol[i]+b[4]*calories[i]
 37 | #  quality[i] ~ dcat(p[i,1:3])	## Categorical distribution (check WinBUGS help for more info)
 38 | #  
 39 | #  
 40 | #  
 41 | #  for(j in 1:3){
 42 | #  pred[i,j] <- equals(p[i,j], ranked(p[i,1:3],3))	## pred[i,j] is always 0 or 1
 43 | #  }
 44 | #  predcat[i] <- pred[i,1] + 2*pred[i,2] + 3*pred[i,3]
 45 | #  }
 46 | #  for(m in 1:4){
 47 | #  b[m] ~ dnorm(0, .0001)
 48 | #  }
 49 | #  theta[1] ~ dnorm(0,.1)I(0, theta[2])	## the capital I restricts the values of theta[1] to be between 0 and theta[2]
 50 | #  theta[2] ~ dnorm(0,.1)I(theta[1], )
 51 | #  
 52 | #  }
 53 | #  
 54 | #
 55 | 
 56 | ## JAGS model !!
 57 | ## Note the differences in handling the cutpoints AND the pred. category code. 
 58 | ## For more info: jags.tutorial.pdf on http://www.jkarreth.net/Bayes2013.html
 59 | 
 60 | beer.model.jags <- function()  {
 61 |   
 62 |  	for (i in 1:N){
 63 |  		for (j in 1:2){
 64 |  			logit(gamma[i,j]) <- theta1[j] - mu[i]
 65 |  			}
 66 |  		quality[i] ~ dcat(p[i,1:3])
 67 |  		p[i,1]<- gamma[i,1]
 68 |  		p[i,2] <- gamma[i,2] - gamma[i,1]
 69 |  		p[i,3] <- 1-gamma[i,2]
 70 |  		mu[i] <-  b1*price[i] + b2*sodium[i] + b3*alcohol[i]+b4*calories[i]
 71 |  		
 72 |  		pred[i,1] <- equals(p[i,1], max(p[i,1], p[i,2], p[i,3])) # "1 if p[i,1] = max(p[i,1], p[i,2], p[i,3]), 0 otherwise"
 73 |  		pred[i,2] <- equals(p[i,2], max(p[i,1], p[i,2], p[i,3]))
 74 |  		pred[i,3] <- equals(p[i,3], max(p[i,1], p[i,2], p[i,3]))
 75 |  				
 76 |  		predcat[i] <- pred[i,1] + 2*pred[i,2] + 3*pred[i,3]
 77 |  				
 78 |  		}
 79 |  		for (i in 1:2) {
 80 |  			theta[i] ~ dnorm(0, 1.0E-3)
 81 |  			}
 82 |  		theta1[1:2] <- sort(theta)
 83 |  		b1 ~ dnorm(0, .1)
 84 |  		b2 ~ dnorm(0, .1)
 85 |  		b3 ~ dnorm(0, .1)
 86 | 		b4 ~ dnorm(0, .1)
 87 | 		
 88 | 		}
 89 | 
 90 | 
 91 | ## Now define the vectors of the data matrix for JAGS:
 92 | 
 93 | ## Read in the Beer data for JAGS
 94 | 
 95 | beer.data  <- list("price", "calories", "sodium", "alcohol", "quality", "N")
 96 | 
 97 | ## Name the JAGS parameters to be monitored
 98 | 
 99 | beer.params <- c("b1", "b2", "b3", "b4", "predcat")
100 | 
101 | ## Define the starting values for JAGS
102 | ## Note that we give starting values to theta, not theta1
103 | 
104 | beer.inits <- function(){
105 |   list("b1" = c(0.56), 
106 |        "b2" = c(-0.05), 
107 |        "b3" = c(0.95), 
108 |        "b4" = c(0.03), 
109 |        "theta" = c(7, 10))
110 | }
111 | 
112 | ## Fit the model in JAGS, having previously copied the BUGS model into your working directory as "beer.mod"
113 | 
114 | beerfit <- jags(data=beer.data, inits=beer.inits, beer.params, n.chains=2, n.iter=2000, n.burnin=500, model.file=beer.model.jags)
115 | 
116 | ## Update your model if necessary - e.g. if there is no/little convergence:
117 | 
118 | beerfit.upd <- update(beerfit, n.iter=1000)
119 | beerfit.upd <- autojags(beerfit)  # this function will auto-update until convergence - check R2jags documentation how this is defined
120 | 
121 | ## Obtain info on predicted categories, i.e.
122 | ## which category does your model predict each observation to be in?
123 | 
124 | # Monitor the "predcat" node from the code above
125 | # Then inspect the MEDIAN (not mean) of predcat
126 | beer.mcmc <- as.mcmc(beerfit)
127 | summary(beer.mcmc)
128 | 
129 | # For a table of the distribution of predicted categories:
130 | chains <- do.call(rbind, beer.mcmc)
131 | pc <- chains[ , grep("predcat", colnames(chains))]
132 | tab <- apply(pc, 2, function(x)table(factor(x, levels = 1:3)))
133 | tab <- t(tab)
134 | library(car)
135 | prop.table(tab, 1)


--------------------------------------------------------------------------------
/diagnostics.R:
--------------------------------------------------------------------------------
  1 | #############################################
  2 | ## Example code to fit Bayesian models and 
  3 | ## generate convergence diagnostics in R 
  4 | #############################################
  5 | 
  6 | ## Johannes Karreth
  7 | ## ICPSR Summer Program 2016
  8 | 
  9 | ## This code is mostly extracted from my tutorial, 
 10 | ## "Using JAGS and BUGS via R", posted online at 
 11 | ## <http://www.jkarreth.net/files/Lab2_JAGS-BUGS.pdf>
 12 | 
 13 | ## Please refer to that file for documentation of the code below.
 14 | 
 15 | ########################################
 16 | ## Install required packages (after having installed JAGS separately)
 17 | ########################################
 18 | 
 19 | 
 20 | ## install.packages("R2jags", dependencies = TRUE, repos = "https://cloud.r-project.org")
 21 | ## install.packages("runjags", dependencies = TRUE, repos = "https://cloud.r-project.org")
 22 | ## install.packages("MCMCpack", dependencies = TRUE, repos = "https://cloud.r-project.org")
 23 | 
 24 | ## -------------------------------------
 25 | library("R2jags")
 26 | 
 27 | ########################################
 28 | ## Fit example model to check if R2jags is properly installed
 29 | ########################################
 30 | 
 31 | # An example model file is given in:
 32 | model.file <- system.file(package = "R2jags", "model", "schools.txt")
 33 | 
 34 | # data
 35 | J <- 8.0
 36 | y <- c(28.4,7.9,-2.8,6.8,-0.6,0.6,18.0,12.2)
 37 | sd <- c(14.9,10.2,16.3,11.0,9.4,11.4,10.4,17.6)
 38 | 
 39 | jags.data <- list("y","sd","J")
 40 | jags.params <- c("mu","sigma","theta")
 41 | jags.inits <- function(){
 42 |   list("mu"=rnorm(1),"sigma"=runif(1),"theta"=rnorm(J))
 43 | }
 44 | 
 45 | # Fit the model
 46 | jagsfit <- jags(data=list("y","sd","J"), inits = jags.inits, 
 47 |   jags.params, n.iter = 10, model.file = model.file)
 48 | 
 49 | ########################################
 50 | ## Simulate data and fit a linear regression model
 51 | ########################################
 52 | 
 53 | 
 54 | ## -------------------------------------
 55 | n.sim <- 100; set.seed(123)
 56 | x1 <- rnorm(n.sim, mean = 5, sd = 2)
 57 | x2 <- rbinom(n.sim, size = 1, prob = 0.3)
 58 | e <- rnorm(n.sim, mean = 0, sd = 1)
 59 | 
 60 | ## -------------------------------------
 61 | b1 <- 1.2
 62 | b2 <- -3.1
 63 | a <- 1.5
 64 | y <- a + b1 * x1 + b2 * x2 + e
 65 | 
 66 | ## -------------------------------------
 67 | sim.dat <- data.frame(y, x1, x2)
 68 | 
 69 | ## -------------------------------------
 70 | freq.mod <- lm(y ~ x1 + x2, data = sim.dat)
 71 | summary(freq.mod)
 72 | 
 73 | ## -------------------------------------
 74 | ## Note: use the path to your own WD here
 75 | setwd("~/Documents/Dropbox/Uni/9 - ICPSR/2016/Applied Bayes/Tutorials/2 - JAGS and R")
 76 | 
 77 | ## -------------------------------------
 78 | bayes.mod <- function()  {
 79 | 
 80 | for(i in 1:N){
 81 | y[i] ~ dnorm(mu[i], tau)
 82 | mu[i] <- alpha + beta1 * x1[i] + beta2 * x2[i]
 83 | }
 84 | 
 85 | alpha ~ dnorm(0, .01)
 86 | beta1 ~ dunif(-100, 100)
 87 | beta2 ~ dunif(-100, 100)
 88 | tau ~ dgamma(.01, .01)
 89 | 
 90 | }
 91 | 
 92 | ## -------------------------------------
 93 | y <- sim.dat$y
 94 | x1 <- sim.dat$x1
 95 | x2 <- sim.dat$x2
 96 | N <- nrow(sim.dat)
 97 | 
 98 | ## -------------------------------------
 99 | sim.dat.jags  <- list("y", "x1", "x2", "N")
100 | 
101 | ## -------------------------------------
102 | sim.dat.jags  <- as.list(sim.dat)
103 | 
104 | ## -------------------------------------
105 | sim.dat.jags$N  <- nrow(sim.dat)
106 | 
107 | ## -------------------------------------
108 | bayes.mod.params <- c("alpha", "beta1", "beta2")
109 | 
110 | ## -------------------------------------
111 | bayes.mod.inits <- function(){
112 | 	list("alpha" = rnorm(1), "beta1" = rnorm(1), "beta2" = rnorm(1))
113 |   }
114 | 
115 | ## -------------------------------------
116 | inits1 <- list("alpha" = 0, "beta1" = 0, "beta2" = 0) 
117 | inits2 <- list("alpha" = 1, "beta1" = 1, "beta2" = 1)
118 | inits3 <- list("alpha" = -1, "beta1" = -1, "beta2" = -1)
119 | bayes.mod.inits <- list(inits1, inits2, inits3)
120 | 
121 | ## -------------------------------------
122 | library("R2jags")
123 | set.seed(123) 
124 | 
125 | ## -------------------------------------
126 | ## Note: use the path to your own model.file here
127 | bayes.mod.fit.R2jags <- jags(data = sim.dat.jags, inits = bayes.mod.inits, 
128 |   parameters.to.save = bayes.mod.params, n.chains = 3, n.iter = 9000, 
129 |   n.burnin = 1000, 
130 |   model.file = "~/Documents/Dropbox/Uni/9 - ICPSR/2016/Applied Bayes/Tutorials/2 - JAGS and R/bayes.mod")
131 | 
132 | ## -------------------------------------
133 | bayes.mod.fit.R2jags <- jags(data = sim.dat.jags, inits = bayes.mod.inits, 
134 |   parameters.to.save = bayes.mod.params, n.chains = 3, n.iter = 9000, 
135 |   n.burnin = 1000, model.file = bayes.mod)
136 | 
137 | ## -------------------------------------
138 | bayes.mod.fit.R2jags.upd <- update(bayes.mod.fit.R2jags, n.iter = 1000)
139 | bayes.mod.fit.R2jags.upd <- autojags(bayes.mod.fit.R2jags) 
140 | 
141 | ## -------------------------------------
142 | print(bayes.mod.fit.R2jags)
143 | plot(bayes.mod.fit.R2jags)
144 | traceplot(bayes.mod.fit.R2jags)
145 | 
146 | ########################################
147 | ## Posterior plots and diagnostics
148 | ########################################
149 | 
150 | ## -------------------------------------
151 | bayes.mod.fit.mcmc <- as.mcmc(bayes.mod.fit.R2jags)
152 | summary(bayes.mod.fit.mcmc)
153 | 
154 | ## -------------------------------------
155 | library("lattice")
156 | xyplot(bayes.mod.fit.mcmc)
157 | 
158 | ## -------------------------------------
159 | xyplot(bayes.mod.fit.mcmc, layout = c(2, 2), aspect = "fill")
160 | 
161 | ## -------------------------------------
162 | densityplot(bayes.mod.fit.mcmc)
163 | 
164 | ## -------------------------------------
165 | densityplot(bayes.mod.fit.mcmc, layout = c(2, 2), as.table = TRUE, aspect = "fill") 
166 | 
167 | ## -------------------------------------
168 | gelman.plot(bayes.mod.fit.mcmc)
169 | geweke.diag(bayes.mod.fit.mcmc)
170 | raftery.diag(bayes.mod.fit.mcmc)
171 | heidel.diag(bayes.mod.fit.mcmc)
172 | 
173 | ## -------------------------------------
174 | ## install.packages("superdiag", dependencies = TRUE, repos = "http://cran.us.r-project.org")
175 | 
176 | ## -------------------------------------
177 | library("superdiag")
178 | 
179 | ## -------------------------------------
180 | superdiag(bayes.mod.fit.mcmc, burnin = 1000)
181 | 
182 | ## -------------------------------------
183 | ## install.packages("mcmcplots", dependencies = TRUE, repos = "http://cran.us.r-project.org")
184 | 
185 | ## -------------------------------------
186 | library("mcmcplots")
187 | 
188 | ## -------------------------------------
189 | denplot(bayes.mod.fit.mcmc, parms = c("alpha", "beta1", "beta2"))
190 | traplot(bayes.mod.fit.mcmc, parms = c("alpha", "beta1", "beta2"))
191 | 
192 | ## -------------------------------------
193 | ## mcmcplot(bayes.mod.fit.mcmc)
194 | 
195 | ## -------------------------------------
196 | caterplot(bayes.mod.fit.mcmc, parms = c("alpha", "beta1", "beta2"), 
197 |   labels = c("alpha", "beta1", "beta2"))
198 | 
199 | ## -------------------------------------
200 | ## install.packages("ggmcmc", dependencies = TRUE, repos = "http://cran.us.r-project.org")
201 | 
202 | ## -------------------------------------
203 | library("ggmcmc")
204 | 
205 | ## -------------------------------------
206 | bayes.mod.fit.gg <- ggs(bayes.mod.fit.mcmc)
207 | ggs_histogram(bayes.mod.fit.gg)
208 | ggs_density(bayes.mod.fit.gg)
209 | ggs_traceplot(bayes.mod.fit.gg)
210 | ggs_running(bayes.mod.fit.gg)
211 | ggs_compare_partial(bayes.mod.fit.gg)
212 | ggs_autocorrelation(bayes.mod.fit.gg)
213 | ggs_geweke(bayes.mod.fit.gg)
214 | ggs_caterpillar(bayes.mod.fit.gg)
215 | 
216 | ## -------------------------------------
217 | ## Note: provide the path and filename for your desired output here
218 | ggmcmc(bayes.mod.fit.gg, 
219 |        file = "~/Documents/Dropbox/Uni/9 - ICPSR/2016/Applied Bayes/Tutorials/2 - JAGS and R/bayes_fit_ggmcmc.pdf")
220 | 
221 | ########################################
222 | ## Use runjags instead of R2Jags
223 | ########################################
224 | 
225 | ## -------------------------------------
226 | library("runjags")
227 | 
228 | ## -------------------------------------
229 | n.sim <- 100; set.seed(123)
230 | x1 <- rnorm(n.sim, mean = 5, sd = 2)
231 | x2 <- rbinom(n.sim, size = 1, prob = 0.3)
232 | e <- rnorm(n.sim, mean = 0, sd = 1)
233 | 
234 | ## -------------------------------------
235 | b1 <- 1.2
236 | b2 <- -3.1
237 | a <- 1.5
238 | y <- a + b1 * x1 + b2 * x2 + e
239 | 
240 | ## -------------------------------------
241 | sim.dat <- data.frame(y, x1, x2)
242 | 
243 | ## -------------------------------------
244 | freq.mod <- lm(y ~ x1 + x2, data = sim.dat)
245 | summary(freq.mod)
246 | 
247 | ## -------------------------------------
248 | bayes.mod <- "model{
249 | 
250 | for(i in 1:N){
251 | y[i] ~ dnorm(mu[i], tau)
252 | mu[i] <- alpha + beta1 * x1[i] + beta2 * x2[i]
253 | }
254 | 
255 | alpha ~ dnorm(0, .01)
256 | beta1 ~ dunif(-100, 100)
257 | beta2 ~ dunif(-100, 100)
258 | tau ~ dgamma(.01, .01)
259 | 
260 | }"
261 | 
262 | ## -------------------------------------
263 | sim.list  <- as.list(sim.dat)
264 | 
265 | ## -------------------------------------
266 | sim.list$N  <- nrow(sim.dat)
267 | 
268 | ## -------------------------------------
269 | sim.dat.runjags <- dump.format(sim.list)
270 | 
271 | ## -------------------------------------
272 | inits1 <- list(alpha = 1, beta1 = 1, beta2 = 1)
273 | inits2 <- list(alpha = 0, beta1 = 0, beta2 = 0)
274 | inits3 <- list(alpha = -1, beta1 = -1, beta2 = -1)
275 | 
276 | ## -------------------------------------
277 | bayes.mod.fit.runjags <- run.jags(model = bayes.mod, monitor = c("alpha", "beta1", "beta2"), 
278 |   data = sim.dat.runjags, n.chains = 3, inits = list(inits1, inits2, inits3),
279 |   burnin = 1000, sample = 5000, keep.jags.files = TRUE)
280 | 
281 | ## -------------------------------------
282 | print(bayes.mod.fit.runjags)
283 | 
284 | ## -------------------------------------
285 | bayes.mod.fit.mcmc <- as.mcmc.list(bayes.mod.fit.runjags)
286 | summary(bayes.mod.fit.mcmc)
287 | 
288 | ## -------------------------------------
289 | mcmcplot(bayes.mod.fit.mcmc)
290 | superdiag(bayes.mod.fit.mcmc, burnin = 1000)
291 | 
292 | ## -------------------------------------
293 | 
294 | ########################################
295 | ## Use JAGS via terminal and analyze posterior in R using coda
296 | ########################################
297 | 
298 | 
299 | ## -------------------------------------
300 | sim.dat.list <- as.list(sim.dat)
301 | sim.dat.list$N  <- nrow(sim.dat)
302 | dump("sim.dat.list", file = "sim.dat.dump")
303 | bugs2jags("sim.dat.dump", "sim.dat")
304 | 
305 | ## -------------------------------------
306 | ## Note: use the path to your own WD and names for your output files here
307 | library("coda")
308 | setwd("~/Documents/Dropbox/Uni/9 - ICPSR/2016/Applied Bayes/Tutorials/2 - JAGS and R/")
309 | chain1 <- read.coda(output.file = "bayes_outchain1.txt", 
310 |   index.file = "bayes_outindex.txt")
311 | chain2 <- read.coda(output.file = "bayes_outchain2.txt", 
312 |   index.file = "bayes_outindex.txt")
313 | chain3 <- read.coda(output.file = "bayes_outchain3.txt", 
314 |   index.file = "bayes_outindex.txt")
315 | bayes.chains <- as.mcmc.list(list(chain1, chain2, chain3))
316 | 
317 | ## -------------------------------------
318 | help(package = "coda")
319 | 
320 | ## -------------------------------------
321 | summary(bayes.chains)
322 | 
323 | ########################################
324 | ## Use MCMCpack
325 | ########################################
326 | 
327 | ## -------------------------------------
328 | library("MCMCpack")
329 | 
330 | ## -------------------------------------
331 | n.sim <- 100; set.seed(123)
332 | x1 <- rnorm(n.sim, mean = 5, sd = 2)
333 | x2 <- rbinom(n.sim, size = 1, prob = 0.3)
334 | e <- rnorm(n.sim, mean = 0, sd = 1)
335 | 
336 | ## -------------------------------------
337 | b1 <- 1.2
338 | b2 <- -3.1
339 | a <- 1.5
340 | y <- a + b1 * x1 + b2 * x2 + e
341 | 
342 | ## -------------------------------------
343 | sim.dat <- data.frame(y, x1, x2)
344 | 
345 | ## -------------------------------------
346 | freq.mod <- lm(y ~ x1 + x2, data = sim.dat)
347 | summary(freq.mod)
348 | 
349 | ## -------------------------------------
350 | bayes.mod.fit.MCMCpack <- MCMCregress(y ~ x1 + x2, data = sim.dat, burnin = 1000,
351 |   mcmc = 5000, seed = 123, beta.start = c(0, 0, 0),
352 |   b0 = c(0, 0, 0), B0 = c(0.1, 0.1, 0.1))
353 | summary(bayes.mod.fit.MCMCpack)
354 | 
355 | ## -------------------------------------
356 | mcmcplot(bayes.mod.fit.MCMCpack)
357 | superdiag(bayes.mod.fit.MCMCpack, burnin = 1000)


--------------------------------------------------------------------------------
/econvote.instructions.R:
--------------------------------------------------------------------------------
  1 | #########################################################
  2 | ## Economic voting in Germany: Multinomial logit model ##
  3 | #########################################################
  4 | 
  5 | ## Johannes Karreth
  6 | ## ICPSR Summer Program 2016
  7 | 
  8 | library(R2jags)
  9 | 
 10 | # Download the data from https://dbk.gesis.org/DBKSearch/download.asp?id=37837
 11 | 
 12 | d <- rio::import("2009_ZA5384_v1-0-0.dta")
 13 | 
 14 | # Data cleaning and recoding
 15 | 
 16 | d$votechoice <- NA
 17 | d$votechoice <- ifelse(d$v3c == 1, 1, d$votechoice) # CDU/CSU
 18 | d$votechoice <- ifelse(d$v3c == 2, 2, d$votechoice) # SPD
 19 | d$votechoice <- ifelse(d$v3c == 3, 3, d$votechoice) # FDP
 20 | d$votechoice <- ifelse(d$v3c == 4, 4, d$votechoice) # Linke
 21 | d$votechoice <- ifelse(d$v3c == 5, 5, d$votechoice) # Green
 22 | 
 23 | d$interest <- d$v15 - 3
 24 | d$interest <- ifelse(d$interest == 3, NA, d$interest)
 25 | 
 26 | d$leftright <- d$v56 - 6
 27 | d$leftright <- ifelse(d$leftright == 6, NA, d$leftright)
 28 | 
 29 | d$age <- d$vb - 5
 30 | 
 31 | d$children <- ifelse(d$vx1 == 1, 1, ifelse(d$vx1 == 2, 0, NA))
 32 | 
 33 | d$education <- ifelse(d$vf == 4 | d$vf == 5, -1, ifelse(d$vf == 1, 0, ifelse(d$vf == 2, 1, ifelse(d$vf == 3, 2, NA))))
 34 | 
 35 | d$union <- ifelse(d$vp == 1 | d$vp == 2 | d$vp == 3, 1, ifelse(d$vp == 4, 0, NA))
 36 | 
 37 | d$female <- d$va- 1
 38 | 
 39 | d$econcountrynow <- ifelse(d$v25 == 1, 1, ifelse(d$v25 == 2, 0, ifelse(d$v25 == 3, 0, NA)))
 40 | 
 41 | d$econselfnow <- ifelse(d$v27 == 1, 1, ifelse(d$v27 == 2, 0, ifelse(d$v27 == 3, 0, NA)))
 42 | 
 43 | d$econctryworse <- ifelse(d$v25 == 1, 0, ifelse(d$v25 == 2, 0, ifelse(d$v25 == 3, 1, NA)))
 44 | 
 45 | d$econselfworse <- ifelse(d$v27 == 1, 0, ifelse(d$v27 == 2, 0, ifelse(d$v27 == 3, 1, NA)))
 46 | 
 47 | 
 48 | d$econcountryfuture <- ifelse(d$v29 == 2, -1, ifelse(d$v29 == 3, 0, ifelse(d$v29 == 1, 1, NA)))
 49 | 
 50 | d$econselffuture <- ifelse(d$v28 == 3, -1, ifelse(d$v28 == 2, 0, ifelse(d$v29 == 1, 1, NA)))
 51 | 
 52 | d$fearterror <- ifelse(d$v43 == 1, 1, ifelse(d$v43 == 2, 0, NA))
 53 | 
 54 | d$class <- ifelse(d$vl2 == 1, -1, ifelse(d$vl2 == 2, 0, ifelse(d$vl2 == 3, 1, NA)))
 55 | 
 56 | d <- d[, c("votechoice", "interest", "leftright", "age", "children", "education", "union", "female", "econctryworse", "econselfworse")]
 57 | 
 58 | # Data for JAGS/BUGS
 59 | 
 60 | d_nm <- na.omit(d[, c("votechoice", "interest", "leftright", "age", "children", "education", "union", "female", "econctryworse", "econselfworse")])
 61 | dj <- as.list(d_nm)
 62 | dj$N <- nrow(d_nm)
 63 | dj$J <- length(as.numeric(levels(as.factor(dj$votechoice))))
 64 | 
 65 | econ.mod <- function()  {
 66 |   
 67 |   for(i in 1:N){
 68 |     votechoice[i] ~ dcat(p[i, 1:J])
 69 |     
 70 |     for (j in 1:J){
 71 |       log(q[i,j]) <-  b[1,j] + 
 72 |         b[2,j] * interest[i] + 
 73 |         b[3,j] * leftright[i] + 
 74 |         b[4,j] * age[i] + 
 75 |         b[5,j] * children[i] + 
 76 |         b[6,j] * education[i] + 
 77 |         b[7,j] * union[i] + 
 78 |         b[8,j] * female[i] +
 79 |         b[9,j] * econctryworse[i] + 
 80 |         b[10,j] * econselfworse[i]
 81 |       s
 82 |       p[i,j] <- q[i,j]/sum(q[i,1:J])  ## should be familiar from MLE notes: q is exp(Xb)
 83 |     }   # close J loop
 84 |   }  # close N loop
 85 |   
 86 |   for(k in 1:10){
 87 |     b[k,1] <- 0          ## MUST set the first set of covariates (for the first outcome category) to 0
 88 |     for(j in 2:J){
 89 |       b[k,j] ~ dnorm(0, 0.1)
 90 |     }  # close J loop
 91 |   }  # close K loop
 92 | }  # close model loop 
 93 | 
 94 | econ.params <- c("b")
 95 | 
 96 | econ.inits <- function(){
 97 |   list(b = matrix(c(rep(NA, 10), 
 98 |                     rep(0, 40)), 
 99 |                   nrow = 10, ncol = 5, byrow = FALSE))
100 | }
101 | 
102 | library(R2jags)
103 | econ.fit <- jags(data = dj, inits = econ.inits, econ.params, n.chains = 3, n.iter = 10, n.burnin = 5, n.thin = 1, model.file = econ.mod)
104 | econ.mcmc <- as.mcmc(econ.fit)
105 | library(mcmcplots)
106 | mcmcplot(econ.mcmc)
107 | 
108 | econ.mcmc.dat <- as.data.frame(as.matrix(econ.mcmc))
109 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R")
110 | mcmctab(econ.mcmc)


--------------------------------------------------------------------------------
/factor.dotplot.R:
--------------------------------------------------------------------------------
 1 | ########################################
 2 | ## Bayesian factor scores as dot plot ##
 3 | #### Workflow for R2Jags/R2WinBUGS #####
 4 | ########################################
 5 | 
 6 | ## Johannes Karreth
 7 | ## jkarreth@albany.edu
 8 | 
 9 | library(lattice)
10 | library(ggplot2)
11 | library(stats)
12 | 
13 | ## if fa.fit is your factor model output from R2jags or R2WinBUGS:
14 | fa.mcmc <- as.mcmc(fa.fit)
15 | fa.mat <- as.matrix(fa.mcmc)
16 | fa.dat <- as.data.frame(fa.mat)
17 | 
18 | fa.dat <- fa.dat[, -1]
19 | 
20 | ## name your predicted factor latent.mean, and the CI between latent.lower and latent.upper
21 | latent.mean <- apply(fa.dat, 2, mean)
22 | latent.lower <- apply(fa.dat, 2, function(x) quantile(x, probs = c(0.025)))
23 | latent.upper <- apply(fa.dat, 2, function(x) quantile(x, probs = c(0.975)))
24 | subject <- colnames(fa.dat)
25 | 
26 | dat <- data.frame(latent.mean, latent.lower, latent.upper, subject)
27 | 
28 | ## here, take only 50 of the 1500 observations (just for demonstration)
29 | plot.dat <- dat[sample(1:nrow(dat), 50, replace=FALSE),]
30 | 
31 | ## order the data by the factor scores, for better visualization
32 | 
33 | plot.dat <- plot.dat[order(plot.dat$latent.mean), ]
34 | 
35 | ## order the observation IDs as well (seems redundant, but is necessary)
36 | 
37 | plot.dat$subject2 <- reorder(plot.dat$subject, plot.dat$latent.mean)
38 | 
39 | ## define range of the scores, for the axis limits in the plot
40 | 
41 | rg <- diff(range(c(plot.dat$latent.upper, plot.dat$latent.lower)))
42 | 
43 | ## make plot using the lattice package:
44 | 
45 | dotplot(subject2 ~ latent.mean, data=plot.dat,scales=list(y=list(cex=.45)), xlim=c(min(plot.dat$latent.lower)-.1*rg, max(plot.dat$latent.upper)+.1*rg),main="Latent trait", panel=function(x,y, subscripts){
46 |   panel.abline(h = as.numeric(y), col = "gray80", lty = 2)
47 |   panel.segments(plot.dat$latent.lower[subscripts], y, plot.dat$latent.upper[subscripts], y, lty=1, col="gray40")
48 |   panel.points(x,y, pch=16, col="black")})
49 | 
50 | ## make plot using the ggplot2 package:
51 | library(ggplot2)
52 | factorplot <- ggplot(plot.dat, aes(x = latent.mean, y = subject2)) + geom_point() + geom_segment(aes(x = latent.lower, xend = latent.upper, y = subject2, yend = subject2))
53 | factorplot <- factorplot + xlab("Latent trait") + ylab("") + theme_bw()
54 | factorplot <- factorplot + geom_text(aes(x = latent.upper + 0.1, label = rownames(plot.dat)), size = 3) + theme(axis.ticks = element_blank(), axis.text.y = element_blank())
55 | factorplot
56 | 


--------------------------------------------------------------------------------
/goals.instructions.R:
--------------------------------------------------------------------------------
 1 | #####################################################################
 2 | ## R2jags example - Students' goals - Multilevel MNL model example ##
 3 | #####################################################################
 4 | 
 5 | ## Johannes Karreth
 6 | ## jkarreth@albany.edu
 7 | 
 8 | ## Note: This example is taken from Lunn et al. (The BUGS Book), Example 10.3.4
 9 | ## Their WINBUGS code can be downloaded at 
10 | ## <http://www.mrc-bsu.cam.ac.uk/software/bugs/the-bugs-project-the-bugs-book/bugs-book-examples/>
11 | 
12 | goals.dat <- read.csv("http://www.jkarreth.net/files/goals.csv")
13 | 
14 | goals.datjags <- as.list(goals.dat)
15 | goals.datjags$npupils <- nrow(goals.dat)
16 | goals.datjags$nschools <- length(unique(goals.datjags$School))
17 | goals.datjags$J <- length(unique(goals.datjags$Goals))
18 | 
19 | ## Pooled model
20 | ## Only one coefficient for boys & goal 1 (sports)
21 | 
22 | mod_pool <- function() { 
23 |   for (i in 1:npupils) {
24 |     Goals[i] ~ dcat(p[i, 1:J])
25 |     
26 |     for (j in 1:J) {
27 |       log(q[i,j]) <- a[i, j]
28 |       p[i, j] <- q[i, j] / sum(q[i, 1:J])
29 |     }
30 |     a[i, 1] <- b[1] + b.boy * Gender[i] # only estimate b for Goals = 1
31 |     a[i, 2] <- b[2]
32 |     a[i, 3] <- 0
33 |   }
34 |   
35 |   b[1] ~ dnorm(0, 0.0001)
36 |   b[2] ~ dnorm(0, 0.0001)
37 |   b.boy ~ dnorm(0, 0.0001)
38 |   or.boy <- exp(b.boy)
39 | 
40 | } 
41 | 
42 | inits.pool1 <- list("b.boy" = 0, "b" = c(0, 0)) 
43 | inits.pool2 <- list("b.boy" = 0, "b" = c(0, 0)) 
44 | inits.pool3  <- list("b.boy" = 0, "b" = c(0, 0)) 
45 | inits.pool <- list(inits.pool1, inits.pool2, inits.pool3)
46 | 
47 | params <- c("b.boy", "b")
48 | 
49 | fit.pool <- jags(data = goals.datjags, inits = inits.pool, parameters.to.save = params,
50 |                  n.chains = 3, n.iter = 1000, n.burnin = 500,
51 |                  model.file = mod_pool)
52 | 
53 | ## Varying intercepts for schools
54 | ## Still only one coefficient for boys & goal 1 (sports)
55 | 
56 | mod_vi <- function() { 
57 |   for (i in 1:npupils) {
58 |     Goals[i] ~ dcat(p[i, 1:J])
59 |     
60 |     for (j in 1:J) {
61 |       log(q[i,j]) <- a[i, j]
62 |       p[i, j] <- q[i, j] / sum(q[i, 1:J])
63 |     }
64 |     a[i, 1] <- b[School[i], 1] + b.boy * Gender[i] # only estimate b for Goals = 1
65 |     a[i, 2] <- b[School[i], 2]
66 |     a[i, 3] <- 0
67 |   }
68 |   
69 |   b.boy ~ dnorm(0, 0.0001)
70 |   or.boy  <- exp(b.boy)
71 |   
72 |   for (j in 1:nschools) {
73 |     b[j, 1] ~ dnorm(0, 0.0001)
74 |     b[j, 2] ~ dnorm(0, 0.0001)
75 |     b[j, 3] <- 0
76 |   }
77 |   
78 | } 
79 | 
80 | inits.vi1 <- list("b.boy" = 0, "b" = structure(.Data=c(0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA),.Dim=c(9,3))) 
81 | inits.vi2 <- list("b.boy" = 0, "b" = structure(.Data=c(0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA),.Dim=c(9,3))) 
82 | inits.vi3  <- list("b.boy" = 0, "b" = structure(.Data=c(0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, NA, NA, NA, NA, NA, NA, NA, NA),.Dim=c(9,3))) 
83 | inits.vi <- list(inits.vi1, inits.vi2, inits.vi3)
84 | 
85 | params <- c("b.boy", "b")
86 | 
87 | fit.vi <- jags(data = goals.datjags, inits = inits.vi, parameters.to.save = params,
88 |                  n.chains = 3, n.iter = 1000, n.burnin = 500,
89 |                  model.file = mod_vi)


--------------------------------------------------------------------------------
/interaction.instructions.R:
--------------------------------------------------------------------------------
  1 | #####################################################################
  2 | ## Interaction example: Conditional effects, Bayes vs. frequentist ##
  3 | #####################################################################
  4 | 
  5 | ## Johannes Karreth
  6 | ## ICPSR Summer Program 2015
  7 | 
  8 | ## Required libraries
  9 | library(R2jags)
 10 | 
 11 | ## Use example data from Dave Armstrong's package
 12 | library(DAMisc)
 13 | data(InteractionEx)
 14 | 
 15 | ## What do the data look like?
 16 | head(InteractionEx)
 17 | 
 18 | InteractionEx2 <- as.data.frame(scale(as.matrix(InteractionEx)))
 19 | 
 20 | ## Frequentist model w/ interaction
 21 | f.mod <- lm(y ~ x1 + x2 + x1:x2, data = InteractionEx2)
 22 | summary(f.mod)
 23 | 
 24 | ## Look at var-cov matrix (for later)
 25 | vcov(lm(y ~ x1 + x2 + x1:x2, data = InteractionEx2))
 26 | 
 27 | ## Bayesian model w/ interaction
 28 | int.model.jags <- function()  {
 29 |   
 30 |   for(i in 1:N){
 31 |     y[i]~dnorm(mu[i], tau)
 32 |     mu[i]<-alpha + beta1 * x1[i] + beta2 * x2[i] + beta3 * x1[i]*x2[i]
 33 |   }
 34 |   
 35 |   alpha~dnorm(0, .01)
 36 |   beta1~dunif(-100,100)
 37 |   beta2~dunif(-100,100)
 38 |   beta3~dunif(-100,100)
 39 |   tau~dgamma(.01,.01)
 40 |   
 41 | }
 42 | 
 43 | ## Data for JAGS
 44 | 
 45 | int.data  <- list(y = InteractionEx2$y, x1 = InteractionEx2$x1, x2 =  InteractionEx2$x2, N = length(InteractionEx2$y))
 46 | 
 47 | ## Name the parameters of the JAGS model you will monitor
 48 | 
 49 | int.params <- c("alpha", "beta1", "beta2", "beta3")
 50 | 
 51 | ## Define the starting values for JAGS
 52 | 
 53 | int.inits <- function(){
 54 |   list("alpha"=c(0), "beta1"=c(0), "beta2" =c(0), "beta3"= c(0))
 55 | }
 56 | 
 57 | ## Fit the model in JAGS, having previously copied the BUGS model into your working directory as "angell.model.jags"
 58 | 
 59 | int.fit <- jags(data=int.data, inits=int.inits, int.params, n.chains=2, n.iter=20000, n.burnin=2000, model.file=int.model.jags)
 60 | 
 61 | int.mcmc <- as.mcmc(int.fit)
 62 | int.mcmc.mat <- as.matrix(int.mcmc)
 63 | int.mcmc.dat <- as.data.frame(int.mcmc.mat)
 64 | 
 65 | ## Simulate the range of the moderating variable
 66 | 
 67 | x2.sim <- seq(min(InteractionEx2$x2), max(InteractionEx2$x2), by = 0.1)
 68 | 
 69 | ## Calculate conditional effect of X1 across the range of X2
 70 | 
 71 | ## Bayes:
 72 | int.sim <- matrix(rep(NA, nrow(int.mcmc.dat)*length(x2.sim)), nrow = nrow(int.mcmc.dat))
 73 | for(i in 1:length(x2.sim)){
 74 |   int.sim[, i] <- int.mcmc.dat$beta1 + int.mcmc.dat$beta3 * x2.sim[i]
 75 | }
 76 | 
 77 | ## Note: the variance now comes from the posterior, not the vcov matrix
 78 | 
 79 | bayes.c.eff.mean <- apply(int.sim, 2, mean)
 80 | bayes.c.eff.lower <- apply(int.sim, 2, function(x) quantile(x, probs = c(0.025)))
 81 | bayes.c.eff.upper <- apply(int.sim, 2, function(x) quantile(x, probs = c(0.975)))
 82 | 
 83 | ## Frequentist (cf. Brambor et al. 2006)
 84 | freq.c.eff.pe <- coef(f.mod)[2] + coef(f.mod)[4] * x2.sim
 85 | freq.c.eff.sd <- sqrt(vcov(f.mod)[2,2] + x2.sim^2 * vcov(f.mod)[4,4] + 2 * x2.sim * vcov(f.mod)[2,4])
 86 | 
 87 | ## Combine both estimates
 88 | plot.dat <- data.frame(x2.sim, bayes.c.eff.mean, bayes.c.eff.lower, bayes.c.eff.upper, freq.c.eff.pe, freq.c.eff.sd)
 89 | 
 90 | ## Compare
 91 | cor(plot.dat$bayes.c.eff.mean, plot.dat$freq.c.eff.pe)
 92 | 
 93 | ## Plot both estimates
 94 | 
 95 | library(ggplot2)
 96 | 
 97 | ## Use blue for Bayesian, red for frequentist estimates. Transparency to allow overlay; purple indicates complete overlay. Take a close look at the upper and lower limits of the CI for each estimate.
 98 | 
 99 | ## Foundation for the plot & line for the posterior mean of the Bayesian conditional effect
100 | p <- ggplot(plot.dat, aes(x = x2.sim, y = bayes.c.eff.mean)) + geom_line(color = "blue", alpha = 0.8, size = 0.5)
101 | 
102 | ## CI for the Bayesian conditional effect
103 | p <- p + geom_ribbon(aes(ymin = bayes.c.eff.lower, ymax = bayes.c.eff.upper), fill = "blue", alpha = 0.2)
104 | 
105 | ## Lines for the lower and upper bound of the Bayesian conditional effect
106 | p <- p + geom_line(aes(x = x2.sim, y = bayes.c.eff.lower), color = "blue", alpha = 0.8, size = 0.5) + geom_line(aes(x = x2.sim, y = bayes.c.eff.upper), color = "blue", alpha = 0.8, size = 0.5)
107 | 
108 | ## Line for the point estimate of the frequentist conditional effect
109 | p <- p + geom_line(aes(x = x2.sim, y = freq.c.eff.pe), color = "red", alpha = 0.5, size = 0.5)
110 | 
111 | ## CI for the frequentist conditional effect
112 | p <- p + geom_ribbon(aes(ymin = freq.c.eff.pe - 1.96 * freq.c.eff.sd, ymax= freq.c.eff.pe + 1.96 * freq.c.eff.sd), fill = "red", alpha = 0.1)
113 | 
114 | ## Lines for the lower and upper bound of the frequentist conditional effect
115 | p <- p + geom_line(aes(x = x2.sim, y = freq.c.eff.pe - 1.96 * freq.c.eff.sd), color = "red", alpha = 0.5, size = 0.5) + geom_line(aes(x = x2.sim, y = freq.c.eff.pe + 1.96 * freq.c.eff.sd), color = "red", alpha = 0.5, size = 0.5)
116 | 
117 | ## Plot labels and theme
118 | p <- p + xlab("X2") + ylab("Conditional effect of X1") + theme_bw()
119 | 
120 | ## Print the plot
121 | p


--------------------------------------------------------------------------------
/legislators.instructions.R:
--------------------------------------------------------------------------------
  1 | ###################################################################################
  2 | ## R2jags example using Simon Jackman's Legislators example - Bayesian IRT model ##
  3 | ###################################################################################
  4 | 
  5 | ## Johannes Karreth
  6 | ## ICPSR Summer Program 2015
  7 | 
  8 | ## Compare to http://jackman.stanford.edu/mcmc/mainFrameWinBugs.php#Legislators
  9 | 
 10 | ## Model (unchanged!)
 11 | 
 12 | legismod.jags <- function()  {
 13 |      for (i in 1:100){
 14 |          for(j in 1:486){
 15 |              y[i,j] ~ dbern(pi[i,j])
 16 |              logit(pi[i,j]) <- x[i]*beta[j,1] - beta[j,2]
 17 |              }
 18 |          }
 19 |          
 20 |      helms <- x[66];          ## monitor these nodes as sanity checks
 21 |      kennedy <- x[41];  
 22 |      fiengold <- x[97]; 
 23 |      boxer <- x[9];     
 24 |      chafee <- x[77];   
 25 |      mosely <- x[26];  
 26 |      gramm <- x[85];   
 27 |  
 28 |      ## priors        
 29 |      for (i in 1:40){ x[i] ~ dnorm(0,1) }
 30 |      for (i in 42:65){ x[i] ~ dnorm(0,1) }
 31 |      for (i in 67:100){ x[i] ~ dnorm(0,1) }
 32 |      x[41] <- -1.0  ## Kennedy on the left
 33 |      x[66] <- 1.0   ## Helms on the right
 34 |      
 35 |      for (j in 1:486){
 36 |        beta[j,1:2] ~ dmnorm(b0[1:2],B0[1:2,1:2]);
 37 |      }
 38 |     b0[1] <- 0;     b0[2] <- 0;
 39 |     B0[1,1] <- .16; B0[2,2] <- .16; 
 40 |     B0[1,2] <- 0;     B0[2,1] <- 0;
 41 |   }    	
 42 | 
 43 | 
 44 | 
 45 | setwd("~/R/Bayes/legislators")
 46 | 
 47 | # Data: copy & paste from legislators.dat.jags, created via
 48 | # bugs2jags("legislators.dat.bugs", "legislators.dat.jags")
 49 | source("http://www.jkarreth.net/files/legislators.dat.jags")
 50 | 
 51 | # Inits: from legislators.init.jags, slightly changed from the WinBUGS example
 52 | 
 53 | legislators.inits <- function(){
 54 | 	list("beta" =
 55 | structure(c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 56 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 57 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 58 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 59 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 60 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 61 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 62 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 63 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 64 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 65 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 66 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 67 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 68 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 69 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 70 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 71 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 72 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 73 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 74 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 75 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 76 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 77 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 78 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 79 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 80 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 81 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 82 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 83 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 84 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 85 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 86 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 87 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 88 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 89 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 90 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 91 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 92 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 93 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 94 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 95 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 96 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 97 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 98 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 99 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
100 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
101 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), .Dim = c(486L, 2L)), "x" = c(-1, -1, -1, -1, -1, -1, 1, -1, 1, 1, 
102 | -1, -1, 1, 1, 1, -1, 1, -1, 1, -1, 1, 1, -1, -1, 1, 1, -1, -1, 
103 | -1, 1, -1, -1, 1, -1, 1, 1, -1, -1, 1, 1, NA, 1, -1, 1, -1, 1, 
104 | -1, -1, -1, -1, 1, -1, -1, 1, 1, 1, -1, -1, 1, 1, 1, -1, -1, 
105 | 1, -1, NA, 1, 1, -1, 1, -1, -1, -1, 1, -1, -1, -1, 1, 1, -1, 
106 | 1, 1, -1, -1, -1, -1, -1, -1, -1, 1, 1, -1, -1, 1, 1, 1, 1, 1, 
107 | -1, -1)
108 | )
109 | }
110 | 
111 | legislators.params <- c("x")
112 | legislators.dat <- list("y")
113 | 
114 | legisfit <- jags(data=legislators.dat, inits=legislators.inits, legislators.params, n.chains=2, n.iter=1000, n.burnin=100, model.file=legismod.jags)


--------------------------------------------------------------------------------
/logit.pp.plot.instructions.R:
--------------------------------------------------------------------------------
  1 | ##################################################################################
  2 | ## Plot predicted probabilities over simulated range of data after Logit models ##
  3 | ############# Using the turnout model from Simon Jackman's examples ##############
  4 | ##################### Workflow for WinBUGS/R2Jags/R2WinBUGS ######################
  5 | ##################################################################################
  6 | 
  7 | ## Johannes Karreth
  8 | ## jkarreth@albany.edu
  9 | 
 10 | ## Fit your Bayesian model, monitor the coefficients (in this example, named b[]) 
 11 | ## and the cut points (in this example, named theta[])
 12 | ## and if you like, the predicted probability of y=1 for each case (in this example, named p[])
 13 | 
 14 | library(lattice)
 15 | library(ggplot2)
 16 | 
 17 | #####################################################
 18 | ## FIRST, PREDICTED PROBABILITIES ON OBSERVED DATA ##
 19 | #####################################################
 20 | 
 21 | ## Data
 22 | turnout.dat <- read.csv("http://www.jkarreth.net/files/turnout.csv")
 23 | 
 24 | ## First, fit a logit model using
 25 | ## https://github.com/jkarreth/Bayes/blob/master/turnout.instructions.R
 26 | ## But be sure to monitor individual predicted probabilities (if you want to plot them)
 27 | ## The BUGS/JAGS object is named "turnout.fit" from here on
 28 | 
 29 | ## R2JAGS USERS, extract the posterior distributions from your jags/bugs object:
 30 | turnout.mcmc <- as.mcmc(turnout.fit)
 31 | turnout.mat <- as.matrix(turnout.mcmc)
 32 | turnout.out <- as.data.frame(turnout.mat)
 33 | 
 34 | ## R2WINBUGS/R2OPENBUGS USERS, extract the posterior distributions from your jags/bugs object:
 35 | ## (in this case, we have 2 chains only)
 36 | turnout.mat <- rbind(turnout.fit$sims.array[, 1, ], turnout.fit$sims.array[, 2, ])
 37 | turnout.out <- data.frame(turnout.mat)
 38 | 	
 39 | ## Define vectors with the values of the predicted probabilities (pulled from the coda files or your mcmc list)
 40 | ## This assumes that you used p in logit(p[i]) in your model
 41 | ## grep("p[",) pulls all columns that start with p[
 42 | p <- turnout.out[, grep("p[", colnames(turnout.out), fixed = T)]
 43 | 
 44 | ## If you want to plot mean predicted observed probabilities against a covariate,
 45 | ## collapse p to the mean of all iterations
 46 | p.mean <- apply(p, 2, mean)
 47 | 
 48 | ## Plot predicted probability (of y=1) against *observed* values of age \
 49 | ## (most likely an ugly plot, b/c we have several y_i(x_i))
 50 | plot(p.mean ~ turnout.dat.dat$age, xlim = c(min(turnout.dat.dat$age), max(turnout.dat.dat$age)))
 51 | 
 52 | #################################################################
 53 | ######### SECOND, OUT-OF-SAMPLE PREDICTED PROBABILITIES, ########
 54 | ## across the range of X1 and given values of other covariates ##
 55 | #################################################################
 56 | 
 57 | ## Import the chains containing the coefficients from your BUGS model, 
 58 | ## after monitoring *only* the coefficients (in this example, named b)
 59 | 
 60 | ## R2JAGS/R2WINBUGS USERS:
 61 | turnout.mcmc <- as.mcmc(turnout.fit)
 62 | turnout.mat <- as.matrix(turnout.mcmc)
 63 | b <- turnout.mat[ , 1:6] ## one column for each coefficient, in this case I had 6 coefficients
 64 | 
 65 | # Generate vector with the simulated range of X1 (here, age)
 66 | new.age <- seq(min(turnout.dat$age), max(turnout.dat$age))
 67 | 
 68 | # Generate vectors set at desired values of the other covariates
 69 | new.education <- rep(median(turnout.dat$educ), length(new.age))
 70 | new.closing <- rep(median(turnout.dat$closing), length(new.age))
 71 | new.govelec <- rep(median(turnout.dat$govelec), length(new.age))
 72 | new.south <- rep(median(turnout.dat$south), length(new.age))
 73 | # Need value of 1 for the constant
 74 | constant <- rep(1, length(new.age))
 75 | 
 76 | # Generate dataframe with simulated values
 77 | turnout.sim <- cbind(constant,new.education,new.age, new.south, new.govelec, new.closing)   ## cbind: combine (bind) columns
 78 | 
 79 | # Or: generate two dataframes to plot PPs for each value of the South dummy (continued further below)
 80 | turnout.sim.s <- cbind(constant,new.education,new.age,rep(1,max(turnout.dat$age)-min(turnout.dat$age)+1), new.govelec, new.closing)
 81 | turnout.sim.n <- cbind(constant,new.education,new.age, rep(0,max(turnout.dat$age)-min(turnout.dat$age)+1), new.govelec, new.closing)
 82 | 
 83 | # Multiply X by the betas from your BUGS output
 84 | Xb <- t(turnout.sim%*% t(b))
 85 | 
 86 | # Transform linear prediction to probability
 87 | turnout.pp.age <- exp(Xb)/(1+exp(Xb))
 88 | 
 89 | # Get CIs (for plotting)
 90 | turnout.ci.age <- apply(turnout.pp.age, 2, quantile, probs=c(.025,.975)) ## apply(a, b, c): apply function (c) to object(a), by(b: 1 for row, 2 for column)
 91 | 
 92 | # Get mean predictions over the n (from BUGS/JAGS iterations) sampled values of b
 93 | mean.turnout.pp.age <- apply(turnout.pp.age, 2, mean)
 94 | mean.turnout.ci.age <- apply(turnout.ci.age, 2, quantile, probs=c(.025,.975))
 95 | 
 96 | # Plot mean probability against the full (simulated) range of X (=age)
 97 | plot(new.age, mean.turnout.pp.age, pch=19, main="Predicted probability of voting", xlab="Age", ylab="Pr(Voting)", xlim=c(min(turnout.dat$age), max(turnout.dat$age)), ylim=c(0,1))
 98 | 
 99 | # Add standard errors as vertical lines (could also do this using 2.5% and 97.5% values from p.chains)
100 | segments(new.age, mean.turnout.ci.age[1, ], new.age, mean.turnout.ci.age[2, ], lty=1)
101 | 
102 | ## Continue two predictions for south=[0,1]
103 | 
104 | # Multiply X by the betas from your BUGS output
105 | Xb.s <- t(turnout.sim.s %*% t(b))
106 | Xb.n <- t(turnout.sim.n %*% t(b))
107 | 
108 | # Transform linear prediction to probability
109 | turnout.pp.age.s <- exp(Xb.s)/(1+exp(Xb.s))
110 | turnout.pp.age.n <- exp(Xb.n)/(1+exp(Xb.n))
111 | 
112 | # Get mean linear predictions & SDS over the n (from BUGS iterations) sampled values of b
113 | mean.turnout.pp.age.s <- apply(turnout.pp.age.s, 2, mean)
114 | mean.turnout.pp.age.n <- apply(turnout.pp.age.n, 2, mean)
115 | turnout.s.ci <- apply(turnout.pp.age.s, 2, quantile, probs=c(.025,.975))
116 | turnout.n.ci <- apply(turnout.pp.age.n, 2, quantile, probs=c(.025,.975))
117 | 
118 | #####################
119 | ## Plot 1 (simple) ##
120 | #####################
121 | 
122 | # Plot mean probability against the full (simulated) range of X (=age)
123 | adjust <- rep(.3, max(turnout.dat$age)-min(turnout.dat$age)+1)  ## slightly adjust the position of the "North" points to avoid overlay
124 | plot(new.age, mean.turnout.pp.age.s, pch=19, main="Predicted probability of voting", xlab="Age", ylab="Pr(Voting)", col="red", xlim=c(min(turnout.dat$age), max(turnout.dat$age)), ylim=c(0,1))
125 | points(new.age + adjust, mean.turnout.pp.age.n, pch=19, col="black")
126 | segments(new.age, turnout.s.ci[1, ], new.age, turnout.s.ci[2, ], lty=1, col="red")
127 | segments(new.age + adjust, turnout.n.ci[1, ], new.age + adjust, turnout.n.ci[2, ], lty=1, col="black")
128 | legend("bottomright", c("South", "Rest of the U.S."), col=c("red", "black"), pch=19, inset=.01, bty="n")
129 | 
130 | #####################
131 | ## Plot 2 (panels) ##
132 | #####################
133 | 
134 | ## Generate data set used for the two plots below
135 | plot.dat <- data.frame(
136 | 	means = c(mean.turnout.pp.age.s , mean.turnout.pp.age.n), 	## means of the pred. probabilities
137 | 	lower = c(turnout.s.ci[1, ] , turnout.n.ci[1, ]), 	## upper CI
138 | 	upper = c(turnout.s.ci[2, ], turnout.n.ci[2, ]),		## lower CI
139 | 	south = factor(rep(c(1,0), each=max(turnout.dat$age)-min(turnout.dat$age)+1), levels=c(1,0), labels=c("South", "Rest of the U.S.")),  ## Outcome variable
140 | 	age = rep(new.age, 2))	## Explanatory variable of interest (here: age)
141 | 	
142 | xyplot(means ~ age | south, data=plot.dat, as.table=T, 
143 | 	ylim=c(min(plot.dat$lower), max(plot.dat$upper)), xlab="Age", ylab="Pr(Voting)", main="Probability of Voting",
144 | 	panel = function(x,y,subscripts){
145 | 		panel.lines(x,y,lty=1, col="black")
146 | 		panel.lines(x, plot.dat$lower[subscripts], lty=2, col="red")
147 | 		panel.lines(x, plot.dat$upper[subscripts], lty=2, col="red")})
148 | 
149 | #################################
150 | ## Plot 3 (transparent colors) ##
151 | #################################
152 | 
153 | xyplot(mean.turnout.pp.age.s ~ new.age, ylim=c(0,1), xlab="Age", ylab="Pr(Voting)", main="Probability of voting",
154 | 	key=list(space=list("right"), rectangles=list(col=c(rgb(1,0,0, alpha=.35), rgb(0,0,1, alpha=.35))), text=list(c("South", "Rest of the U.S."))),
155 | 	panel=function(x,y){
156 | 		panel.polygon(x=c(x,rev(x),x[1]), y=c(turnout.s.ci[1,], rev(turnout.s.ci[2,]), turnout.s.ci[1,1]), 
157 | 			col=rgb(1,0,0,alpha=.35), border=NA)
158 | 		panel.polygon(x=c(x,rev(x),x[1]), y=c(turnout.n.ci[1,], rev(turnout.n.ci[2,]), turnout.n.ci[1,1]), 
159 | 			col=rgb(0,0,1,alpha=.35), border=NA)
160 | 		panel.lines(x, mean.turnout.pp.age.s, col="red")
161 | 		panel.lines(x, mean.turnout.pp.age.n, col="blue")
162 | 		})
163 | 
164 | ######################
165 | ## Plot 4 (ggplot2) ##
166 | ######################
167 | 
168 | ## Dataframe for plotting
169 | plot.dat <- data.frame(south = as.factor(c(rep(1, length(new.age)), rep(0, length(new.age)))), new.age = c(new.age, new.age), mean.turnout.pp = c(mean.turnout.pp.age.s, mean.turnout.pp.age.n), turnout.lower = c(turnout.s.ci[1, ], turnout.n.ci[1, ]), turnout.upper = c(turnout.s.ci[2, ], turnout.n.ci[2, ]))
170 | 
171 | ## Make lines and ribbons separately
172 | p <- ggplot(dat = plot.dat, aes(x = new.age, y = mean.turnout.pp, group = south)) + geom_line(aes(colour = south))
173 | p <- p + geom_ribbon(aes(ymin = turnout.lower, ymax = turnout.upper, fill = south), alpha = 0.2)
174 | p <- p + xlab("Age") + ylab("Pr(Voting)") + theme_bw() + scale_colour_manual(values=c("blue", "red")) + scale_fill_manual(values=c("blue", "red"))
175 | p <- p + theme(legend.position = "none") + annotate("text", x = 30, y = 0.8, label = "Rest of US", colour = "blue") + annotate("text", x = 60, y = 0.5, label = "South", colour = "red")
176 | 
177 | ## Simpler: use geom_smooth()
178 | p2 <- ggplot(dat = plot.dat, aes(x = new.age, y = mean.turnout.pp, group = south)) + geom_smooth(aes(x = new.age, ymin = turnout.lower, ymax = turnout.upper, fill = south, colour = south), stat = "identity")
179 | p2 <- p2 + xlab("Age") + ylab("Pr(Voting)") + theme_bw() + scale_colour_manual(values=c("blue", "red")) + scale_fill_manual(values=c("blue", "red"))
180 | p2 <- p2 + theme(legend.position = "none") + annotate("text", x = 30, y = 0.8, label = "Rest of US", colour = "blue") + annotate("text", x = 60, y = 0.5, label = "South", colour = "red")


--------------------------------------------------------------------------------
/mac.R2openBUGS.R:
--------------------------------------------------------------------------------
 1 | #################################################
 2 | ## Run OpenBUGS through R2OpenBUGS on Mac OS X ##
 3 | #################################################
 4 | 
 5 | # Johannes Karreth
 6 | # jkarreth@albany.edu
 7 | 
 8 | # The code below uses the help file for bugs()
 9 | # Adjust file paths to your system
10 | 
11 | library(R2OpenBUGS)
12 | 
13 | model.file <- system.file(package="R2WinBUGS", "model", "schools.txt")
14 | 
15 | # Some example data (see ?schools for details):
16 | data(schools)
17 | 
18 | J <- nrow(schools)
19 | y <- schools$estimate
20 | sigma.y <- schools$sd
21 | data <- list(J=J, y=y, sigma.y=sigma.y)
22 | inits <- function(){
23 |   list(theta=rnorm(J, 0, 100), mu.theta=rnorm(1, 0, 100),
24 |        sigma.theta=runif(1, 0, 100))
25 | }
26 | ## or alternatively something like:
27 | # inits <- list(
28 | #   list(theta=rnorm(J, 0, 90), mu.theta=rnorm(1, 0, 90),
29 | #        sigma.theta=runif(1, 0, 90)),
30 | #   list(theta=rnorm(J, 0, 100), mu.theta=rnorm(1, 0, 100),
31 | #        sigma.theta=runif(1, 0, 100))
32 | #   list(theta=rnorm(J, 0, 110), mu.theta=rnorm(1, 0, 110),
33 | #        sigma.theta=runif(1, 0, 110)))
34 | 
35 | parameters <- c("theta", "mu.theta", "sigma.theta")
36 | 
37 | schools.sim <- bugs(data = data, 
38 |                     inits = inits, 
39 |                     parameters = parameters, 
40 |                     model.file = model.file,
41 |                     n.chains=3, 
42 |                     n.burnin = 2500,
43 |                     n.iter=5000,
44 |                     debug = FALSE,
45 |                     OpenBUGS.pgm = "/Applications/Wineskin/OpenBUGS.app/Contents/Resources/drive_c/Program Files/OpenBUGS/OpenBUGS323/OpenBUGS.exe",
46 |                     working.directory = "/Users/johanneskarreth/R/Bayes/R2OpenBUGS",    # Note: spaces in this path will create errors!
47 |                     useWINE = TRUE,
48 |                     bugs.seed = 12)
49 | 
50 | print(schools.sim)
51 | plot(schools.sim)
52 | 
53 | # For a function to create a barebones summary table, use
54 | # https://github.com/jkarreth/JKmisc/blob/master/mcmctab.R
55 | # install.packages("devtools")
56 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R")
57 | mcmctab(schools.sim)
58 | 
59 | # Access posterior draws for postestimation
60 | # Rows are draws, columns are parameters
61 | 
62 | schools.out <- schools.sim$sims.matrix


--------------------------------------------------------------------------------
/mac.R2winBUGS.R:
--------------------------------------------------------------------------------
 1 | ###############################################
 2 | ## Run WinBUGS through R2WinBUGS on Mac OS X ##
 3 | ###############################################
 4 | 
 5 | # Johannes Karreth
 6 | # jkarreth@albany.edu
 7 | 
 8 | # The code below uses the help file for bugs()
 9 | # Adjust file paths to your system
10 | 
11 | library(R2WinBUGS)
12 | 
13 | model.file <- system.file(package="R2WinBUGS", "model", "schools.txt")
14 | 
15 | # Some example data (see ?schools for details):
16 | data(schools)
17 | 
18 | J <- nrow(schools)
19 | y <- schools$estimate
20 | sigma.y <- schools$sd
21 | data <- list(J=J, y=y, sigma.y=sigma.y)
22 | inits <- function(){
23 |   list(theta=rnorm(J, 0, 100), mu.theta=rnorm(1, 0, 100),
24 |        sigma.theta=runif(1, 0, 100))
25 | }
26 | ## or alternatively something like:
27 | # inits <- list(
28 | #   list(theta=rnorm(J, 0, 90), mu.theta=rnorm(1, 0, 90),
29 | #        sigma.theta=runif(1, 0, 90)),
30 | #   list(theta=rnorm(J, 0, 100), mu.theta=rnorm(1, 0, 100),
31 | #        sigma.theta=runif(1, 0, 100))
32 | #   list(theta=rnorm(J, 0, 110), mu.theta=rnorm(1, 0, 110),
33 | #        sigma.theta=runif(1, 0, 110)))
34 | 
35 | parameters <- c("theta", "mu.theta", "sigma.theta")
36 | 
37 | schools.sim <- bugs(data, inits, parameters, model.file,
38 |                     n.chains=3, n.iter=5000,
39 |                     bugs.directory="/Applications/Wineskin/WinBUGS.app/Contents/Resources/drive_c/Program Files/WinBUGS14")
40 | print(schools.sim)
41 | plot(schools.sim)
42 | 
43 | # For a function to create a barebones summary table, use
44 | # https://github.com/jkarreth/JKmisc/blob/master/mcmctab.R
45 | # install.packages("devtools")
46 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R")
47 | mcmctab(schools.sim)
48 | 
49 | # Access posterior draws for postestimation
50 | # Rows are draws, columns are parameters
51 | 
52 | schools.out <- schools.sim$sims.matrix


--------------------------------------------------------------------------------
/mlm.state.instructions.R:
--------------------------------------------------------------------------------
  1 | ####################################################################
  2 | ## R2jags example - Bush votes in 1988 - Multilevel model example ##
  3 | ####################################################################
  4 | 
  5 | ## Johannes Karreth
  6 | ## jkarreth@albany.edu
  7 | 
  8 | ## Note: This example is taken from Gelman & Hill, Ch. 17. 
  9 | ## state.mod is a simplified version with fewer interactions 
 10 | ## and with categorical instead of dummy variables for the age & education categories.
 11 | 
 12 | ## Simple model
 13 | 
 14 | state.mod <- function() {
 15 |   for (i in 1:n){
 16 |     y[i] ~ dbin (p.bound[i], 1)
 17 |     p.bound[i] <- max(0, min(1, p[i]))
 18 |     logit(p[i]) <- Xbeta[i]
 19 |     Xbeta[i] <- b.female*female[i] + b.black*black[i] +
 20 |       b.age*age[i] + b.edu*edu[i]
 21 |       + b.state[state[i]]
 22 |   }
 23 |   b.female ~ dnorm (0, .01)
 24 |   b.black ~ dnorm (0, .01)
 25 |   b.age ~ dnorm (0, .01)
 26 |   b.edu ~ dnorm (0, .01)
 27 | 
 28 |  
 29 |   for (j in 1:n.state){
 30 |     b.state[j] ~ dnorm(b.state.hat[j], tau.state)
 31 |     b.state.hat[j] <- b.state0 + b.v.prev*v.prev[j]
 32 |     }
 33 |     b.state.hat.mu<-mean(b.state.hat[])
 34 |     b.v.prev ~ dnorm(0, .01)
 35 |     b.state0 ~ dnorm(0, .01)
 36 |     tau.state <- pow(sigma.state, -2)
 37 |     sigma.state ~ dunif (0, 100)
 38 | }
 39 | 
 40 | setwd("~/R/Bayes/mlm.states")
 41 | 
 42 | library(R2jags)
 43 | library(foreign)
 44 | 
 45 | ### Read in the data
 46 | 
 47 | ## Option A) Use complete dataset including level-2 predictor
 48 | 
 49 | polls.subset <- read.dta("polls.subset.merged.dta")
 50 | 
 51 | y <- polls.subset$bush
 52 | female <- polls.subset$female
 53 | black <- polls.subset$black
 54 | age <- polls.subset$age
 55 | edu <- polls.subset$edu
 56 | 
 57 | # To get ID for state beginning with 1, do this:
 58 | # (Note that it is redundant in this case, but useful in other situations)
 59 | uniqstate <- unique(polls.subset$state)
 60 | polls.subset$stateid <- match(polls.subset$state, uniqstate)
 61 | state <- polls.subset$stateid
 62 | 
 63 | v.prev <- as.vector(by(polls.subset$g76_84pr, polls.subset$stateid, mean))
 64 | 
 65 | ## Option B) Combine level-1 and level-2 dataset in R
 66 | 
 67 | polls.subset <- read.dta("polls.subset.JK.dta")
 68 | 
 69 | # Load in level-1 data
 70 | 
 71 | y <- polls.subset$bush
 72 | female <- polls.subset$female
 73 | black <- polls.subset$black
 74 | age <- polls.subset$age
 75 | edu <- polls.subset$edu
 76 | state <- polls.subset$state
 77 | 
 78 | # Load in level-2 data. Note that these data should be ordered by state,
 79 | # so you will NOT need a level-2 ID
 80 | 
 81 | presvote <- read.dta("presvote.dta")
 82 | v.prev  <- presvote$g76_84pr
 83 | 
 84 | ### Data are ready
 85 | 
 86 | ### Define number of observations on each level
 87 | 
 88 | n <- length(y)             # of survey respondents
 89 | n.state <- max(state)      # of states
 90 | 
 91 | ### Fit the model in JAGS
 92 | 
 93 | state.dat <- list ("n", "n.state", "y", "female", "black", "age", "edu", "state", "v.prev")
 94 |  
 95 | state.params <- c ("b.female", "b.black", "b.age", "b.edu", "b.state.hat",  "b.v.prev")
 96 | 
 97 | state.inits <- function (){
 98 |  list(b.female = c(0), b.black = c(0), 
 99 |   b.age = c(0), b.edu = c(0), b.state = rnorm(n.state), b.v.prev = c(0))
100 | }
101 | 
102 | state.fit <- jags(data=state.dat, inits=state.inits, state.params, model.file=state.mod, n.chains=2, n.iter=100, n.burnin=10)
103 | 
104 | ### Here, in nested structures, the plot() command in R2Jags comes in handy
105 | plot(state.fit)
106 | 
107 | ### Compare to GLM estimates
108 | 
109 | state.df <- read.dta("polls.subset.merged.dta")
110 | library(arm)
111 | state.lmer <- lmer(bush ~ edu + age + female + black + g76_84pr + (1|state), data=state.df, family=binomial) 
112 | summary(state.lmer)


--------------------------------------------------------------------------------
/ologit.pp.plot.instructions.R:
--------------------------------------------------------------------------------
  1 | ############################################################################################
  2 | #### Plot predicted probabilities and other things after a Bayesian ordered logit model ####
  3 | ############################################################################################
  4 | 
  5 | ## Johannes Karreth
  6 | ## jkarreth@albany.edu
  7 | 
  8 | ## Much of the code below is (a) old and (b) based on code originally written by 
  9 | ## Dave Armstrong, UW-Milwaukee, armstrod@uwm.edu
 10 | ## See the slides for Day 11 for more compact code, also with explanations
 11 | 
 12 | library(foreign)
 13 | library(R2jags)
 14 | 
 15 | ## Estimate frequentist and Bayesian models
 16 | 
 17 | hw5.dat <- read.dta("http://www.jkarreth.net/files/ordered.logit.dta")
 18 | hw5.dat <- na.omit(hw5.dat)
 19 | 
 20 | hw5.dat$interact_f <- as.factor(hw5.dat$interact)
 21 | freq.ologit <- MASS::polr(interact_f ~ orgmembs + indmembs + age + taxexmpt, data = hw5.dat)
 22 | summary(freq.ologit)
 23 | 
 24 | hw5.dat.jags <- list(interact = hw5.dat$interact, orgmembs = hw5.dat$orgmembs, indmembs = hw5.dat$indmembs, age = hw5.dat$age, taxexmpt = hw5.dat$taxexmpt, N = length(hw5.dat$interact))
 25 | 
 26 | hw5.mod <- function() {
 27 |   
 28 |   for (i in 1:N){
 29 |     for (j in 1:3){
 30 |       logit(gamma[i,j]) <- theta1[j] - mu[i]  ## Note the use of theta1 instead of theta
 31 |     }
 32 |     interact[i] ~ dcat(p[i, 1:4])
 33 |     p[i,1] <- gamma[i,1]
 34 |     p[i,2] <- gamma[i,2]-gamma[i,1]
 35 |     p[i,3] <- gamma[i,3]-gamma[i,2]
 36 |     p[i,4] <- 1-gamma[i,3]
 37 |     mu[i] <- b1 * orgmembs[i]+b2 * indmembs[i]+b3 * age[i]+b4 * taxexmpt[i] 
 38 |     
 39 |   }
 40 |   
 41 |   for(i in 1:3){   
 42 |     theta[i] ~ dnorm(0,.0001)  ## Note the use of theta
 43 |   }
 44 |   theta1[1:3] <- sort(theta)  ## Note the use of theta1 / theta   
 45 |   
 46 |   b1 ~ dnorm(0,.0001)
 47 |   b2 ~ dnorm(0,.0001)
 48 |   b3 ~ dnorm(0,.0001)
 49 |   b4 ~ dnorm(0,.0001)
 50 |   
 51 | }
 52 | 
 53 | hw5.params <- c("b1", "b2", "b3", "b4", "p", "theta") ## Monitor "p" for observed probabilities, PRE, etc.
 54 | 
 55 | hw5.inits <- function(){
 56 |   list("b1" = c(0), "b2" = c(0), "b3" = c(0), "b4" = c(0), "theta" = c(0,10,13))  ## Note that we're giving inits to theta, not theta1
 57 | }
 58 | 
 59 | hw5.fit <- jags(data = hw5.dat.jags, inits = hw5.inits, hw5.params, n.chains = 3, n.iter = 1000, n.burnin = 500, model.file =  hw5.mod)
 60 | 
 61 | ## Posterior distributions of coefficients:
 62 | hw5.fit$BUGSoutput$summary[1:5, ]
 63 | 
 64 | ## JAGS/BUGS USERS: Read the coda output from Bugs or Jags into R like so:
 65 | # chains <- rbind(read.coda("ologit_chain1.txt", "ologit_index.txt"), 
 66 | #                 read.coda("ologit_chain2.txt", "ologit_index.txt"))
 67 | 
 68 | ## Or: R2JAGS/R2WINBUGS USERS, if hw5.fit is your R2jags/R2WinBUGS object:
 69 | chains <- as.mcmc(hw5.fit)
 70 | chains <- as.matrix(chains)
 71 | chains <- as.data.frame(chains)
 72 | 
 73 | ## Be sure to remember the order of your coefficients. In this case:
 74 | ## b1: orgmembs
 75 | ## b2: indmembs
 76 | ## b3: age
 77 | ## b4: taxexmpt
 78 | 
 79 | #####################################################
 80 | ## FIRST, PREDICTED PROBABILITIES ON OBSERVED DATA ##
 81 | #####################################################
 82 | 
 83 | ## Define vectors with the values of the predicted probabilities (pulled from the coda files)
 84 | ## grep("p[",) pulls all columns that start with p[
 85 | probs <- chains[, grep("p[", colnames(chains), fixed = T)]
 86 | ## Note: this will be in the order p[observation, outcome category]
 87 | 
 88 | ## Now, make a new list with n.iter (here, 1000) elements, 
 89 | ## where each is a matrix of the probability of being in one of the categories,
 90 | ## hence ncol = number of categories (here, 4) and nrow (not specified) = 772 (N of 
 91 | ## the original data.
 92 | prob.list <- lapply(1:nrow(probs), function(x) matrix(probs[x, ], ncol = 4))	## nrow(probs) is the N of your simulations (if using several chains, the N is be the number of all iterations combined, i.e. the N of chain1 + the N of chain2, etc.). 
 93 | 
 94 | ##
 95 | ## Summarize classification and proportional reduction of error (PRE)
 96 | ##
 97 | 
 98 | ## Identify the predicted category (simply the largest value of the four columns)
 99 | pred.cats <- sapply(prob.list, function(x) apply(x, 1, which.max))
100 | 
101 | ## % correctly predicted: average # of simulations where pred.cats = observed outcome
102 | ## Here, dat is the dataframe of the observed data, and interact is the observed DV
103 | pcp <- apply(pred.cats, 2, function(x) mean(x ==  as.numeric(hw5.dat$interact)))
104 | 
105 | ## PMC: percentage of observations in the modal category of the observed data
106 | ## This would be the ``naive'' guess in a null model - simply predict the modal category
107 | pmc <- max(table(as.numeric(hw5.dat$interact))/sum(table(as.numeric(hw5.dat$interact))))
108 | 
109 | ## PRE is defined as \frac{PCP - PMC}{1 - PMC}, so use this formula
110 | pre <- (pcp - pmc)/(1-pmc)
111 | 
112 | ## Neat: remember, we are doing all this over a list of 1000 simulations
113 | ## Hence, PRE is actually a distribution
114 | summary(pre)
115 | mean(pre > 0)
116 | 
117 | ## Again, PRE is a distribution
118 | plot(density(pre, bw = .05))
119 | ## Terrible model - but recall that almost all obs. had interact =  = {4}
120 | ## So the modal null model should already fare pretty well.
121 | 
122 | ## Again, note the nice fact that we have a distribution for PRE,
123 | 
124 | ## PRE and ePRE should be quite similar to the posterior means from the Bayesian model.
125 | ## Use the pre() function from Dave Armstrong's DAMisc package:
126 | DAMisc::pre(freq.ologit)
127 | 
128 | #################################################################
129 | ######### SECOND, OUT-OF-SAMPLE PREDICTED PROBABILITIES, ########
130 | ## across the range of X1 and given values of other covariates ##
131 | #################################################################
132 | 
133 | ## Generate dataset with simulated data (i.e. where your explanatory variables are set to min->max or held constant)
134 | newdat <- data.frame(
135 |   age = seq(min(hw5.dat$age), max(hw5.dat$age), length = max(hw5.dat$age) - min(hw5.dat$age) + 1),	## length: number of individual values of this continuous expl. var.
136 |   orgmembs = median(hw5.dat$orgmembs),
137 |   indmembs = median(hw5.dat$indmembs),
138 |   taxexmpt = median(hw5.dat$taxexmpt)
139 | )
140 | 
141 | ## Define matrices with the values of the coefficients (pulled from the coda files)
142 | ## in this case, note that all my coefficients were named b[j], where j = number of coefficients
143 | b <- chains[,grep("b", colnames(chains), fixed = T)]	
144 | 
145 | ## In the MNL, you would define as many coefficient matrices as you have categories, minus the base category.
146 | ## Then you can simply proceed by calculating predicted probabilities for each category
147 | 
148 | ## Define X matrix (explanatory variables)
149 | ## Important: The order of predictors must match the order in your model code!
150 | X <- model.matrix( ~ orgmembs + indmembs + age + taxexmpt, data = newdat)
151 | ## Remove the intercept for ordered logit
152 | ## (You would keep it for MNL)
153 | X <- X[ , -1]
154 | 
155 | ## Multiply X by the betas from your JAGS/BUGS output
156 | Xb <- t(X %*% t(b))
157 | 
158 | ## Define vectors with the values of the cutoff points (pulled from the coda files)
159 | ## Note that in my model, the cutpoints were called theta
160 | ## Theta will only be in your chains if you monitored them earlier
161 | kaps <- chains[, grep("theta", colnames(chains), fixed = T)]
162 | q1 <- plogis(kaps[ , 1] - Xb)
163 | q2 <- plogis(kaps[ , 2] - Xb)
164 | q3 <- plogis(kaps[ , 3] - Xb)
165 | 
166 | ## Probabilities to be in each of the 4 (j) categories
167 | p1 <- q1
168 | p2 <- q2 - q1
169 | p3 <- q3 - q2
170 | p4 <- 1 - q3
171 | 
172 | ## ... and the respective credible intervals
173 | p1.ci <- apply(p1, 2, quantile, probs = c(.025,.975))
174 | p2.ci <- apply(p2, 2, quantile, probs = c(.025,.975))
175 | p3.ci <- apply(p3, 2, quantile, probs = c(.025,.975))
176 | p4.ci <- apply(p4, 2, quantile, probs = c(.025,.975))
177 | 
178 | ## Get the simulation mean for each quantity of interest
179 | kap.mean <- apply(kaps, 2, mean)
180 | b.mean <- apply(b, 2, mean)
181 | mean.q1 <- plogis(kap.mean[1] - X %*% b.mean)
182 | mean.q2 <- plogis(kap.mean[2] - X %*% b.mean)
183 | mean.q3 <- plogis(kap.mean[3] - X %*% b.mean)
184 | mean.p1 <- mean.q1
185 | mean.p2 <- mean.q2 - mean.q1
186 | mean.p3 <- mean.q3 - mean.q2
187 | mean.p4 <- 1 - mean.q3
188 | 
189 | ## Generate data set used for the two plots below
190 | plot.dat <- data.frame(
191 |   means = c(mean.p1, mean.p2, mean.p3, mean.p4), 	## means of the pred. probabilities
192 |   lower = c(p1.ci[1, ] , p2.ci[1, ], p3.ci[1, ], p4.ci[1, ]), 	## upper CI
193 |   upper = c(p1.ci[2, ], p2.ci[2, ], p3.ci[2, ], p4.ci[2, ]), 		## lower CI
194 |   Interaction = factor(rep(c(1,2,3,4), each = 145), levels = c(1,2,3,4), labels = c("Never", "Seldom", "Occasionally", "Frequently")),  ## Outcome variable
195 |   age = rep(newdat$age, 4))	## Predictor of interest (here: age)
196 | 
197 | ######
198 | ## Plots using ggplot2
199 | ####
200 | 
201 | ## PLOT 1 (four separate panels for each outcome)
202 | 
203 | library(ggplot2)
204 | 
205 | p1 <- ggplot(dat = plot.dat, aes(x = age, y = means)) + geom_smooth(aes(x = age, ymin = lower, ymax = upper), stat = "identity") + facet_wrap(~ Interaction)
206 | p1 <- p1 + xlab("Age") + ylab("Pr(Interaction)") + theme_bw()
207 | 
208 | ## PLOT 2 (all outcomes in one panel)	
209 | 
210 | p2 <- ggplot(dat = plot.dat, aes(x = age, y = means, group = Interaction)) + geom_smooth(aes(x = age, ymin = lower, ymax = upper, fill = Interaction, colour = Interaction), stat = "identity")
211 | p2 <- p2 + xlab("Age") + ylab("Pr(Interaction)") + theme_bw()
212 | 
213 | 
214 | ######
215 | ## Plots using lattice
216 | ####
217 | 
218 | library(lattice)
219 | 
220 | ## PLOT 1 (four separate panels for each outcome)
221 | 
222 | xyplot(means ~ age | Interaction, data = plot.dat, as.table = T, 
223 |        ylim = c(min(plot.dat$lower), max(plot.dat$upper)), xlab = "Age", ylab = "Probability",
224 |        panel = function(x,y,subscripts){
225 |          panel.lines(x,y,lty = 1, col = "black")
226 |          panel.lines(x, plot.dat$lower[subscripts], lty = 2, col = "red")
227 |          panel.lines(x, plot.dat$upper[subscripts], lty = 2, col = "red")})
228 | 
229 | ## PLOT 2 (all outcomes in one panel)	
230 | 
231 | xyplot(mean.p1 ~ newdat$age, ylim = c(0,1), xlab = "Age", ylab = "Probability",
232 |        key = list(space = list("top"), 
233 |                   rectangles = list(col = c(rgb(1,0,0, alpha = .35), rgb(0,1,0, alpha = .35), 
234 |                                                          rgb(0,0,1,alpha = .35), rgb(1,1,0,alpha = .35))), 
235 |                   text = list(c("Pr(Interact = Never)", "Pr(Interact = Seldom)", "Pr(Interact = Occasionally)", "Pr(Interact = Frequently)"))),
236 |        panel = function(x,y){
237 |          panel.polygon(x = c(x,rev(x),x[1]), y = c(p1.ci[1,], rev(p1.ci[2,]), p1.ci[1,1]), 
238 |                        col = rgb(1,0,0,alpha = .35), border = NA)
239 |          panel.polygon(x = c(x,rev(x),x[1]), y = c(p2.ci[1,], rev(p2.ci[2,]), p2.ci[1,1]), 
240 |                        col = rgb(0,1,0,alpha = .35), border = NA)
241 |          panel.polygon(x = c(x,rev(x),x[1]), y = c(p3.ci[1,], rev(p3.ci[2,]), p3.ci[1,1]), 
242 |                        col = rgb(0,0,1,alpha = .35), border = NA)
243 |          panel.polygon(x = c(x,rev(x),x[1]), y = c(p4.ci[1,], rev(p4.ci[2,]), p4.ci[1,1]), 
244 |                        col = rgb(1,1,0,alpha = .35), border = NA)	
245 |          panel.lines(x, mean.p1, col = "red")
246 |          panel.lines(x, mean.p2, col = "green")
247 |          panel.lines(x, mean.p3, col = "blue")
248 |          panel.lines(x, mean.p4, col = "yellow")
249 |        })
250 | 
251 | #### Questions: Johannes Karreth, jkarreth@albany.edu


--------------------------------------------------------------------------------
/pumps.instructions.R:
--------------------------------------------------------------------------------
 1 | ########################################################################
 2 | ## WinBUGS example Pumps - conjugate gamma-Poisson hierarchical model ##
 3 | ########################################################################
 4 | 
 5 | ## Johannes Karreth
 6 | ## ICPSR Summer Program 2016
 7 | 
 8 | ## Data
 9 | 
10 | pumps.data <- list(t = c(94.3, 15.7, 62.9, 126, 5.24, 31.4, 1.05, 1.05, 2.1, 10.5),
11 | 	     x = c(5, 1, 5, 14, 3, 19, 1, 1, 4, 22), N = 10)
12 | 					
13 | t <- pumps.data$t
14 | x <- pumps.data$x
15 | N <- pumps.data$N
16 | 
17 | pumps.data <- list("t", "x", "N")
18 | 
19 | ## Model
20 | 
21 | ## Save the following model separately in "pumps.model.jags" in your WD. Don't type it into R.
22 | 
23 | pumps.mod <- function()  {
24 | 		for (i in 1:N) {
25 | 			theta[i] ~ dgamma(alpha, beta)
26 | 			lambda[i] <- theta[i] * t[i]	## t is an independent variable
27 | 			x[i] ~ dpois(lambda[i])			## x is a stochastic node, lambda is a deterministic node.
28 | 		}
29 | 		alpha ~ dexp(1)						## alpha and beta are the hyperpriors (and parental nodes)
30 | 		beta ~ dgamma(0.1, 1.0)
31 | }
32 | 
33 | ## Set WD:
34 | setwd("~/R/Bayes/pumps")
35 | 
36 | ## Model parameters
37 | 
38 | pumps.params <- c("theta")
39 | 
40 | ## Run JAGS
41 | 
42 | pumpsfit <- jags(data=pumps.data, inits=NULL, pumps.params, n.chains=2, n.iter=5000, n.burnin=1000, model.file=pumps.mod)
43 | 				
44 | ## Results & diagnostics
45 | 
46 | print(pumpsfit)
47 | 
48 | plot(pumpsfit)
49 | 
50 | traceplot(pumpsfit)
51 | ## if we want to keep these:
52 | pdf("pumps.traceplot.pdf") 	## open PDF device
53 | traceplot(pumpsfit)			## write the plot
54 | dev.off()					## finish the plotting command
55 | 
56 | ## Generate MCMC object for analysis
57 | pumpsfit.mcmc <- as.mcmc(pumpsfit)
58 | 
59 | xyplot(pumpsfit.mcmc)
60 | ## large plot (many variables): use different output technique - print as PDF
61 | pdf("pumps.xyplot.pdf") # open pdf device
62 | xyplot(pumpsfit.mcmc, layout = c(5,4,3), aspect="fill")	## provide commands for the layout to fit all plots into one graph
63 | dev.off() # close PDF device; graph is in the WD
64 | 
65 | densityplot(pumpsfit.mcmc) 
66 | ## large plot (many variables): use different output technique
67 | pdf("pumps.densityplot.pdf") # open pdf device
68 | densityplot(pumpsfit.mcmc, layout = c(5,4,3), aspect="fill")
69 | dev.off() # close PDF device; graph is in the WD
70 | 
71 | ## More diagnostics:
72 | 
73 | gelman.plot(pumpsfit.mcmc)
74 | geweke.diag(pumpsfit.mcmc)
75 | geweke.plot(pumpsfit.mcmc)
76 | raftery.diag(pumpsfit.mcmc)
77 | heidel.diag(pumpsfit.mcmc)
78 | 
79 | 
80 | ## Update models:
81 | 
82 | pumpsfit.upd <- update(pumpsfit, n.iter=1000)
83 | pumpsfit.upd <- autojags(pumpsfit)
84 | 
85 | ## Run JAGS such that it produces coda files for analysis with boa or coda packages:
86 | 
87 | pumpsfit <- jags2(data=pumps.data, inits=pumps.inits, pumps.params, n.chains=2, n.iter=5000, n.burnin=1000, model.file="pumps.model.jags", clearWD=FALSE)
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/rats.instructions.R:
--------------------------------------------------------------------------------
  1 | ######################################################
  2 | ## WinBUGS example Rats - Normal hierarchical model ##
  3 | ######################################################
  4 | 
  5 | ## Johannes Karreth
  6 | ## ICPSR Summer Program 2016
  7 | 
  8 | ## Required libraries
  9 | library(R2jags)
 10 | 
 11 | ## Model
 12 | 
 13 | rats.model <- function()  {
 14 | 		for( i in 1 : N ) {
 15 | 			for( j in 1 : T ) {
 16 | 				Y[i , j] ~ dnorm(mu[i , j],tau.c)
 17 | 				mu[i , j] <- alpha[i] + beta[i] * (x[j] - xbar)
 18 | 			}
 19 | 			alpha[i] ~ dnorm(alpha.c,tau.alpha)
 20 | 			beta[i] ~ dnorm(beta.c,tau.beta)
 21 | 		}
 22 | 		tau.c ~ dgamma(0.001,0.001)
 23 | 		sigma <- 1 / sqrt(tau.c)
 24 | 		alpha.c ~ dnorm(0.0,1.0E-6)	 
 25 | 		sigma.alpha~ dunif(0,100)
 26 | 		sigma.beta~ dunif(0,100)
 27 | 		tau.alpha<-1/(sigma.alpha*sigma.alpha)
 28 | 		tau.beta<-1/(sigma.beta*sigma.beta)
 29 | 		beta.c ~ dnorm(0.0,1.0E-6)
 30 | 		alpha0 <- alpha.c - xbar * beta.c	
 31 | 	}
 32 | 
 33 | ## Copy the data from the WinBUGS code:
 34 | 
 35 | rats.data  <- list(x = c(8.0, 15.0, 22.0, 29.0, 36.0), xbar = 22, N = 30, T = 5,	
 36 | 		Y = structure(
 37 | 			.Data =   c(151, 199, 246, 283, 320,
 38 | 							 145, 199, 249, 293, 354,
 39 | 							 147, 214, 263, 312, 328,
 40 | 							 155, 200, 237, 272, 297,
 41 | 							 135, 188, 230, 280, 323,
 42 | 							 159, 210, 252, 298, 331,
 43 | 							 141, 189, 231, 275, 305,
 44 | 							 159, 201, 248, 297, 338,
 45 | 							 177, 236, 285, 350, 376,
 46 | 							 134, 182, 220, 260, 296,
 47 | 							 160, 208, 261, 313, 352,
 48 | 							 143, 188, 220, 273, 314,
 49 | 							 154, 200, 244, 289, 325,
 50 | 							 171, 221, 270, 326, 358,
 51 | 							 163, 216, 242, 281, 312,
 52 | 							 160, 207, 248, 288, 324,
 53 | 							 142, 187, 234, 280, 316,
 54 | 							 156, 203, 243, 283, 317,
 55 | 							 157, 212, 259, 307, 336,
 56 | 							 152, 203, 246, 286, 321,
 57 | 							 154, 205, 253, 298, 334,
 58 | 							 139, 190, 225, 267, 302,
 59 | 							 146, 191, 229, 272, 302,
 60 | 							 157, 211, 250, 285, 323,
 61 | 							 132, 185, 237, 286, 331,
 62 | 							 160, 207, 257, 303, 345,
 63 | 							 169, 216, 261, 295, 333,
 64 | 							 157, 205, 248, 289, 316,
 65 | 							 137, 180, 219, 258, 291,
 66 | 							 153, 200, 244, 286, 324),
 67 | 						.Dim = c(30,5)))
 68 | 						
 69 | ## Now define the vectors of the data matrix for JAGS:
 70 | 
 71 | Y <- rats.data$Y
 72 | T <- rats.data$T
 73 | x <-  rats.data$x
 74 | xbar <- rats.data$xbar
 75 | N <- rats.data$N
 76 | 
 77 | ## Read in the rats data for JAGS
 78 | 
 79 | rats.data  <- list("Y", "x", "T", "N", "xbar")
 80 | 
 81 | ## Name the JAGS parameters
 82 | 
 83 | rats.params <- c("tau.c", "sigma", "alpha.c", "sigma.alpha", "sigma.beta", "tau.alpha", "tau.beta", "beta.c", "alpha0")
 84 | 
 85 | ## Define the starting values for JAGS
 86 | 
 87 | rats.inits <- function(){
 88 | 	list("alpha"=c(250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250, 250), 
 89 | 		"beta"=c(6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6), 
 90 | 		"alpha.c"=c(150), "beta.c"=c(10), "tau.c"=c(1), "sigma.alpha"=c(1), "sigma.beta"=c(1))
 91 | }
 92 | 
 93 | ## Fit the model in JAGS, having previously copied the BUGS model in my working directory as "rats.model.jags"
 94 | 
 95 | ratsfit <- jags(data=rats.data, inits=rats.inits, rats.params, n.chains=2, n.iter=2000, n.burnin=1000, model.file=rats.model)
 96 | 
 97 | ## If we wanted to specify different initial values for the different chains, specify:
 98 | ## inits = c() ???
 99 | 
100 | ## Updating?
101 | ratsfit.upd <- update(ratsfit, n.iter=1000)
102 | ratsfit.upd <- autojags(ratsfit)
103 | 
104 | ## Diagnoses
105 | 
106 | print(ratsfit)
107 | 
108 | plot(ratsfit)
109 | 
110 | traceplot(ratsfit)
111 | ## if you want to save them:
112 | pdf("rats.trace.pdf")
113 | traceplot(ratsfit)
114 | dev.off()
115 | 
116 | ## Generate MCMC object for analysis
117 | ratsfit.mcmc <- as.mcmc(ratsfit)
118 | 
119 | summary(ratsfit.mcmc)
120 | 
121 | xyplot(ratsfit.mcmc)
122 | ## maybe better (you can use other Lattice options here as well):
123 | xyplot(ratsfit.mcmc, layout=c(2,6), aspect="fill")
124 | 
125 | densityplot(ratsfit.mcmc)
126 | ## maybe better:
127 | densityplot(ratsfit.mcmc, layout=c(2,6), aspect="fill") 
128 | 
129 | ## and more plots... all using the MCMC object and the CODA package:
130 | 
131 | ## Trace- and density in one plot
132 | pdf("ratsfit.mcmc.plot.pdf")
133 | plot(ratsfit.mcmc)
134 | dev.off()
135 | 
136 | ## Autocorrelation plot
137 | pdf("ratsfit.mcmc.autocorr.pdf")
138 | autocorr.plot(ratsfit.mcmc)
139 | dev.off()
140 | 
141 | ## Others (not working with this example - yet. Convergence problems?)
142 | 
143 | gelman.plot(ratsfit.mcmc)
144 | geweke.diag(ratsfit.mcmc)
145 | geweke.plot(ratsfit.mcmc)
146 | raftery.diag(ratsfit.mcmc)
147 | heidel.diag(ratsfit.mcmc)
148 | 
149 | ## If we want to examine the Coda files via Boa or Coda, use the function jags2:
150 | 
151 | ratsfit <- jags2(data=rats.data, inits=rats.inits, rats.params, n.iter=5000, model.file="rats.model.jags")
152 | 
153 | print(ratsfit)
154 | plot(ratsfit)
155 | ## etc.
156 | 
157 | ## This will create the following files in your working directory:
158 | /Users/johanneskarreth/R/CODAchain1.txt
159 | /Users/johanneskarreth/R/CODAchain2.txt
160 | /Users/johanneskarreth/R/CODAchain3.txt
161 | /Users/johanneskarreth/R/CODAindex.txt
162 | /Users/johanneskarreth/R/jagsdata.txt
163 | /Users/johanneskarreth/R/jagsinits1.txt
164 | /Users/johanneskarreth/R/jagsinits2.txt
165 | /Users/johanneskarreth/R/jagsinits3.txt
166 | /Users/johanneskarreth/R/jagsscript.txt
167 | 
168 | ## ... which can then be analyzed 
169 | library(boa)
170 | boa.menu()
171 | 
172 | ## remember to rename the index file's extension to .ind, and the chain files to .out
173 | 
174 | library(coda)
175 | codamenu()


--------------------------------------------------------------------------------
/regression.dotplot.R:
--------------------------------------------------------------------------------
 1 | ####################################################################
 2 | ## Coefficient dot plot of posterior distributions from JAGS/BUGS ##
 3 | ####################################################################
 4 | 
 5 | ## Johannes Karreth
 6 | ## ICPSR Summer Program 2016
 7 | 
 8 | ## if angell.fit is your R2jags/R2WinBUGS output object, and 
 9 | ## angellfit.mcmc is that object as mcmc object:
10 | angellfit.mcmc <- as.mcmc(angell.fit)
11 | 
12 | ## using the mcmcplots package:
13 | 
14 | library(mcmcplots)
15 | caterplot(angellfit.mcmc, parms = c("beta1", "beta2"), labels = c("Diversity", "Mobility"), val.lim = c(-0.27, 0.05))
16 | abline(v = 0)
17 | 
18 | ## using the arm package:
19 | 
20 | coef.vect <- angell.fit$BUGSoutput$summary[2:3, 1]
21 | sd.vect <- angell.fit$BUGSoutput$summary[2:3, 2]
22 | short.names <- rownames(angell.fit$BUGSoutput$summary[2:3,])
23 | long.names <- c("Diversity", "Mobility")
24 | 
25 | library(arm)
26 | coefplot(coef.vect, sd.vect, varnames = long.names, main = "", xlim = c(-0.3, 0.05))
27 | 
28 | ## using ggplot2 from scratch
29 | library(ggplot2)
30 | angellfit.mat <- as.matrix(angellfit.mcmc)
31 | angellfit.dat <- as.data.frame(angellfit.mat)
32 | coef.vect <- apply(angellfit.dat, 2, mean)
33 | lower.vect <- apply(angellfit.dat, 2, function(x) quantile(x, probs = c(0.025)))
34 | upper.vect <- apply(angellfit.dat, 2, function(x) quantile(x, probs = c(0.975)))
35 | long.names <- c("Intercept", "Diversity", "Mobility", "Deviance")
36 | plot.dat <- data.frame(coef.vect, lower.vect, upper.vect, long.names)[c(2,3), ]
37 | 
38 | p <- ggplot(data = plot.dat, aes(x = coef.vect, y = long.names)) + geom_point() + geom_segment(aes(x = lower.vect, xend = upper.vect, y = long.names, yend = long.names))
39 | p <- p + geom_vline(xintercept = 0, colour = "blue", linetype = 2) + xlab("Posterior estimates") + ylab("")
40 | p


--------------------------------------------------------------------------------
/regression.table.R:
--------------------------------------------------------------------------------
 1 | ############################################################################
 2 | ## Regression coefficient table of posterior distributions from JAGS/BUGS ##
 3 | ############################################################################
 4 | 
 5 | ## Johannes Karreth
 6 | ## ICPSR Summer Program 2016
 7 | 
 8 | ## if angell.fit is your R2jags/R2WinBUGS output object
 9 | 
10 | angell.fit$BUGSoutput$summary[, c(1, 2, 3, 7)]
11 | library(xtable)
12 | regtable1 <- xtable(angell.fit$BUGSoutput$summary[, c(1, 2, 3, 7)])
13 | print(regtable1, type = "latex")
14 | print(regtable1, type = "html")
15 | 
16 | ## Another option:
17 | # install.packages("devtools")
18 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R")
19 | regtable <- mcmctab(as.mcmc(angell.fit))
20 | 
21 | ## Use regtable() for the summary
22 | 
23 | ## pick out the parameters of interest (e.g., the coefficients and SDs)
24 | 
25 | regtable1 <- regtable[c(1:3), c(1:2)]
26 | 
27 | library(xtable)
28 | regtable1 <- xtable(regtable1)
29 | print(regtable1, type = "latex")
30 | print(regtable1, type = "html")
31 | 
32 | ## you can save the HTML table as a .doc file and open it with MS word. You might have to make some modifications, but the basic layout should be functionable.
33 | print(regtable1, type = "html", 
34 | 	file = "~/Desktop/angell_table.doc")
35 | 
36 | ## pick out other parameters of interest (here, the coefficients and CIs)
37 | 
38 | regtable2 <- regtable[c(1:3), c(1, 3, 4)]
39 | 
40 | xtable(regtable2)


--------------------------------------------------------------------------------
/results.rmarkdown.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Summarizing posteriors through tables in RMarkdown"
 3 | author: "Johannes Karreth"
 4 | date: "July 4, 2016"
 5 | output: pdf_document
 6 | ---
 7 | 
 8 | ## Simulate data and fit a model
 9 | 
10 | ```{r}
11 | n.sim <- 100; set.seed(123)
12 | x1 <- rnorm(n.sim, mean = 5, sd = 2)
13 | x2 <- rbinom(n.sim, size = 1, prob = 0.3)
14 | e <- rnorm(n.sim, mean = 0, sd = 1)
15 | b1 <- 1.2
16 | b2 <- -3.1
17 | a <- 1.5
18 | y <- a + b1 * x1 + b2 * x2 + e
19 | 
20 | sim.dat <- data.frame(y, x1, x2)
21 | 
22 | bayes.mod <- function()  {
23 | 
24 | for(i in 1:N){
25 | y[i] ~ dnorm(mu[i], tau)
26 | mu[i] <- alpha + beta1 * x1[i] + beta2 * x2[i]
27 | }
28 | 
29 | alpha ~ dnorm(0, .01)
30 | beta1 ~ dunif(-100, 100)
31 | beta2 ~ dunif(-100, 100)
32 | tau ~ dgamma(.01, .01)
33 | 
34 | }
35 | 
36 | sim.dat.jags  <- as.list(sim.dat)
37 | 
38 | sim.dat.jags$N  <- nrow(sim.dat)
39 | 
40 | bayes.mod.params <- c("alpha", "beta1", "beta2")
41 | 
42 | 
43 | inits1 <- list("alpha" = 0, "beta1" = 0, "beta2" = 0) 
44 | inits2 <- list("alpha" = 1, "beta1" = 1, "beta2" = 1)
45 | inits3 <- list("alpha" = -1, "beta1" = -1, "beta2" = -1)
46 | bayes.mod.inits <- list(inits1, inits2, inits3)
47 | 
48 | library(R2jags)
49 | set.seed(123) 
50 | 
51 | bayes.mod.fit <- jags(data = sim.dat.jags, inits = bayes.mod.inits, 
52 |   parameters.to.save = bayes.mod.params, n.chains = 3, n.iter = 9000, 
53 |   n.burnin = 1000, 
54 |   model.file = bayes.mod)
55 | ```
56 | 
57 | ## Create a summary table
58 | 
59 | ```{r}
60 | devtools::source_url("https://raw.githubusercontent.com/jkarreth/JKmisc/master/mcmctab.R")
61 | regtable <- mcmctab(as.mcmc(bayes.mod.fit))[-c(4), ]  # remove the fourth row
62 | ```
63 | 
64 | ## Option 1: the pander package
65 | 
66 | ```{r}
67 | library(pander)
68 | pander(regtable)
69 | ```
70 | 
71 | ## Option 2: the knitr package
72 | 
73 | ```{r}
74 | library(knitr)
75 | kable(regtable)
76 | ```


--------------------------------------------------------------------------------
/results.rmarkdown.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jkarreth/Bayes/6084383b894b4a78899b19bea0d7284fa57c2644/results.rmarkdown.pdf


--------------------------------------------------------------------------------
/rstan.R:
--------------------------------------------------------------------------------
 1 | ############################
 2 | ## Run Stan through rstan ##
 3 | ############################
 4 | 
 5 | # Johannes Karreth
 6 | # jkarreth@albany.edu
 7 | 
 8 | # The code below adapts the help file from jags() for use with stan()
 9 | # Adjust file paths to your system
10 | 
11 | # data
12 | y <- c(28.4,7.9,-2.8,6.8,-0.6,0.6,18.0,12.2)
13 | sigma <- c(14.9,10.2,16.3,11.0,9.4,11.4,10.4,17.6)
14 | 
15 | schools_data <- as.list(data.frame(y, sigma))
16 | schools_data$J <- 8
17 | 
18 | 
19 | schools_mod <- "
20 | data {
21 |       int<lower=0> J; // number of schools
22 |       real y[J];      // estimated treatment effects
23 |       real<lower=0> sigma[J]; // s.d. of effect estimates
24 |     }
25 | parameters {
26 |       real mu;
27 |       real<lower=0> tau;
28 |       vector[J] eta;
29 |     }
30 | transformed parameters {
31 |       vector[J] theta;
32 |       theta <- mu + tau * eta;
33 |     }
34 | model {
35 |       eta ~ normal(0, 1);
36 |       y ~ normal(theta, sigma);
37 | }
38 | 
39 | "
40 | 
41 | schools_fit <- stan(model_code = schools_mod, 
42 |                 data = schools_data,
43 |                 chains = 4, iter = 2000, warmup = 1000, thin = 1,
44 |                 init = "0", seed = 123, verbose = FALSE)
45 | 
46 | ## Same with Angell data & OLS
47 | 
48 | # Read the data in car library
49 | library(car)
50 | data(Angell)
51 | angell_dat <- Angell[, -4] ## take off the fourth column (remember, the order is (row, column))
52 | angell_dat <- as.list(angell_dat)
53 | angell_dat$N <- length(angell_dat$moral)
54 | 
55 | ## Model
56 | 
57 | angell_mod <- "
58 |   
59 | data{
60 |   int<lower = 0> N;
61 |   vector[N] moral;
62 |   vector[N] hetero;
63 |   vector[N] mobility;
64 | }
65 | 
66 | parameters{
67 |   vector[3] beta;
68 |   real<lower = 0> sigma;
69 | }
70 | 
71 | model{
72 |   moral ~ normal(beta[1] + beta[2] * hetero + beta[3] * mobility,
73 |                   sigma);
74 | }
75 | 
76 | "
77 | 
78 | ## Fit the model in rstan
79 | 
80 | angell_fit <- stan(model_code = angell_mod, 
81 |                     data = angell_dat,
82 |                     chains = 4, iter = 2000, warmup = 1000, thin = 1,
83 |                     init = "0", seed = 123, verbose = FALSE)


--------------------------------------------------------------------------------
/wvs.instructions.R:
--------------------------------------------------------------------------------
 1 | #################################################################
 2 | ## R2jags example using the WVS example - random effects model ##
 3 | #################################################################
 4 | 
 5 | ## Johannes Karreth
 6 | ## jkarreth@albany.edu
 7 | 
 8 | 
 9 | ## Required libraries
10 | library(foreign)
11 | library(R2jags)
12 | 
13 | ## Read the data
14 | 
15 | source("http://www.jkarreth.net/files/wvs.jags.txt")
16 | 
17 | wvs.dat <- list("N", "nation1", "trust1", "lifesat", "lr", "relig1", "proud1", "sex1", "age", "educ")
18 | 
19 | ## JAGS model: very simple, but can be extended as you see fit
20 | 
21 | 
22 | wvs.mod <- function(){
23 |  for(i in 1:N)
24 |   {
25 |    
26 |     
27 |      lifesat[i]~dnorm(mu[i],sigma2inv[nation1[i]])		## DV: life satisfaction; distributed normal. 
28 |                                                         ## BUT: variance is indexed by NATION, mean is indexed by individual. 
29 |                                                         ## Each country gets its own within-group variance..
30 |      mu[i]<-alpha[nation1[i]] +beta[nation1[i]]*proud1[i]    ## mu gets its own intercept by nation (country-specific intercepts)
31 | 														## If we want to get different slopes, do this: mu[i]<-alpha[nation1[i]] +beta[nation1[i]]*proud1[i]
32 |     }
33 | for(i in 1:4){
34 |   alpha[i]~dnorm(malpha,tau2inv)	## Note: nothing indexed here... IF this were [0;1], it'd be a fixed effects model...
35 |   beta[i]~dnorm(mbeta,tau3inv)
36 |   
37 | }			## Closing the forloop for the four countries
38 | mbeta~dnorm(0,1.0E-4)		## Mean for beta; note that scientific writing is permitted in model code. 
39 | malpha~dnorm(0,1.0E-4)		## Mbeta and Malpha are stochastic parent nodes. If those were to be modeled as a function of country-level covariates, we'd have #  additional mean functions for these!
40 | tau2inv~dgamma(.1,.1)		## Between-group variance
41 | tau2<-1/sqrt(tau2inv)
42 | tau3inv~dgamma(.1,.1)
43 | tau3<-1/sqrt(tau2inv)
44 | 
45 | 
46 | for(i in 1:4){
47 | sigma2inv[i]~dgamma(.1,.1)	## Prior for within-country variance
48 | sigma2[i]<-1/sqrt(sigma2inv[i])
49 | }
50 | }
51 | 
52 | ## Set wd
53 | 
54 | setwd("~/R/Bayes/wvs")
55 | 
56 | ## Name the JAGS parameters to be monitored
57 | 
58 | wvs.params <- c("alpha", "beta")
59 | 
60 | ## Define the starting values for JAGS
61 | 
62 | wvs.inits <- function(){
63 | 	list("alpha"=c(0, 0, 0, 0), "beta"=c(0, 0, 0, 0))
64 | }
65 | 
66 | ## Fit the model in JAGS
67 | 
68 | wvsfit <- jags(data=wvs.dat, inits=wvs.inits, wvs.params, n.chains=2, n.iter=500, n.burnin=100, model.file=wvs.mod)


--------------------------------------------------------------------------------