.
7 | License: MIT + file LICENSE
8 | Encoding: UTF-8
9 | LazyData: true
10 | Authors@R: person("Jeffrey", "Arnold", role = c("aut", "cre"),
11 | email = "jeffrey.arnold@gmail.com")
12 | RoxygenNote: 6.0.1
13 |
--------------------------------------------------------------------------------
/bayesjackman/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2018
2 | COPYRIGHT HOLDER: Jeffrey B. Arnold
3 |
--------------------------------------------------------------------------------
/bayesjackman/NAMESPACE:
--------------------------------------------------------------------------------
1 | exportPattern("^[[:alpha:]]+")
2 |
--------------------------------------------------------------------------------
/bayesjackman/NEWS.md:
--------------------------------------------------------------------------------
1 | # bayesjackman 0.0.1.9000
2 |
3 | * Added a `NEWS.md` file to track changes to the package.
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/bayesjackman/R/Untitled.R:
--------------------------------------------------------------------------------
1 | lint_filter <- function(ifile, encoding = "unknown") {
2 | x = readLines(ifile, encoding = encoding, warn = FALSE)
3 | n = length(x)
4 | if (n == 0)
5 | return(x)
6 | p = knitr:::detect_pattern(x, tolower(knitr:::file_ext(ifile)))
7 | if (is.null(p))
8 | return(x)
9 | p = knitr::all_patterns[[p]]
10 | p1 = p$chunk.begin
11 | p2 = p$chunk.end
12 | i1 = grepl(p1, x)
13 | i2 = knitr:::filter_chunk_end(i1, grepl(p2, x))
14 | m = numeric(n)
15 | m[i1] = 1
16 | m[i2] = 2
17 | if (m[1] == 0)
18 | m[1] = 2
19 | for (i in seq_len(n - 1)) if (m[i + 1] == 0)
20 | m[i + 1] = m[i]
21 | out <- x
22 | out[m == 2 | i1] = ""
23 | # return inline code
24 | # x[m == 2] = stringr::str_replace_all(x[m == 2], p$inline.code,
25 | # "")
26 | x
27 | }
28 |
--------------------------------------------------------------------------------
/bayesjackman/R/bayesjackman-package.r:
--------------------------------------------------------------------------------
1 | #' bayesjackman.
2 | #'
3 | #' @name bayesjackman
4 | #' @docType package
5 | NULL
6 |
--------------------------------------------------------------------------------
/bayesjackman/bayesjackman.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: knitr
13 | LaTeX: XeLaTeX
14 |
15 | StripTrailingWhitespace: Yes
16 |
17 | BuildType: Package
18 | PackageUseDevtools: Yes
19 | PackageInstallArgs: --no-multiarch --with-keep.source
20 |
--------------------------------------------------------------------------------
/bayesjackman/data/PoliticalSophistication.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/bayesjackman/data/PoliticalSophistication.rda
--------------------------------------------------------------------------------
/bayesjackman/data/ReaganApproval.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/bayesjackman/data/ReaganApproval.rda
--------------------------------------------------------------------------------
/bayesjackman/data/a2z.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/bayesjackman/data/a2z.rda
--------------------------------------------------------------------------------
/bayesjackman/data/corporatism.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/bayesjackman/data/corporatism.rda
--------------------------------------------------------------------------------
/bayesjackman/data/engines.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/bayesjackman/data/engines.rda
--------------------------------------------------------------------------------
/bayesjackman/data/resistant.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/bayesjackman/data/resistant.rda
--------------------------------------------------------------------------------
/bayesjackman/data/st_louis_census.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/bayesjackman/data/st_louis_census.rda
--------------------------------------------------------------------------------
/bayesjackman/data/turnout2005.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/bayesjackman/data/turnout2005.rda
--------------------------------------------------------------------------------
/bayesjackman/man/bayesjackman.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/bayesjackman-package.r
3 | \docType{package}
4 | \name{bayesjackman}
5 | \alias{bayesjackman}
6 | \alias{bayesjackman-package}
7 | \title{bayesjackman.}
8 | \description{
9 | bayesjackman.
10 | }
11 |
--------------------------------------------------------------------------------
/bimodal.Rmd:
--------------------------------------------------------------------------------
1 | # Bimodal: Extreme missingness in bivariate normal data {#bimodal}
2 |
3 | ```{r bimodal_setup,message=FALSE,cache=FALSE}
4 | library("rstan")
5 | library("tidyverse")
6 | library("stringr")
7 | ```
8 |
9 | Simple methods for dealing with missing data can run into trouble given pernicious patterns of missingness. A famous artificial data set designed to highlight this point was created by Gordon Murray, to show how an EM algorithm can run into problems [@Murray1977a,@DempsterLairdRubin1977a].
10 |
11 | ```{r Bimodal}
12 | Bimodal <- tribble(
13 | ~ x1, ~ x2,
14 | 1, 1,
15 | 1, -1,
16 | -1, 1,
17 | -1, -1,
18 | 2, NA,
19 | 2, NA,
20 | -2, NA,
21 | -2, NA,
22 | NA, 2,
23 | NA, 2,
24 | NA, -2,
25 | NA, -2
26 | )
27 | ```
28 |
29 | ```{r}
30 | Bimodal
31 | ```
32 |
33 | Assume bivariate normality, and that the means of the two variables are both
34 | zero, but the variances and covariance are unknown. Inference about the
35 | correlation coefficient $r$ between these two variables is not trivial in
36 | this instance. The marginal complete-data likelihood for $r$ is not unimodal,
37 | and has a saddle-point at zero, and two local maxima close to -1 and 1. A
38 | Bayesian analysis (with uninformative priors) similarly recovers a bimodal
39 | posterior density for the correlation coefficient; e.g., [@Tanner1996a,
40 | @Congdon2007a].
41 |
42 | ```{r bimodal_mod,message=FALSE,results='hide',cache.extra=tools::md5sum("stan/bimodal.stan")}
43 | bimodal_mod <- stan_model("stan/bimodal.stan")
44 | ```
45 |
46 | ```{r echo=FALSE}
47 | bimodal_mod
48 | ```
49 |
50 | You can ignore the **rstan** warning,
51 |
52 | DIAGNOSTIC(S) FROM PARSER:
53 | Warning (non-fatal):
54 | Left-hand side of sampling statement (~) may contain a non-linear transform of a parameter or local variable.
55 | If it does, you need to include a target += statement with the log absolute determinant of the Jacobian of the transform.
56 | Left-hand-side of sampling statement:
57 | X[i] ~ multi_normal(...)
58 |
59 | since the left hand side is a simple linear relationship and no
60 | Jacobian adjustment is needed.
61 | All we did was replace index values in the transformed parameter.
62 |
63 | ```{r bimodal_data}
64 | X_mat <- as.matrix(Bimodal)
65 | X <- X_mat %>%
66 | as_data_frame() %>%
67 | mutate(.row = row_number()) %>%
68 | gather(.col, value, -.row) %>%
69 | mutate(.col = as.integer(str_replace(.col, "x", "")))
70 |
71 | X_obs <- filter(X, !is.na(value))
72 | X_miss <- filter(X, is.na(value))
73 | bimodal_data <- within(list(), {
74 | N <- nrow(X_mat)
75 | x_obs <- X_obs$value
76 | x_obs_row <- X_obs$.row
77 | x_obs_col <- X_obs$.col
78 | N_obs <- nrow(X_obs)
79 | x_miss_row <- X_miss$.row
80 | x_miss_col <- X_miss$.col
81 | N_miss <- nrow(X_miss)
82 | df <- 100
83 | })
84 | ```
85 |
86 | ```{r bimodal_fit,message=FALSE,results='hide'}
87 | bimodal_fit <- sampling(bimodal_mod, data = bimodal_data,
88 | chains = 4)
89 | ```
90 |
91 | ```{r}
92 | bimodal_fit
93 | ```
94 |
95 | This example is derived from Simon Jackman, "Bimodal: Extreme missingness in
96 | bivariate normal data",
97 | [URL](https://web-beta.archive.org/web/20070724034055/http://jackman.stanford.edu:80/mcmc/bimodal.odc).
98 |
--------------------------------------------------------------------------------
/cancer.Rmd:
--------------------------------------------------------------------------------
1 | # Cancer: difference in two binomial proportions {#cancer}
2 |
3 | ```{r cancer_setup,message=FALSE,cache=FALSE}
4 | library("tidyverse")
5 | library("rstan")
6 | ```
7 |
8 | Two groups chosen to be random samples from subpopulations of lung-cancer patients and cancer-free individuals.[^cancer]
9 | The scientific question of interest is the difference in the smoking habits between two groups.
10 | The results of the survey are:
11 | ```{r cancer}
12 | cancer <- tribble(
13 | ~group, ~n, ~smokers,
14 | "Cancer patients", 86, 82,
15 | "Control group", 86, 72
16 | )
17 | ```
18 | ```{r echo=FALSE,results='asis'}
19 | cancer
20 | ```
21 |
22 | ## Two Sample Binomial Model
23 |
24 | In implementing this model, we have just two data points (cancer patients and
25 | control group) and a binomial sampling model, in which the population
26 | proportions of smokers in each group appear as parameters. Quantities of
27 | interest such as the difference in the population proportions and the log of
28 | the odds ratio are computed in the generated quantities section. Uniform priors
29 | on the population proportions are used in this example.
30 |
31 | $$
32 | \begin{aligned}[t]
33 | r_i &\sim \mathsf{Binomial}(n_i, \pi_i)
34 | \end{aligned}
35 | $$
36 | Additionally the difference,
37 | $$
38 | \delta = \pi_1 - \pi_2 ,
39 | $$
40 | and the log-odds ratio,
41 | $$
42 | \lambda = \log\left(\frac{\pi_1}{1 - \pi_1}\right) - \log \left( \frac{\pi_2}{1 - \pi_2} \right) ,
43 | $$
44 |
45 | It places uniform priors (Beta priors) are placed on $\pi$,
46 | $$
47 | \begin{aligned}
48 | \pi_i &\sim \mathsf{Beta}(1, 1)
49 | \end{aligned}
50 | $$
51 |
52 | The difference between and log odds ratio are defined in the `generated quantities` block.
53 |
54 | ```{r}
55 | cancer_data <- list(
56 | r <- cancer$smokers,
57 | n <- cancer$n,
58 | # beta prior on pi
59 | p_a = rep(1, 2),
60 | p_b = rep(1, 2)
61 | )
62 | ```
63 |
64 | The Stan model for this is:
65 | ```{r cancer_mod1,results='hide',cache.extra=tools::md5sum("stan/cancer1.stan")}
66 | cancer_mod1 <- stan_model("stan/cancer1.stan")
67 | ```
68 | ```{r echo=FALSE,results='asis'}
69 | cancer_mod1
70 | ```
71 |
72 | Now estimate the model:
73 | ```{r cancer_fit1,results='hide'}
74 | cancer_fit1 <- sampling(cancer_mod1, cancer_data)
75 | ```
76 | ```{r}
77 | cancer_fit1
78 | ```
79 |
80 | ## Binomial Logit Model of the Difference
81 |
82 | An alternative parameterization directly models the difference in the population proportion.
83 |
84 | $$
85 | \begin{aligned}[t]
86 | r_i &\sim \mathsf{Binomial}(n_i, \pi_i) \\
87 | \pi_1 &= \frac{1}{1 + \exp(-(\alpha + \beta)} \\
88 | \pi_2 &= \frac{1}{1 + \exp(-\alpha))}
89 | \end{aligned}
90 | $$
91 | The parameters $\alpha$ and $\beta$ are given weakly informative priors on the log-odds scale,
92 | $$
93 | \begin{aligned}
94 | \alpha &\sim N(0, 10)\\
95 | \beta &\sim N(0, 2.5)
96 | \end{aligned}
97 | $$
98 |
99 | ```{r cancer_mod2,results='hide',cache.extra=tools::md5sum("stan/cancer2.stan")}
100 | cancer_mod2 <- stan_model("stan/cancer2.stan")
101 | ```
102 | ```{r echo=FALSE,results='asis'}
103 | cancer_mod2
104 | ```
105 |
106 | Re-use `r` and `n` values from `cancer_data`, but add the appropriate values for the prior distributions.
107 | ```{r cancer_data2}
108 | cancer_data2 <- within(cancer_data, {
109 | p_a <- p_b <- NULL
110 | a_loc <- b_loc <- 0
111 | a_scale <- 10
112 | b_scale <- 2.5
113 | })
114 | ```
115 |
116 | Sample from the model:
117 | ```{r cancer_fit2,results='hide'}
118 | cancer_fit2 <- sampling(cancer_mod2, cancer_data2)
119 | ```
120 | ```{r}
121 | cancer_fit2
122 | ```
123 |
124 | ## Questions
125 |
126 | 1. Expression the Binomial Logit model of the Difference as a regression
127 | 1. What number of success and failures is a `Beta(1,1)` prior equivalent to?
128 |
129 | [^cancer]: This example is derived from Simon Jackman,
130 | "[Cancer: difference in two binomial proportions](https://web-beta.archive.org/web/20070601000000*/http://jackman.stanford.edu:80/mcmc/cancer.odc)",
131 | *BUGS Examples,* 2007-07-24, This examples comes from @JohnsonAlbert1999a, using data from @Dorn1954a.
132 |
--------------------------------------------------------------------------------
/corporatism.Rmd:
--------------------------------------------------------------------------------
1 | # Corporatism: Hierarchical model for economic growth {#corporatism}
2 |
3 | ```{r corporatism_startup,message=FALSE}
4 | library("rstan")
5 | library("tidyverse")
6 | ```
7 |
8 | The following program implements a regression model of economic growth among 16 OECD countries, 1971-1984 [@Western1998a, @AlvarezGarrettLange1991a].[^corporatism-src]
9 | The model is hierarchical in that it specifies country-specific coefficients for the following predictors: lagged growth, demand, import price movements, export price movements, leftist government and an intercept.
10 | The magnitudes of the country-specific coefficients are conditional on (time-invariant) extent of labor organization within each country; these regression relationships constitute the second level of the model.
11 |
12 | The data come from N=16 countries, and $T=14$ years (1971:1984) with $K=6$ covariates at the lowest ("micro") level of the hierarchy, and $J=2$ covariates (an intercept and the labor organization variable) at the second level.
13 |
14 | ```{r corporatism}
15 | data("corporatism", package = "bayesjackman")
16 | ```
17 |
18 | ```{r corporatism_country}
19 | corporatism_country <- corporatism %>%
20 | dplyr::select(country, labor.org) %>%
21 | distinct()
22 | ```
23 |
24 | ```{r corporatism_mod,results='hide'}
25 | corporatism_mod <- stan_model("stan/corporatism.stan")
26 | ```
27 |
28 | ```{r echo=FALSE,cache=FALSE,results='asis'}
29 | corporatism_mod
30 | ```
31 |
32 | [^corporatism-src]: Example derived from Simon Jackman, "[Corporatism: hierarchical or 'multi-level' model for economic growth in 16 OECD countries](https://web-beta.archive.org/web/20070724034043/http://jackman.stanford.edu/mcmc/corporatism.odc)", 2007-07-24.
33 |
--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/docs/.nojekyll
--------------------------------------------------------------------------------
/docs/SingleTruncation.md:
--------------------------------------------------------------------------------
1 |
2 | # Truncation: How does Stan deal with truncation?
3 |
4 | See @Stan2016a, Chapter 11 "Truncated or Censored Data" for more on how Stan handles truncation and censoring.
5 |
6 | ```r
7 | library("tidyverse")
8 | library("rstan")
9 | ```
10 |
11 |
12 | Assume we have the observations, $y = 1,...,9$, from a Normal population with unknown mean and variance, subject to the constraint that $y < 10$,
13 | $$
14 | \begin{aligned}[t]
15 | y &\sim \mathsf{Normal}(\mu, \sigma^2) I(-\infty, 10) .
16 | \end{aligned}
17 | $$
18 |
19 | Ignoring the constraint, the MLEs for the mean and variance are 5 and 6.67; with the constraint taken into account, each observation makes likelihood contribution
20 | $$
21 | f (y; m, s_2)/F ((k - m)/s),
22 | $$
23 | where $k$ is the truncation point (in this case, 10), and the MLEs of $m, s_2$ are 5.32 and 8.28.
24 |
25 | The posterior of this model is not well identified by the data, so the mean, $\mu$, and scale, $\sigma$, are given informative priors based on the data,
26 | $$
27 | \begin{aligned}[t]
28 | \mu &\sim \mathsf{Normal}(\bar{y}, s_y) ,\\
29 | \sigma &\sim \mathsf{HalfCauchy}(0, s_y) .
30 | \end{aligned}
31 | $$
32 | where $\bar{y}$ is the mean of $y$, and $s_y$ is the standard deviation of $y$.
33 |
34 |
35 | ```r
36 | truncation_mod <- stan_model("stan/SingleTruncation.stan")
37 | ```
38 |
39 | data {
40 | int N;
41 | vector[N] y;
42 | real U;
43 | real mu_mean;
44 | real mu_scale;
45 | real sigma_scale;
46 | }
47 | parameters {
48 | real mu;
49 | real sigma;
50 | }
51 | model {
52 | mu ~ normal(mu_mean, mu_scale);
53 | sigma ~ cauchy(0., sigma_scale);
54 | for (i in 1:N) {
55 | y[i] ~ normal(mu, sigma) T[, U];
56 | }
57 | }
58 |
59 |
60 |
61 | ```r
62 | truncation_data <- within(list(), {
63 | y <- 1:9
64 | N <- length(y)
65 | U <- 10
66 | mu_mean <- mean(y)
67 | mu_scale <- sd(y)
68 | sigma_scale <- sd(y)
69 | })
70 | ```
71 |
72 |
73 |
74 |
75 | ```r
76 | truncation_fit <- sampling(truncation_mod, data = truncation_data)
77 | ```
78 |
79 | ```r
80 | truncation_fit
81 | #> Inference for Stan model: SingleTruncation.
82 | #> 4 chains, each with iter=2000; warmup=1000; thin=1;
83 | #> post-warmup draws per chain=1000, total post-warmup draws=4000.
84 | #>
85 | #> mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
86 | #> mu 5.82 0.04 1.48 3.39 4.84 5.63 6.63 9.41 1201 1
87 | #> sigma 3.76 0.04 1.39 1.97 2.80 3.46 4.40 7.20 1250 1
88 | #> lp__ -13.54 0.03 1.08 -16.30 -13.99 -13.22 -12.75 -12.44 1258 1
89 | #>
90 | #> Samples were drawn using NUTS(diag_e) at Tue May 30 22:46:28 2017.
91 | #> For each parameter, n_eff is a crude measure of effective sample size,
92 | #> and Rhat is the potential scale reduction factor on split chains (at
93 | #> convergence, Rhat=1).
94 | ```
95 |
96 | We can compare these results to that of a model in which the truncation is not taken into account:
97 | $$
98 | \begin{aligned}[t]
99 | y_i &\sim \mathsf{Normal}(\mu, \sigma^2), \\
100 | \mu &\sim \mathsf{Normal}(\bar{y}, s_y) ,\\
101 | \sigma &\sim \mathsf{HalfCauchy}(0, s_y) .
102 | \end{aligned}
103 | $$
104 |
105 |
106 | ```r
107 | truncation_mod2 <- stan_model("stan/normal.stan")
108 | ```
109 |
110 | data {
111 | int N;
112 | vector[N] y;
113 | real mu_mean;
114 | real mu_scale;
115 | real sigma_scale;
116 | }
117 | parameters {
118 | real mu;
119 | real sigma;
120 | }
121 | model {
122 | mu ~ normal(mu_mean, mu_scale);
123 | sigma ~ cauchy(0., sigma_scale);
124 | y ~ normal(mu, sigma);
125 | }
126 |
127 |
128 |
129 | ```r
130 | truncation_fit2 <-
131 | sampling(truncation_mod2, data = truncation_data)
132 | ```
133 |
134 | ```r
135 | truncation_fit2
136 | #> Inference for Stan model: normal.
137 | #> 4 chains, each with iter=2000; warmup=1000; thin=1;
138 | #> post-warmup draws per chain=1000, total post-warmup draws=4000.
139 | #>
140 | #> mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
141 | #> mu 5.00 0.02 0.93 3.17 4.43 5.01 5.59 6.91 2193 1
142 | #> sigma 2.97 0.02 0.79 1.87 2.42 2.82 3.33 4.96 1836 1
143 | #> lp__ -13.77 0.03 1.05 -16.69 -14.16 -13.45 -13.03 -12.75 1265 1
144 | #>
145 | #> Samples were drawn using NUTS(diag_e) at Tue May 30 22:52:33 2017.
146 | #> For each parameter, n_eff is a crude measure of effective sample size,
147 | #> and Rhat is the potential scale reduction factor on split chains (at
148 | #> convergence, Rhat=1).
149 | ```
150 |
151 |
152 |
--------------------------------------------------------------------------------
/docs/bimodal.md:
--------------------------------------------------------------------------------
1 |
2 | # Bimodal: Extreme missingness in bivariate normal data {#bimodal}
3 |
4 |
5 | ```r
6 | library("rstan")
7 | #> Loading required package: ggplot2
8 | #> Loading required package: StanHeaders
9 | #> rstan (Version 2.15.1, packaged: 2017-04-19 05:03:57 UTC, GitRev: 2e1f913d3ca3)
10 | #> For execution on a local, multicore CPU with excess RAM we recommend calling
11 | #> rstan_options(auto_write = TRUE)
12 | #> options(mc.cores = parallel::detectCores())
13 | library("tidyverse")
14 | #> Loading tidyverse: tibble
15 | #> Loading tidyverse: tidyr
16 | #> Loading tidyverse: readr
17 | #> Loading tidyverse: purrr
18 | #> Loading tidyverse: dplyr
19 | #> Conflicts with tidy packages ----------------------------------------------
20 | #> extract(): tidyr, rstan
21 | #> filter(): dplyr, stats
22 | #> lag(): dplyr, stats
23 | library("stringr")
24 | ```
25 |
26 |
27 | Simple methods for dealing with missing data can run into trouble given pernicious patterns of missingness. A famous artificial data set designed to highlight this point was created by Gordon Murray, to show how an EM algorithm can run into problems [@Murray1977a,@DempsterLairdRubin1977a].
28 |
29 | ```
30 | x1: 1 1 -1 -1 2 2 -2 -2 * * * *
31 | x2: 1 -1 1 -1 * * * * 2 2 -2 -2
32 | ```
33 |
34 | Assume bivariate normality, and that the means of the two variables are both zero, but the variances and covariance are unknown. Inference about the correlation coefficient $r$ between these two variables is not trivial in this instance. The marginal complete-data likelihood for $r$ is not unimodal, and has a saddle-point at zero, and two local maxima close to -1 and 1. A Bayesian analysis (with uninformative priors) similarly recovers a bimodal posterior density for the correlation coefficient; e.g.,
35 | [@Tanner1996a, @Congdon2007a].
36 |
37 |
38 | ```r
39 | bimodal_mod <- stan_model("stan/bimodal.stan")
40 | ```
41 |
42 | data {
43 | int N;
44 | int N_obs;
45 | int N_miss;
46 | vector[N_obs] x_obs;
47 | int x_obs_idx[N_obs, 2];
48 | int x_miss_idx[N_miss, 2];
49 | vector[2] mu;
50 | }
51 | parameters {
52 | cov_matrix[2] Sigma;
53 | vector[N_miss] x_miss;
54 | }
55 | transformed parameters {
56 | // using an array of vectors is more convenient when sampling
57 | // multi_normal than using an matrix
58 | vector[2] X[N];
59 | for (i in 1:N_obs) {
60 | X[x_obs_idx[i, 1], x_obs_idx[i, 2]] = x_obs[i];
61 | }
62 | for (i in 1:N_miss) {
63 | X[x_miss_idx[i, 1], x_miss_idx[i, 2]] = x_miss[i];
64 | }
65 | }
66 | model{
67 | for (i in 1:N) {
68 | X[i] ~ multi_normal(mu, Sigma);
69 | }
70 | }
71 |
72 |
73 | You can ignore the **rstan** warning,
74 | ```
75 | DIAGNOSTIC(S) FROM PARSER:
76 | Warning (non-fatal):
77 | Left-hand side of sampling statement (~) may contain a non-linear transform of a parameter or local variable.
78 | If it does, you need to include a target += statement with the log absolute determinant of the Jacobian of the transform.
79 | Left-hand-side of sampling statement:
80 | X[i] ~ multi_normal(...)
81 | ```
82 | since the left hand side is a simple linear relationship and no
83 | Jacobian adjustment is needed.
84 | All we did was replace index values in the transformed parameter.
85 |
86 |
87 | ```r
88 | X_mat <- matrix(c(1, 1, -1, -1, 2, 2, -2, -2, NA, NA, NA, NA,
89 | 1, -1, 1, -1, NA, NA, NA, NA, 2, 2, -2, -2), ncol = 2)
90 | X_mat <- matrix(rnorm(12), ncol = 2)
91 | X_mat[1, 1] <- NA
92 | X_mat[3, 2] <- NA
93 | # 1, -1, 1, -1, NA, NA, NA, NA, 2, 2, -2, -2), ncol = 2)
94 | X <- X_mat %>%
95 | as_data_frame() %>%
96 | mutate(.row = row_number()) %>%
97 | gather(.col, value, -.row) %>%
98 | mutate(.col = as.integer(str_replace(.col, "V", "")))
99 |
100 | X_obs <- filter(X, !is.na(value))
101 | X_miss <- filter(X, is.na(value))
102 |
103 | bimodal_data <- within(list(), {
104 | N <- nrow(X)
105 | x_obs <- X_obs$value
106 | x_obs_idx <- as.matrix(X_obs[ , c(".row", ".col")])
107 | N_obs <- nrow(X_obs)
108 | x_miss_idx <- as.matrix(X_miss[ , c(".row", ".col")])
109 | N_miss <- nrow(X_miss)
110 | })
111 | ```
112 |
113 |
114 | ```r
115 | bimodal_fit <- sampling(bimodal_mod, data = bimodal_data,
116 | chains = 1)
117 | #> Warning in is.na(x): is.na() applied to non-(list or vector) of type 'NULL'
118 | #> Warning in FUN(X[[i]], ...): data with name mu is not numeric and not used
119 | ```
120 |
--------------------------------------------------------------------------------
/docs/campaign_files/figure-html/campaign_plot_xi-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/docs/campaign_files/figure-html/campaign_plot_xi-1.png
--------------------------------------------------------------------------------
/docs/corporatism.md:
--------------------------------------------------------------------------------
1 |
2 | # Corporatism: Hierarchical model for economic growth {#corporatism}
3 |
4 |
5 | ```r
6 | library("rstan")
7 | library("tidyverse")
8 | ```
9 |
10 | The following program implements a regression model of economic growth among 16 OECD countries, 1971-1984 [@Western1998a, @AlvarezGarrettLange1991a].[^corporatism-src]
11 | The model is hierarchical in that it specifies country-specific coefficients for the following predictors: lagged growth, demand, import price movements, export price movements, leftist government and an intercept.
12 | The magnitudes of the country-specific coefficients are conditional on (time-invariant) extent of labor organization within each country; these regression relationships constitute the second level of the model.
13 |
14 | The data come from N=16 countries, and $T=14$ years (1971:1984) with $K=6$ covariates at the lowest ("micro") level of the hierarchy, and $J=2$ covariates (an intercept and the labor organization variable) at the second level.
15 |
16 |
17 | ```r
18 | data("corporatism", package = "bayesjackman")
19 | ```
20 |
21 |
22 | ```r
23 | corporatism_country <- corporatism %>%
24 | dplyr::select(country, labor.org) %>%
25 | distinct()
26 | ```
27 |
28 |
29 | ```r
30 | corporatism_mod <- stan_model("stan/corporatism.stan")
31 | ```
32 |
33 |
34 | data {
35 | // number of observations
36 | int N;
37 | // response variable
38 | vector[N] y;
39 | // number of predictors in the regression
40 | int K;
41 | // design matrix of country-year obs
42 | matrix[N, K] X;
43 | // number of countries
44 | int n_country;
45 | // countries for each observation
46 | int country[N];
47 | // design matrix of country-variables
48 | int J;
49 | matrix[n_country, J] U;
50 | // priors
51 | // mean and scale of normal prior on beta
52 | vector[K] beta_mean;
53 | vector[K] beta_scale;
54 | // mean and scale of normal prior on gamma
55 | real gamma_mean;
56 | real gamma_scale;
57 | // scale for half-Cauchy prior on tau
58 | real tau_scale;
59 | }
60 | parameters {
61 | // obs. errors.
62 | real sigma;
63 | // country-specific terms
64 | vector[n_country] gamma;
65 | vector[J] delta;
66 | // regression coefficients
67 | vector[K] beta[n_country];
68 | // scale on country priors
69 | real tau;
70 | }
71 | transformed parameters {
72 | vector[N] mu;
73 | vector[n_country] alpha;
74 | alpha = gamma + U * delta;
75 | for (i in 1:N) {
76 | mu[i] = alpha[country[i]] + X[i] * beta[country[i]];
77 | }
78 | }
79 | model {
80 | gamma ~ normal(gamma_mean, gamma_scale);
81 | tau ~ cauchy(0., tau_scale);
82 | for (k in 1:K) {
83 | beta[k] ~ normal(beta_mean, beta_scale);
84 | }
85 | alpha ~ normal(gamma, tau);
86 | y ~ normal(mu, sigma);
87 | }
88 | generated quantities {
89 | }
90 |
91 |
92 | [^corporatism-src]: Example derived from Simon Jackman, "[Corporatism: hierarchical or 'multi-level' model for economic growth in 16 OECD countries](https://web-beta.archive.org/web/20070724034043/http://jackman.stanford.edu/mcmc/corporatism.odc)", 2007-07-24.
93 |
--------------------------------------------------------------------------------
/docs/florida.md:
--------------------------------------------------------------------------------
1 |
2 | # Florida: Learning About an Unknown Proportion from Survey Data {#florida}
3 |
4 |
5 | ```r
6 | library("tidyverse")
7 | library("rstan")
8 | ```
9 |
10 | In this example, beliefs about an unknown proportion are updated from new survey data.
11 | The particular example is using survey update beliefs about support for Bush in Florida in the 2000 presidential election campaign [@Jackman2004a].[^florida-src]
12 |
13 |
14 | ```r
15 | florida_mod <- stan_model("stan/florida.stan")
16 | ```
17 |
18 | data {
19 | real y;
20 | real y_sd;
21 | real mu_mean;
22 | real mu_sd;
23 | }
24 | parameters {
25 | real mu;
26 | }
27 | model {
28 | mu ~ normal(mu_mean, mu_sd);
29 | y ~ normal(mu, y_sd);
30 | }
31 |
32 |
33 | The prior polls had a mean of 49.1% in support for Bush, with a standard deviation of 2.2%.
34 | The new poll shows 55% support for Bush, with a standard deviation of 2.2%.
35 |
36 | ```r
37 | florida_data <- list(
38 | mu_mean = 49.1,
39 | mu_sd = 2.2,
40 | y_sd = 2.2,
41 | y = 55
42 | )
43 | ```
44 |
45 |
46 | ```r
47 | florida_fit <- sampling(florida_mod, data = florida_data)
48 | ```
49 |
50 | ```r
51 | florida_fit
52 | #> Inference for Stan model: florida.
53 | #> 4 chains, each with iter=2000; warmup=1000; thin=1;
54 | #> post-warmup draws per chain=1000, total post-warmup draws=4000.
55 | #>
56 | #> mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat
57 | #> mu 52.0 0.04 1.57 48.88 50.98 52.06 53.08 55.1 1449 1
58 | #> lp__ -2.3 0.02 0.72 -4.41 -2.46 -2.02 -1.85 -1.8 2047 1
59 | #>
60 | #> Samples were drawn using NUTS(diag_e) at Fri Apr 20 00:55:02 2018.
61 | #> For each parameter, n_eff is a crude measure of effective sample size,
62 | #> and Rhat is the potential scale reduction factor on split chains (at
63 | #> convergence, Rhat=1).
64 | ```
65 |
66 | After observing the new poll, the mean for the posterior is 52, with a 95% credible interval of 48.9--55.1.
67 |
68 | [^florida-src]: This example is derived from Simon Jackman, "Florida," *BUGS Examples,* 2007-07-24, [URL](https://web-beta.archive.org/web/20070724034219/http://jackman.stanford.edu/mcmc/florida.zip).
69 |
--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
1 |
2 | ---
3 | title: "Simon Jackman's Bayesian Model Examples in Stan"
4 | author: "Jeffrey B. Arnold"
5 | date: "2018-05-07"
6 | site: "bookdown::bookdown_site"
7 | output:
8 | bookdown::gitbook: default
9 | documentclass: book
10 | bibliography:
11 | - "bayes.bib"
12 | biblio-style: apalike
13 | link-citations: yes
14 | ---
15 |
16 | # Preface {-}
17 |
18 | This work contains the Bayesian model examples written by Simon Jackman and previously available on his website.
19 | These were originally written in WinBUGS or JAGS.
20 | I have translated these examples into Stan and revised or edited them as appropriate.
21 |
22 | This work is licensed under the [Creative Commons Attribution 4.0 International License](http://creativecommons.org/licenses/by/4.0/)
23 |
24 | 1. [Undervote](undervote): difference of two independent proportions; racial differences in self-reported undervoting
25 | 1. [Cancer](cancer): difference of two independent proportions; differences in rates of lung cancer by smoking
26 | 1. [Florida](florida): learning about an unknown proportion from survey data; using survey data to update beliefs about support for Bush in Florida in the 2000 presidential election campaign
27 | 1. [Turnout](turnout2005): logit/probit models for binary response; voter turnout as a function of covariates
28 | 1. [Co-Sponsor](cosponsor): computing auxiliary quantities from MCMC output, such as residuals, goodness of fit; logit model of legislative co-sponsorship
29 | 1. [Reagan](reagan): linear regression with AR(1) disturbances; monthly presidential approval ratings for Ronald Reagan
30 | 1. [Political Sophistication](sophistication): generalized latent variable modeling (item-response modeling with a mix of binary and ordinal responses); assessing levels of political knowledge among survey respondents in France
31 | 1. [Legislators](legislators): generalized latent variable modeling (two-parameter item-response model); estimating legislative ideal points from roll call data
32 | 1. [Judges](judges): item response modeling; estimating ideological locations of Supreme Court justices via analysis of decisions
33 | 1. [Resistant](resistant): outlier-resistant regression via the t density; votes in U.S. Congressional elections, 1956-1994; incumbency advantage.
34 | 1. [House of Commons](uk92): analysis of compositional data; vote shares for candidates to the U.K. House of Commons
35 | 1. [Campaign](campaign): tracking a latent variable over time; support for candidates over the course of an election campaign, as revealed by polling from different survey houses.
36 | 1. [Aspirin](aspirin): meta-analysis via hierarchical modeling of treatment effects; combining numerous experimental studies of effect of aspirin on surviving myocardial infarction (heart attack)
37 | 1. [Corporatism](corporatism) hierarchical linear regression model, normal errors; joint impact of left-wing governments and strength of trade unions in structuring the determinants of economic growth
38 | 1. [Bimodal](bimodal): severe pattern of missingness in bivariate normal data; bimodal density over correlation coefficient
39 | 1. [Unidentified](unidentified): the consequences of over-parameterization; contrived example from Carlin and Louis
40 | 1. [Engines](engines): modeling truncated data; time to failure, engines being bench-tested at different operating temperatures
41 | 1. [Truncated](truncated): Example of sampling from a truncated normal distribution.
42 | 1. [Generalized Beetles](genbeetles): Generalizing link functions for binomial GLMs.
43 | 1. [Negative Binomial](negbin): Example of a negative binomial regression of homicides
44 |
45 | ## Dependencies {-}
46 |
47 | The R packages, Stan models, and datasets needed to run the code examples can be installed with
48 |
49 | ```r
50 | # install.packages("devtools")
51 | devtools::install_github("jrnold/jackman-bayes", subdir = "bayesjackman")
52 | ```
53 |
54 | ## Colonophon {-}
55 |
56 |
57 | ```r
58 | sessionInfo()
59 | #> R version 3.4.4 (2018-03-15)
60 | #> Platform: x86_64-apple-darwin15.6.0 (64-bit)
61 | #> Running under: macOS High Sierra 10.13.3
62 | #>
63 | #> Matrix products: default
64 | #> BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
65 | #> LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
66 | #>
67 | #> locale:
68 | #> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
69 | #>
70 | #> attached base packages:
71 | #> [1] methods stats graphics grDevices utils datasets base
72 | #>
73 | #> loaded via a namespace (and not attached):
74 | #> [1] Rcpp_0.12.16 knitr_1.20 magrittr_1.5
75 | #> [4] munsell_0.4.3 colorspace_1.3-2 rlang_0.2.0
76 | #> [7] stringr_1.3.0 plyr_1.8.4 tools_3.4.4
77 | #> [10] parallel_3.4.4 grid_3.4.4 gtable_0.2.0
78 | #> [13] xfun_0.1 htmltools_0.3.6 StanHeaders_2.17.2
79 | #> [16] lazyeval_0.2.1 rprojroot_1.3-2 digest_0.6.15
80 | #> [19] tibble_1.4.2 rstan_2.17.3 bookdown_0.7.7
81 | #> [22] gridExtra_2.3 ggplot2_2.2.1 inline_0.3.14
82 | #> [25] evaluate_0.10.1 rmarkdown_1.9 stringi_1.1.7
83 | #> [28] pillar_1.2.1 compiler_3.4.4 scales_0.5.0
84 | #> [31] backports_1.1.2 stats4_3.4.4
85 | ```
86 |
--------------------------------------------------------------------------------
/docs/judges.md:
--------------------------------------------------------------------------------
1 |
2 | # Judges: estimating the ideological locations of Supreme Court justices {#judges}
3 |
4 |
5 | ```r
6 | library("pscl")
7 | library("tidyverse")
8 | library("rstan")
9 | ```
10 |
11 | This program implements an ideal-point model (similar to the legislators
12 | example), estimating both the locations of the justices on a latent ideological
13 | dimension, and two parameters specific to each case (corresponding to the item
14 | difficulty and item discrimination parameters of a two-parameter IRT
15 | model).[^judges-src] The data consist of the decisions of Justices Rehnquist,
16 | Stevens, O'Connor, Scalia, Kennedy, Souter, Thomas, Ginsberg and Bryer, in that
17 | order, $i = 1, \dots , 9$. The decisions are coded 1 for votes with the
18 | majority, and 0 for votes against the majority, and `NA` for abstentions.
19 |
20 | In these models, the only observed data are votes, and the analyst wants to
21 | model those votes as a function of legislator- ($\theta_i$), and vote-specific
22 | ($\alpha_i$, $\lambda_i$) parameters. The vote of legislator $i$ on roll-call
23 | $j$ ($y_{i,j}$) is a function of a the legislator's ideal point ($\theta_i$),
24 | the vote's difficulty parameter and the vote's discrimination ($\beta_j$):
25 | $$
26 | \begin{aligned}[t]
27 | y_{i,j} &\sim \mathsf{Bernoulli}(\pi_i) \\
28 | \pi_i &= \frac{1}{1 + \exp(-\mu_{i,j})} \\
29 | \mu_{i,j} &= \beta_j \theta_i - \alpha_j
30 | \end{aligned}
31 | $$
32 |
33 | $$
34 | \begin{aligned}[t]
35 | \beta_j &\sim \mathsf{Normal}(0, 2.5) \\
36 | \alpha_j &\sim \mathsf{Normal}(0, 5) \\
37 | \theta_i &\sim \mathsf{Normal}(0, 1) \\
38 | \end{aligned}
39 | $$
40 |
41 |
42 | ```r
43 | data("sc9497", package = "pscl")
44 | ```
45 | To simplify the analysis, the outcomes will be aggregated to "Yes", "No", and missing values (which
46 |
47 | ```r
48 | sc9497_vote_data <- tibble(vote = colnames(sc9497$votes)) %>%
49 | mutate(.vote_id = row_number())
50 |
51 | sc9497_legis_data <- as.data.frame(sc9497$legis.names) %>%
52 | rownames_to_column("judge") %>%
53 | mutate(.judge_id = row_number())
54 |
55 | sc9497_votes <- sc9497$votes %>%
56 | as.data.frame() %>%
57 | rownames_to_column("judge") %>%
58 | gather(vote, yea, -judge) %>%
59 | filter(!is.na(yea)) %>%
60 | inner_join(dplyr::select(sc9497_vote_data, vote, .vote_id), by = "vote") %>%
61 | inner_join(dplyr::select(sc9497_legis_data, judge, .judge_id), by = "judge")
62 | ```
63 |
64 |
65 | ```r
66 | # mod_ideal_point <- stan_model("ideal_point.stan")
67 | ```
68 |
69 | ```r
70 | # mod_ideal_point
71 | ```
72 |
73 | [^judges-src]: This example is derived from Simon Jackman, "Judges: estimating the ideological locations of Supreme Court justices", *BUGS Examples*, 2007-07-24, [URL](https://web-beta.archive.org/web/20070724034049/http://jackman.stanford.edu:80/mcmc/judges.odc).
74 |
--------------------------------------------------------------------------------
/docs/legislators_files/figure-html/legislator_plot_1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/docs/legislators_files/figure-html/legislator_plot_1-1.png
--------------------------------------------------------------------------------
/docs/legislators_files/figure-html/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/docs/legislators_files/figure-html/unnamed-chunk-5-1.png
--------------------------------------------------------------------------------
/docs/legislators_files/figure-html/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/docs/legislators_files/figure-html/unnamed-chunk-6-1.png
--------------------------------------------------------------------------------
/docs/legislators_files/figure-html/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/docs/legislators_files/figure-html/unnamed-chunk-7-1.png
--------------------------------------------------------------------------------
/docs/libs/gitbook-2.6.7/css/fontawesome/fontawesome-webfont.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/docs/libs/gitbook-2.6.7/css/fontawesome/fontawesome-webfont.ttf
--------------------------------------------------------------------------------
/docs/libs/gitbook-2.6.7/css/plugin-bookdown.css:
--------------------------------------------------------------------------------
1 | .book .book-header h1 {
2 | padding-left: 20px;
3 | padding-right: 20px;
4 | }
5 | .book .book-header.fixed {
6 | position: fixed;
7 | right: 0;
8 | top: 0;
9 | left: 0;
10 | border-bottom: 1px solid rgba(0,0,0,.07);
11 | }
12 | span.search-highlight {
13 | background-color: #ffff88;
14 | }
15 | @media (min-width: 600px) {
16 | .book.with-summary .book-header.fixed {
17 | left: 300px;
18 | }
19 | }
20 | @media (max-width: 1240px) {
21 | .book .book-body.fixed {
22 | top: 50px;
23 | }
24 | .book .book-body.fixed .body-inner {
25 | top: auto;
26 | }
27 | }
28 | @media (max-width: 600px) {
29 | .book.with-summary .book-header.fixed {
30 | left: calc(100% - 60px);
31 | min-width: 300px;
32 | }
33 | .book.with-summary .book-body {
34 | transform: none;
35 | left: calc(100% - 60px);
36 | min-width: 300px;
37 | }
38 | .book .book-body.fixed {
39 | top: 0;
40 | }
41 | }
42 |
43 | .book .book-body.fixed .body-inner {
44 | top: 50px;
45 | }
46 | .book .book-body .page-wrapper .page-inner section.normal sub, .book .book-body .page-wrapper .page-inner section.normal sup {
47 | font-size: 85%;
48 | }
49 |
50 | @media print {
51 | .book .book-summary, .book .book-body .book-header, .fa {
52 | display: none !important;
53 | }
54 | .book .book-body.fixed {
55 | left: 0px;
56 | }
57 | .book .book-body,.book .book-body .body-inner, .book.with-summary {
58 | overflow: visible !important;
59 | }
60 | }
61 | .kable_wrapper {
62 | border-spacing: 20px 0;
63 | border-collapse: separate;
64 | border: none;
65 | margin: auto;
66 | }
67 | .kable_wrapper > tbody > tr > td {
68 | vertical-align: top;
69 | }
70 | .book .book-body .page-wrapper .page-inner section.normal table tr.header {
71 | border-top-width: 2px;
72 | }
73 | .book .book-body .page-wrapper .page-inner section.normal table tr:last-child td {
74 | border-bottom-width: 2px;
75 | }
76 | .book .book-body .page-wrapper .page-inner section.normal table td, .book .book-body .page-wrapper .page-inner section.normal table th {
77 | border-left: none;
78 | border-right: none;
79 | }
80 | .book .book-body .page-wrapper .page-inner section.normal table.kable_wrapper > tbody > tr, .book .book-body .page-wrapper .page-inner section.normal table.kable_wrapper > tbody > tr > td {
81 | border-top: none;
82 | }
83 | .book .book-body .page-wrapper .page-inner section.normal table.kable_wrapper > tbody > tr:last-child > td {
84 | border-bottom: none;
85 | }
86 |
87 | div.theorem, div.lemma, div.corollary, div.proposition {
88 | font-style: italic;
89 | }
90 | span.theorem, span.lemma, span.corollary, span.proposition {
91 | font-style: normal;
92 | }
93 | div.proof:after {
94 | content: "\25a2";
95 | float: right;
96 | }
97 | .header-section-number {
98 | padding-right: .5em;
99 | }
100 |
--------------------------------------------------------------------------------
/docs/libs/gitbook-2.6.7/css/plugin-search.css:
--------------------------------------------------------------------------------
1 | .book .book-summary .book-search {
2 | padding: 6px;
3 | background: transparent;
4 | position: absolute;
5 | top: -50px;
6 | left: 0px;
7 | right: 0px;
8 | transition: top 0.5s ease;
9 | }
10 | .book .book-summary .book-search input,
11 | .book .book-summary .book-search input:focus,
12 | .book .book-summary .book-search input:hover {
13 | width: 100%;
14 | background: transparent;
15 | border: 1px solid #ccc;
16 | box-shadow: none;
17 | outline: none;
18 | line-height: 22px;
19 | padding: 7px 4px;
20 | color: inherit;
21 | box-sizing: border-box;
22 | }
23 | .book.with-search .book-summary .book-search {
24 | top: 0px;
25 | }
26 | .book.with-search .book-summary ul.summary {
27 | top: 50px;
28 | }
29 |
--------------------------------------------------------------------------------
/docs/libs/gitbook-2.6.7/js/jquery.highlight.js:
--------------------------------------------------------------------------------
1 | gitbook.require(["jQuery"], function(jQuery) {
2 |
3 | /*
4 | * jQuery Highlight plugin
5 | *
6 | * Based on highlight v3 by Johann Burkard
7 | * http://johannburkard.de/blog/programming/javascript/highlight-javascript-text-higlighting-jquery-plugin.html
8 | *
9 | * Code a little bit refactored and cleaned (in my humble opinion).
10 | * Most important changes:
11 | * - has an option to highlight only entire words (wordsOnly - false by default),
12 | * - has an option to be case sensitive (caseSensitive - false by default)
13 | * - highlight element tag and class names can be specified in options
14 | *
15 | * Copyright (c) 2009 Bartek Szopka
16 | *
17 | * Licensed under MIT license.
18 | *
19 | */
20 |
21 | jQuery.extend({
22 | highlight: function (node, re, nodeName, className) {
23 | if (node.nodeType === 3) {
24 | var match = node.data.match(re);
25 | if (match) {
26 | var highlight = document.createElement(nodeName || 'span');
27 | highlight.className = className || 'highlight';
28 | var wordNode = node.splitText(match.index);
29 | wordNode.splitText(match[0].length);
30 | var wordClone = wordNode.cloneNode(true);
31 | highlight.appendChild(wordClone);
32 | wordNode.parentNode.replaceChild(highlight, wordNode);
33 | return 1; //skip added node in parent
34 | }
35 | } else if ((node.nodeType === 1 && node.childNodes) && // only element nodes that have children
36 | !/(script|style)/i.test(node.tagName) && // ignore script and style nodes
37 | !(node.tagName === nodeName.toUpperCase() && node.className === className)) { // skip if already highlighted
38 | for (var i = 0; i < node.childNodes.length; i++) {
39 | i += jQuery.highlight(node.childNodes[i], re, nodeName, className);
40 | }
41 | }
42 | return 0;
43 | }
44 | });
45 |
46 | jQuery.fn.unhighlight = function (options) {
47 | var settings = { className: 'highlight', element: 'span' };
48 | jQuery.extend(settings, options);
49 |
50 | return this.find(settings.element + "." + settings.className).each(function () {
51 | var parent = this.parentNode;
52 | parent.replaceChild(this.firstChild, this);
53 | parent.normalize();
54 | }).end();
55 | };
56 |
57 | jQuery.fn.highlight = function (words, options) {
58 | var settings = { className: 'highlight', element: 'span', caseSensitive: false, wordsOnly: false };
59 | jQuery.extend(settings, options);
60 |
61 | if (words.constructor === String) {
62 | words = [words];
63 | }
64 | words = jQuery.grep(words, function(word, i){
65 | return word !== '';
66 | });
67 | words = jQuery.map(words, function(word, i) {
68 | return word.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
69 | });
70 | if (words.length === 0) { return this; }
71 |
72 | var flag = settings.caseSensitive ? "" : "i";
73 | var pattern = "(" + words.join("|") + ")";
74 | if (settings.wordsOnly) {
75 | pattern = "\\b" + pattern + "\\b";
76 | }
77 | var re = new RegExp(pattern, flag);
78 |
79 | return this.each(function () {
80 | jQuery.highlight(this, re, settings.element, settings.className);
81 | });
82 | };
83 |
84 | });
85 |
--------------------------------------------------------------------------------
/docs/libs/gitbook-2.6.7/js/plugin-fontsettings.js:
--------------------------------------------------------------------------------
1 | gitbook.require(["gitbook", "lodash", "jQuery"], function(gitbook, _, $) {
2 | var fontState;
3 |
4 | var THEMES = {
5 | "white": 0,
6 | "sepia": 1,
7 | "night": 2
8 | };
9 |
10 | var FAMILY = {
11 | "serif": 0,
12 | "sans": 1
13 | };
14 |
15 | // Save current font settings
16 | function saveFontSettings() {
17 | gitbook.storage.set("fontState", fontState);
18 | update();
19 | }
20 |
21 | // Increase font size
22 | function enlargeFontSize(e) {
23 | e.preventDefault();
24 | if (fontState.size >= 4) return;
25 |
26 | fontState.size++;
27 | saveFontSettings();
28 | };
29 |
30 | // Decrease font size
31 | function reduceFontSize(e) {
32 | e.preventDefault();
33 | if (fontState.size <= 0) return;
34 |
35 | fontState.size--;
36 | saveFontSettings();
37 | };
38 |
39 | // Change font family
40 | function changeFontFamily(index, e) {
41 | e.preventDefault();
42 |
43 | fontState.family = index;
44 | saveFontSettings();
45 | };
46 |
47 | // Change type of color
48 | function changeColorTheme(index, e) {
49 | e.preventDefault();
50 |
51 | var $book = $(".book");
52 |
53 | if (fontState.theme !== 0)
54 | $book.removeClass("color-theme-"+fontState.theme);
55 |
56 | fontState.theme = index;
57 | if (fontState.theme !== 0)
58 | $book.addClass("color-theme-"+fontState.theme);
59 |
60 | saveFontSettings();
61 | };
62 |
63 | function update() {
64 | var $book = gitbook.state.$book;
65 |
66 | $(".font-settings .font-family-list li").removeClass("active");
67 | $(".font-settings .font-family-list li:nth-child("+(fontState.family+1)+")").addClass("active");
68 |
69 | $book[0].className = $book[0].className.replace(/\bfont-\S+/g, '');
70 | $book.addClass("font-size-"+fontState.size);
71 | $book.addClass("font-family-"+fontState.family);
72 |
73 | if(fontState.theme !== 0) {
74 | $book[0].className = $book[0].className.replace(/\bcolor-theme-\S+/g, '');
75 | $book.addClass("color-theme-"+fontState.theme);
76 | }
77 | };
78 |
79 | function init(config) {
80 | var $bookBody, $book;
81 |
82 | //Find DOM elements.
83 | $book = gitbook.state.$book;
84 | $bookBody = $book.find(".book-body");
85 |
86 | // Instantiate font state object
87 | fontState = gitbook.storage.get("fontState", {
88 | size: config.size || 2,
89 | family: FAMILY[config.family || "sans"],
90 | theme: THEMES[config.theme || "white"]
91 | });
92 |
93 | update();
94 | };
95 |
96 |
97 | gitbook.events.bind("start", function(e, config) {
98 | var opts = config.fontsettings;
99 |
100 | // Create buttons in toolbar
101 | gitbook.toolbar.createButton({
102 | icon: 'fa fa-font',
103 | label: 'Font Settings',
104 | className: 'font-settings',
105 | dropdown: [
106 | [
107 | {
108 | text: 'A',
109 | className: 'font-reduce',
110 | onClick: reduceFontSize
111 | },
112 | {
113 | text: 'A',
114 | className: 'font-enlarge',
115 | onClick: enlargeFontSize
116 | }
117 | ],
118 | [
119 | {
120 | text: 'Serif',
121 | onClick: _.partial(changeFontFamily, 0)
122 | },
123 | {
124 | text: 'Sans',
125 | onClick: _.partial(changeFontFamily, 1)
126 | }
127 | ],
128 | [
129 | {
130 | text: 'White',
131 | onClick: _.partial(changeColorTheme, 0)
132 | },
133 | {
134 | text: 'Sepia',
135 | onClick: _.partial(changeColorTheme, 1)
136 | },
137 | {
138 | text: 'Night',
139 | onClick: _.partial(changeColorTheme, 2)
140 | }
141 | ]
142 | ]
143 | });
144 |
145 |
146 | // Init current settings
147 | init(opts);
148 | });
149 | });
150 |
151 |
152 |
--------------------------------------------------------------------------------
/docs/libs/gitbook-2.6.7/js/plugin-sharing.js:
--------------------------------------------------------------------------------
1 | gitbook.require(["gitbook", "lodash", "jQuery"], function(gitbook, _, $) {
2 | var SITES = {
3 | 'github': {
4 | 'label': 'Github',
5 | 'icon': 'fa fa-github',
6 | 'onClick': function(e) {
7 | e.preventDefault();
8 | var repo = $('meta[name="github-repo"]').attr('content');
9 | if (typeof repo === 'undefined') throw("Github repo not defined");
10 | window.open("https://github.com/"+repo);
11 | }
12 | },
13 | 'facebook': {
14 | 'label': 'Facebook',
15 | 'icon': 'fa fa-facebook',
16 | 'onClick': function(e) {
17 | e.preventDefault();
18 | window.open("http://www.facebook.com/sharer/sharer.php?s=100&p[url]="+encodeURIComponent(location.href));
19 | }
20 | },
21 | 'twitter': {
22 | 'label': 'Twitter',
23 | 'icon': 'fa fa-twitter',
24 | 'onClick': function(e) {
25 | e.preventDefault();
26 | window.open("http://twitter.com/home?status="+encodeURIComponent(document.title+" "+location.href));
27 | }
28 | },
29 | 'google': {
30 | 'label': 'Google+',
31 | 'icon': 'fa fa-google-plus',
32 | 'onClick': function(e) {
33 | e.preventDefault();
34 | window.open("https://plus.google.com/share?url="+encodeURIComponent(location.href));
35 | }
36 | },
37 | 'linkedin': {
38 | 'label': 'LinkedIn',
39 | 'icon': 'fa fa-linkedin',
40 | 'onClick': function(e) {
41 | e.preventDefault();
42 | window.open("https://www.linkedin.com/shareArticle?mini=true&url="+encodeURIComponent(location.href)+"&title="+encodeURIComponent(document.title));
43 | }
44 | },
45 | 'weibo': {
46 | 'label': 'Weibo',
47 | 'icon': 'fa fa-weibo',
48 | 'onClick': function(e) {
49 | e.preventDefault();
50 | window.open("http://service.weibo.com/share/share.php?content=utf-8&url="+encodeURIComponent(location.href)+"&title="+encodeURIComponent(document.title));
51 | }
52 | },
53 | 'instapaper': {
54 | 'label': 'Instapaper',
55 | 'icon': 'fa fa-instapaper',
56 | 'onClick': function(e) {
57 | e.preventDefault();
58 | window.open("http://www.instapaper.com/text?u="+encodeURIComponent(location.href));
59 | }
60 | },
61 | 'vk': {
62 | 'label': 'VK',
63 | 'icon': 'fa fa-vk',
64 | 'onClick': function(e) {
65 | e.preventDefault();
66 | window.open("http://vkontakte.ru/share.php?url="+encodeURIComponent(location.href));
67 | }
68 | }
69 | };
70 |
71 |
72 |
73 | gitbook.events.bind("start", function(e, config) {
74 | var opts = config.sharing;
75 | if (!opts) return;
76 |
77 | // Create dropdown menu
78 | var menu = _.chain(opts.all)
79 | .map(function(id) {
80 | var site = SITES[id];
81 |
82 | return {
83 | text: site.label,
84 | onClick: site.onClick
85 | };
86 | })
87 | .compact()
88 | .value();
89 |
90 | // Create main button with dropdown
91 | if (menu.length > 0) {
92 | gitbook.toolbar.createButton({
93 | icon: 'fa fa-share-alt',
94 | label: 'Share',
95 | position: 'right',
96 | dropdown: [menu]
97 | });
98 | }
99 |
100 | // Direct actions to share
101 | _.each(SITES, function(site, sideId) {
102 | if (!opts[sideId]) return;
103 |
104 | gitbook.toolbar.createButton({
105 | icon: site.icon,
106 | label: site.text,
107 | position: 'right',
108 | onClick: site.onClick
109 | });
110 | });
111 | });
112 | });
113 |
--------------------------------------------------------------------------------
/docs/negbin.md:
--------------------------------------------------------------------------------
1 |
2 | # Negative Binomial: Estimating Homicides in Census Tracks {#negbin}
3 |
4 |
5 | ```r
6 | library("tidyverse")
7 | library("rstan")
8 | library("rstanarm")
9 | ```
10 |
11 | The data are from the 1990 United States Census for the city of St. Louis,
12 | Missouri for Census Tracts, and from records of the St. Louis City Metropolitan
13 | Police Department for the years 1980 through 1994. For each Census Tract (with
14 | a population), N=111, an observation includes
15 |
16 | - the median household income in 1990
17 | - the percentage unemployed (base of labor force)
18 | - a count of the number of homicide incidents.
19 |
20 | The number of homicides in this 15 year period totals 2,815. The average size
21 | of a Census Tract is 3,571 with a range of 249--8,791. Income has been rescaled
22 | by dividing by 1,000 which produces a range similar to that of percentage
23 | unemployed and standard deviations that are very close. Tract homicide counts
24 | range from 0 through 99 with a median of 16 (mean is 25.+). An enhanced set of
25 | linear, predictors does better than this two predictor example.
26 |
27 | $$
28 | \begin{aligned}[t]
29 | y_i &\sim \mathsf{NegBinomial2}(\mu_i,\phi) \\
30 | \mu_i &= \frac{1}{1 + e^{-\eta_i}} \\
31 | \eta_i &= x_i \beta
32 | \end{aligned}
33 | $$
34 | The negative binomial distribution is parameterized so that $\mu \in \mathbb{R}^+$ is the location parameter, and $\phi \in \mathbb{R}^+$ is the reciprocal overdispersion parameter, such that the mean and variance of a random variable $Y$ distributed negative binomial is
35 | $$
36 | \begin{aligned}[t]
37 | E[Y] &= \mu , \\
38 | V[Y] &= \mu + \frac{\mu^2}{\phi} .
39 | \end{aligned}
40 | $$
41 | As $\phi \to \infty$, the negative binomial approaches the Poisson distribution.
42 |
43 | The parameters are given weakly informative priors,
44 | $$
45 | \begin{aligned}[t]
46 | \alpha &\sim \mathsf{Normal}(0, 10), \\
47 | \beta_k &\sim \mathsf{Normal}(0, 2.5), \\
48 | \phi^{-1} &\sim \mathsf{HalfCauchy}(0, 5).
49 | \end{aligned}
50 | $$
51 |
52 |
53 | ```r
54 | negbin_mod <- stan_model("stan/negbin.stan")
55 | ```
56 |
57 | data {
58 | int N;
59 | int y[N];
60 | int K;
61 | matrix[N, K] X;
62 | // priors
63 | real alpha_mean;
64 | real alpha_scale;
65 | vector[K] beta_mean;
66 | vector[K] beta_scale;
67 | real reciprocal_phi_scale;
68 | }
69 | parameters {
70 | real alpha;
71 | vector[K] beta;
72 | real reciprocal_phi;
73 | }
74 | transformed parameters {
75 | vector[N] eta;
76 | real phi;
77 | eta = alpha + X * beta;
78 | phi = 1. / reciprocal_phi;
79 | }
80 | model {
81 | reciprocal_phi ~ cauchy(0., reciprocal_phi_scale);
82 | alpha ~ normal(alpha_mean, alpha_scale);
83 | beta ~ normal(beta_mean, beta_scale);
84 | y ~ neg_binomial_2_log(eta, phi);
85 | }
86 | generated quantities {
87 | vector[N] mu;
88 | vector[N] log_lik;
89 | vector[N] y_rep;
90 | mu = exp(eta);
91 | for (i in 1:N) {
92 | log_lik[i] = neg_binomial_2_log_lpmf(y[i] | eta[i], phi);
93 | y_rep[i] = neg_binomial_2_rng(mu[i], phi);
94 | }
95 | }
96 |
97 |
98 |
99 | ```r
100 | data("st_louis_census", package = "bayesjackman")
101 | negbin_data <- within(list(), {
102 | y <- st_louis_census$i8094
103 | N <- length(y)
104 | X <- model.matrix(~ 0 + pcunemp9 + incrs, data = st_louis_census) %>% scale()
105 | K <- ncol(X)
106 | beta_mean <- rep(0, K)
107 | beta_scale <- rep(2.5, K)
108 | alpha_mean <- 0
109 | alpha_scale <- 10
110 | reciprocal_phi_scale <- 5
111 | })
112 | ```
113 |
114 |
115 | ```r
116 | negbin_fit <- sampling(negbin_mod, data = negbin_data)
117 | ```
118 |
119 | ```r
120 | summary(negbin_fit, par = c("alpha", "beta", "phi"))$summary
121 | #> mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff
122 | #> alpha 2.926 0.00114 0.0719 2.787 2.878 2.926 2.973 3.064 4000
123 | #> beta[1] 0.691 0.00197 0.1122 0.471 0.615 0.689 0.766 0.912 3245
124 | #> beta[2] -0.349 0.00171 0.1007 -0.551 -0.415 -0.348 -0.280 -0.154 3481
125 | #> phi 1.968 0.00488 0.3088 1.424 1.751 1.949 2.166 2.639 4000
126 | #> Rhat
127 | #> alpha 1
128 | #> beta[1] 1
129 | #> beta[2] 1
130 | #> phi 1
131 | ```
132 |
133 | We could also fit the model using the **rstanarm** function `stan_glm.nb` (or `stan_glm`):
134 |
135 | ```r
136 | negbin_fit2 <- stan_glm.nb(i8094 ~ pcunemp9 + incrs, data = st_louis_census)
137 | ```
138 |
139 | ```r
140 | negbin_fit2
141 | #> stan_glm.nb
142 | #> family: neg_binomial_2 [log]
143 | #> formula: i8094 ~ pcunemp9 + incrs
144 | #> observations: 111
145 | #> predictors: 3
146 | #> ------
147 | #> Median MAD_SD
148 | #> (Intercept) 2.8 0.4
149 | #> pcunemp9 0.1 0.0
150 | #> incrs -0.1 0.0
151 | #> reciprocal_dispersion 1.9 0.3
152 | #>
153 | #> Sample avg. posterior predictive distribution of y:
154 | #> Median MAD_SD
155 | #> mean_PPD 32.4 5.6
156 | #>
157 | #> ------
158 | #> For info on the priors used see help('prior_summary.stanreg').
159 | ```
160 |
161 | Example derived from Simon Jackman, "negative binomial using the ones trick with log link", 2005-10-27, [URL](https://web-beta.archive.org/web/20051027082311/http://jackman.stanford.edu:80/mcmc/negbineg.odc).
162 |
--------------------------------------------------------------------------------
/docs/placeholder.html:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/docs/placeholder.html
--------------------------------------------------------------------------------
/docs/references.md:
--------------------------------------------------------------------------------
1 |
2 | # References {-}
3 |
--------------------------------------------------------------------------------
/docs/truncated_files/figure-html/truncate_plot_density_mu-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/docs/truncated_files/figure-html/truncate_plot_density_mu-1.png
--------------------------------------------------------------------------------
/docs/truncated_files/figure-html/truncate_plot_density_sigma-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/docs/truncated_files/figure-html/truncate_plot_density_sigma-1.png
--------------------------------------------------------------------------------
/engines.Rmd:
--------------------------------------------------------------------------------
1 | # Engines: right-censored failure times
2 |
3 | ```{r engines_setup,message=FALSE}
4 | library("tidyverse")
5 | library("rstan")
6 | ```
7 |
8 | ## Data
9 |
10 | The data are 40 engines tested at various operating temperatures, with the the failure time if the engine failed, or the last time of the observational period if it had not [@Tanner1996a].[^engines-src]
11 | Of the 40 engines, 23 did not fail in their observational periods.
12 |
13 | ```{r engines}
14 | data("engines", package = "bayesjackman")
15 | glimpse(engines)
16 | ```
17 |
18 | ## Model
19 |
20 | Let $y^*$ be the failure time of engine $i$.
21 | The failure times are modeled as a regression with normal errors,
22 | $$
23 | \begin{aligned}[t]
24 | y^*_i &\sim \mathsf{Normal}(\mu_i, \sigma) , \\
25 | \mu_i &= \alpha + \beta x_i .
26 | \end{aligned}
27 | $$
28 | However, the failure times are not always observed.
29 | In some cases, only the last observation time is known, meaning that all is known is $y^*_i > y_i$.
30 | Let $L$ be the set of censored observation.
31 | $$
32 | \begin{aligned}[t]
33 | y_i &\sim \mathsf{Normal}(\mu_i, \sigma) & i \notin L, \\
34 | y^*_i &\sim \mathsf{Normal}(\mu_i, \sigma) U(y_i, \infty) & i \in L, \\
35 | \mu_i &= \alpha + \beta x_i .
36 | \end{aligned}
37 | $$
38 |
39 | $$
40 | \begin{aligned}[t]
41 | \log L(y_i, \dots, y_N | \alpha, \beta, \sigma) &= \sum_{i \notin L} \log \mathsf{Normal}(y_i; \mu_i, \Sigma) \\
42 | &\quad + \sum_{i \in L} \log \int_{y_i}^{\infty} \mathsf{Normal}(y^*; \mu_i, \Sigma) d\,y^* ,
43 | \end{aligned}
44 | $$
45 | where
46 | $$
47 | \mu_i = \alpha + \beta x .
48 | $$
49 |
50 | ```{r mod_engines,results='hide',cache.extra=tools::md5sum("data/engines.stan")}
51 | mod_engines <- stan_model("stan/engines.stan")
52 | ```
53 |
54 | ```{r results='asis'}
55 | mod_engines
56 | ```
57 |
58 | ## Estimation
59 |
60 | For the input data to the Stan model, the observations that are observed and censored have to be provided in separate vectors.
61 |
62 | ```{r }
63 | X <- scale(engines$x)
64 |
65 | engines_data <- within(list(), {
66 | N <- nrow(engines)
67 | # observed obs
68 | y_obs <- engines$y[!engines$censored]
69 | N_obs <- length(y_obs)
70 | X_obs <- X[!engines$censored, , drop = FALSE]
71 | K <- ncol(X_obs)
72 | # censored obs
73 | y_cens <- engines$y[engines$censored]
74 | N_cens <- length(y_cens)
75 | X_cens <- X[engines$censored, , drop = FALSE]
76 | # priors
77 | # use the mean and sd of y to roughly scale the weakly informative
78 | # priors -- these don't account for need to exact
79 | alpha_loc <- mean(engines$y)
80 | alpha_scale <- 10 * sd(engines$y)
81 | beta_loc <- array(0)
82 | beta_scale <- array(2.5 * sd(engines$y))
83 | sigma_scale <- 5 * sd(y_obs)
84 | })
85 | ```
86 |
87 | ```{r}
88 | sampling(mod_engines, data = engines_data,
89 | chains = 1, init = list(list(alpha = mean(engines$y))))
90 | ```
91 |
92 | [^engines-src]: This example is derived from Simon Jackman, "Engines: right-censored failure times - the I(,) construct contrasted with other approaches", 2007-07-24,
93 | [URL](https://web-beta.archive.org/web/20070724034205/http://jackman.stanford.edu:80/mcmc/engines.odc)
94 |
--------------------------------------------------------------------------------
/florida.Rmd:
--------------------------------------------------------------------------------
1 | # Florida: Learning About an Unknown Proportion from Survey Data {#florida}
2 |
3 | ```{r florida_setup,message=FALSE,cache=FALSE}
4 | library("tidyverse")
5 | library("rstan")
6 | ```
7 |
8 | In this example, beliefs about an unknown proportion are updated from new survey data.
9 | The particular example is using survey update beliefs about support for Bush in Florida in the 2000 presidential election campaign [@Jackman2004a].[^florida-src]
10 |
11 | ```{r florida_mod,results='hide',cache.extra=tools::md5sum("stan/florida.stan")}
12 | florida_mod <- stan_model("stan/florida.stan")
13 | ```
14 | ```{r echo=FALSE,cache=FALSE,results='asis'}
15 | florida_mod
16 | ```
17 |
18 | The prior polls had a mean of 49.1% in support for Bush, with a standard deviation of 2.2%.
19 | The new poll shows 55% support for Bush, with a standard deviation of 2.2%.
20 | ```{r florida_data}
21 | florida_data <- list(
22 | mu_mean = 49.1,
23 | mu_sd = 2.2,
24 | y_sd = 2.2,
25 | y = 55
26 | )
27 | ```
28 |
29 | ```{r florida_fit,results='hide'}
30 | florida_fit <- sampling(florida_mod, data = florida_data)
31 | ```
32 | ```{r}
33 | florida_fit
34 | ```
35 | ```{r include=FALSE}
36 | post_mean <- round(summary(florida_fit)$summary["mu", "mean"], 1)
37 | post_2.5 <- round(summary(florida_fit)$summary["mu", "2.5%"], 1)
38 | post_97.5 <- round(summary(florida_fit)$summary["mu", "97.5%"], 1)
39 | ```
40 | After observing the new poll, the mean for the posterior is `r post_mean`, with a 95% credible interval of `r post_2.5`--`r post_97.5`.
41 |
42 | [^florida-src]: This example is derived from Simon Jackman, "Florida," *BUGS Examples,* 2007-07-24, [URL](https://web-beta.archive.org/web/20070724034219/http://jackman.stanford.edu/mcmc/florida.zip).
43 |
--------------------------------------------------------------------------------
/index.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Simon Jackman's Bayesian Model Examples in Stan"
3 | author: "Jeffrey B. Arnold"
4 | date: "`r Sys.Date()`"
5 | site: "bookdown::bookdown_site"
6 | output:
7 | bookdown::gitbook: default
8 | documentclass: book
9 | bibliography:
10 | - "bayes.bib"
11 | biblio-style: apalike
12 | link-citations: yes
13 | ---
14 |
15 | # Preface {-}
16 |
17 | This work contains the Bayesian model examples written by Simon Jackman and previously available on his website.
18 | These were originally written in WinBUGS or JAGS.
19 | I have translated these examples into Stan and revised or edited them as appropriate.
20 |
21 | This work is licensed under the [Creative Commons Attribution 4.0 International License](http://creativecommons.org/licenses/by/4.0/)
22 |
23 | 1. [Undervote](undervote): difference of two independent proportions; racial differences in self-reported undervoting
24 | 1. [Cancer](cancer): difference of two independent proportions; differences in rates of lung cancer by smoking
25 | 1. [Florida](florida): learning about an unknown proportion from survey data; using survey data to update beliefs about support for Bush in Florida in the 2000 presidential election campaign
26 | 1. [Turnout](turnout2005): logit/probit models for binary response; voter turnout as a function of covariates
27 | 1. [Co-Sponsor](cosponsor): computing auxiliary quantities from MCMC output, such as residuals, goodness of fit; logit model of legislative co-sponsorship
28 | 1. [Reagan](reagan): linear regression with AR(1) disturbances; monthly presidential approval ratings for Ronald Reagan
29 | 1. [Political Sophistication](sophistication): generalized latent variable modeling (item-response modeling with a mix of binary and ordinal responses); assessing levels of political knowledge among survey respondents in France
30 | 1. [Legislators](legislators): generalized latent variable modeling (two-parameter item-response model); estimating legislative ideal points from roll call data
31 | 1. [Judges](judges): item response modeling; estimating ideological locations of Supreme Court justices via analysis of decisions
32 | 1. [Resistant](resistant): outlier-resistant regression via the t density; votes in U.S. Congressional elections, 1956-1994; incumbency advantage.
33 | 1. [House of Commons](uk92): analysis of compositional data; vote shares for candidates to the U.K. House of Commons
34 | 1. [Campaign](campaign): tracking a latent variable over time; support for candidates over the course of an election campaign, as revealed by polling from different survey houses.
35 | 1. [Aspirin](aspirin): meta-analysis via hierarchical modeling of treatment effects; combining numerous experimental studies of effect of aspirin on surviving myocardial infarction (heart attack)
36 | 1. [Corporatism](corporatism) hierarchical linear regression model, normal errors; joint impact of left-wing governments and strength of trade unions in structuring the determinants of economic growth
37 | 1. [Bimodal](bimodal): severe pattern of missingness in bivariate normal data; bimodal density over correlation coefficient
38 | 1. [Unidentified](unidentified): the consequences of over-parameterization; contrived example from Carlin and Louis
39 | 1. [Engines](engines): modeling truncated data; time to failure, engines being bench-tested at different operating temperatures
40 | 1. [Truncated](truncated): Example of sampling from a truncated normal distribution.
41 | 1. [Generalized Beetles](genbeetles): Generalizing link functions for binomial GLMs.
42 | 1. [Negative Binomial](negbin): Example of a negative binomial regression of homicides
43 |
44 | ## Dependencies {-}
45 |
46 | The R packages, Stan models, and datasets needed to run the code examples can be installed with
47 | ```{r eval=FALSE}
48 | # install.packages("devtools")
49 | devtools::install_github("jrnold/jackman-bayes", subdir = "bayesjackman")
50 | ```
51 |
52 | ## Colonophon {-}
53 |
54 | ```{r}
55 | sessionInfo()
56 | ```
57 |
--------------------------------------------------------------------------------
/jackman-bayes.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: No
4 | SaveWorkspace: No
5 | AlwaysSaveHistory: No
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: knitr
13 | LaTeX: XeLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 |
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 |
22 | QuitChildProcessesOnExit: Yes
23 |
--------------------------------------------------------------------------------
/judges.Rmd:
--------------------------------------------------------------------------------
1 | # Judges: estimating the ideological locations of Supreme Court justices {#judges}
2 |
3 | ```{r judges_setup,message=FALSE,cache=FALSE}
4 | library("pscl")
5 | library("tidyverse")
6 | library("rstan")
7 | ```
8 |
9 | This program implements an ideal-point model (similar to the legislators
10 | example), estimating both the locations of the justices on a latent ideological
11 | dimension, and two parameters specific to each case (corresponding to the item
12 | difficulty and item discrimination parameters of a two-parameter IRT
13 | model).[^judges-src] The data consist of the decisions of Justices Rehnquist,
14 | Stevens, O'Connor, Scalia, Kennedy, Souter, Thomas, Ginsberg and Bryer, in that
15 | order, $i = 1, \dots , 9$. The decisions are coded 1 for votes with the
16 | majority, and 0 for votes against the majority, and `NA` for abstentions.
17 |
18 | In these models, the only observed data are votes, and the analyst wants to
19 | model those votes as a function of legislator- ($\theta_i$), and vote-specific
20 | ($\alpha_i$, $\lambda_i$) parameters. The vote of legislator $i$ on roll-call
21 | $j$ ($y_{i,j}$) is a function of a the legislator's ideal point ($\theta_i$),
22 | the vote's difficulty parameter and the vote's discrimination ($\beta_j$):
23 | $$
24 | \begin{aligned}[t]
25 | y_{i,j} &\sim \mathsf{Bernoulli}(\pi_i) \\
26 | \pi_i &= \frac{1}{1 + \exp(-\mu_{i,j})} \\
27 | \mu_{i,j} &= \beta_j \theta_i - \alpha_j
28 | \end{aligned}
29 | $$
30 |
31 | $$
32 | \begin{aligned}[t]
33 | \beta_j &\sim \mathsf{Normal}(0, 2.5) \\
34 | \alpha_j &\sim \mathsf{Normal}(0, 5) \\
35 | \theta_i &\sim \mathsf{Normal}(0, 1) \\
36 | \end{aligned}
37 | $$
38 |
39 | ```{r sc9497}
40 | data("sc9497", package = "pscl")
41 | ```
42 | To simplify the analysis, the outcomes will be aggregated to "Yes", "No", and missing values (which
43 | ```{r}
44 | sc9497_vote_data <- tibble(vote = colnames(sc9497$votes)) %>%
45 | mutate(.vote_id = row_number())
46 |
47 | sc9497_legis_data <- as.data.frame(sc9497$legis.names) %>%
48 | rownames_to_column("judge") %>%
49 | mutate(.judge_id = row_number())
50 |
51 | sc9497_votes <- sc9497$votes %>%
52 | as.data.frame() %>%
53 | rownames_to_column("judge") %>%
54 | gather(vote, yea, -judge) %>%
55 | filter(!is.na(yea)) %>%
56 | inner_join(dplyr::select(sc9497_vote_data, vote, .vote_id), by = "vote") %>%
57 | inner_join(dplyr::select(sc9497_legis_data, judge, .judge_id), by = "judge")
58 | ```
59 |
60 | ```{r message=FALSE}
61 | # mod_ideal_point <- stan_model("ideal_point.stan")
62 | ```
63 | ```{r results='asis'}
64 | # mod_ideal_point
65 | ```
66 |
67 | [^judges-src]: This example is derived from Simon Jackman, "Judges: estimating the ideological locations of Supreme Court justices", *BUGS Examples*, 2007-07-24, [URL](https://web-beta.archive.org/web/20070724034049/http://jackman.stanford.edu:80/mcmc/judges.odc).
68 |
--------------------------------------------------------------------------------
/multivarmissing.Rmd:
--------------------------------------------------------------------------------
1 | # Multivariate Missing Data {#multivarmissing}
2 |
3 | $$
4 | \DeclareMathOperator{diag}{diag}
5 | $$
6 | ```{r multivarmissing_setup,message=FALSE}
7 | library("tidyverse")
8 | library("rstan")
9 | ```
10 |
11 | This example shows how to impute missing data. See @Stan2016a, Chapter 10 "Missing Data & Partially Known Parameters" for more discussion.[^multivarmissing-src]
12 |
13 | Consider a data set of 10 observations on 3 variables
14 | Only one of the variables, $z$, is completely observed.
15 | The other two variables, x$ and $y$, have a non-overlapping pattern of missing data.
16 |
17 | ```{r multivarmissing}
18 | multivarmissing <- tribble(
19 | ~x, ~y, ~z,
20 | 1, NA, NA,
21 | 2, NA, 4,
22 | 3, NA, 3,
23 | 4, NA, 5,
24 | 5, NA, 7,
25 | NA, 7, 9,
26 | NA, 8, 8,
27 | NA, 9, 11,
28 | NA, 8, 10,
29 | NA, 9, 8)
30 | ```
31 |
32 | The missing elements of $x$ and $y$ are parameters, and the observed elements of $x$, $y$, and $z$ are data.
33 | These are combined in the `transformed parameters` block, and modeled.
34 |
35 | ## Separate Regressions
36 |
37 | We use $z$ to predict $x$,
38 | and $z$ and $x$ (both observed and imputed) to impute $y$.
39 |
40 | $$
41 | \begin{aligned}[t]
42 | x_i &\sim \mathsf{Normal}(\mu_{x,i}, \sigma_x) \\
43 | \mu_{x,i} &= \gamma_1 + \gamma_2 z_i \\
44 | y_i &\sim \mathsf{Normal}(\mu_{y,i}, \sigma_y) \\
45 | \mu_{y,i} &= \beta_1 + \beta_2 y_i + \beta_3 z_i \\
46 | z_i &\sim \mathsf{Normal}(\mu_z, \sigma_z)
47 | \end{aligned}
48 | $$
49 |
50 | The parameters are given weakly informative parameters:
51 | $$
52 | \begin{aligned}[t]
53 | \sigma_x,\sigma_y,\sigma_z &\sim \mathsf{HalfCauchy}(0, 5) \\
54 | \gamma_1, \beta_1 &\sim \mathsf{Normal}(0, 10) \\
55 | \gamma_2, \beta_2, \beta_3 &\sim \mathsf{Normal}(0, 2.5)
56 | \end{aligned}
57 | $$
58 | Note that this assumes that $x$, $y$, and $z$ are standardized to have zero mean and unit variance.
59 |
60 | ```{r data_multivarmissing}
61 | data_multivarmissing <- within(list(), {
62 | N <- nrow(multivarmissing)
63 | x_obs <- multivarmissing$x[!is.na(multivarmissing$x)] %>%
64 | scale() %>% as.numeric()
65 | x_obs_idx <- array(which(!is.na(multivarmissing$x)))
66 | N_x_obs <- length(x_obs_idx)
67 | x_miss_idx <- array(which(is.na(multivarmissing$x)))
68 | N_x_miss <- length(x_miss_idx)
69 | y_obs <- multivarmissing$y[!is.na(multivarmissing$y)] %>%
70 | scale() %>% as.numeric()
71 | y_obs_idx <- array(which(!is.na(multivarmissing$y)))
72 | N_y_obs <- length(y_obs_idx)
73 | y_miss_idx <- array(which(is.na(multivarmissing$y)))
74 | N_y_miss <- length(y_miss_idx)
75 | z_obs <- multivarmissing$z[!is.na(multivarmissing$z)] %>%
76 | scale() %>% as.numeric()
77 | z_obs_idx <- array(which(!is.na(multivarmissing$z)))
78 | N_z_obs <- length(z_obs_idx)
79 | z_miss_idx <- array(which(is.na(multivarmissing$z)))
80 | N_z_miss <- length(z_miss_idx)
81 | alpha_loc <- 0
82 | alpha_scale <- 10
83 | beta_loc <- rep(0, 3)
84 | beta_scale <- c(10, 2.5, 2.5)
85 | gamma_loc <- rep(0, 2)
86 | gamma_scale <- c(10, 2.5)
87 | sigma_x_scale <- 5
88 | sigma_y_scale <- 5
89 | sigma_z_scale <- 5
90 | })
91 | ```
92 |
93 | ```{r mod_multivarmissing,cache.extra=tools::md5sum("stan/multivarmissing.stan"),message=FALSE,warning=FALSE}
94 | mod_multivarmissing <- stan_model("stan/multivarmissing2.stan")
95 | ```
96 |
97 | ```{r}
98 | mod_multivarmissing
99 | ```
100 |
101 | ```{r fit_multivarmissing,results='hide'}
102 | fit_multivarmissing <-
103 | sampling(mod_multivarmissing, data = data_multivarmissing)
104 | ```
105 |
106 | ```{r}
107 | fit_multivarmissing
108 | ```
109 |
110 | ## Multivariate Normal
111 |
112 | Alternatively, $x$, $y$, and $z$ could be modeled as coming from a multivariate normal distribution.
113 | $$
114 | \begin{bmatrix}
115 | x_i \\
116 | y_i \\
117 | z_i
118 | \end{bmatrix} \sim
119 | \mathsf{Normal}(\mu, \Sigma)
120 | $$
121 | where $\mu$ and $\Sigma$ are given weakly informative priors,
122 | $$
123 | \begin{aligned}[t]
124 | \mu_{i,k} &\sim \mathsf{Normal}(0, 10) & k \in \{1, 2, 3\}, \\
125 | \Sigma &= \diag{\sigma} R \diag{sigma}, \\
126 | \sigma &\sim \mathsf{HalfCauchy}(0, 5), \\
127 | R &\sim \mathsf{LkjCorr}(2) .
128 | \end{aligned}
129 | $$
130 |
131 | ```{r data_multivarmissing2}
132 | data_multivarmissing2 <- within(list(), {
133 | N <- nrow(multivarmissing)
134 | K <- ncol(multivarmissing)
135 | mu_loc <- rep(0, 3)
136 | mu_scale <- rep(0, 10)
137 | Sigma_scale_scale <- 5
138 | Sigma_corr_L_eta <- 2
139 | })
140 | ```
141 |
142 | ```{r mod_multivarmissing2,cache.extra=tools::md5sum("stan/multivarmissing2.stan")}
143 | mod_multivarmissing2 <- stan_model("stan/multivarmissing2.stan")
144 | ```
145 |
146 | ```{r}
147 | mod_multivarmissing2
148 | ```
149 |
150 | ```{r fit_multivarmissing2,results='hide'}
151 | fit_multivarmissing <-
152 | sampling(mod_multivarmissing2, data = data_multivarmissing2)
153 | ```
154 |
155 | ```{r}
156 | fit_multivarmissing
157 | ```
158 |
159 | [^multivarmissing-src]: This example is derived from Simon Jackman, "[Multivariate Missing Data](https://web-beta.archive.org/web/20020618183148/http://jackman.stanford.edu:80/mcmc/multivarmissing.odc)", 2002-06-18.
160 |
--------------------------------------------------------------------------------
/negbin.Rmd:
--------------------------------------------------------------------------------
1 | # Negative Binomial: Estimating Homicides in Census Tracks {#negbin}
2 |
3 | ```{r negbin_setup,message=FALSE,cache=FALSE}
4 | library("tidyverse")
5 | library("rstan")
6 | library("rstanarm")
7 | ```
8 |
9 | The data are from the 1990 United States Census for the city of St. Louis,
10 | Missouri for Census Tracts, and from records of the St. Louis City Metropolitan
11 | Police Department for the years 1980 through 1994. For each Census Tract (with
12 | a population), N=111, an observation includes
13 |
14 | - the median household income in 1990
15 | - the percentage unemployed (base of labor force)
16 | - a count of the number of homicide incidents.
17 |
18 | The number of homicides in this 15 year period totals 2,815. The average size
19 | of a Census Tract is 3,571 with a range of 249--8,791. Income has been rescaled
20 | by dividing by 1,000 which produces a range similar to that of percentage
21 | unemployed and standard deviations that are very close. Tract homicide counts
22 | range from 0 through 99 with a median of 16 (mean is 25.+). An enhanced set of
23 | linear, predictors does better than this two predictor example.
24 |
25 | $$
26 | \begin{aligned}[t]
27 | y_i &\sim \mathsf{NegBinomial2}(\mu_i,\phi) \\
28 | \mu_i &= \frac{1}{1 + e^{-\eta_i}} \\
29 | \eta_i &= x_i \beta
30 | \end{aligned}
31 | $$
32 | The negative binomial distribution is parameterized so that $\mu \in \mathbb{R}^+$ is the location parameter, and $\phi \in \mathbb{R}^+$ is the reciprocal overdispersion parameter, such that the mean and variance of a random variable $Y$ distributed negative binomial is
33 | $$
34 | \begin{aligned}[t]
35 | E[Y] &= \mu , \\
36 | V[Y] &= \mu + \frac{\mu^2}{\phi} .
37 | \end{aligned}
38 | $$
39 | As $\phi \to \infty$, the negative binomial approaches the Poisson distribution.
40 |
41 | The parameters are given weakly informative priors,
42 | $$
43 | \begin{aligned}[t]
44 | \alpha &\sim \mathsf{Normal}(0, 10), \\
45 | \beta_k &\sim \mathsf{Normal}(0, 2.5), \\
46 | \phi^{-1} &\sim \mathsf{HalfCauchy}(0, 5).
47 | \end{aligned}
48 | $$
49 |
50 | ```{r negbin_mod,results='hide',cache.extra=tools::md5sum("stan/negbin.stan")}
51 | negbin_mod <- stan_model("stan/negbin.stan")
52 | ```
53 | ```{r echo=FALSE,results='asis',cache=FALSE}
54 | negbin_mod
55 | ```
56 |
57 | ```{r negbin_data}
58 | data("st_louis_census", package = "bayesjackman")
59 | negbin_data <- within(list(), {
60 | y <- st_louis_census$i8094
61 | N <- length(y)
62 | X <- model.matrix(~ 0 + pcunemp9 + incrs, data = st_louis_census) %>% scale()
63 | K <- ncol(X)
64 | beta_mean <- rep(0, K)
65 | beta_scale <- rep(2.5, K)
66 | alpha_mean <- 0
67 | alpha_scale <- 10
68 | reciprocal_phi_scale <- 5
69 | })
70 | ```
71 |
72 | ```{r negbin_fit,results='hide'}
73 | negbin_fit <- sampling(negbin_mod, data = negbin_data)
74 | ```
75 | ```{r}
76 | summary(negbin_fit, par = c("alpha", "beta", "phi"))$summary
77 | ```
78 |
79 | We could also fit the model using the **rstanarm** function `stan_glm.nb` (or `stan_glm`):
80 | ```{r negbin_fit2}
81 | negbin_fit2 <- stan_glm.nb(i8094 ~ pcunemp9 + incrs, data = st_louis_census)
82 | ```
83 | ```{r}
84 | negbin_fit2
85 | ```
86 |
87 | Example derived from Simon Jackman, "negative binomial using the ones trick with log link", 2005-10-27, [URL](https://web-beta.archive.org/web/20051027082311/http://jackman.stanford.edu:80/mcmc/negbineg.odc).
88 |
--------------------------------------------------------------------------------
/reagan.Rmd:
--------------------------------------------------------------------------------
1 | # Reagan: linear regression with AR(1) disturbances {#reagan}
2 |
3 | ```{r reagan_setup,message=FALSE,cache=FALSE}
4 | library("tidyverse")
5 | library("rstan")
6 | ```
7 |
8 | Ninety-six monthly observations on presidential job approval ratings for Ronald Reagan are modeled via linear regression, with a correction for first-order serial correlation among the disturbances.[^reagan]
9 | Note the marginal model for the first observation, and the conditioning on the lagged observation for months 2 through 96.
10 | A uniform prior over the stationary (-1,1) interval is employed for the residual AR(1) parameter.
11 |
12 | $$
13 | \begin{aligned}[t]
14 | y_i &= \mu_i + \epsilon_i + \theta \epsilon_{i - 1} ,\\
15 | \mu_i &= \alpha + x_i' \beta , \\
16 | \epsilon_i &\sim \mathsf{Normal}(0, \sigma^2) ,
17 | \end{aligned}
18 | $$
19 | for $i \in 1, \dots, N$.
20 | Weakly informative priors for each parameter are used,
21 | $$
22 | \begin{aligned}[t]
23 | \alpha &\sim \mathsf{Normal}(0, 10), \\
24 | \beta_k &\sim \mathsf{Normal}(0, 2.5), & k \in 1, \dots, K, \\
25 | \sigma &\sim \mathsf{HalfCauchy}(0, 5), \\
26 | \theta &= 2 \theta^{*} - 1 , \\
27 | \theta^{*} &\sim \mathsf{Beta}(1, 1) .
28 | \end{aligned}
29 | $$
30 |
31 | ```{r ReaganApproval}
32 | data("ReaganApproval", package = "bayesjackman")
33 | ReaganApproval
34 | ```
35 |
36 | ```{r reagan_data}
37 | reagan_data <- within(list(), {
38 | y <- ReaganApproval$app
39 | N <- length(y)
40 | X <- model.matrix(~ 0 + infl + unemp, data = ReaganApproval) %>% scale()
41 | K <- ncol(X)
42 | alpha_loc <- 0
43 | alpha_scale <- 10
44 | beta_loc <- rep(0, K)
45 | beta_scale <- rep(2.5 * sd(y), K)
46 | sigma_scale <- 5 * sd(y)
47 | theta_a <- 1
48 | theta_b <- 1
49 | })
50 | ```
51 |
52 | ```{r mod_regar1,cache.extra=tools::md5sum("stan/regar1.stan")}
53 | mod_regar1 <- stan_model("stan/regar1.stan")
54 | ```
55 | ```{r}
56 | mod_regar1
57 | ```
58 |
59 | ```{r reagan_fit,results='hide'}
60 | reagan_fit <- sampling(mod_regar1, data = reagan_data)
61 | ```
62 |
63 | ```{r reagan_fit_summary}
64 | summary(reagan_fit, par = c("alpha", "beta", "theta", "sigma"))$summary
65 | ```
66 |
67 | ## Cochrane-Orcutt/Prais-Winsten
68 |
69 | An AR(1) error model can also be estimated [Prais-Winsten](https://en.wikipedia.org/wiki/Prais%E2%80%93Winsten_estimation) estimation:
70 | $$
71 | \begin{aligned}[t]
72 | y_1 &\sim \mathsf{Normal}\left(\alpha + x_1' \beta, \frac{\sigma ^ 2}{1 - \theta ^ 2} \right), \\
73 | y_i &\sim \mathsf{Normal}\left(\theta y_{i - 1} + \alpha (1 - \theta) + \beta (X_i - \theta X_{i - 1}), \sigma ^ 2 \right) & i = 2, \dots, N
74 | \end{aligned}
75 | $$
76 |
77 | ```{r mod_pw,cache.extra=tools::md5sum("stan/pw.stan")}
78 | mod_pw <- stan_model("stan/pw.stan")
79 | ```
80 | ```{r}
81 | mod_pw
82 | ```
83 |
84 | ```{r reagan_fit2,results='hide'}
85 | reagan_fit2 <- sampling(mod_pw, data = reagan_data)
86 | ```
87 |
88 | ```{r reagan_fit_summary2}
89 | summary(reagan_fit2, par = c("alpha", "beta", "theta", "sigma"))$summary
90 | ```
91 |
92 | [^reagan]: Example derived from Simon Jackman, "Reagan: linear regression with AR(1) disturbances," *BUGS Examples,* 2007-07-24, [URL](https://web-beta.archive.org/web/20070724034151/http://jackman.stanford.edu:80/mcmc/reagan.odc).
93 |
--------------------------------------------------------------------------------
/references.Rmd:
--------------------------------------------------------------------------------
1 | # References {-}
2 |
--------------------------------------------------------------------------------
/stan/aspirin.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N;
3 | vector[N] y;
4 | vector[N] s;
5 | real mu_loc;
6 | real mu_scale;
7 | real tau_scale;
8 | real tau_df;
9 | }
10 | parameters {
11 | vector[N] theta;
12 | real mu;
13 | real tau;
14 | }
15 | model {
16 | mu ~ normal(mu_loc, mu_scale);
17 | tau ~ student_t(tau_df, 0., tau_scale);
18 | theta ~ normal(mu, tau);
19 | y ~ normal(theta, s);
20 | }
21 | generated quantities {
22 | vector[N] shrinkage;
23 | {
24 | real tau2;
25 | tau2 = pow(tau, 2.);
26 | for (i in 1:N) {
27 | real v;
28 | v = pow(s[i], 2);
29 | shrinkage[i] = v / (v + tau2);
30 | }
31 | }
32 | }
33 |
--------------------------------------------------------------------------------
/stan/aspirin2.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N;
3 | vector[N] y;
4 | vector[N] s;
5 | real mu_loc;
6 | real mu_scale;
7 | real tau_scale;
8 | real tau_df;
9 | }
10 | parameters {
11 | vector[N] theta_raw;
12 | real mu;
13 | real tau;
14 | }
15 | transformed parameters {
16 | vector[N] theta;
17 | theta = tau * theta_raw + mu;
18 | }
19 | model {
20 | mu ~ normal(mu_loc, mu_scale);
21 | tau ~ student_t(tau_df, 0., tau_scale);
22 | theta_raw ~ normal(0., 1.);
23 | y ~ normal(theta, s);
24 | }
25 | generated quantities {
26 | vector[N] shrinkage;
27 | {
28 | real tau2;
29 | tau2 = pow(tau, 2.);
30 | for (i in 1:N) {
31 | real v;
32 | v = pow(s[i], 2);
33 | shrinkage[i] = v / (v + tau2);
34 | }
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/stan/bimodal.stan:
--------------------------------------------------------------------------------
1 | data {
2 | // number of obs
3 | int N;
4 | int N_obs;
5 | int N_miss;
6 | vector[N_obs] x_obs;
7 | int x_obs_row[N_obs];
8 | int x_obs_col[N_obs];
9 | int x_miss_row[N_miss];
10 | int x_miss_col[N_miss];
11 | real df;
12 | }
13 | parameters {
14 | vector[2] mu;
15 | cov_matrix[2] Sigma;
16 | vector[N_miss] x_miss;
17 | }
18 | transformed parameters {
19 | // using an array of vectors is more convenient when sampling
20 | // multi_normal than using an matrix
21 | vector[2] X[N];
22 | for (i in 1:N_obs) {
23 | X[x_obs_row[i], x_obs_col[i]] = x_obs[i];
24 | }
25 | for (i in 1:N_miss) {
26 | X[x_miss_row[i], x_miss_col[i]] = x_miss[i];
27 | }
28 | }
29 | model{
30 | for (i in 1:N) {
31 | X[i] ~ multi_student_t(df, mu, Sigma);
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/stan/campaign.stan:
--------------------------------------------------------------------------------
1 | // polling model
2 | data {
3 | int N;
4 | int T;
5 | vector[N] y;
6 | vector[N] s;
7 | int time[N];
8 | int H;
9 | int house[N];
10 | // initial and final values
11 | real xi_init;
12 | real xi_final;
13 | real delta_loc;
14 | real zeta_scale;
15 | real tau_scale;
16 | }
17 | parameters {
18 | vector[T - 1] omega;
19 | real tau;
20 | vector[H] delta_raw;
21 | real zeta;
22 | }
23 | transformed parameters {
24 | vector[H] delta;
25 | vector[T - 1] xi;
26 | vector[N] mu;
27 | // this is necessary. If not centered the model is unidentified
28 | delta = (delta_raw - mean(delta_raw)) / sd(delta_raw) * zeta;
29 | xi[1] = xi_init;
30 | for (i in 2:(T - 1)) {
31 | xi[i] = xi[i - 1] + tau * omega[i - 1];
32 | }
33 | for (i in 1:N) {
34 | mu[i] = xi[time[i]] + delta[house[i]];
35 | }
36 | }
37 | model {
38 | // house effects
39 | delta_raw ~ normal(0., 1.);
40 | zeta ~ normal(0., zeta_scale);
41 | // latent state innovations
42 | omega ~ normal(0., 1.);
43 | // scale of innovations
44 | tau ~ cauchy(0, tau_scale);
45 | // final known effect
46 | xi_final ~ normal(xi[T - 1], tau);
47 | // daily polls
48 | y ~ normal(mu, s);
49 | }
50 |
--------------------------------------------------------------------------------
/stan/campaign2.stan:
--------------------------------------------------------------------------------
1 | // polling model
2 | data {
3 | int N;
4 | int T;
5 | vector[N] y;
6 | vector[N] s;
7 | int time[N];
8 | int H;
9 | int house[N];
10 | // initial and final values
11 | vector[2] xi_init;
12 | vector[2] xi_final;
13 | real delta_loc;
14 | real zeta_scale;
15 | vector[2] tau_scale;
16 | }
17 | parameters {
18 | vector[2] omega[T - 1];
19 | vector[2] tau;
20 | vector[H] delta_raw;
21 | real zeta;
22 | }
23 | transformed parameters {
24 | vector[H] delta;
25 | vector[2] xi[T - 1];
26 | vector[N] mu;
27 | // this is necessary. If not centered the model is unidentified
28 | delta = (delta_raw - mean(delta_raw)) / sd(delta_raw) * zeta;
29 | xi[1] = xi_init;
30 | for (i in 2:(T - 1)) {
31 | // slope needs to be defined before the original data
32 | xi[i, 2] = xi[i - 1, 2] + tau[2] * omega[i - 1, 2];
33 | xi[i, 1] = xi[i - 1, 1] + xi[i, 2] + tau[1] * omega[i - 1, 1];
34 | }
35 | for (i in 1:N) {
36 | mu[i] = xi[time[i], 1] + delta[house[i]];
37 | }
38 | }
39 | model {
40 | // house effects
41 | delta_raw ~ normal(0., 1.);
42 | zeta ~ normal(0., zeta_scale);
43 | // latent state innovations
44 | for (i in 1:size(omega)) {
45 | omega[i] ~ normal(0., 1.);
46 | }
47 | // scale of innovations
48 | tau ~ normal(0, tau_scale);
49 | // final known effect
50 | xi_final ~ normal(xi[T - 1], tau);
51 | // daily polls
52 | y ~ normal(mu, s);
53 | }
54 |
--------------------------------------------------------------------------------
/stan/cancer1.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int r[2];
3 | int n[2];
4 | // param for beta prior on p
5 | vector[2] p_a;
6 | vector[2] p_b;
7 | }
8 | parameters {
9 | vector[2] p;
10 | }
11 | model {
12 | p ~ beta(p_a, p_b);
13 | r ~ binomial(n, p);
14 | }
15 | generated quantities {
16 | real delta;
17 | int delta_up;
18 | real lambda;
19 | int lambda_up;
20 |
21 | delta = p[1] - p[2];
22 | delta_up = delta > 0;
23 | lambda = logit(p[1]) - logit(p[2]);
24 | lambda_up = lambda > 0;
25 |
26 | }
27 |
--------------------------------------------------------------------------------
/stan/cancer2.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int r[2];
3 | int n[2];
4 | // param for beta prior on p
5 | real a_loc;
6 | real a_scale;
7 | real b_loc;
8 | real b_scale;
9 | }
10 | parameters {
11 | real a;
12 | real b;
13 | }
14 | transformed parameters {
15 | vector[2] p;
16 | p[1] = inv_logit(a + b);
17 | p[2] = inv_logit(a);
18 | }
19 | model {
20 | a ~ normal(a_loc, a_scale);
21 | b ~ normal(a_loc, b_scale);
22 | r ~ binomial(n, p);
23 | }
24 | generated quantities {
25 | real delta;
26 | int delta_up;
27 | real lambda;
28 | int lambda_up;
29 |
30 | delta = p[1] - p[2];
31 | delta_up = delta > 0;
32 | lambda = logit(p[1]) - logit(p[2]);
33 | lambda_up = lambda > 0;
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/stan/corporatism.stan:
--------------------------------------------------------------------------------
1 | data {
2 | // number of observations
3 | int N;
4 | // response variable
5 | vector[N] y;
6 | // number of predictors in the regression
7 | int K;
8 | // design matrix of country-year obs
9 | matrix[N, K] X;
10 | // number of countries
11 | int n_country;
12 | // countries for each observation
13 | int country[N];
14 | // design matrix of country-variables
15 | int J;
16 | matrix[n_country, J] U;
17 | // priors
18 | // mean and scale of normal prior on beta
19 | vector[K] beta_mean;
20 | vector[K] beta_scale;
21 | // mean and scale of normal prior on gamma
22 | real gamma_mean;
23 | real gamma_scale;
24 | // scale for half-Cauchy prior on tau
25 | real tau_scale;
26 | }
27 | parameters {
28 | // obs. errors.
29 | real sigma;
30 | // country-specific terms
31 | vector[n_country] gamma;
32 | vector[J] delta;
33 | // regression coefficients
34 | vector[K] beta[n_country];
35 | // scale on country priors
36 | real tau;
37 | }
38 | transformed parameters {
39 | vector[N] mu;
40 | vector[n_country] alpha;
41 | alpha = gamma + U * delta;
42 | for (i in 1:N) {
43 | mu[i] = alpha[country[i]] + X[i] * beta[country[i]];
44 | }
45 | }
46 | model {
47 | gamma ~ normal(gamma_mean, gamma_scale);
48 | tau ~ cauchy(0., tau_scale);
49 | for (k in 1:K) {
50 | beta[k] ~ normal(beta_mean, beta_scale);
51 | }
52 | alpha ~ normal(gamma, tau);
53 | y ~ normal(mu, sigma);
54 | }
55 | generated quantities {
56 | }
57 |
--------------------------------------------------------------------------------
/stan/engines.stan:
--------------------------------------------------------------------------------
1 | data {
2 | // number of observations
3 | int N;
4 | // observed data
5 | int N_obs;
6 | vector[N_obs] y_obs;
7 | // censored data
8 | int N_cens;
9 | vector[N_cens] y_cens;
10 | // covariates
11 | int K;
12 | matrix[N_obs, K] X_obs;
13 | matrix[N_cens, K] X_cens;
14 | // priors
15 | real alpha_loc;
16 | real alpha_scale;
17 | vector[K] beta_loc;
18 | vector[K] beta_scale;
19 | real sigma_scale;
20 | }
21 | parameters {
22 | real alpha;
23 | vector[K] beta;
24 | real sigma;
25 | }
26 | transformed parameters {
27 | vector[N_obs] mu_obs;
28 | vector[N_cens] mu_cens;
29 | mu_obs = alpha + X_obs * beta;
30 | mu_cens = alpha + X_cens * beta;
31 | }
32 | model {
33 | sigma ~ cauchy(0, sigma_scale);
34 | alpha ~ normal(alpha_loc, alpha_scale);
35 | beta ~ normal(beta_loc, beta_scale);
36 | y_obs ~ normal(mu_obs, sigma);
37 | target += normal_lccdf(y_cens | mu_cens, sigma);
38 | }
39 |
--------------------------------------------------------------------------------
/stan/florida.stan:
--------------------------------------------------------------------------------
1 | data {
2 | real y;
3 | real y_sd;
4 | real mu_mean;
5 | real mu_sd;
6 | }
7 | parameters {
8 | real mu;
9 | }
10 | model {
11 | mu ~ normal(mu_mean, mu_sd);
12 | y ~ normal(mu, y_sd);
13 | }
14 |
--------------------------------------------------------------------------------
/stan/genbeetles.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N;
3 | int r[N];
4 | int n[N];
5 | vector[N] x;
6 | }
7 | parameters {
8 | real alpha;
9 | real beta;
10 | real nu;
11 | }
12 | transformed parameters {
13 | vector[N] mu;
14 | for (i in 1:N) {
15 | mu[i] = pow(inv_logit(alpha + beta * x[i]), nu) ;
16 | }
17 | }
18 | model {
19 | alpha ~ normal(0., 10.);
20 | beta ~ normal(0., 2.5);
21 | nu ~ gamma(0.25, 0.25);
22 | r ~ binomial(n, mu);
23 | }
24 | generated quantities {
25 | // probability where the maximum marginal effect
26 | real pdot;
27 | pdot = pow(inv_logit(nu), nu);
28 | }
29 |
--------------------------------------------------------------------------------
/stan/ideal_point_1.stan:
--------------------------------------------------------------------------------
1 | // ideal point model
2 | // identification:
3 | // - xi ~ hierarchical
4 | // - except fixed senators
5 | data {
6 | // number of individuals
7 | int N;
8 | // number of items
9 | int K;
10 | // observed votes
11 | int Y_obs;
12 | int y_idx_leg[Y_obs];
13 | int y_idx_vote[Y_obs];
14 | int y[Y_obs];
15 | // priors
16 | // on items
17 | real alpha_loc;
18 | real alpha_scale;
19 | real beta_loc;
20 | real beta_scale;
21 | // on legislators
22 | int N_xi_obs;
23 | int idx_xi_obs[N_xi_obs];
24 | vector[N_xi_obs] xi_obs;
25 | int N_xi_param;
26 | int idx_xi_param[N_xi_param];
27 | // prior on ideal points
28 | real zeta_loc;
29 | real zeta_scale;
30 | real tau_scale;
31 | }
32 | parameters {
33 | // item difficulties
34 | vector[K] alpha;
35 | // item discrimination
36 | vector[K] beta;
37 | // unknown ideal points
38 | vector[N_xi_param] xi_param;
39 | // hyperpriors
40 | real tau;
41 | real zeta;
42 | }
43 | transformed parameters {
44 | // create xi from observed and parameter ideal points
45 | vector[Y_obs] mu;
46 | vector[N] xi;
47 | xi[idx_xi_param] = xi_param;
48 | xi[idx_xi_obs] = xi_obs;
49 | for (i in 1:Y_obs) {
50 | mu[i] = alpha[y_idx_vote[i]] + beta[y_idx_vote[i]] * xi[y_idx_leg[i]];
51 | }
52 | }
53 | model {
54 | alpha ~ normal(alpha_loc, alpha_scale);
55 | beta ~ normal(beta_loc, beta_scale);
56 | xi_param ~ normal(zeta, tau);
57 | xi_obs ~ normal(zeta, tau);
58 | zeta ~ normal(zeta_loc, zeta_scale);
59 | tau ~ cauchy(0., tau_scale);
60 | y ~ bernoulli_logit(mu);
61 | }
62 | generated quantities {
63 | vector[Y_obs] log_lik;
64 | for (i in 1:Y_obs) {
65 | log_lik[i] = bernoulli_logit_lpmf(y[i] | mu[i]);
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/stan/ideal_point_2.stan:
--------------------------------------------------------------------------------
1 | // ideal point model
2 | //
3 | // identification:
4 | // - ideal points ~ normal(0, 1)
5 | // - signs of item discrimination using skew normal
6 | data {
7 | // number of individuals
8 | int N;
9 | // number of items
10 | int K;
11 | // observed votes
12 | int Y_obs;
13 | int y_idx_leg[Y_obs];
14 | int y_idx_vote[Y_obs];
15 | int y[Y_obs];
16 | // priors
17 | // on items
18 | real alpha_loc;
19 | real alpha_scale;
20 | vector[K] beta_loc;
21 | vector[K] beta_scale;
22 | vector[K] beta_skew;
23 | }
24 | parameters {
25 | // item difficulties
26 | vector[K] alpha;
27 | // item discrimination
28 | vector[K] beta;
29 | // unknown ideal points
30 | vector[N] xi_raw;
31 | }
32 | transformed parameters {
33 | // create xi from observed and parameter ideal points
34 | vector[Y_obs] mu;
35 | vector[N] xi;
36 | xi = (xi_raw - mean(xi_raw)) ./ sd(xi_raw);
37 | for (i in 1:Y_obs) {
38 | mu[i] = alpha[y_idx_vote[i]] + beta[y_idx_vote[i]] * xi[y_idx_leg[i]];
39 | }
40 | }
41 | model {
42 | alpha ~ normal(alpha_loc, alpha_scale);
43 | beta ~ skew_normal(beta_loc, beta_scale, beta_skew);
44 | // soft center ideal points
45 | // in transformed block enforce hard-centering
46 | xi_raw ~ normal(0., 1.);
47 | y ~ bernoulli_logit(mu);
48 | }
49 | generated quantities {
50 | vector[Y_obs] log_lik;
51 | for (i in 1:Y_obs) {
52 | log_lik[i] = bernoulli_logit_lpmf(y[i] | mu[i]);
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/stan/ideal_point_3.stan:
--------------------------------------------------------------------------------
1 | // ideal point model
2 | // identification:
3 | // - ideal points ~ normal(0, 1)
4 | // - signs of ideal points using skew normal
5 | data {
6 | // number of individuals
7 | int N;
8 | // number of items
9 | int K;
10 | // observed votes
11 | int Y_obs;
12 | int y_idx_leg[Y_obs];
13 | int y_idx_vote[Y_obs];
14 | int y[Y_obs];
15 | // priors
16 | // on items
17 | real alpha_loc;
18 | real alpha_scale;
19 | real beta_loc;
20 | real beta_scale;
21 | // on ideal points
22 | vector[N] xi_skew;
23 | }
24 | parameters {
25 | // item difficulties
26 | vector[K] alpha;
27 | // item discrimination
28 | vector[K] beta;
29 | // unknown ideal points
30 | vector[N] xi_raw;
31 | }
32 | transformed parameters {
33 | // create xi from observed and parameter ideal points
34 | vector[Y_obs] mu;
35 | vector[N] xi;
36 |
37 | xi = (xi_raw - mean(xi_raw)) ./ sd(xi_raw);
38 | for (i in 1:Y_obs) {
39 | mu[i] = alpha[y_idx_vote[i]] + beta[y_idx_vote[i]] * xi[y_idx_leg[i]];
40 | }
41 | }
42 | model {
43 | alpha ~ normal(alpha_loc, alpha_scale);
44 | beta ~ normal(beta_loc, beta_scale);
45 | // soft center ideal points
46 | // in transformed block enforce hard-centering
47 | xi_raw ~ skew_normal(0., 1., xi_skew);
48 | y ~ bernoulli_logit(mu);
49 | }
50 | generated quantities {
51 | vector[Y_obs] log_lik;
52 |
53 | for (i in 1:Y_obs) {
54 | log_lik[i] = bernoulli_logit_lpmf(y[i] | mu[i]);
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/stan/ideal_point_4.stan:
--------------------------------------------------------------------------------
1 | // ideal point model
2 | // identification:
3 | // - ideal points ~ normal(0, 1)
4 | // - signs of item discrimination using bounds
5 | data {
6 | // number of individuals
7 | int N;
8 | // number of items
9 | int K;
10 | // observed votes
11 | int Y_obs;
12 | int y_idx_leg[Y_obs];
13 | int y_idx_vote[Y_obs];
14 | int y[Y_obs];
15 | // priors
16 | // on items
17 | real alpha_loc;
18 | real alpha_scale;
19 | vector[K] beta_loc;
20 | vector[K] beta_scale;
21 | int K_beta_pos;
22 | int beta_idx_pos[K_beta_pos];
23 | int K_beta_neg;
24 | int beta_idx_neg[K_beta_neg];
25 | int K_beta_unc;
26 | int beta_idx_unc[K_beta_unc];
27 | }
28 | parameters {
29 | // item difficulties
30 | vector[K] alpha;
31 | // item discrimination
32 | vector[K_beta_pos] beta_pos;
33 | vector[K_beta_neg] beta_neg;
34 | vector[K_beta_unc] beta_unc;
35 | // unknown ideal points
36 | vector[N] xi_raw;
37 | }
38 | transformed parameters {
39 | // create xi from observed and parameter ideal points
40 | vector[Y_obs] mu;
41 | vector[N] xi;
42 | vector[K] beta;
43 |
44 | beta[beta_idx_neg] = beta_neg;
45 | beta[beta_idx_pos] = beta_pos;
46 | beta[beta_idx_unc] = beta_unc;
47 | xi = (xi_raw - mean(xi)) / sd(xi);
48 | for (i in 1:Y_obs) {
49 | mu[i] = alpha[y_idx_vote[i]] + beta[y_idx_vote[i]] * xi[y_idx_leg[i]];
50 | }
51 |
52 | }
53 | model {
54 | alpha ~ normal(alpha_loc, alpha_scale);
55 | beta_neg ~ normal(beta_loc, beta_scale);
56 | beta_pos ~ normal(beta_loc, beta_scale);
57 | beta_unc ~ normal(beta_loc, beta_scale);
58 | xi_raw ~ normal(0., 1.);
59 | y ~ bernoulli_logit(mu);
60 | }
61 | generated quantities {
62 | vector[Y_obs] log_lik;
63 |
64 | for (i in 1:Y_obs) {
65 | log_lik[i] = bernoulli_logit_lpmf(y[i] | mu[i]);
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/stan/ideal_point_5.stan:
--------------------------------------------------------------------------------
1 | // ideal point model
2 | // identification:
3 | // - xi ~ normal(0, 1)
4 | // - signs of xi
5 | data {
6 | // number of individuals
7 | int N;
8 | // number of items
9 | int K;
10 | // observed votes
11 | int Y_obs;
12 | int y_idx_leg[Y_obs];
13 | int y_idx_vote[Y_obs];
14 | int y[Y_obs];
15 | // priors
16 | // on items
17 | real alpha_loc;
18 | real alpha_scale;
19 | vector[K] beta_loc;
20 | vector[K] beta_scale;
21 | int N_xi_pos;
22 | int xi_idx_pos[N_xi_pos];
23 | int N_xi_neg;
24 | int xi_idx_neg[N_xi_neg];
25 | int N_xi_unc;
26 | int xi_idx_unc[N_xi_unc];
27 | }
28 | parameters {
29 | // item difficulties
30 | vector[K] alpha;
31 | // item discrimination
32 | vector[K] beta;
33 | // unknown ideal points
34 | vector[N_xi_pos] xi_pos;
35 | vector[N_xi_neg] xi_neg;
36 | vector[N_xi_unc] xi_unc;
37 | }
38 | transformed parameters {
39 | // create xi from observed and parameter ideal points
40 | vector[Y_obs] mu;
41 | vector[N] xi;
42 | xi[xi_idx_neg] = xi_neg;
43 | xi[xi_idx_pos] = xi_pos;
44 | xi[xi_idx_unc] = xi_unc;
45 | for (i in 1:Y_obs) {
46 | mu[i] = alpha[y_idx_vote[i]] + beta[y_idx_vote[i]] * xi[y_idx_leg[i]];
47 | }
48 | }
49 | model {
50 | alpha ~ normal(alpha_loc, alpha_scale);
51 | beta ~ normal(beta_loc, beta_scale);
52 | xi_neg ~ normal(0., 1.);
53 | xi_pos ~ normal(0., 1.);
54 | xi_unc ~ normal(0., 1.);
55 | y ~ bernoulli_logit(mu);
56 | }
57 | generated quantities {
58 | vector[Y_obs] log_lik;
59 | for (i in 1:Y_obs) {
60 | log_lik[i] = bernoulli_logit_lpmf(y[i] | mu[i]);
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/stan/judges.stan:
--------------------------------------------------------------------------------
1 | data {
2 | // number of items
3 | int K;
4 | // number of individuals
5 | int N;
6 | // observed votes
7 | int Y_obs;
8 | int y_idx_leg[Y_obs];
9 | int y_idx_vote[Y_obs];
10 | int y[Y_obs];
11 | // ideal points
12 | vector[N] xi_loc;
13 | vector[N] xi_scale;
14 | vector[N] xi_skew;
15 | // priors
16 | vector[K] alpha_loc;
17 | vector[K] alpha_scale;
18 | vector[K] beta_loc;
19 | vector[K] beta_scale;
20 | }
21 | parameters {
22 | // item difficulties
23 | vector[K] alpha;
24 | // item cutpoints
25 | vector[K] beta;
26 | // unknown ideal points
27 | vector[N] xi;
28 | }
29 | transformed parameters {
30 | // create xi from observed and parameter ideal points
31 | vector[Y_obs] mu;
32 | for (i in 1:Y_obs) {
33 | mu[i] = beta[y_idx_vote[i]] * xi[y_idx_leg[i]] - alpha[y_idx_vote[i]];
34 | }
35 | }
36 | model {
37 | alpha ~ normal(alpha_loc, alpha_scale);
38 | beta ~ normal(beta_loc, beta_scale);
39 | xi ~ skew_normal(xi_loc, xi_scale, xi_skew);
40 | y ~ binomial_logit(1, mu);
41 | }
42 | generated quantities {
43 | vector[Y_obs] log_lik;
44 | for (i in 1:Y_obs) {
45 | log_lik[i] = binomial_logit_lpmf(y[i] | 1, mu[i]);
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/stan/logit.stan:
--------------------------------------------------------------------------------
1 | data {
2 | // response
3 | int N;
4 | int y[N];
5 | // covariates
6 | int K;
7 | matrix[N, K] X;
8 | // priors
9 | real alpha_loc;
10 | real alpha_scale;
11 | vector[K] beta_loc;
12 | vector[K] beta_scale;
13 | }
14 | parameters {
15 | real alpha;
16 | vector[K] beta;
17 | }
18 | transformed parameters {
19 | // linear predictor
20 | vector[N] eta;
21 | eta = alpha + X * beta;
22 | }
23 | model {
24 | alpha ~ normal(alpha_loc, alpha_scale);
25 | beta ~ normal(beta_loc, beta_scale);
26 | // y ~ bernoulli(inv_logit(eta));
27 | // this is faster and more numerically stable
28 | y ~ bernoulli_logit(eta);
29 | }
30 | generated quantities {
31 | // log-likelihood of each obs
32 | vector[N] log_lik;
33 | // probability
34 | vector[N] mu;
35 | for (i in 1:N) {
36 | mu[i] = inv_logit(eta[i]);
37 | log_lik[i] = bernoulli_logit_lpmf(y[i] | eta[i]);
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/stan/logit2.stan:
--------------------------------------------------------------------------------
1 | functions {
2 | real pct_correct_pred(int[] y, vector mu) {
3 | real out;
4 | int N;
5 | N = num_elements(mu);
6 | out = 0.;
7 | for (i in 1:N) {
8 | if (y[i]) {
9 | out = out + int_step(mu[i] >= 0.5);
10 | } else {
11 | out = out + int_step(mu[i] < 0.5);
12 | }
13 | }
14 | out = out / N;
15 | return out;
16 | }
17 | real expected_pct_correct_pred(int[] y, vector mu) {
18 | real out;
19 | int N;
20 | N = num_elements(mu);
21 | out = 0.;
22 | for (i in 1:N) {
23 | if (y[i]) {
24 | out = out + mu[i];
25 | } else {
26 | out = out + (1. - mu[i]);
27 | }
28 | }
29 | out = out / N;
30 | return out;
31 | }
32 | }
33 | data {
34 | // response
35 | int N;
36 | int y[N];
37 | // covariates
38 | int K;
39 | matrix[N, K] X;
40 | // priors
41 | real alpha_loc;
42 | real alpha_scale;
43 | vector[K] beta_loc;
44 | vector[K] beta_scale;
45 | }
46 | parameters {
47 | real alpha;
48 | vector[K] beta;
49 | }
50 | transformed parameters {
51 | // linear predictor
52 | vector[N] eta;
53 | eta = alpha + X * beta;
54 | }
55 | model {
56 | alpha ~ normal(alpha_loc, alpha_scale);
57 | beta ~ normal(beta_loc, beta_scale);
58 | // y ~ bernoulli(inv_logit(eta));
59 | // this is faster and more numerically stable
60 | y ~ bernoulli_logit(eta);
61 | }
62 | generated quantities {
63 | // log-likelihood of each obs
64 | vector[N] log_lik;
65 | // probability
66 | vector[N] mu;
67 | // simulated outcomes
68 | int y_rep[N];
69 | // percent correctly predicted
70 | real PCP;
71 | real ePCP;
72 | for (i in 1:N) {
73 | mu[i] = inv_logit(eta[i]);
74 | log_lik[i] = bernoulli_logit_lpmf(y[i] | eta[i]);
75 | y_rep[i] = bernoulli_rng(mu[i]);
76 | }
77 | PCP = pct_correct_pred(y, mu);
78 | ePCP = expected_pct_correct_pred(y, mu);
79 | }
80 |
--------------------------------------------------------------------------------
/stan/mnl.stan:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/stan/mnl.stan
--------------------------------------------------------------------------------
/stan/multivarmissing.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N;
3 | // X
4 | int N_x_obs;
5 | int N_x_miss;
6 | int x_obs_idx[N_x_obs];
7 | vector[N_x_obs] x_obs;
8 | int x_miss_idx[N_x_miss];
9 | // Y
10 | int N_y_obs;
11 | int N_y_miss;
12 | int y_obs_idx[N_y_obs];
13 | vector[N_y_obs] y_obs;
14 | int y_miss_idx[N_y_miss];
15 | // Z
16 | int N_z_obs;
17 | int N_z_miss;
18 | int z_obs_idx[N_z_obs];
19 | vector[N_z_obs] z_obs;
20 | int z_miss_idx[N_z_miss];
21 | // priors
22 | real sigma_x_scale;
23 | real sigma_z_scale;
24 | real sigma_y_scale;
25 | real alpha_loc;
26 | real alpha_scale;
27 | vector[2] gamma_loc;
28 | vector[2] gamma_scale;
29 | vector[3] beta_loc;
30 | vector[3] beta_scale;
31 | }
32 | parameters {
33 | vector[2] gamma;
34 | vector[3] beta;
35 | real alpha;
36 | real sigma_x;
37 | real sigma_y;
38 | real sigma_z;
39 | // missing observations
40 | vector[N_x_miss] x_miss;
41 | vector[N_y_miss] y_miss;
42 | vector[N_z_miss] z_miss;
43 | }
44 | transformed parameters {
45 | vector[N] x;
46 | vector[N] y;
47 | vector[N] z;
48 | x[x_miss_idx] = x_miss;
49 | x[x_obs_idx] = x_obs;
50 | y[y_miss_idx] = y_miss;
51 | y[y_obs_idx] = y_obs;
52 | z[z_miss_idx] = z_miss;
53 | z[z_obs_idx] = z_obs;
54 | }
55 | model {
56 | x ~ normal(gamma[1] + gamma[2] * z, sigma_x);
57 | y ~ normal(beta[1] + beta[2] * x + beta[3] * z, sigma_y);
58 | z ~ normal(alpha, sigma_z);
59 | alpha ~ normal(alpha_loc, alpha_scale);
60 | gamma ~ normal(gamma_loc, gamma_scale);
61 | beta ~ normal(beta_loc, beta_scale);
62 | sigma_x ~ cauchy(0., sigma_x_scale);
63 | sigma_y ~ cauchy(0., sigma_y_scale);
64 | sigma_z ~ cauchy(0., sigma_z_scale);
65 | }
66 |
--------------------------------------------------------------------------------
/stan/multivarmissing2.stan:
--------------------------------------------------------------------------------
1 | data {
2 | // number of obs
3 | int N;
4 | // number of variables
5 | int K;
6 | // X
7 | int N_obs;
8 | vector[N_obs] X_obs;
9 | int X_obs_row[N_obs];
10 | int X_obs_col[N_obs];
11 | int N_miss;
12 | int X_miss_row[N_miss];
13 | int X_miss_col[N_miss];
14 | // priors
15 | vector[K] Sigma_scale_scale;
16 | real Sigma_corr_L_eta;
17 | vector[K] mu_loc;
18 | vector[K] mu_scale;
19 | }
20 | parameters {
21 | vector[K] mu;
22 | vector[K] Sigma_scale;
23 | cholesky_factor_corr[K] Sigma_corr_L;
24 | vector[N_miss] X_miss;
25 | }
26 | transformed parameters {
27 | vector[K] X[N];
28 | for (i in 1:N_obs) {
29 | X[X_obs_row[i], X_obs_col[i]] = X_obs[i];
30 | }
31 | for (i in 1:N_miss) {
32 | X[X_miss_row[i], X_miss_col[i]] = X_miss[i];
33 | }
34 | }
35 | model {
36 | Sigma_corr_L ~ lkj_corr_cholesky(Sigma_corr_L_eta);
37 | Sigma_scale ~ cauchy(0., Sigma_scale_scale);
38 | for (i in 1:N) {
39 | X[i] ~ multi_normal_cholesky(mu, Sigma_corr_L);
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/stan/negbin.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N;
3 | int y[N];
4 | int K;
5 | matrix[N, K] X;
6 | // priors
7 | real alpha_mean;
8 | real alpha_scale;
9 | vector[K] beta_mean;
10 | vector[K] beta_scale;
11 | real reciprocal_phi_scale;
12 | }
13 | parameters {
14 | real alpha;
15 | vector[K] beta;
16 | real reciprocal_phi;
17 | }
18 | transformed parameters {
19 | vector[N] eta;
20 | real phi;
21 | eta = alpha + X * beta;
22 | phi = 1. / reciprocal_phi;
23 | }
24 | model {
25 | reciprocal_phi ~ cauchy(0., reciprocal_phi_scale);
26 | alpha ~ normal(alpha_mean, alpha_scale);
27 | beta ~ normal(beta_mean, beta_scale);
28 | y ~ neg_binomial_2_log(eta, phi);
29 | }
30 | generated quantities {
31 | vector[N] mu;
32 | vector[N] log_lik;
33 | vector[N] y_rep;
34 | mu = exp(eta);
35 | for (i in 1:N) {
36 | log_lik[i] = neg_binomial_2_log_lpmf(y[i] | eta[i], phi);
37 | y_rep[i] = neg_binomial_2_rng(mu[i], phi);
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/stan/normal.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N;
3 | vector[N] y;
4 | real mu_mean;
5 | real mu_scale;
6 | real sigma_scale;
7 | }
8 | parameters {
9 | real mu;
10 | real sigma;
11 | }
12 | model {
13 | mu ~ normal(mu_mean, mu_scale);
14 | sigma ~ cauchy(0., sigma_scale);
15 | y ~ normal(mu, sigma);
16 | }
17 |
--------------------------------------------------------------------------------
/stan/orderedlogit.stan:
--------------------------------------------------------------------------------
1 | data {
2 | // number of observations
3 | int N;
4 | // number of response categories
5 | int K;
6 | // response
7 | int y[N];
8 | // regression design matrix
9 | int D;
10 | matrix[N, D] X;
11 | }
12 | parameters {
13 | // ordered logistic distribution cutpoints
14 | vector[K - 1] gamma;
15 | // intercept and coefficients in regression
16 | real alpha;
17 | vector[P] beta;
18 | }
19 | transformed parameters {
20 | // linear predictor in logit scale;
21 | vector[N] eta;
22 | eta = alpha + X * beta;
23 | }
24 | model {
25 | for (i in 1:N) {
26 | y[i] ~ ordered_logistic(eta[i], gamma);
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/stan/probit.stan:
--------------------------------------------------------------------------------
1 | data {
2 | // response
3 | int N;
4 | int y[N];
5 | // covariates
6 | int K;
7 | matrix[N, K] X;
8 | // priors
9 | real alpha_mean;
10 | real alpha_scale;
11 | vector[K] beta_mean;
12 | vector[K] beta_scale;
13 | }
14 | parameters {
15 | real alpha;
16 | vector[K] beta;
17 | }
18 | transformed parameters {
19 | // linear predictor
20 | vector[N] eta;
21 | vector[N] mu;
22 | eta = alpha + X * beta;
23 | // mu = Phi(eta);
24 | // Phi_approx is faster
25 | mu = Phi_approx(eta);
26 | }
27 | model {
28 | alpha ~ normal(alpha_mean, alpha_scale);
29 | beta ~ normal(beta_mean, beta_scale);
30 | y ~ bernoulli(mu);
31 | }
32 | generated quantities {
33 | // log-likelihood of each obs
34 | vector[N] log_lik;
35 | for (i in 1:N) {
36 | log_lik[i] = bernoulli_lpmf(y[i] | mu[i]);
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/stan/pw.stan:
--------------------------------------------------------------------------------
1 | data {
2 | // number of observations
3 | // need at least two to estimates
4 | int N;
5 | // response
6 | vector[N] y;
7 | // regression design matrix
8 | int K;
9 | matrix[N, K] X;
10 | // priors
11 | real alpha_loc;
12 | real alpha_scale;
13 | vector[K] beta_loc;
14 | vector[K] beta_scale;
15 | real sigma_scale;
16 | real theta_a;
17 | real theta_b;
18 | }
19 | parameters {
20 | // regression coefficients
21 | real alpha;
22 | vector[K] beta;
23 | // error scale
24 | real sigma;
25 | // lag coefficients
26 | real theta_raw;
27 | }
28 | transformed parameters {
29 | // observation means
30 | vector[N] mu;
31 | // lag coefficient;
32 | real theta;
33 | // convert range of theta from (0, 1) to (-1, 1)
34 | theta = (2. * theta_raw - 1.);
35 | // regression
36 | mu[1] = alpha + dot_product(beta, X[1, ]);
37 | mu[2:N] = alpha * (1 - theta) + (X[2:N, ] - theta * X[1:(N - 1), ]) * beta;
38 | }
39 | model {
40 | alpha ~ cauchy(alpha_loc, alpha_scale);
41 | beta ~ cauchy(beta_loc, beta_scale);
42 | theta_raw ~ beta(theta_a, theta_b);
43 | sigma ~ cauchy(0, sigma_scale);
44 | y[1] ~ normal(mu[1], sigma / sqrt(1 + theta ^ 2));
45 | y[2:N] ~ normal(mu[2:N], sigma);
46 | }
47 |
--------------------------------------------------------------------------------
/stan/regar1.stan:
--------------------------------------------------------------------------------
1 | data {
2 | // number of observations
3 | // need at least two to estimates
4 | int N;
5 | // response
6 | vector[N] y;
7 | // regression design matrix
8 | int K;
9 | matrix[N, K] X;
10 | // priors
11 | real alpha_loc;
12 | real alpha_scale;
13 | vector[K] beta_loc;
14 | vector[K] beta_scale;
15 | real sigma_scale;
16 | real theta_a;
17 | real theta_b;
18 | }
19 | parameters {
20 | // regression coefficients
21 | real alpha;
22 | vector[K] beta;
23 | // error scale
24 | real sigma;
25 | // lag coefficients
26 | real theta_raw;
27 | }
28 | transformed parameters {
29 | // observation means
30 | vector[N] mu;
31 | // error terms
32 | vector[N] epsilon;
33 | // lag coefficient;
34 | real theta;
35 | // convert range of theta from (0, 1) to (-1, 1)
36 | theta = (2. * theta_raw - 1.);
37 | // regression
38 | mu = alpha + X * beta;
39 | // construct errors
40 | epsilon[1] = y[1] - mu[1];
41 | for (i in 2:N) {
42 | epsilon[i] = y[i] - mu[i] - theta * epsilon[i - 1];
43 | }
44 | }
45 | model {
46 | alpha ~ cauchy(alpha_loc, alpha_scale);
47 | beta ~ cauchy(beta_loc, beta_scale);
48 | theta_raw ~ beta(theta_a, theta_b);
49 | sigma ~ cauchy(0, sigma_scale);
50 | for (i in 2:N) {
51 | y[i] ~ normal(mu[i] + theta * epsilon[i - 1], sigma);
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/stan/resistant.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N;
3 | vector[N] y;
4 | int K;
5 | matrix[N, K] X;
6 | int Y;
7 | int year[N];
8 | // priors
9 | real sigma_scale;
10 | vector[K] beta_loc;
11 | vector[K] beta_scale;
12 | real alpha_loc;
13 | real alpha_scale;
14 | }
15 | parameters {
16 | vector[Y] alpha;
17 | vector[K] beta;
18 | real nu;
19 | real sigma;
20 | real tau;
21 | }
22 | transformed parameters {
23 | vector[N] mu;
24 | for (i in 1:N) {
25 | mu[i] = alpha[year[i]] + X[i] * beta;
26 | }
27 | }
28 | model{
29 | // priors for error variance
30 | sigma ~ cauchy(0., sigma_scale);
31 | // priors for year intercepts
32 | alpha ~ normal(alpha_loc, alpha_scale);
33 | // priors for the regression coefficients
34 | beta ~ normal(beta_loc, beta_scale);
35 | // degrees of freedom
36 | nu ~ gamma(2, 0.1);
37 | // likelihood
38 | y ~ student_t(nu, mu, sigma);
39 | }
40 | generated quantities {
41 | real delta;
42 | delta = beta[3] + beta[4];
43 | }
44 |
--------------------------------------------------------------------------------
/stan/resistant2.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N;
3 | vector[N] y;
4 | int K;
5 | matrix[N, K] X;
6 | int Y;
7 | int year[N];
8 | // priors
9 | real sigma_scale;
10 | vector[K] beta_loc;
11 | vector[K] beta_scale;
12 | real alpha_loc;
13 | real alpha_scale;
14 | }
15 | parameters {
16 | vector[Y] alpha;
17 | vector[K] beta;
18 | real nu;
19 | real sigma_raw;
20 | real tau;
21 | }
22 | transformed parameters {
23 | vector[N] mu;
24 | real sigma;
25 | for (i in 1:N) {
26 | mu[i] = alpha[year[i]] + X[i] * beta;
27 | }
28 | // paramterization so sigma and
29 | sigma = sigma_raw * sqrt((nu - 2) / nu);
30 | }
31 | model{
32 | // priors for the standard deviation
33 | sigma_raw ~ cauchy(0., sigma_scale);
34 | // priors for year intercepts
35 | alpha ~ normal(alpha_loc, alpha_scale);
36 | // priors for the regression coefficients
37 | beta ~ normal(beta_loc, beta_scale);
38 | // degrees of freedom
39 | nu ~ gamma(2, 0.1);
40 | // likelihood
41 | y ~ student_t(nu, mu, sigma);
42 | }
43 | generated quantities {
44 | real delta;
45 | delta = beta[3] + beta[4];
46 | }
47 |
--------------------------------------------------------------------------------
/stan/sophistication.stan:
--------------------------------------------------------------------------------
1 | data {
2 | // number of respondents
3 | int N;
4 | // number of items
5 | int K;
6 | // binary responses
7 | int y_bern[K, N];
8 | // interviewer overall rating
9 | vector[N] y_norm;
10 | // interviewers
11 | int J;
12 | int interviewer[N];
13 | // priors
14 | real alpha_loc;
15 | real alpha_scale;
16 | real beta_loc;
17 | real beta_scale;
18 | real gamma_scale;
19 | real sigma_scale;
20 | real tau_scale;
21 | real delta_loc;
22 | real delta_scale;
23 | }
24 | parameters {
25 | // respondent latent score
26 | vector[N] xi_raw;
27 | // item discrimination
28 | vector[K] beta;
29 | // item difficulty
30 | vector[K] alpha;
31 | // coefficient in interviewer rating
32 | real gamma;
33 | // error in interviewer rating
34 | real sigma;
35 | // interviewer random effects
36 | vector[J] nu;
37 | // location of interviewer random effects
38 | real delta;
39 | // scale of interviewer random effects
40 | real tau;
41 | }
42 | transformed parameters {
43 | // interviewer rating
44 | vector[N] theta;
45 | // abilities
46 | vector[N] xi;
47 | xi = (xi_raw - mean(xi_raw));
48 | // respondent latent score
49 | for (i in 1:N) {
50 | theta[i] = gamma * xi[i] + nu[interviewer[i]];
51 | }
52 | }
53 | model {
54 | // priors
55 | xi_raw ~ normal(0., 1.);
56 | beta ~ normal(beta_loc, beta_scale);
57 | alpha ~ normal(alpha_loc, alpha_scale);
58 | gamma ~ normal(0., gamma_scale);
59 | sigma ~ cauchy(0., sigma_scale);
60 | tau ~ cauchy(0., tau_scale);
61 | delta ~ normal(delta_loc, delta_scale);
62 | // binary responses
63 | for (k in 1:K) {
64 | y_bern[k] ~ bernoulli_logit(beta[k] * xi - alpha[k]);
65 | }
66 | // interviewer random effects
67 | nu ~ normal(delta, tau);
68 | // interviewer score
69 | y_norm ~ normal(theta, sigma);
70 | }
71 |
--------------------------------------------------------------------------------
/stan/truncated.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N;
3 | vector[N] y;
4 | real U;
5 | real mu_mean;
6 | real mu_scale;
7 | real sigma_scale;
8 | }
9 | parameters {
10 | real mu;
11 | real sigma;
12 | }
13 | model {
14 | mu ~ normal(mu_mean, mu_scale);
15 | sigma ~ cauchy(0., sigma_scale);
16 | for (i in 1:N) {
17 | y[i] ~ normal(mu, sigma) T[, U];
18 | }
19 | }
20 |
--------------------------------------------------------------------------------
/stan/uk92.stan:
--------------------------------------------------------------------------------
1 | data {
2 | // multivariate outcome
3 | int N;
4 | int K;
5 | vector[K] y[N];
6 | // covariates
7 | int P;
8 | vector[P] X[N];
9 | // prior
10 | vector[K] alpha_loc;
11 | vector[K] alpha_scale;
12 | vector[P] beta_loc[K];
13 | vector[P] beta_scale[K];
14 | real Sigma_corr_shape;
15 | real Sigma_scale_scale;
16 | }
17 | parameters {
18 | // regression intercept
19 | vector[K] alpha;
20 | // regression coefficients
21 | vector[P] beta[K];
22 | // Cholesky factor of the correlation matrix
23 | cholesky_factor_corr[K] Sigma_corr_L;
24 | vector[K] Sigma_scale;
25 | // student-T degrees of freedom
26 | real nu;
27 | }
28 | transformed parameters {
29 | vector[K] mu[N];
30 | matrix[K, K] Sigma;
31 | // covariance matrix
32 | Sigma = crossprod(diag_pre_multiply(Sigma_scale, Sigma_corr_L));
33 | for (i in 1:N) {
34 | for (k in 1:K) {
35 | mu[i, k] = alpha[k] + dot_product(X[i], beta[k]);
36 | }
37 | }
38 | }
39 | model {
40 | for (k in 1:K) {
41 | alpha[k] ~ normal(alpha_loc[k], alpha_scale[k]);
42 | beta[k] ~ normal(beta_loc[k], beta_scale[k]);
43 | }
44 | nu ~ gamma(2, 0.1);
45 | Sigma_scale ~ cauchy(0., Sigma_scale_scale);
46 | Sigma_corr_L ~ lkj_corr_cholesky(Sigma_corr_shape);
47 | y ~ multi_student_t(nu, mu, Sigma);
48 | }
49 |
--------------------------------------------------------------------------------
/stan/undervote.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int n[4];
3 | int y[4];
4 | vector[4] pi_a;
5 | vector[4] pi_b;
6 | }
7 | parameters {
8 | vector[4] pi;
9 | }
10 | model {
11 | y ~ binomial(n, pi);
12 | pi ~ beta(pi_a, pi_b);
13 | }
14 | generated quantities {
15 | vector[2] delta;
16 | int good[2];
17 | delta[1] = pi[2] - pi[1];
18 | delta[2] = pi[4] - pi[3];
19 | good[1] = int_step(delta[1]);
20 | good[2] = int_step(delta[2]);
21 | }
22 |
--------------------------------------------------------------------------------
/stan/unidentified.stan:
--------------------------------------------------------------------------------
1 | data {
2 | real y;
3 | vector[2] theta_mean;
4 | vector[2] theta_scale;
5 | }
6 | parameters {
7 | vector[2] theta;
8 | }
9 | transformed parameters {
10 | real mu;
11 | mu = sum(theta);
12 | }
13 | model {
14 | y ~ normal(mu, 1.);
15 | theta ~ normal(theta_mean, theta_scale);
16 | }
17 |
--------------------------------------------------------------------------------
/truncated.Rmd:
--------------------------------------------------------------------------------
1 | # Truncation: How does Stan deal with truncation?
2 |
3 | ```{r truncated_setup,message=FALSE}
4 | library("tidyverse")
5 | library("rstan")
6 | ```
7 |
8 | Suppose we observed $y = (1, \dots, 9)$,[^truncated-source]
9 | ```{r}
10 | y <- 1:9
11 | ```
12 | These observations are drawn from a population distributed normal with unknown mean ($\mu$) and variance ($\sigma^2$), with the constraint that $y < 10$,
13 | $$
14 | \begin{aligned}[t]
15 | y_i &\sim \mathsf{Normal}(\mu, \sigma^2) I(-\infty, 10) .
16 | \end{aligned}
17 | $$
18 |
19 | With the censoring taken into account, the log likelihood is
20 | $$
21 | \log L(y; \mu, \sigma) = \sum_{i = 1}^n \left( \log \phi(y_i; \mu, \sigma^2) - \log\Phi(y_i; \mu, \sigma^2) \right)
22 | $$
23 | where $\phi$ is the normal distribution PDF, and $\Phi$ is the normal distribution $
24 |
25 | The posterior of this model is not well identified by the data, so the mean, $\mu$, and scale, $\sigma$, are given informative priors based on the data,
26 | $$
27 | \begin{aligned}[t]
28 | \mu &\sim \mathsf{Normal}(\bar{y}, s_y) ,\\
29 | \sigma &\sim \mathsf{HalfCauchy}(0, s_y) .
30 | \end{aligned}
31 | $$
32 | where $\bar{y}$ is the mean of $y$, and $s_y$ is the standard deviation of $y$. Alternatively, we could have standardized the data prior to estimation.
33 |
34 | ## Stan Model
35 |
36 | See @Stan2016a, Chapter 11 "Truncated or Censored Data" for more on how Stan handles truncation and censoring.
37 | In Stan the `T` operator used in sampling statement,
38 | ```
39 | y ~ distribution(...) T[upper, lower];
40 | ```
41 | is used to adjust the log-posterior contribution for truncation.
42 |
43 | ```{r truncate_mod,results='hide'}
44 | truncate_mod <- stan_model("stan/truncated.stan")
45 | ```
46 | ```{r echo=FALSE,results='asis',cache=FALSE}
47 | truncate_mod
48 | ```
49 |
50 | ## Estimation
51 |
52 | ```{r truncate_data}
53 | truncate_data <- within(list(), {
54 | y <- y
55 | N <- length(y)
56 | U <- 10
57 | mu_mean <- mean(y)
58 | mu_scale <- sd(y)
59 | sigma_scale <- sd(y)
60 | })
61 | ```
62 |
63 | ```{r truncate_fit1,results='hide',message=FALSE}
64 | truncate_fit1 <- sampling(truncate_mod, data = truncate_data)
65 | ```
66 | ```{r}
67 | truncate_fit1
68 | ```
69 |
70 | We can compare these results to that of a model in which the truncation is not taken into account:
71 | $$
72 | \begin{aligned}[t]
73 | y_i &\sim \mathsf{Normal}(\mu, \sigma^2), \\
74 | \mu &\sim \mathsf{Normal}(\bar{y}, s_y) ,\\
75 | \sigma &\sim \mathsf{HalfCauchy}(0, s_y) .
76 | \end{aligned}
77 | $$
78 |
79 | ```{r truncate_mod2,results='hide'}
80 | truncate_mod2 <- stan_model("stan/normal.stan")
81 | ```
82 | ```{r echo=FALSE,results='asis',cache=FALSE}
83 | truncate_mod2
84 | ```
85 |
86 | ```{r truncate_fit2,results='hide'}
87 | truncate_fit2 <-
88 | sampling(truncate_mod2, data = truncate_data)
89 | ```
90 | ```{r}
91 | truncate_fit2
92 | ```
93 |
94 | We can compare the densities for $\mu$ and $\sigma$ in each of these models.
95 | ```{r truncted_plot_density}
96 | plot_density <- function(par) {
97 | bind_rows(
98 | tibble(value = rstan::extract(truncate_fit1, par = par)[[1]],
99 | model = "truncated"),
100 | tibble(value = rstan::extract(truncate_fit2, par = par)[[1]],
101 | model = "non-truncated")
102 | ) %>%
103 | ggplot(aes(x = value, colour = model, fill = model)) +
104 | geom_density(alpha = 0.3) +
105 | labs(x = eval(bquote(expression(.(as.name(par)))))) +
106 | theme(legend.position = "bottom")
107 | }
108 | ```
109 | ```{r truncate_plot_density_mu,fig.cap="Posterior density of $\\mu$ when estimated with and without truncation"}
110 | plot_density("mu")
111 | ```
112 | ```{r truncate_plot_density_sigma,fig.cap="Posterior density of $\\sigma$ when estimated with and without truncation"}
113 | plot_density("sigma")
114 | ```
115 |
116 | ## Questions
117 |
118 | 1. How are the densities of $\mu$ and $\sigma$ different under the two models? Why are they different?
119 | 1. Re-estimate the model with improper uniform priors for $\mu$ and $\sigma$. How do the posterior distributions change?
120 | 1. Suppose that the truncation points are unknown. Write a Stan model and estimate. See @Stan2016a, Section 11.2 "Unknown Truncation Points" for how to write such a model. How important is the prior you place on the truncation points?
121 |
122 | [^truncated-source]: This example is derived from Simon Jackman. "Truncation: How does WinBUGS deal with truncation?" *BUGS Examples*, 2007-07-24,
123 | [URL](https://web-beta.archive.org/web/20070724034035/http://jackman.stanford.edu:80/mcmc/SingleTruncation.odc).
124 |
--------------------------------------------------------------------------------
/turnout.Rmd:
--------------------------------------------------------------------------------
1 | # Turnout: logit/probit models for binary data {#turnout}
2 |
3 | ```{r turnout_setup,message=FALSE,cache=FALSE}
4 | library("tidyverse")
5 | library("rstanarm")
6 | library("rstan")
7 | ```
8 |
9 | ## Data
10 |
11 | The data comprise the first 2,000 (of 15,000+) observations in the 1992 [American National Election Studies](http://www.electionstudies.org/) (ANES).
12 | These data are included in the **Zelig** package as `turnout` and analyzed in @KingTomzWittenberg2000a.
13 | ```{r turnout)}
14 | data("turnout", package = "Zelig")
15 | glimpse(turnout)
16 | ```
17 | We will model voting turnout as a function of covariates (age, education, income, race).
18 | ```{r turnout_formula}
19 | turnout_formula <- vote ~ poly(age, 2) + educate + income + race
20 | ```
21 |
22 | ## Logit Model
23 |
24 | Let $y_i \in \{0, 1\}$ be the decision to vote by respondent $i$ for $i \in 1, \dots, n$,
25 | $$
26 | \begin{aligned}[t]
27 | y_i &\sim \mathsf{Bernoulli}(\pi_i) , \\
28 | \pi_i &= \frac{1}{1 + e^{-\eta_i}} , \\
29 | \eta_i &= \alpha + x_i \beta,
30 | \end{aligned}
31 | $$
32 | where $x_i$ is a vector of covariates.
33 | The regression parameters, $\alpha$ and $\beta$, are given weakly informative priors on the logit scale,
34 | $$
35 | \begin{aligned}[t]
36 | \alpha &\sim \mathsf{Normal}(0, 16) , \\
37 | \beta_k &\sim \mathsf{Normal}(0, 4) .
38 | \end{aligned}
39 | $$
40 |
41 | The logit model in Stan is
42 | ```{r turnout_mod_logit,results='hide',cache.extra=tools::md5sum("stan/logit.stan")}
43 | turnout_mod_logit <- stan_model("stan/logit.stan")
44 | ```
45 | ```{r results='asis',echo=FALSE}
46 | turnout_mod_logit
47 | ```
48 |
49 | ```{r turnout_data}
50 | turnout_data <- within(list(), {
51 | N <- nrow(turnout)
52 | X <- scale(model.matrix(update(turnout_formula, . ~ 0 + .),
53 | data = turnout))
54 | K <- ncol(X)
55 | y <- turnout$vote
56 | alpha_loc <- 0
57 | alpha_scale <- 10
58 | beta_loc <- rep(0, K)
59 | beta_scale <- rep(2.5, K)
60 | })
61 | ```
62 |
63 | ```{r turnout_fit_logit,results='hide'}
64 | turnout_fit_logit <- sampling(turnout_mod_logit, data = turnout_data)
65 | ```
66 | ```{r}
67 | turnout_fit_logit
68 | ```
69 |
70 | ## Probit Model
71 |
72 | The only difference between the logit and probit models are in the link function.
73 | The probit model uses the normal distribution CDF function instead of the inverse logit.
74 | $$
75 | \begin{aligned}[t]
76 | \pi_i &= \Phi(\eta_i) ,
77 | \end{aligned}
78 | $$
79 | where $\Phi$ is the standard normal distribution CDF.
80 | The priors for the probit model are adjusted by a facto of 1.6, corresponding to `dnorm(0) / dlogis(0)`,
81 | $$
82 | \begin{aligned}[t]
83 | \alpha &\sim \mathsf{Normal}(0, 16), \\
84 | \beta_k &\sim \mathsf{Normal}(0, 4) .
85 | \end{aligned}
86 | $$
87 | ```{r}
88 | turnout_data <- within(turnout_data, {
89 | alpha_scale <- 16
90 | beta_scale <- rep(4, K)
91 | })
92 | ```
93 |
94 | The probit model in Stan is
95 | ```{r turnout_mod_probit,results='hide',cache.extra=tools::md5sum("stan/probit.stan")}
96 | turnout_mod_probit <- stan_model("stan/probit.stan")
97 | ```
98 | ```{r results='asis',echo=FALSE}
99 | turnout_mod_probit
100 | ```
101 |
102 | Fit the model.
103 | ```{r turnout_fit_probit,results='hide'}
104 | turnout_fit_probit <-
105 | sampling(turnout_mod_probit,
106 | data = turnout_data)
107 | ```
108 | ```{r}
109 | turnout_fit_probit
110 | ```
111 |
112 | ## rstanarm
113 |
114 | These models can also be estimated with the **rstanarm** function `stan_glm`.
115 | ```{r turnout_fit_logit2,results='hide'}
116 | turnout_fit_logit2 <-
117 | stan_glm(turnout_formula,
118 | family = binomial(),
119 | data = turnout)
120 | ```
121 | ```{r}
122 | turnout_fit_logit2
123 | ```
124 |
125 | For the probit model, the priors need to be reduced.
126 | ```{r turnout_fit_probit2,results='hide'}
127 | turnout_fit_probit2 <-
128 | stan_glm(turnout_formula,
129 | family = binomial(link = "probit"),
130 | prior_intercept = normal(location = 0, scale = 8, autoscale = TRUE),
131 | prior = normal(location = 0, scale = 1.5, autoscale = TRUE),
132 | init = 0, chains = 1,
133 | data = turnout)
134 | ```
135 | ```{r}
136 | turnout_fit_probit2
137 | ```
138 |
139 | ## Questions {-}
140 |
141 | 1. Estimate the posterior distribution of the average marginal effect for each covariate using the methods in @HanmerKalkan2012a.
142 | 1. Estimate the expected percent correctly predicted (ePCP) and percent correctly predicted (PCP) as discussed in the [cosponsorship data](#cosponsor).
143 | 1. Estimate a generalized logit model to allow for asymmetric effects of covariates as described in the [General Beetles](#genbeetles) example and @Nagler1994a. Describe the substantive meaning of this generalization.
144 | 1. Is there any difference in the fit of the probit and logit? Which observations fit better or worse.
145 | 1. Estimate the model with a Student's $t$ CDF instead of logit or probit. This is called a robit model @Liu2005a. Compare its fit and estimates to those of the logit. Which observations fit better or worse? Describe the substantive meaning of this generalization.
146 |
--------------------------------------------------------------------------------
/uk92.Rmd:
--------------------------------------------------------------------------------
1 | # House of Commons elections: modeling with the multivariate Student-$t$ density {#uk92}
2 |
3 | ```{r uk92_setup,message=FALSE,cache=FALSE}
4 | library("tidyverse")
5 | library("rstan")
6 | ```
7 |
8 | The data for this example consist of constituency vote proportions from the 1992 United Kingdom House of Commons election.
9 | These data come from @KatzKing1999a, were re-analyzed @TomzTuckerWittenberg2002a.[^uk92-source]
10 | This data is included in the **pscl** package as `UKHouseOfCommons`:
11 | ```{r UKHoseOfCommons}
12 | (data("UKHouseOfCommons", package = "pscl"))
13 | glimpse(UKHouseOfCommons)
14 | ```
15 |
16 | The data consist of the vote proportions for 522 constituencies, for the three major UK parties: the Labor party, the Conservative Party, and the Liberal-Alliance.
17 | Instead of working with the vote proportions directly, we will work with log-odds ratios.
18 | This is common in the analysis of multinomial or "compositional" data [@Aitchison1982a].
19 | The column `y1` is the log-odds of Conservative to the Liberal-Democratic vote share, while `y2` is the log-odds of Labor to the Liberal-Democratic vote share.
20 |
21 | Let $y_{i,k}$, $k \in \{1, 2\}$, $i \in 1, \dots, N$ be the log-odds ratio vote share in constituency $i$.
22 | @KatzKing1999a noted that the distribution of the log-odds ratios appear to be heavy-tailed relative to the normal.
23 | Thus, like them, we will model the data with a multivariate Student's $t$ distribution with unknown degrees of freedom ($\nu$),
24 | $$
25 | \begin{aligned}[t]
26 | y_i &\sim \mathsf{StudentT}(\nu, \alpha + x' \beta, \Sigma) & i \in 1, \dots, N,
27 | \end{aligned}
28 | $$
29 |
30 | For identification, as in a logit regression, either the intercept or scale must be fixed. In this case, $\Sigma$ is a correlation matrix.
31 |
32 | Weakly informative priors are used for the regression parameters.
33 | The degrees of freedom of the multivariate Student t distribution is a parameter, and given a weakly informative Gamma distribution that puts most of the prior density between 3 and 40 [@JuarezSteel2010a],
34 | $$
35 | \begin{aligned}[t]
36 | \alpha &\sim \mathsf{Normal}(0, 10) , \\
37 | \beta_p &\sim \mathsf{Normal}(0, 2.5), & p \in 1, \dots, P , \\
38 | \Sigma &\sim \mathsf{LkjCorr}(\eta) , \\
39 | \nu &\sim \mathsf{Gamma}(2, 0.1) .
40 | \end{aligned}
41 | $$
42 |
43 | ```{r UKHouseOfCommons}
44 | (data("UKHouseOfCommons", package = "pscl"))
45 | glimpse(UKHouseOfCommons)
46 | ```
47 |
48 | ```{r uk92_data}
49 | uk92_data <- within(list(), {
50 | y <- as.matrix(dplyr::select(UKHouseOfCommons, y1, y2))
51 | X <- model.matrix(~ 0 + y1lag + y2lag + coninc + labinc + libinc, data = UKHouseOfCommons) %>% scale()
52 | N <- nrow(y)
53 | K <- ncol(y)
54 | P <- ncol(X)
55 | alpha_loc <- rep(0, K)
56 | alpha_scale <- rep(10, K)
57 | beta_loc <- matrix(0, K, P)
58 | beta_scale <- matrix(2.5, K, P)
59 | Sigma_corr_shape <- 2
60 | Sigma_scale_scale <- 5
61 | })
62 | ```
63 |
64 | ```{r results='hide',cache.extra=tools::md5sum("stan/uk92.stan")}
65 | uk92_mod <- stan_model("stan/uk92.stan")
66 | ```
67 | ```{r results='asis',echo=FALSE}
68 | uk92_mod
69 | ```
70 |
71 | Fit the model in Stan.
72 | ```{r uk92_fit,results='hide'}
73 | uk92_fit <- sampling(uk92_mod, data = uk92_data, chains = 1)
74 | ```
75 | ```{r}
76 | summary(uk92_fit, par = c("nu", "alpha", "beta", "Sigma"))$summary
77 | ```
78 |
79 | ## Questions
80 |
81 | - Given this model, replicate some of the results in @KatzKing1999a.
82 | - Model the data using a multivariate normal model instead. How do the results differ? Which fits the data better? What does the value of $\nu$ from the multivariate Student t model tell you about the plausibility of the multivariate normal distribution?
83 | - @TomzTuckerWittenberg2002a suggest using seemingly unrelated regressions (SUR). Model the data with SUR. How does it compare in results and speed?
84 | - Could you model this using a multinomial model with the data provided? What data would you need?
85 |
86 | [^uk92-source]: Example derived from Simon Jackman, "House of Commons elections: modeling with the multivariate t density." *BUGS Examples* [URL](https://web-beta.archive.org/web/20070724034125/http://jackman.stanford.edu/mcmc/92.odc). Some language copied from the original.
87 |
--------------------------------------------------------------------------------
/undervote.Rmd:
--------------------------------------------------------------------------------
1 | # Undervoting for President, by Race: Difference in Two Binomial Proportions {#undervote}
2 |
3 | ```{r undervote_setup,message=FALSE,cache=FALSE}
4 | library("tidyverse")
5 | library("rstan")
6 | ```
7 |
8 | Does undervoting for the US president differ by race?
9 | Intentional undervoting is when a voter chooses not to cast vote for
10 | an item on a ballot.
11 |
12 | @TomzHouweling2003a analyze this phenomenon using two surveys:
13 |
14 | - Voter News Service (VNS) exit poll for the 1992 election
15 | - American National Election Studies (ANES) for the 1964--2000 elections
16 |
17 | Each of these surveys asked voters whether they voted for president, as well as the race of the respondents.
18 | The results of these surveys is contained in the `undervote` data frame.
19 | The column `undervote` is the number of respondents who reported voting but not voting for president.
20 |
21 | ```{r undervote}
22 | undervote <- tribble(
23 | ~survey, ~race, ~n, ~undervote,
24 | "VNS", "black", 6537, 26,
25 | "VNS", "white", 44531, 91,
26 | "ANES", "black", 1101, 10,
27 | "ANES", "white", 9827, 57
28 | )
29 | ```
30 |
31 | ```{r echo=FALSE,results='asis'}
32 | undervote %>%
33 | mutate(`Survey` = survey,
34 | `Race` = race,
35 | `No. Voted` = n,
36 | `Didn't vote for president` = undervote) %>%
37 | knitr::kable()
38 | ```
39 |
40 | We are interested in analyzing the difference in proportions for each of these surveys independently.
41 | We will model the proportions of each race and survey,
42 | $$
43 | \begin{aligned}[t]
44 | y_i &\sim \mathsf{Binomial}(n_i, \pi_i) ,
45 | \end{aligned}
46 | $$
47 | where
48 | $$
49 | i \in \{ (\text{VNS},\text{black}), (\text{VNS},\text{white}), (\text{ANES},\text{black}), (\text{ANES},\text{white}) \} .
50 | $$
51 |
52 | We will model the proportions independently by assigning them identical independent uninformative priors,
53 |
54 | $$
55 | \begin{aligned}[t]
56 | \pi_i &\sim \mathsf{Beta}(1, 1) .
57 | \end{aligned}
58 | $$
59 | The racial differences in undervoting in each survey are auxiliary quantities,
60 | $$
61 | \begin{aligned}[t]
62 | \delta_{\text{VNS}} &= \pi_{\text{VNS},\text{black}} - \pi_{\text{VNS},\text{white}} ,\\
63 | \delta_{\text{ANES}} &= \pi_{\text{ANES},\text{black}} - \pi_{\text{ANES},\text{white}} . \\
64 | \end{aligned}
65 | $$
66 | We are also interested in the probability that black undervoting is greater than white undervoting, $\Pr(\delta_j) > 0$, in each survey.
67 |
68 | ```{r undervote_mod,results='hide',cache.extra=tools::md5sum("stan/undervote.stan")}
69 | undervote_mod <- stan_model("stan/undervote.stan")
70 | ```
71 | ```{r results='asis',echo=FALSE}
72 | undervote_mod
73 | ```
74 |
75 | ```{r undervote_data}
76 | # this analysis depends on the order of the data frame
77 | undervote_data <-
78 | list(y = undervote$undervote,
79 | n = undervote$n,
80 | N = nrow(undervote),
81 | pi_a = rep(1, 4),
82 | pi_b = rep(1, 4))
83 | ```
84 |
85 | ```{r undervote_fit,results='hide'}
86 | undervote_fit <- sampling(undervote_mod, data = undervote_data)
87 | ```
88 | ```{r}
89 | undervote_fit
90 | ```
91 |
92 | ## References {-}
93 |
94 | Simon Jackman, "[Undervoting for President, by Race: difference in two binomial proportions](https://web-beta.archive.org/web/20070724034102/http://jackman.stanford.edu:80/mcmc/undervote.odc)", *BUGS Examples* 2007-07-24.
95 |
--------------------------------------------------------------------------------
/unidentified.Rmd:
--------------------------------------------------------------------------------
1 | # Unidentified: Over-Parameterization of a Normal Mean {#unidentified}
2 |
3 | The following example illustrates the need for caution in diagnosing convergence, and is based on an example appearing in @CarlinLouis2000a [p174].
4 |
5 | Consider a model of the mean, in which it it the additive sum of two parameters,
6 | $$
7 | \begin{aligned}[t]
8 | y &\sim \mathsf{Normal}(\mu, 1) \\
9 | \mu &= \theta_1 + \theta_2
10 | \end{aligned}
11 | $$
12 | The data have no information about about either $\theta_1$ and $\theta_2$, but the data are informative about $\mu = \theta_1 + \theta_2$.
13 | The likelihood function for the two unidentified parameters ($\theta_1$, $\theta_2$) has a ridge along the line,
14 | $$
15 | \left\{ q_1, q_2 : \bar{y} = q_1 + q_2 \right\} ,
16 | $$
17 | where $\bar{y}$ is the mean of the observed data.
18 |
19 | Bayesian models require the specification of priors for model parameters. Proper priors will ensure unimodal posteriors for $q_1$ and $q_2$, and
20 | can be used to sample from the posterior for this problem. @CarlinLouis2000a show (see their Q25, p191) the dangers of models of this
21 | type. The posteriors for $\theta$ are not identical to the prior (the posterior
22 | standard deviations are 7.05, while the prior standard deviations used below
23 | are 10), suggesting that the data are somewhat informative about both $\theta$
24 | parameters, when this is not the case. An inexperienced user of Markov chain
25 | Monte Carlo methods might fail to recognize that the $q$ parameters are not
26 | identified, and naively report the posterior summaries for theta generated by
27 | the software. On the other hand, note that the identified parameter $m = q_1 +
28 | q_2$ is well behaved
29 |
30 | ```{r}
31 | library("rstan")
32 | mod_unidentified <- stan_model("stan/unidentified.stan")
33 | ```
34 |
35 | Use very large scales for this; though the behavior is still present with weakly informative scales.
36 | ```{r}
37 | data_unidentified <- list(
38 | y = 0,
39 | theta_mean = rep(0, 2),
40 | theta_scale = rep(100, 2)
41 | )
42 | ```
43 | ```{r results='hide',message=FALSE}
44 | fit_unidentified <- sampling(mod_unidentified, data = data_unidentified,
45 | refresh = -1)
46 | ```
47 | ```{r}
48 | fit_unidentified
49 | ```
50 |
51 | This example is derived from Simon Jackman, "Unidentified: over-parameterization of normal mean", 2007-07-24, [URL](https://web-beta.archive.org/web/20070724034211/http://jackman.stanford.edu:80/mcmc/unidentified.odc).
52 |
--------------------------------------------------------------------------------
/winbugs/92.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/92.odc
--------------------------------------------------------------------------------
/winbugs/AusJPSReplication/TwoPartyPreferred.r:
--------------------------------------------------------------------------------
1 | #################################################################
2 | ## assume data has been read in, see read.r
3 | ##
4 | ## dumps data and runs JAGS for two-party preferred daily track
5 | ## generates picture summarizing Gibbs sampler output
6 | ##
7 | ## simon jackman, dept of political science
8 | ## stanford university, october 2005
9 | #################################################################
10 |
11 | if(exists("foo"))
12 | rm(foo)
13 | foo <- list()
14 |
15 | foo$y <- data$coalition2PP/100
16 | var <- foo$y*(1-foo$y)/data$sampleSize
17 | foo$prec <- 1/var
18 | foo$date <- data$date - min(data$date) + 1
19 | foo$org <- data$org
20 | foo$NPOLLS <- length(data$y)
21 | foo$NPERIODS <- length(min(data$date):282)
22 | foo$alpha <- c(rep(NA,length((min(data$date)):282)-1),
23 | .5274) ## actual 2PP on last day
24 |
25 | ## write content of object foo back to top level directory
26 | for(i in 1:length(foo))
27 | assign(x=names(foo)[i],
28 | value=foo[[i]])
29 | dump(list=names(foo)) ## dump
30 | rm(list=names(foo)) ## now clean-up
31 |
32 | ## run jags job in batch mode
33 | system("jags jags.cmd")
34 |
35 | ## read JAGS output back into R
36 | library(coda)
37 | alpha <- read.jags()
38 | house <- alpha[,116:120]
39 | sigma <- alpha[,115]
40 | alpha <- alpha[,1:114]
41 |
42 |
43 | z <- alpha[,c(113,115:120)]
44 | z <- unclass(z)
45 | pdf(file="traceplots.pdf",
46 | width=8,
47 | height=6)
48 | par(bg="black",fg="white")
49 | plot.ts(z,xlab="Iterations")
50 |
51 | dev.off()
52 |
--------------------------------------------------------------------------------
/winbugs/AusJPSReplication/appendix.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/AusJPSReplication/appendix.pdf
--------------------------------------------------------------------------------
/winbugs/AusJPSReplication/data.csv:
--------------------------------------------------------------------------------
1 | org,start,end,mode,alpPrimary,alp2PP,coalition,coalition2PP,sampleSize
AC Nielsen,18-Jun,19-Jun,telephone,42,52,43,48,1419
AC Nielsen,16-Jul,18-Jul,telephone,40,52,44,48,1414
AC Nielsen,13-Aug,15-Aug,telephone,39,53,42,47,1414
AC Nielsen,3-Sep,5-Sep,telephone,40,50,46,50,1414
AC Nielsen,14-Sep,16-Sep,telephone,40,49,48,51,1414
AC Nielsen,21-Sep,23-Sep,telephone,36,46,50,54,1417
AC Nielsen,1-Oct,3-Oct,telephone,39,48,48,52,1397
AC Nielsen,5-Oct,7-Oct,telephone,37,46,49,54,2029
Galaxy,24-Jul,26-Jul,telephone,39,49,45,51,996
Galaxy,6-Aug,8-Aug,telephone,36,46,47,54,996
Galaxy,20-Aug,22-Aug,telephone,39,50,43,50,996
Galaxy,3-Sep,5-Sep,telephone,39,48,47,52,1000
Galaxy,17-Sep,19-Sep,telephone,41,49,46,51,1010
Galaxy,1-Oct,3-Oct,telephone,39,48,45,52,1000
Galaxy,5-Oct,6-Oct,telephone,39,48,46,52,1200
Newspoll,18-Jun,20-Jun,telephone,43,52,43,48,1155
Newspoll,2-Jul,4-Jul,telephone,41,51,43,49,1147
Newspoll,16-Jul,18-Jul,telephone,40,51,43,49,1138
Newspoll,30-Jul,1-Aug,telephone,40,50,45,50,1140
Newspoll,13-Aug,15-Aug,telephone,42,54,39,46,1145
Newspoll,27-Aug,29-Aug,telephone,40,52,43,48,1145
Newspoll,3-Sep,5-Sep,telephone,40,50,45,50,1734
Newspoll,10-Sep,12-Sep,telephone,40,50,46,50,1707
Newspoll,17-Sep,19-Sep,telephone,41,52.5,43,47.5,1674
Newspoll,24-Sep,26-Sep,telephone,40,52,43,48,1701
Newspoll,1-Oct,3-Oct,telephone,39,49.5,46,50.5,1680
Newspoll,6-Oct,7-Oct,telephone,39,50,45,50,2500
Roy Morgan,19-Jun,27-Jun,face-to-face,45.5,54,41,46,1264
Roy Morgan,3-Jul,4-Jul,face-to-face,43.5,51.5,42.5,48.5,1095
Roy Morgan,10-Jul,11-Jul,face-to-face,46,54,41.5,46,1055
Roy Morgan,17-Jul,25-Jul,face-to-face,42,53,41.5,47,2029
Roy Morgan,31-Jul,8-Aug,face-to-face,43,53.5,40,46.5,1909
Roy Morgan,14-Aug,22-Aug,face-to-face,43.5,55.5,39,44.5,1926
Roy Morgan,28-Aug,5-Sep,face-to-face,43,56,38.5,44,1866
Roy Morgan,11-Sep,12-Sep,face-to-face,43.5,54.5,40,45.5,933
Roy Morgan,18-Sep,19-Sep,face-to-face,41,53,42.5,47,1046
Roy Morgan,25-Sep,26-Sep,face-to-face,40,50,44,50,1323
Roy Morgan,2-Oct,3-Oct,face-to-face,40.5,51.5,41.5,48.5,1019
Roy Morgan,7-Oct,8-Oct,telephone,38.5,51,45.5,49,1311
ANU,12-Sep,12-Sep,internet,44,54,37,46,273
ANU,13-Sep,13-Sep,internet,43,54,42,47,621
ANU,14-Sep,14-Sep,internet,43,52,41,48,834
ANU,15-Sep,15-Sep,internet,39,49,46,51,777
ANU,16-Sep,16-Sep,internet,41,51,42.5,49,822
ANU,17-Sep,17-Sep,internet,39,50,43,50,691
ANU,18-Sep,18-Sep,internet,37,47.5,42,52.5,397
ANU,19-Sep,19-Sep,internet,37,49,43,51,367
ANU,20-Sep,20-Sep,internet,41,53,41,47,664
ANU,21-Sep,21-Sep,internet,45,56,40,44,719
ANU,22-Sep,22-Sep,internet,45,55,39,45,756
ANU,23-Sep,23-Sep,internet,41,51,42,49,747
ANU,24-Sep,24-Sep,internet,45,54,41,46,604
ANU,25-Sep,25-Sep,internet,40,52,40,48,419
ANU,26-Sep,26-Sep,internet,42,52,37,48,340
ANU,27-Sep,27-Sep,internet,41,50,41,50,710
ANU,28-Sep,28-Sep,internet,41,53,41.5,47,765
ANU,29-Sep,29-Sep,internet,45,58,38.5,42,805
ANU,30-Sep,30-Sep,internet,44,54,40.5,46,871
ANU,1-Oct,1-Oct,internet,43,54,40,46,839
ANU,2-Oct,2-Oct,internet,42,53,40,47,582
ANU,3-Oct,3-Oct,internet,43,55,38,45,569
ANU,4-Oct,4-Oct,internet,44,54.5,38.5,45.5,791
ANU,5-Oct,5-Oct,internet,44,54,39.5,46,912
ANU,6-Oct,6-Oct,internet,43,54,39,46,867
ANU,7-Oct,7-Oct,internet,43,54,40,46,883
ANU,8-Oct,8-Oct,internet,43,54,40,46,878
--------------------------------------------------------------------------------
/winbugs/AusJPSReplication/figure6.r:
--------------------------------------------------------------------------------
1 | ##########################################################################
## this file to get fig6 in a form suitable for AusJPS publication
##
## assumes firstPrefs.r has been run
##
## simon jackman, sept 2005
###########################################################################
postscript(file="figure6.ps",
horizontal=F,
#paper="a4",
#height=8.5,width=11,
width = 13/2.54,
height=8.5/11 * 13/2.54,
family=dinfamily)
par(oma=rep(0,4),
mar=c(1.75,2.5,.75,2.25),
mgp=c(1.5,.5,0),
tcl=-.25,
cex.axis=.65,
cex.main=.65,
cex.lab=.65,
las=1)
xlocs <- jitter(data$date)
plot(dseq,
alphaFirst.bar,
type="n",
axes=F,
xlab="",
ylab=toupper("Coalition 1st preferences (%)"),
xlim=c(min(data$date)-1,
electionDay),
ylim=range(data$coalition/100),
yaxs="i",
xaxs="i")
axis(1,at=datetcks,labels=datelabs)
axis(2,at=seq(from=.38,to=.50,by=.02),
labels=as.character(100*seq(.38,.50,by=.02)))
axis(4,at=c(seq(from=.38,to=.50,by=.02),.467),
labels=c(as.character(100*seq(.38,.50,by=.02)),"46.7"))
lastDay <- match(max(data$date),dseq)
ok <- 1:lastDay
polygon(x=c(dseq[ok],rev(dseq[ok])),
y=c(alphaFirst.ci[1,ok],rev(alphaFirst.ci[2,ok])),
border=F,
col=gray(.75))
lines(dseq,alphaFirst.bar,lwd=3)
## overlay events
par(xpd=T)
for(i in 1:length(eventTicks)){
lines(x=rep(eventTicks[i],2),
y=par()$usr[3:4],
col=gray(.45))
text(x=eventTicks[i],
y=par()$usr[4] + .002,
cex=.65,
i)
}
## now overlay poll data
for(i in 1:dim(data)[1]){
points(xlocs[i],data$coalition[i]/100,pch=16,cex=1,col="black")
#lines(x=rep(xlocs[i],2),
# y=c(data$lo[i],data$up[i]))
}
for(i in 1:dim(data)[1]){
text(xlocs[i],data$coalition[i]/100,
unclass(data$org)[i],
cex=.5,
col="white")
}
## legend info
y0 <- .3975
text(x=par()$usr[1]*.98 + par()$usr[2]*.02,
y=y0,
cex=.65,
"POLLS:",
adj=0)
for(i in 1:5){
y0 <- y0 - .005
points(par()$usr[1]*.97 + par()$usr[2]*.03,
y=y0,
pch=16,col="black",cex=1)
text(par()$usr[1]*.97 + par()$usr[2]*.03,
y=y0,i,cex=.5,col="white")
text(par()$usr[1]*.95 + par()$usr[2]*.05,
y=y0,
adj=0,cex=.65,
toupper(levels(data$org)[i]))
}
dev.off()
system("epstopdf figure6.ps")
--------------------------------------------------------------------------------
/winbugs/AusJPSReplication/firstPrefs.r:
--------------------------------------------------------------------------------
1 | #################################################################
## assume data has been read in, see read.r
##
## dumps data and runs JAGS for 1st preference daily track
## generates picture summarizing Gibbs sampler output
##
## simon jackman, dept of political science
## stanford university, october 2005
#################################################################
if(exists("foo"))
rm(foo)
foo <- list()
foo$y <- data$coalition/100
var <- foo$y*(1-foo$y)/data$sampleSize
foo$prec <- 1/var
foo$date <- data$date - min(data$date) + 1
foo$org <- data$org
foo$NPOLLS <- length(data$y)
foo$NPERIODS <- length(min(data$date):282)
foo$alpha <- c(rep(NA,length((min(data$date)):282)-1),
.4047 + .0589 + .0034) ## actual first preference on last day
## write contents of object foo back to top level directory
for(i in 1:length(foo))
assign(x=names(foo)[i],
value=foo[[i]])
dump(list=names(foo)) ## dump
rm(list=names(foo)) ## now clean-up
## run jags job in batch mode
system("jags jags.cmd")
## read JAGS output back into R
library(coda)
alphaFirst <- read.jags()
houseFirst <- alphaFirst[,116:120]
sigmaFirst <- alphaFirst[,115]
alphaFirst <- alphaFirst[,1:114]
alphaFirst.bar <- apply(alphaFirst,2,mean)
alphaFirst.ci <- apply(alphaFirst,2,quantile,c(.025,.975))
## get time stuff correct
electionDay <- julian(as.Date("2004-10-09"),
origin=as.Date("2004-01-01"))
dseq <- min(data$date):electionDay
dateseq <- paste("2004-",
c("07","08","09","10"),
"-01",
sep="")
dateseq <- c(dateseq,"2004-10-09")
datetcks <- julian(as.Date(dateseq),
origin=as.Date("2004-01-01"))
datelabs <- paste(c("July","Aug","Sep","Oct"),
"1")
datelabs <- c(datelabs,"Oct 9")
## election announced 8/29
## debate 9/12
## lib poluct launch 9/26
## labor policy launch 9/29
eventTicks <- julian(as.Date(c("2004-08-29",
"2004-09-09",
"2004-09-12",
"2004-09-26",
"2004-09-29")),
origin=as.Date("2004-01-01"))
eventLabs <- c("Election\nAnnounced",
"Jakarta\nEmbassy\nBomb",
"Leader\nDebate",
"Lib Launch",
"ALP Launch")
eventRotate <- c(0,0,rep(30,3))
eventAdj <- list(c(.5,-.1),
c(.5,-.1),
c(-.1,.5),
c(-.1,.5),
c(-.1,.5))
postscript(file="dailyFirst.ps",
paper="a4",
#height=8.5,width=11,
family=dinfamily)
par(oma=c(.1,.1,3,.1),
mar=c(3,4,3,3),
las=1)
xlocs <- jitter(data$date)
plot(dseq,alphaFirst.bar,
type="n",
axes=F,
xlab="",
ylab=toupper("Coalition 1st Preferences (%)"),
xlim=c(min(data$date)-1,
electionDay),
ylim=c(.3675,.5025),
yaxs="i",
xaxs="i")
axis(1,at=datetcks,labels=datelabs)
axis(2,at=seq(from=.38,to=.50,by=.02),
labels=as.character(100*seq(.38,.50,by=.02)))
axis(4,at=c(seq(from=.38,to=.50,by=.02),.467),
labels=c(as.character(100*seq(.38,.50,by=.02)),"46.7"))
lastDay <- match(max(data$date),dseq)
ok <- 1:lastDay
##lines(dseq[ok],alphaFirst.ci[1,ok],lty=2)
##lines(dseq[ok],alphaFirst.ci[2,ok],lty=2)
polygon(x=c(dseq[ok],rev(dseq[ok])),
y=c(alphaFirst.ci[1,ok],rev(alphaFirst.ci[2,ok])),
border=F,
col=gray(.75))
ok <- (lastDay):length(dseq)
##polygon(x=c(dseq[ok],rev(dseq[ok])),
## y=c(alphaFirst.ci[1,ok],rev(alphaFirst.ci[2,ok])),
## border=F,
## col="pink")
lines(dseq,alphaFirst.bar,lwd=3)
#points(dseq,alphaFirst.bar,pch=16,cex=.5,col="white")
## overlay events
par(xpd=T)
for(i in 1:length(eventTicks)){
lines(x=rep(eventTicks[i],2),
y=par()$usr[3:4],
lty=2)
text(x=eventTicks[i],
y=par()$usr[4],
eventLabs[i],
adj=eventAdj[[i]],
srt=eventRotate[i])
}
## now overlay poll data
for(i in 1:dim(data)[1]){
points(xlocs[i],data$coalition[i]/100,
pch=16,cex=1.45,col="black")
#lines(x=rep(xlocs[i],2),
# y=c(data$lo[i],data$up[i]))
}
for(i in 1:dim(data)[1]){
text(xlocs[i],data$coalition[i]/100,
unclass(data$org)[i],cex=.75,col="white")
}
## text(x=par()$usr[1],
## y=par()$usr[4],
## adj=c(0,1.25),
## paste("computed on ",
## as.Date(Sys.time()),
## " \n",
## "with most recent poll fielded on ",
## data$start[order(data$date)[dim(data)[1]]],
## sep="")
## )
## legend info
y0 <- .3975
text(x=par()$usr[1]*.98 + par()$usr[2]*.02,
y=y0,
"Legend:",
adj=0)
for(i in 1:5){
y0 <- y0 - .005
points(par()$usr[1]*.975 + par()$usr[2]*.025,
y=y0,
pch=16,col="black",cex=1.45)
text(par()$usr[1]*.975 + par()$usr[2]*.025,
y=y0,i,cex=.75,col="white")
text(par()$usr[1]*.955 + par()$usr[2]*.045,
y=y0,adj=0,levels(data$org)[i])
}
dev.off()
system("epstopdf dailyFirst.ps")
###################################################################################
houseFirstEffects <- round(apply(houseFirst,2,
function(x)c(mean(x),quantile(x,c(.025,.975))))*100,
1)
dimnames(houseFirstEffects)[[2]] <- levels(data$org)
## summary statistics for average
houseFirstSum <- apply(houseFirst,1,mean)
mean(houseFirstSum)*100
quantile(houseFirstSum,c(.025,.975))*100
--------------------------------------------------------------------------------
/winbugs/AusJPSReplication/jags.cmd:
--------------------------------------------------------------------------------
1 | model in kalman.bug
2 | data in dumpdata.R
3 | compile
4 | initialize
5 | update 1000
6 | monitor alpha, thin(500)
7 | monitor sigma, thin(500)
8 | monitor house, thin(500)
9 | update 25000
10 | coda *
11 | exit
12 |
--------------------------------------------------------------------------------
/winbugs/AusJPSReplication/kalman.bug:
--------------------------------------------------------------------------------
1 | model{
2 | ## measurement model
3 | for(i in 1:NPOLLS){
4 | mu[i] <- house[org[i]] + alpha[date[i]]
5 | y[i] ~ dnorm(mu[i],prec[i])
6 | }
7 |
8 | ## transition model (aka random walk prior)
9 | for(i in 2:NPERIODS){
10 | mu.alpha[i] <- alpha[i-1]
11 | alpha[i] ~ dnorm(mu.alpha[i],tau)
12 | }
13 |
14 | ## priors
15 | tau <- 1/pow(sigma,2) ## deterministic transform to precision
16 | sigma ~ dunif(0,.01) ## uniform prior on standard deviation
17 |
18 | alpha[1] ~ dunif(.4,.6) ## initialization of daily track
19 |
20 | for(i in 1:5){ ## vague normal priors for house effects
21 | house[i] ~ dnorm(0,.01)
22 | }
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/winbugs/AusJPSReplication/read.r:
--------------------------------------------------------------------------------
1 | data <- read.table(file="data.csv",
sep=",",
header=T)
## turn dates into usable dates
data$startDOY <- julian(as.Date(as.vector(paste(data$start,"-2004",sep="")),
"%d-%b-%Y"),
origin=as.Date("2004-01-01"))
data$endDOY <- julian(as.Date(as.vector(paste(data$end,"-2004",sep="")),
"%d-%b-%Y"),
origin=as.Date("2004-01-01"))
## use the midpoint of the field period as the date for my purposes
data$date <- floor((data$startDOY + data$endDOY)/2)
## convert result and sample size to variance
data$y <- data$coalition2PP/100
data$var <- data$y*(1-data$y)/data$sampleSize
## compute upper and lower bounds
data$sd <- sqrt(data$var)
data$lo <- data$y - 1.96*data$sd
data$up <- data$y + 1.96*data$sd
############################################################
## the following commands for running WinBUGS via R2WinBUGS
## I run a Mac and so use JAGS, so I don't run these commands
############################################################
## dump for WinBugs
if(.Platform$OS.type == "unix")
library(R2WinBUGS,
lib.loc="/home/jackman/Library/R/library")
if(.Platform$OS.type == "windows")
library(R2WinBUGS)
foo <- list(y=data$y,
prec=1/data$var,
date=as.numeric(data$date - min(data$date) + 1),
org=as.integer(data$org),
NPOLLS=length(data$y),
NPERIODS=length(min(data$date):282),
alpha=c(rep(NA,
length(min(data$date):281)),
.5274)
)
initfunc <- function(){
house <- rnorm(n=5,sd=.05)
NPERIODS <- length(min(data$date):282)
alpha <- c(runif(n=1,.4,.6),
runif(n=NPERIODS-2),
NA)
sigma <- runif(n=1,0,.01)
list(house=house,
alpha=alpha,
sigma=sigma)
}
daily <- bugs(data=foo,
inits=initfunc,
debug=T,
n.burnin=5000,
n.iter=55000,
n.thin=100,
parameters.to.save=c("alpha","house","sigma"),
model.file="kalman.bug")
--------------------------------------------------------------------------------
/winbugs/SingleTruncation.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/SingleTruncation.odc
--------------------------------------------------------------------------------
/winbugs/aspirin.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/aspirin.odc
--------------------------------------------------------------------------------
/winbugs/aspirin.txt:
--------------------------------------------------------------------------------
1 | Aspirin: Shrinkage (or "borrowing strength") via hierarchical modeling
2 |
3 | The following data come from a meta-analysis of heart attack data. Each observation is the results of a study of survivorship following a heart attack (myocardial infarction). In each study, some victims were given aspirin immediately following their heart attack, while some vicitims were not. The observed values of y are the differences in mean survivorship observed in each study, with the other piece of data, the standard deviations, reflecting the relative sizes of the two groups in each study (i.e., although the data are binomial, given the large number of observations per study a normal approximation is valid and reduces each study's data to the observed treatement effect and a standard deviation). The goal of the meta-analysis is to synthesize the six studies, so as to arrive at an overall conclusion regarding the effects of aspirin on survivorship following a heart attack.
4 |
5 | This is an extremely simple example of hierarchical modeling. Via the exchangeability assumption (i.e., the study-specific means have a common prior), the studies "borrow strength" from one another, introducing some bias (each study's mean qi is shrunk back towards the common mean), but with the benefit of gaining precision (smaller variance). We also gain a better estimate of the overall effect of aspirin on survivorship after heart attack than we would get from naively pooling the studies.
6 |
7 | These data and the meta-analysis is discussed at length in David Draper et al. (1992), Combining Information: Statistical Issues and Opportunities for Research, American Statistical Association: Alexandria, Virginia.
8 |
9 | model{
10 | for(i in 1:6){ ## loop over studies
11 | theta[i] ~ dnorm(mu,tau); ## prior for each study
12 | v[i] <- pow(sd[i],2); ## convert each study's se to var
13 | precision[i] <- 1/v[i]; ## convert var to precision
14 | y[i] ~ dnorm(theta[i],precision[i]); ## model for each study
15 | b[i] <- v[i]/(v[i] + sigma2) ## shrinkage (auxilary quantity)
16 | }
17 | mu ~ dnorm(0.0, .001); ## prior for the common mean
18 | tau ~ dgamma(.01, .01); ## "between-study" precision, prior
19 | sigma2 <- 1/tau; ## convert precision to variance
20 | good <- step(mu); ## E(good) = Pr(mu>0 | data)
21 | }
22 |
23 | ## data
24 | list(y=c(2.77,2.50,1.84,2.56,2.31,-1.15),
25 | sd=c(1.65,1.31,2.34,1.67,1.98,0.90))
26 |
27 | ## initial values
28 | list(mu=0,tau=1)
29 |
30 | Results: The boost in survivorship is estimated at 1.32 percentage points with a standard deviation of .93; the posterior mean of "good" yields a Bayesian p-value for this overall treatment effect, the (posterior) probability that aspirin does not increase survivorship is 1 - .9447 = .0553. Note that a classical analysis that simply pooled the studies yields an average treatment effect of .86 and a standard error of .59 (z = 1.47, p = .072).
31 |
32 | node mean sd MC error 2.5% median 97.5% start sample
33 | b[1] 0.6661 0.2461 0.007897 0.1663 0.6882 0.9953 1001 10000
34 | b[2] 0.5894 0.2701 0.00917 0.1117 0.5818 0.9926 1001 10000
35 | b[3] 0.7712 0.1995 0.005867 0.2863 0.8161 0.9977 1001 10000
36 | b[4] 0.67 0.2447 0.007828 0.1696 0.6933 0.9954 1001 10000
37 | b[5] 0.7232 0.2229 0.006835 0.2231 0.7606 0.9967 1001 10000
38 | b[6] 0.4634 0.2923 0.0108 0.05601 0.3963 0.9844 1001 10000
39 | good 0.9447 0.2286 0.004739 0.0 1.0 1.0 1001 10000
40 | mu 1.316 0.9346 0.02277 -0.3015 1.263 3.304 1001 10000
41 | sigma2 2.612 6.079 0.1084 0.01284 1.234 13.65 1001 10000
42 | theta[1] 1.736 1.177 0.02922 -0.2638 1.603 4.352 1001 10000
43 | theta[2] 1.738 1.065 0.02805 -0.06121 1.62 4.037 1001 10000
44 | theta[3] 1.375 1.29 0.027 -1.053 1.269 4.246 1001 10000
45 | theta[4] 1.655 1.176 0.02757 -0.3317 1.523 4.292 1001 10000
46 | theta[5] 1.538 1.247 0.02587 -0.6295 1.405 4.325 1001 10000
47 | theta[6] -0.07534 0.9488 0.02355 -1.985 -0.02785 1.636 1001 10000
48 |
--------------------------------------------------------------------------------
/winbugs/bimodal.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/bimodal.odc
--------------------------------------------------------------------------------
/winbugs/bimodal.txt:
--------------------------------------------------------------------------------
1 | Bimodal: Extreme missingness in bivariate normal data
2 |
3 | Simple methods for dealing with missing data can run into trouble given pernicious patterns of missingness. A famous artificial data set designed to highlight this point was created by Gordon Murray, to show how an EM algorithm can run into problems (see the Journal of the Royal Statistical Society Series B, 39:27, 1977; this example appears in the discussion to Dempster, Laird and Rubin's much-cited EM paper):
4 |
5 | x1: 1 1 -1 -1 2 2 -2 -2 * * * *
6 | x2: 1 -1 1 -1 * * * * 2 2 -2 -2
7 |
8 | Assume bivariate normality, and that the means of the two variables are both zero, but the variances and covariance are unknown. Inference about the correlation coefficient r between these two variables is not trivial in this instance. The marginal complete-data likelihood for r is not unimodal, and has a saddle-point at zero, and two local maxima close to -1 and 1. A Bayesian analysis (with uninformative priors) similarly recovers a bimodal posterior density for the correlation coefficient; e.g., see Tanner, Tools for Statistical Inference, 3rd edition, pp95-96 or Congdon, Bayesian Statistical Modelling, p46.
9 |
10 | model{
11 | for(i in 1:N){
12 | x[i,1:2] ~ dmnorm(mu[1:2],tau[1:2,1:2])
13 | }
14 | mu[1] <- 0 mu[2] <- 0
15 | tau[1:2,1:2] ~ dwish(S[1:2,1:2], 2)
16 |
17 | S[1,1] <- 1 S[1,2] <- 0
18 | S[2,1] <- 0 S[2,2] <- 1
19 |
20 | Sigma[1:2,1:2] <- inverse(tau[,])
21 | rho <- Sigma[1,2]/sqrt(Sigma[1,1]*Sigma[2,2])
22 |
23 | }
24 |
25 | Data
26 | list(x = structure(.Data = c(-1,- 1,
27 | -1, 1,
28 | 1, -1,
29 | 1, 1,
30 | 2, NA,
31 | -2, NA,
32 | 2, NA,
33 | -2, NA,
34 | NA, 2,
35 | NA, -2,
36 | NA, 2,
37 | NA, -2),
38 | .Dim = c(12,2)),
39 | N=12)
40 |
41 |
42 | Alternative Parameterization:
43 |
44 | Operationalizing this model in BUGS used to be tricky. While BUGS can deal with multivariate nodes, WinBUGS 1.3 did not handle partially missing data in a multivariate node (WinBUGS 1.4 solves this problem; see below). Accordingly, we model these bivariate normal data with two univariate normal nodes, with a marginal model for x1, and a conditional model for x2 (or vice-versa).
45 |
46 | model{
47 | for (i in 1:N){
48 | x[i,1] ~ dnorm(0.0,tau1); # marginal model
49 | x[i,2] ~ dnorm(mu[i],tau21); # conditional on x1
50 | mu[i] <- beta*x[i,1]; # E(x2|x1)
51 | }
52 |
53 | # deterministic relationships for marginal-conditional model
54 | sig21 <- 1/tau21;
55 | sig1sq <- 1/tau1;
56 | sig2sq <- sig21 + sig1sq*pow(beta,2);
57 | rho <- beta*sqrt(sig1sq/sig2sq); ## is quantity of interest
58 |
59 | # priors
60 | beta ~ dnorm(0,.001);
61 | tau1 ~ dgamma(.01,.01);
62 | tau21 ~ dgamma(.01,.01);
63 | }
64 |
65 |
66 |
--------------------------------------------------------------------------------
/winbugs/cancer.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/cancer.odc
--------------------------------------------------------------------------------
/winbugs/cancer.txt:
--------------------------------------------------------------------------------
1 | Cancer: difference in two binomial proportions
2 |
3 | The following simple model is drawn from an example in Johnson and Albert 's Ordinal Data Modeling (p35), using data collected in a study by H.F. Dorn ("The Relationship of Cancer of the Lung and the Use of Tobacco", The American Statistician, 1954, V8:7-13). A sample of 86 lung-cancer partients and a sample of 86 controls were questioned about their smoking habits. The two groups were chosen to represent random samples from a subpopulation of lung-cancer patients and an otherwise similar population of cancer-free individuals. Of the cancer patients, 83 out of 86 were smokers; among the control group 72 out of 86 were smokers. The scientific question of interest was to assess the difference between the smoking habits in the two groups.
4 |
5 | In implementing this model in WinBUGS, we have just two data points (cancer patients and control group) and a binomial sampling model, in which the population proportions of smokers in each group appear as parameters. Quantities of interest such as the difference in the population proportions and the log of the odds ratio are computed as auxiliary quantities. Uniform priors on the population proportions are used in this example.
6 |
7 | An alternative parameterization appears below, in which the difference in the population proportions of probabilities is modeled directly, instead of appearing as an auxilary quantity.
8 |
9 | model{
10 | ## sampling model for the data
11 | for(i in 1:2){ ## loop over observations
12 | r[i] ~ dbin(p[i],n[i]) ## p is unknown parameter
13 | }
14 |
15 | ## priors
16 | p[1] ~ dunif(0,1) ## uniform distributions
17 | p[2] ~ dunif(0,1)
18 |
19 | ## compute quantities of interest
20 | delta <- p[1] - p[2] ## difference in probs
21 | delta.up <- step(delta) ## is delta > 0???
22 |
23 | ## log of the odds ratio
24 | lambda <- log( (p[1]/(1-p[1])) / (p[2]/(1-p[2])) );
25 | lambda.up <- step(lambda) ## is lambda > 0???
26 | }
27 |
28 | ## data
29 | list(r=c(83,72),n=c(86,86))
30 |
31 |
32 | Alternative Parameterization:
33 | model{
34 | ## sampling model for the data
35 | for(i in 1:2){ ## loop over observations
36 | r[i] ~ dbin(p[i],n[i]) ## p is unknown parameter
37 | }
38 |
39 | ## compute quantities of interest
40 | ## log of the odds ratio
41 | delta <- p[1] - p[2]
42 | lambda <- log( (p[1]/(1-p[1])) / (p[2]/(1-p[2])) );
43 | lambda.up <- step(lambda) ## is lambda > 0???
44 |
45 | ## priors
46 | v[2] ~ dnorm(0,.01); ## vague prior, logits
47 | logit(p[2]) <- v[2]; ## convert to probability
48 | v[1] <- v[2] + vdelta; ## difference in logits
49 | vdelta ~ dnorm(0,.01); ## vague prior on difference
50 | logit(p[1]) <- v[1]; ## convert to probability
51 | }
52 |
53 | ## data
54 | list(r=c(83,72),n=c(86,86))
55 |
56 | ## initial values
57 | list(v=c(NA,.5),vdelta=0)
58 |
59 |
60 |
--------------------------------------------------------------------------------
/winbugs/corporatism.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/corporatism.odc
--------------------------------------------------------------------------------
/winbugs/engines.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/engines.odc
--------------------------------------------------------------------------------
/winbugs/florida/florida.bug:
--------------------------------------------------------------------------------
1 | model{
2 |
3 | ## model for survey results
4 | y ~ dnorm(mu,tau)
5 | tau <- 1/pow(2.2, 2)
6 |
7 | ## prior, from previous studies, with sd = 2.2
8 | mu ~ dnorm(49.1,prec)
9 | prec <- 1/pow(2.2,2)
10 | psd <- 2.2
11 |
12 |
13 | }
14 |
--------------------------------------------------------------------------------
/winbugs/florida/florida.cmd:
--------------------------------------------------------------------------------
1 | model in florida.bug
2 | data in florida.dat
3 | compile
4 | initialize
5 | monitor mu
6 | update 10000
7 | coda *
8 | exit
9 |
--------------------------------------------------------------------------------
/winbugs/florida/florida.dat:
--------------------------------------------------------------------------------
1 | "y" <- c(55)
2 |
--------------------------------------------------------------------------------
/winbugs/florida/florida.r:
--------------------------------------------------------------------------------
1 | ## run jags on florida setup
2 |
3 | system("jags florida.cmd")
4 |
5 | library(coda)
6 | mu <- read.jags()
7 | plot(mu)
8 | summary(mu)
9 |
--------------------------------------------------------------------------------
/winbugs/genbeetles.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/genbeetles.odc
--------------------------------------------------------------------------------
/winbugs/genbeetles.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/genbeetles.txt
--------------------------------------------------------------------------------
/winbugs/info12.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/info12.odc
--------------------------------------------------------------------------------
/winbugs/info12.txt:
--------------------------------------------------------------------------------
1 | Political Information in France: 2-parameter item-response model
2 |
3 | As part of a recent study of public opinion in France, Paul Sniderman, myself, and our French partners came up with a list of twelve political information items. We administered these items to our respondents as ``true'' or ``false'' propositions, towards the end of the interview. This is one of the first times that ``objective'' or "factual" measurements of political information have been administered in France, and we faced considerable uncertainty as to how our test items would fare. Were our items too hard or too easy? Do some items tap political information more so than others? What are the properties of any resulting scale measure?
4 |
5 | Two pre-tests of 26 and 25 interviews respectively were conducted in April 2000. Each respondent was administered ten items, out of the set of 12 items. The items and the results of this analysis are described in Simon Jackman, "Estimation and Inference are `Missing Data' Problems: Unifying Social Science Statistics via Bayesian Simulation", Political Analysis, 8(4):307-322 (Fall 2000).
6 | Interesting features of this particular implementation in WinBUGS are the N(0,1) prior for the unobserved latent traits (political information); the use of the "double-subscript" trick to match respondents to item parameters (via the variable asked); the use of truncated Normal sampling to operationalize a probit model for these data (the observed binary responses are represented to the model as truncation limits).
7 |
8 | model{
9 | for (i in 1:51){ ## loop over 51 survey respondents
10 |
11 | x[i] ~ dnorm(0,1); ## prior for ideal points
12 |
13 | for (j in 1:10){ ## loop over the 10 items asked of each R
14 | ## note use of double-subscript trick
15 | mu[i,j] <- x[i]*beta[asked[i,j],1] - beta[asked[i,j],2];
16 |
17 | ## truncated Normal sampling
18 | ystar[i,j] ~ dnorm(mu[i,j],1)I(lower[i,j],upper[i,j]);
19 | }
20 | }
21 |
22 | for(j in 1:12){
23 | beta[j,1:2] ~ dmnorm(b0[],B0[ , ]); ## vague Normal priors
24 | }
25 |
26 | b0[1] <- 0.0; b0[2] <- 0.0; ## mean zero
27 | B0[1,1] <- 0.04; B0[1,2] <- 0; ## variances 25 (sd = 5)
28 | B0[2,2] <- 0.04; B0[2,1] <- 0; ## covariances 0
29 | }
30 |
--------------------------------------------------------------------------------
/winbugs/judges.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/judges.odc
--------------------------------------------------------------------------------
/winbugs/judges.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/judges.txt
--------------------------------------------------------------------------------
/winbugs/kk.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/kk.odc
--------------------------------------------------------------------------------
/winbugs/legislators.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/legislators.odc
--------------------------------------------------------------------------------
/winbugs/legislators.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/legislators.txt
--------------------------------------------------------------------------------
/winbugs/llg.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/llg.odc
--------------------------------------------------------------------------------
/winbugs/llg.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/llg.txt
--------------------------------------------------------------------------------
/winbugs/logit.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/logit.odc
--------------------------------------------------------------------------------
/winbugs/logit.txt:
--------------------------------------------------------------------------------
1 | Turnout: logit model for binary data
2 |
3 | Voter turnout is coded as a binary indicator (y =1 if voted, 0 otherwise) and related to covariates via a logit model. The data comprise a random 3,000 observation subset of a much larger data set analyzed by Jonathan Nagler.
4 |
5 |
6 | model{
7 | for (i in 1:N){ ## loop over observations
8 | y[i] ~ dbern(p[i]); ## binary outcome, Bernoulli trial
9 | logit(p[i]) <- mu[i]; ## logit link
10 | mu[i] <- beta[1] ## regression structure for covariates
11 | + beta[2]*educ[i]
12 | + beta[3]*(educ[i]*educ[i])
13 | + beta[4]*age[i]
14 | + beta[5]*(age[i]*age[i])
15 | + beta[6]*south[i]
16 | + beta[7]*govelec[i]
17 | + beta[8]*closing[i]
18 | + beta[9]*(closing[i]*educ[i])
19 | + beta[10]*(educ[i]*educ[i]*closing[i]);
20 |
21 | ## not necc for model fitting
22 | llh[i] <- y[i]*log(p[i]) + (1-y[i])*log(1-p[i]); ## llh contribution
23 | }
24 |
25 | sumllh <- sum(llh[]); ## sum of log-likelihood contributions
26 |
27 | ## priors, b0 and B given at end of data file, imply vague priors
28 | beta[1:10] ~ dmnorm(b0[ ] , B[ , ]) ; ## multivariate Normal prior
29 | }
30 |
31 | ## reasonable start values
32 | list(beta = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0))
33 |
34 | ## alternative set of start values for parallel chains, from OLS
35 | list(beta=c(-0.3392423378, 0.0741911953, 0.0012163747, 0.0230970246,
36 | -0.0001679677, -0.0333484965, 0.0204799754, -0.0068319918, 0.0017752978, -0.0001432201)
--------------------------------------------------------------------------------
/winbugs/multivarmissing.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/multivarmissing.odc
--------------------------------------------------------------------------------
/winbugs/multivarmissing.txt:
--------------------------------------------------------------------------------
1 | Multivariate Missing Data
2 |
3 | The following example highlights some of WinBUGS' capacities for handling missing data. We have a data set of 10 observations on 3 variables. Only one of the variables, z, is completely observed. The other two variables, x and y, have a non-overlapping pattern of missing data.
4 |
5 | Data (rectangular format):
6 | x[] y[] z[]
7 | 1 NA NA
8 | 2 NA 4
9 | 3 NA 3
10 | 4 NA 5
11 | 5 NA 7
12 | NA 7 9
13 | NA 8 8
14 | NA 9 11
15 | NA 8 10
16 | NA 9 8
17 |
18 |
19 | We use z to make imputations for x, and use the complete data for x (observed and imputed) and z to predict y. No special handling of the missing data is required; when missing data appears on the left-hand side of a stochastic expression (a "~" or "twiddle"), WinBUGS will automatically generate imputations. Version 1.4 of WinBUGS promises even better capacities for dealing with missing data.
20 |
21 | This example displays extremely high within-chain autocorrelation for the regression parameters, which is not unsurprising given the large amount of missing data. Multiple chains, a long run, and/or a large thinning interval is required in order to reassure ourselves that the sampler is visiting locations in the parameter space with frequencies proportional to their posterior probability.
22 | model{
23 | for (i in 1:10){
24 | x[i] ~ dnorm(mux[i],taux);
25 | mux[i] <- gamma[1] + gamma[2]*z[i];
26 |
27 | y[i] ~ dnorm(muy[i],tauy);
28 | muy[i] <- beta[1] + beta[2]*x[i] + beta[3]*z[i];
29 | }
30 |
31 | ## priors
32 | z[1] ~ dunif(-3,6);
33 |
34 | gamma[1] ~ dnorm(0.0,.001);
35 | gamma[2] ~ dnorm(0.0,.001);
36 |
37 | for(j in 1:3){
38 | beta[j] ~ dnorm(0.0,.001);
39 | }
40 | taux ~ dgamma(.01,.01);
41 | tauy ~ dgamma(.01,.01);
42 | sigmax <- sqrt(1/taux); ## convert prec to sigmas
43 | sigmay <- sqrt(1/tauy);
44 | }
45 |
46 |
47 |
48 | initial values:
49 | list(gamma=c(0,0),beta=c(0,0,0),taux=1,tauy=1)
50 |
51 | list(gamma=c(-10,10),beta=c(-10,10,-10),taux=10,tauy=10)
--------------------------------------------------------------------------------
/winbugs/negbineg.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/negbineg.odc
--------------------------------------------------------------------------------
/winbugs/reagan.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/reagan.odc
--------------------------------------------------------------------------------
/winbugs/reagan.txt:
--------------------------------------------------------------------------------
1 | Reagan: linear regression with AR(1) disturbances.
2 |
3 | Ninety-six monthly observations on presidential job approval ratings for Ronald Reagan are modeled via linear regression, with a correction for first-order serial correlation among the disturbances. Note the marginal model for the first observation, and the conditioning on the lagged observation for months 2 through 96. A uniform prior over the stationary (-1,1) interval is employed for the residual AR(1) parameter.
4 |
5 |
6 | model {
7 | mu[1] <- b[1] + b[2]*infl[1] + b[3]*unemp[1]
8 | app[1] ~ dnorm(mu[1],tau.u)
9 |
10 | for (t in 2:96){ ## loop over obs 2 to T
11 | mu[t] <- b[1]*(1-rho)
12 | + b[2]*(infl[t] - rho*infl[t-1])
13 | + b[3]*(unemp[t] - rho*unemp[t-1])
14 | #+ b[4]*(irancontra[t] - rho*irancontra[t-1])
15 | + rho*app[t-1];
16 | app[t] ~ dnorm(mu[t], tau.e);
17 | }
18 |
19 | sigma.e <- 1/tau.e ## convert precision to variance
20 | sigma.u <- sigma.e/(1+pow(rho,2)) ## regression error variance
21 | tau.u <- 1/sigma.u
22 |
23 | ## priors
24 | rho ~ dunif(-1,1); ## uniform prior on stationary interval
25 | b[1:3] ~ dmnorm(b0[], B0[ , ]); ## multivariate Normal prior
26 | tau.e ~ dgamma(.05, .05); ## vague prior on sigma
27 | }
28 |
29 | Data:
30 |
31 | list(app=c(51, 55, 60, 67, 68, 59, 58, 60, 56, 54, 55, 49, 47, 47, 46, 43, 45, 44, 42, 42, 42, 42, 43, 41, 35, 40, 41, 41, 46, 47, 42, 43, 47, 49, 53, 54, 55, 56, 54, 54, 53, 54, 56, 57, 54, 58, 61, 59, 64, 60, 56, 52, 55, 58, 63, 65, 60, 63, 65, 63, 64, 64, 63, 62, 68, 64, 63, 61, 61, 63, 63, 47, 48, 40, 47, 48, 51, 53, 49, 49, 49, 49, 49, 49, 49, 50, 51, 50, 48, 48, 54, 53, 54, 51, 57, 64), infl=c(11.7948717948718, 11.3924050632911, 10.6117353308365, 10.1359703337454, 9.79192166462668, 9.6969696969697, 10.7748184019371, 10.8173076923077, 10.9654350417163, 10.271546635183, 9.57943925233646, 8.91203703703702, 8.25688073394495, 7.61363636363637, 6.88487584650115, 6.62177328843996, 6.9119286510591, 7.18232044198894, 6.55737704918034, 5.9652928416486, 4.94092373791624, 5.03211991434689, 4.47761194029852, 3.82571732199788, 3.70762711864407, 3.48468848996832, 3.59028511087645, 4.00000000000000, 3.44108446298228, 2.47422680412372, 2.35897435897436, 2.45649948822926, 2.76356192425793, 2.75229357798166, 3.16326530612245, 3.78710337768680, 4.29009193054135, 4.6938775510204, 4.89296636085628, 4.55465587044535, 4.33467741935483, 4.32595573440644, 4.30861723446894, 4.29570429570432, 4.28286852589641, 4.26587301587302, 4.15430267062316, 4.04339250493095, 3.52595494613126, 3.60623781676415, 3.79008746355685, 3.58180058083253, 3.57487922705315, 3.66441658630665, 3.45821325648417, 3.35249042145593, 3.2473734479465, 3.23501427212178, 3.51377018043686, 3.79146919431279, 3.97350993377483, 3.19849482596426, 2.15355805243445, 1.58878504672897, 1.67910447761195, 1.76744186046511, 1.67130919220055, 1.57553290083410, 1.75763182238668, 1.56682027649770, 1.28440366972478, 1.18721461187214, 1.45586897179253, 2.00546946216955, 2.93308890925756, 3.77184912603494, 3.76146788990825, 3.83912248628884, 4.01826484018266, 4.37956204379564, 4.36363636363637, 4.44646098003629, 4.61956521739131, 4.42238267148014, 4.12556053811659, 3.84271671134941, 3.82902938557434, 3.90070921985817, 3.9787798408488, 3.9612676056338, 4.12642669007901, 4.02097902097902, 4.18118466898953, 4.25716768027802, 4.24242424242425, 4.40795159896283), unemp=c(7.5, 7.4, 7.4, 7.2, 7.5, 7.5, 7.2, 7.4, 7.6, 7.9, 8.3, 8.5, 8.6, 8.9, 9, 9.3, 9.4, 9.6, 9.8, 9.8, 10.1, 10.4, 10.8, 10.8, 10.4, 10.4, 10.3, 10.2, 10.1, 10.1, 9.4, 9.5, 9.2, 8.8, 8.5, 8.3, 8, 7.8, 7.8, 7.7, 7.4, 7.2, 7.5, 7.5, 7.3, 7.4, 7.2, 7.3, 7.3, 7.2, 7.2, 7.3, 7.2, 7.4, 7.4, 7.1, 7.1, 7.1, 7, 7, 6.7, 7.2, 7.2, 7.1, 7.2, 7.2, 7, 6.9, 7, 7, 6.9, 6.6, 6.6, 6.6, 6.6, 6.3, 6.3, 6.2, 6.1, 6, 5.9, 6, 5.8, 5.7, 5.7, 5.7, 5.7, 5.4, 5.6, 5.4, 5.4, 5.6, 5.4, 5.4, 5.3, 5.3), b0=c(0, 0, 0), B0= structure(.Data= c(0.00001, 0, 0, 0, 0.00001, 0, 0, 0, 0.00001), .Dim=c(3, 3)))
32 |
33 |
34 | Initial values:
35 |
36 | list(b=c(80,0,0),rho=0.0,tau.e=.01)
37 |
38 | Results:
39 | node mean sd MC error 2.5% median 97.5% start sample
40 | b[1] 58.87 15.36 0.2088 24.35 60.34 84.86 11001 10000
41 | b[2] -0.1243 0.598 0.006841 -1.232 -0.1471 1.178 11001 10000
42 | b[3] -0.7827 1.68 0.0228 -3.845 -0.8752 2.796 11001 10000
43 | rho 0.9047 0.05272 7.46E-4 0.7926 0.9083 0.9912 11001 10000
44 | sigma.e 13.0 1.937 0.01833 9.719 12.83 17.32 11001 10000
45 |
46 |
47 |
--------------------------------------------------------------------------------
/winbugs/resistant.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/resistant.odc
--------------------------------------------------------------------------------
/winbugs/sen1051d.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/sen1051d.odc
--------------------------------------------------------------------------------
/winbugs/sen1051d.txt:
--------------------------------------------------------------------------------
1 | Legislators: estimating ideal points from roll call data
2 |
3 | Recorded votes in legislative settings (roll calls) are often used to recover the underlying preferences of legislators. Political scientists analyze roll call data using a spatial model: each legislator has a preferred policy position (a point in low-dimensional Euclidean space), and each vote amounts to a choice between "Aye" and a "Nay" locations. Legislators are assumed to choose on the basis of utility maximization, with the utility of each choice declining in the square of the distance between the ideal point and the implicit location of each choice (so-called "quadratic loss"), plus a random disturbance. Subject to identifying restrictions, the legislators' preferred positions and, for one-dimensional models, the midpoint between the "Aye" and "Nay" locations can be estimated using the roll call data (in higher dimensions we recover a hyperplane between the "Aye" and "Nay" locations).
4 |
5 | There is an extremely close correspondence between the statistical analysis of roll call data and item-response models used in educational testing. A two-parameter item-response model is equivalent to the statistical operationalization of the model described in the previous paragraph, with the unobserved ideal point taking the part of the latent trait, and the item-discrimination parameters tapping ideological discrimination.
6 |
7 | If the legislators utilities have disturbance terms that are (a) distributed normal, (b) independent across legislators and roll calls, then the connection with a two-parameter item-response model with normal errors is complete. In turn a two-parameter item-response model with normal errors amounts to a hierarchical probit model, well suited to estimation and inference via Bayesian simulation.
8 |
9 | In the implementation below, the iid N(0,1) prior identifies the unobserved ideal points, and vague Normal priors are used for the item-discrimination parameters. In addition, the probit model here is implemented using truncated Normal sampling; the observed data are represented to the model as the lower and upper truncation points. "Nay" votes imply the latent variable ystar lies to the left of zero; for "Yea" votes ystar lies to the right of 0 (negative and postive infinity are operationalized as -10 and 10, respectively).
10 |
11 | ###########################################################
12 | ## one dimensional model
13 | ##
14 | ## use sen1051d.stval.dpt as initial values
15 | ##
16 | ## simon jackman
17 | ## dept of political science, stanford university
18 | ## feb 2001
19 | ###########################################################
20 | model{
21 | for (i in 1:N){
22 | for(j in 1:M){
23 | ystar[i,j] ~ dnorm(mu[i,j],1)I(lower[i,j],upper[i,j]);
24 | mu[i,j] <- x[i]*beta[j,1] - beta[j,2];
25 | ok[i,j] <- y.ok[i,j]*equals(step(mu[i,j]),step(upper[i,j]-5));
26 | }
27 | }
28 |
29 | helms <- x[66]; ## monitor these nodes as sanity checks
30 | kennedy <- x[41];
31 | fiengold <- x[97];
32 | boxer <- x[9];
33 | chafee <- x[77];
34 | mosely <- x[26];
35 | gramm <- x[85];
36 | check <- helms-kennedy;
37 |
38 | ## goodness of fit
39 | allok <- (sum(ok[,])/N.OK)*100;
40 | for (i in 1:N){
41 | x.ok[i]<- sum(ok[i,])/sum(y.ok[i,])*100;
42 | }
43 | for (j in 1:M){
44 | bill.ok[j] <- sum(ok[,j])/sum(y.ok[,j])*100;
45 | }
46 |
47 | ## priors
48 | for (i in 1:N){
49 | x[i] ~ dnorm(0.0,1.0);
50 | }
51 | for (j in 1:M){
52 | beta[j,1:2] ~ dmnorm(b0[1:2],B0[1:2,1:2]);
53 | }
54 | b0[1] <- 0; b0[2] <- 0;
55 | B0[1,1] <- .04; B0[2,2] <- .04;
56 | B0[1,2] <- 0; B0[2,1] <- 0;
57 | }
58 |
59 |
60 |
61 | ###########################################################
62 | ## one dimensional model with uniform (-1, 1) priors
63 | ## use stval.1d.unif.dpt as initial values
64 | ##
65 | ## simon jackman
66 | ## dept of political science, stanford university
67 | ## feb 2001
68 | ###########################################################
69 | model{
70 | for (i in 1:N){
71 | for(j in 1:M){
72 | ystar[i,j] ~ dnorm(mu[i,j],1)I(lower[i,j],upper[i,j]);
73 | mu[i,j] <- x[i]*beta[j,1] - beta[j,2];
74 | ok[i,j] <- y.ok[i,j]*equals(step(mu[i,j]),step(upper[i,j]-5));
75 | }
76 | }
77 |
78 | helms <- x[66]; ## monitor these nodes as sanity checks
79 | kennedy <- x[41];
80 | fiengold <- x[97];
81 | boxer <- x[9];
82 | chafee <- x[77];
83 | mosely <- x[26];
84 | gramm <- x[85];
85 | check <- helms-kennedy;
86 |
87 | ## goodness of fit
88 | allok <- (sum(ok[,])/N.OK)*100;
89 | for (i in 1:N){
90 | x.ok[i]<- sum(ok[i,])/sum(y.ok[i,])*100;
91 | }
92 | for (j in 1:M){
93 | bill.ok[j] <- sum(ok[,j])/sum(y.ok[,j])*100;
94 | }
95 |
96 | ## priors
97 | for (i in 1:N){
98 | x[i] ~ dunif(-1.0,1.0);
99 | }
100 | for (j in 1:M){
101 | beta[j,1:2] ~ dmnorm(b0[1:2],B0[1:2,1:2]);
102 | }
103 | b0[1] <- 0; b0[2] <- 0;
104 | B0[1,1] <- .04; B0[2,2] <- .04;
105 | B0[1,2] <- 0; B0[2,1] <- 0;
106 | }
--------------------------------------------------------------------------------
/winbugs/sophistication2002.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/sophistication2002.odc
--------------------------------------------------------------------------------
/winbugs/sophistication2002.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/sophistication2002.txt
--------------------------------------------------------------------------------
/winbugs/tpriors.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/tpriors.odc
--------------------------------------------------------------------------------
/winbugs/truncnorm.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/truncnorm.odc
--------------------------------------------------------------------------------
/winbugs/truncnorm.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/truncnorm.txt
--------------------------------------------------------------------------------
/winbugs/turnout2005.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/turnout2005.odc
--------------------------------------------------------------------------------
/winbugs/uk92.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/uk92.odc
--------------------------------------------------------------------------------
/winbugs/undervote.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/undervote.odc
--------------------------------------------------------------------------------
/winbugs/undervote.txt:
--------------------------------------------------------------------------------
1 | Undervoting for President, by Race: difference in two binomial proportions.
2 |
3 | In exit polls for the 1992 election, the Voter News Service asked black and white voters if they did not vote for president (a phenomenon known as "intentional undervoting"). Of 6,537 black voters, 26 said they did not vote for president; of 44,531 white voters, 91 said they did not vote for president.
4 | In the American National Election Studies (1964-2000), of 1,101 black voters, 10 report not voting for president, while 57 of 9,827 white voters report not voting for president. Substantive interest centers on whether this rate of intentional undervoting differs by race.
5 |
6 | These data appear in Tomz and Van Houweling (2003), "How Does Voting Equipment Affect the Racial Gap in Voided Ballots?", American Journal of Political Science.
7 |
8 | model{
9 | for (i in 1:4){
10 | r[i] ~ dbin(p[i],n[i])
11 | }
12 |
13 | delta[1] <- p[2] - p[1] ## difference
14 | good[1] <- step(delta[1]) ## sign of the difference
15 |
16 | delta[2] <- p[4] - p[3] ## difference
17 | good[2] <- step(delta[2]) ## sign of the difference
18 |
19 | ## priors
20 | for(i in 1:4){
21 | p[i] ~ dunif(0,1)
22 | }
23 | }
24 |
25 | Data:
26 | list(r=c(26,91,10,57),n=c(6537,44531,1101,9827))
27 |
28 | Since the data set is tiny and the computation trivial, we can generate a large number of samples from the posterior densities:
29 |
30 | Results:
31 | node mean sd MC error 2.5% median 97.5% start sample
32 | delta[1] -0.002063 8.219E-4 8.192E-7 -0.003805 -0.002016 -5.876E-4 3000001 1000000
33 | delta[2] -0.004073 0.00309 3.09E-6 -0.01088 -0.0038 0.001177 3000001 1000000
34 | good[1] 0.001802 0.04241 4.217E-5 0.0 0.0 0.0 3000001 1000000
35 | good[2] 0.07692 0.2665 2.722E-4 0.0 0.0 1.0 3000001 1000000
36 |
37 | The Bayesian p-values in the vector good can be contrasted with those arising from a classical analysis (e.g., using the functions in the ctest library in R); the differences between the classical analysis and the Bayesian simulation-based analysis are more pronounced for proportions in the smaller NES data set.
38 |
39 | VNS NES
40 | Classical two-sample test,
41 | one-sided, without continuity
42 | correction: .0011 .0929
43 |
44 | Classical two-sample test,
45 | one-sided, with continuity
46 | correction: .0018 .1315
47 |
48 | Fisher's exact test,
49 | one-sided: .0033 .1330
--------------------------------------------------------------------------------
/winbugs/unidentified.odc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jrnold/bugs-examples-in-stan/a8480da7a29f4d4a4c704ff5e7f018b3122d828d/winbugs/unidentified.odc
--------------------------------------------------------------------------------
/winbugs/unidentified.txt:
--------------------------------------------------------------------------------
1 | Unidentified: over-parameterization of normal mean
2 |
3 | The following example illustrates the need for caution in diagnosing convergence, and is based on an example appearing in Carlin and Louis' Bayes and Empirical Bayes Methods for Data Analysis, 2nd edition, p174.
4 |
5 | Consider a model for the mean as an additive sum of two parameters: e.g.,
6 | y ~ N(q1+ q2, 1). The data are not informative about q1 and q2 , but are informative about m = q1 + q2 and the likelihood function for the two unidentified parameters has a ridge along the locus of points
7 |
8 | (q1,q2): ybar = q1 + q2
9 |
10 | where ybar is the mean of the observed data.
11 |
12 | In the Bayesian approach, we are obliged to specify priors over the model parameters. Proper priors ensure unimodal posteriors for q1 and q2, and normal priors ensure conjugacy and a simple Gibbs sampler (with normal conditionals) can be used to the sample from the posterior for this problem. But as Carlin and Louis show (see their Q25, p191), we need to be careful with models of this type. The posteriors for theta are not identical to the prior (the posterior standard deviations are 7.05, while the prior standard deviations used below are 10), suggesting that the data are somewhat informative about both theta parameters, when this is not the case. An inexperienced user of Markov chain Monte Carlo methods might fail to recognize that the q parameters are not identified, and naively report the posterior summaries for theta generated by the software. On the other hand, note that the identified parameter m = q1 + q2 is well behaved.
13 |
14 | The problems with this model become more exacerbated as the priors tend towards impropriety; see the results from using N(0,10000) priors, but en bloc updating of the theta parameters appears to mitigate the slow mixing that results from treating each component of theta as a distinct node.
15 |
16 | model{
17 |
18 | ## loop over observations
19 | for (i in 1:1){
20 | y[i] ~ dnorm(mu,1.0); ## known precision
21 | }
22 | mu <- theta[1] + theta[2]
23 |
24 | ## priors
25 | #theta[1] ~ dnorm(0.0, 0.01); ## this form is not efficient
26 | #theta[2] ~ dnorm(0.0, 0.01); ## vis-a-vis en bloc approach below
27 |
28 | theta[1:2] ~ dmnorm(b0[],B0[,]) ## en bloc updating for theta
29 | b0[1] <- 0 b0[2] <- 0
30 | B0[1,1] <- .01 B0[2,2] <- .01
31 | B0[1,2] <- 0 B0[2,1] <- 0
32 | }
33 |
34 | Data:
35 | list(y=c(0))
36 | Initial Values:
37 | list(theta=c(0,0))
38 |
39 | Results with univariate N(0,100) priors:
40 |
41 |
42 | node mean sd MC error 2.5% median 97.5% start sample
43 | mu -0.001947 0.9758 0.001686 -1.907 -0.002111 1.915 10001 180000
44 | theta[1] -0.007469 2.293 0.01711 -4.508 -0.00116 4.46 10001 180000
45 | theta[2] 0.005523 2.291 0.01728 -4.461 0.002011 4.5 10001 180000
46 |
47 | Densities:
48 |
49 |
50 | Scatterplot for theta:
51 |
52 |
53 |
54 | Autocorrelations:
55 |
56 |
57 |
58 |
59 | Results with univariate N(0,10000) priors:
60 |
61 | node mean sd MC error 2.5% median 97.5% start sample
62 | mu -0.001893 0.9972 0.001825 -1.954 -0.002808 1.955 10001 180000
63 | theta[1] -0.05764 7.038 0.1513 -14.08 -0.02319 13.72 10001 180000
64 | theta[2] 0.05574 7.037 0.1515 -13.73 0.01983 14.1 10001 180000
65 |
66 |
67 |
68 |
69 |
70 | Results with multivariate N(0,10000) priors:
71 |
72 | node mean sd MC error 2.5% median 97.5% start sample
73 | mu -0.002634 0.995 0.002255 -1.953 -0.002252 1.949 10001 180000
74 | theta[1] 0.00679 7.085 0.01736 -13.88 0.01368 13.91 10001 180000
75 | theta[2] -0.009424 7.086 0.01747 -13.87 -0.0136 13.91 10001 180000
76 |
77 |
78 | Autocorrelations:
79 |
80 |
81 |
--------------------------------------------------------------------------------