├── .gitignore
├── 00-install.R
├── 00-make-exercises.R
├── 00-plan.md
├── 01-small-world.Rmd
├── 02-mcmc-demo.Rmd
├── 03-mcmc-diagnostics-exercise.Rmd
├── 04-priors-explorer.R
├── 05-brms-basic.Rmd
├── 06-posterior-predictive.Rmd
├── 07-priors-brms.Rmd
├── 08-stan-programming-basic.Rmd
├── 09-stan-coding-growth.Rmd
├── 10-loo.Rmd
├── 11-divergent-transitions.Rmd
├── 12-workflow-sdm.Rmd
├── 13-resources.Rmd
├── LICENSE.txt
├── README.md
├── bayes-course.Rproj
├── data-raw
├── mcmc-diagnostics-make-data.R
├── pcod-growth.R
├── postpred-data.R
└── rockfish-depth.R
├── data
├── grey-heron.csv
├── house-wren.csv
├── hughes-etal-2018.rds
├── kidiq.rds
├── mcmc1.rds
├── mcmc2.rds
├── mcmc3.rds
├── mcmc4.rds
├── mcmc5.rds
├── mcmc6.rds
├── pcod-age-length.rds
├── pcod-growth.rds
├── ppcheck-df1.rds
├── ppcheck-df2.rds
├── ppcheck-df3.rds
├── ppcheck-yrep1.rds
├── ppcheck-yrep2.rds
├── ppcheck-yrep3.rds
├── ppcheck1.rds
├── ppcheck2.rds
└── rockfish-depth.rds
├── extra
├── distributions.R
├── equations.tex
├── equations2.tex
├── render.R
├── rstan-growth.Rmd
├── rstanarm-counterfactual.Rmd
├── slides.R
├── vb-fixed-error.stan
└── vb.stan
├── slides
└── loo.R
├── stan
├── 8schools.stan
├── 8schools_noncentered.stan
├── gompertz.stan
├── lm-matrix.stan
├── lm-measure.stan
├── lm-simple.stan
└── lm.stan
└── vb
├── vb_basic.stan
├── vb_norm.stan
├── vb_norm_regions.stan
└── vb_norm_regions_sigma.stan
/.gitignore:
--------------------------------------------------------------------------------
1 | # History files
2 | .Rhistory
3 | .Rapp.history
4 |
5 | # Session Data files
6 | .RData
7 |
8 | # Example code in package build process
9 | *-Ex.R
10 |
11 | # Output files from R CMD build
12 | /*.tar.gz
13 |
14 | # Output files from R CMD check
15 | /*.Rcheck/
16 |
17 | # RStudio files
18 | .Rproj.user/
19 |
20 | # produced vignettes
21 | vignettes/*.html
22 | vignettes/*.pdf
23 |
24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
25 | .httr-oauth
26 |
27 | # knitr and R markdown default cache directories
28 | /*_cache/
29 | /cache/
30 |
31 | # Temporary files created by R markdown
32 | *.utf8.md
33 | *.knit.md
34 |
35 |
36 | *.pdf
37 | *.rda
38 | *.rds
39 | *.png
40 | *.html
41 | .Rproj.user
42 |
43 | original-files/
44 | *.key
45 | admin/
46 | extra/
47 |
48 | 01-small-world_files/*
49 | dates.R
50 |
51 | exercise-files/*
52 |
--------------------------------------------------------------------------------
/00-install.R:
--------------------------------------------------------------------------------
1 | # The first half is adapted from Hadley Wickham's install script.
2 |
3 | # A polite helper for installing packages ---------------------------------
4 |
5 | please_install <- function(pkgs, install_fun = install.packages) {
6 | if (length(pkgs) == 0) {
7 | return(invisible())
8 | }
9 | if (!interactive()) {
10 | stop("Please run in interactive session", call. = FALSE)
11 | }
12 |
13 | title <- paste0(
14 | "Ok to install these packges?\n",
15 | paste("* ", pkgs, collapse = "\n")
16 | )
17 | ok <- menu(c("Yes", "No"), title = title) == 1
18 |
19 | if (!ok) {
20 | return(invisible())
21 | }
22 |
23 | install_fun(pkgs)
24 | }
25 |
26 | # Do you have all the needed packages? ------------------------------------
27 |
28 | pkgs <- c(
29 | "tidyverse", "rstan", "rstanarm", "brms", "rmarkdown",
30 | "manipulate", "shiny", "usethis", "bayesplot", "loo",
31 | "devtools", "gridExtra", "patchwork", "extraDistr", "coda"
32 | )
33 | have <- rownames(installed.packages())
34 | needed <- setdiff(pkgs, have)
35 |
36 | please_install(needed)
37 |
38 | # Do you have build tools? ---------------------------------------
39 | devtools::has_devel()
40 |
41 | # If not:
42 |
43 | # On a Mac:
44 | # Open the Terminal.app (see /Applications/Utilities/Terminal.app)
45 | # Run:
46 | # xcode-select --install
47 |
48 | # On a PC:
49 | # Install the Rtools version that matches your version of R
50 | # DFO users can find this in the Software Center
51 |
52 | # Restart R
53 |
54 | # Did it work?
55 | devtools::has_devel()
56 |
57 | # Stan working? ---------------------------------------
58 |
59 | library("rstan")
60 | scode <- "
61 | parameters {
62 | real y[2];
63 | }
64 | model {
65 | y[1] ~ normal(0, 1);
66 | y[2] ~ double_exponential(0, 2);
67 | }
68 | "
69 | cat("Please wait a minute while the model compiles.\n")
70 | fit1 <- stan(model_code = scode, iter = 50, verbose = FALSE, chains = 1)
71 |
72 | if (identical(class(fit1)[[1]], "stanfit")) {
73 | cat("Stan is working. Congratulations! You're done. You can ignore any warnings about 'R-hat' and 'Effective Samples Size (ESS)' above\n")
74 | } else {
75 | cat("Stan is *not* working. Please ask a coworker or contact Sean.\n")
76 | }
77 |
78 | # Check brms -------------------------------------------
79 |
80 | library(brms)
81 | fit1 <- brm(
82 | count ~ zBase * Trt + (1|patient),
83 | data = epilepsy, family = poisson(),
84 | chains = 1, iter = 1000,
85 | prior = prior(normal(0, 1), class = b) +
86 | prior(cauchy(0, 1), class = sd)
87 | )
88 | summary(fit1)
89 | # Ignore any warnings about R-hat or ESS
90 |
91 | if (identical(class(fit1)[[1]], "brmsfit")) {
92 | cat("brms is working. You can ignore any warnings about 'R-hat' and 'Effective Samples Size (ESS)' above\n")
93 | } else {
94 | cat("brms is *not* working. Please ask a coworker or contact Sean.\n")
95 | }
96 |
--------------------------------------------------------------------------------
/00-make-exercises.R:
--------------------------------------------------------------------------------
1 | files <- list.files(".", pattern = "*.Rmd$|*.R$")
2 |
3 | dir.create("exercise-files", showWarnings = FALSE)
4 | remove_exercises <- function(x) {
5 | file.copy(x, "exercise-files")
6 | f <- readLines(x)
7 | f_ex <- ifelse(grepl("# exercise", f), "# exercise", f)
8 | f_ex <- ifelse(grepl("", f_ex), "", f_ex)
9 | f_ex <- ifelse(grepl("^Answer: ", f_ex), "Answer: ", f_ex)
10 | writeLines(as.character(f_ex), con = file.path("exercise-files", x))
11 | }
12 | purrr::walk(files, remove_exercises)
13 |
--------------------------------------------------------------------------------
/00-plan.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: "An introduction to applied Bayesian data analysis for ecologists"
3 | output:
4 | html_document:
5 | toc: true
6 | ---
7 |
8 | # Objectives
9 |
10 | 1. Develop an intuition for Bayes' theorem, how a Bayesian approach fundamentally differs from a frequentist approach, and when using a Bayesian approach is particularly advantageous.
11 |
12 | 2. Understand the principle behind MCMC (Markov chain Monte Carlo) sampling. Become familiar with the concepts of chain convergence and MCMC tuning. Develop a high-level understanding of Hamiltonian MCMC.
13 |
14 | 3. Learn to fit pre-packaged Bayesian regression models with brms. Become familiar with the concepts of posterior predictive checking and manipulating posterior samples to calculate posterior probabilities.
15 |
16 | 4. Learn how to assess the relative contribution of priors vs. the data. Learn the difference between weakly informative and informative priors. Learn what some common choices of weakly informative priors are. Become familiar with prior predictive checking.
17 |
18 | 5. Learn the basics of Stan model syntax and how to interact with Stan in R.
19 |
20 | 6. Become familiar with how the pieces fit together into a Bayesian workflow.
21 |
22 | 7. Throughout, gain some familiarity with several useful tools built around Stan and R (e.g., rstan, brms, bayesplot, loo, tidybayes).
23 |
24 | 6. Leave with some ideas for where to find more information.
25 |
26 | # Plan
27 |
28 | ## Day 1
29 |
30 | 1. Introduction to probability, Bayes' theorem, when to go Bayesian
31 | - Slides: an introduction to Bayes' theorem and Bayesian updating
32 | - Slides: frequentist vs. Bayesian inference interpretation
33 | - Slides: went to go Bayes: advantages and disadvantages
34 | - *All together exercise*: small world Bayesian updating
35 | - Rmd: manipulating and summarizing the posterior various ways
36 |
37 | 2. Demystifying MCMC (group exercises and online demo)
38 | - Slides: MCMC intro
39 | - Rmd: run through Metropolis MCMC in R and plot the chain together
40 | - *Individual exercise*: tuning MCMC
41 | - Hamiltonian and NUTS slides
42 | - *Individual exercise*: play with online demo of Hamiltonian and NUTS MCMC
43 |
44 | 3. Convergence and MCMC diagnostics
45 | - Slides: MCMC diagnostics part 1
46 | - *Small groups exercise*: MCMC diagnostics
47 |
48 | 4. Priors (interactive code, slides, and discussion)
49 | - Slides: goals of priors, types of priors
50 | - *Small groups exercise*: experiment with an interactive prior demo
51 |
52 | ## Day 2
53 |
54 | Recap
55 |
56 | 5. Posterior and prior predictive checking
57 | - Slides: predictive checking
58 | - *Small groups exercise*: posterior predictive checking
59 |
60 | 6. Introduction to applied Bayesian modeling
61 | - Rmd: brms-basic.Rmd
62 | - fit a regression model with brms
63 | - inspect MCMC chains for convergence
64 | - summarize MCMC chains to quantify the posterior
65 | - first intro to posterior predictive checking
66 | - making probabilistic statements by manipulating the posterior samples
67 |
68 | 7. Rmd: brms priors and prior predictive checks
69 |
70 | ## Day 3
71 |
72 | 8. Introduction to Stan code and rstan
73 | - Slides: Stan model syntax
74 | - Rmd: regression with a Stan model
75 | - understand when/why you might use brms vs. custom Stan code
76 | - look at the syntax and the code sections of a Stan model
77 | - call the Stan model from R
78 | - extract the posterior samples and make similar plots as before
79 | - fit an length-age growth model to groundfish data and summarize the output
80 |
81 | 9. Leave-one-out cross validation, log scores, and ELPD
82 | - Slides: cross-validation concepts and terms
83 | - Rmd: ELPD + LOO
84 |
85 | ## Day 4
86 |
87 | 10. Divergent transitions
88 | - Slides: divergent transitions
89 | - Rmd: divergent transitions
90 |
91 | 11. Putting it all together: Bayesian workflow
92 | - discuss why and what the suggested steps are
93 | - *All together exercise*: work through an example as a group
94 | - *Small groups exercise*: work through an example as an exercise
95 |
96 | 12. Applied Bayesian modeling standards, words of wisdom, and resources (slides)
97 | - standards for iterations, warmup, chains, and assessing convergence
98 | - Stan warnings to watch out for
99 | - how to describe the models
100 | - what to report in a paper
101 | - good books and online resources
102 |
--------------------------------------------------------------------------------
/01-small-world.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "An introduction to Bayesian updating with a grid search approach"
3 | output:
4 | html_document:
5 | toc: true
6 | toc_float: true
7 | ---
8 |
9 | # Goals
10 |
11 | - Develop an intuition for Bayes' theorem and Bayesian updating
12 | - See the inner workings of how easy it is to apply Bayes' rule with a 'grid
13 | search' approach (for very simple problems)
14 | - Become familiar with the various ways we can summarize samples from a
15 | posterior distribution
16 |
17 | # Setup
18 |
19 | ```{r, message=FALSE, warning=FALSE}
20 | library(ggplot2)
21 | library(dplyr)
22 | theme_set(theme_light())
23 | ```
24 |
25 | # Bayesian updating of the posterior
26 |
27 | We will start by performing an experiment where we "sample" from a spinning globe and record whether a given finger lands on land `0` or water `1`.
28 |
29 | In person: launch the beach ball!
30 |
31 | Online:
32 |
33 | The goal is to estimate what proportion of the globe is covered in water.
34 |
35 | This example has some built-in data from a previous version of that experiment. In person, we will replace it with the data we collect.
36 |
37 | ```{r}
38 | dat <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1) # data we collected
39 |
40 | # dat <- sample(0:1, size = 24, replace = T)
41 | p <- seq(0, 1, length.out = 200) # a sequence of 'proportions water' to evaluate
42 |
43 | # --------------------
44 | # try different priors:
45 | prior <- dunif(p, 0, 1)
46 | # prior <- dbinom(2, size = 3, prob = p)
47 | # prior <- dbinom(7, size = 10, prob = p)
48 | # prior <- dbinom(71, size = 100, prob = p)
49 | # prior <- dbinom(5, size = 10, prob = p)
50 | # prior <- dnorm(p, mean = 0.5, sd = 0.3)
51 | # prior <- dbeta(p, shape1 = 3, shape2 = 2)
52 | # prior <- dnorm(p, mean = 0.5, sd = 0.25);prior[1:100] <- 0
53 | # --------------------
54 |
55 | prior <- prior / sum(prior) # make the prior sum to 1
56 |
57 | out <- list()
58 | for (i in seq_along(dat)) {
59 | likelihood <- dbinom(sum(dat[i]), size = 1, prob = p) # data likelihood
60 | posterior <- likelihood * prior # Bayes rule numerator
61 | posterior <- posterior / sum(posterior) # make it sum to 1
62 | out[[i]] <- data.frame(p, prior, posterior, toss = i) # save it
63 | prior <- posterior # for next time around
64 | }
65 | out <- bind_rows(out)
66 |
67 | ggplot(out, aes(x = p, ymax = posterior, ymin = 0, group = toss)) +
68 | geom_ribbon(alpha = 0.1, fill = "red") +
69 | geom_ribbon(aes(x = p, ymax = prior, ymin = 0), alpha = 0.1) +
70 | coord_cartesian(expand = FALSE) +
71 | xlab("Proportion water") +
72 | ylab("Probability density") +
73 | facet_wrap(~toss)
74 | ```
75 |
76 | In the above figure, the grey represents the prior distribution and red represents the posterior distribution.
77 |
78 | In each panel, we add one observation to our data, take the posterior from the previous calculation and turn it into our new prior, multiply the prior by the data likelihood, and divide that by the sum of all likelihood times the prior values to make sure our posterior sums to 1.
79 |
80 | Questions:
81 | 1. What effect does changing the prior have on the posterior for the first few data points?
82 | 2. Does the posterior end up in a qualitatively different place with the different priors above?
83 |
84 | # Fitting all the data at once
85 |
86 | In the last example, we updated are posterior with each additional piece of data. Let's do our calculations in a way that is more consistent with how we would probably do this experiment. We'll calculate the posterior distribution including all the data at once along with our initial prior.
87 |
88 | Again, try playing with the prior.
89 |
90 | ```{r}
91 | N <- 1000 # bigger for smoother histogram plots below
92 | p <- seq(0, 1, length.out = N)
93 |
94 | prior <- dunif(p, 0, 1) / length(p)
95 | # prior <- dbinom(2, size = 3, prob = p)
96 | # prior <- dnorm(p, mean = 0.5, sd = 0.25)
97 | # prior <- dbeta(p, shape1 = 3, shape2 = 2)
98 | # prior <- dnorm(p, mean = 0.5, sd = 0.25);prior[1:100] <- 0
99 | # prior <- c(1:250, 250:1)
100 | prior <- prior / sum(prior)
101 |
102 | likelihood <- dbinom(sum(dat), size = length(dat), prob = p)
103 | posterior <- likelihood * prior
104 | posterior <- posterior / sum(posterior)
105 |
106 | ggplot(tibble(posterior, prior),
107 | aes(x = p, ymax = posterior, ymin = 0)) +
108 | geom_ribbon(alpha = 0.2, fill = "red") +
109 | geom_ribbon(aes(x = p, ymax = prior, ymin = 0), alpha = 0.2) +
110 | coord_cartesian(expand = FALSE, ylim = c(0, 1.03 * max(c(posterior, prior)))) +
111 | xlab("Proportion water") +
112 | ylab("Probability density")
113 | ```
114 |
115 | Does that look the same as the final panel of the previous plot?
116 |
117 | # Summarizing the posterior with samples
118 |
119 | If we sample from the values of `p` in proportion to their probability, we will be drawing samples from the posterior.
120 |
121 | This makes it easy to summarize the posterior however we would like: it's just a matter of manipulating the samples.
122 |
123 | This is a helpful warmup practice because next we will be using Markov chain Monte Carlo (MCMC), which will also return samples from the posterior.
124 |
125 | ```{r}
126 | post_samples <- sample(p, prob = posterior, size = 2000, replace = TRUE)
127 | ```
128 |
129 | Now we can make a histogram of the posterior distribution samples.
130 |
131 | ```{r}
132 | g <- ggplot(tibble(post_samples), aes(post_samples)) +
133 | geom_histogram(bins = 30, alpha = 0.4, fill = "red") +
134 | coord_cartesian(expand = FALSE)
135 | g
136 | ```
137 |
138 | We can look at the means or the median of the posterior as one way to summarize it.
139 |
140 | ```{r}
141 | q <- mean(post_samples)
142 | g + geom_vline(xintercept = q)
143 |
144 | q_median <- median(post_samples)
145 | g + geom_vline(xintercept = q) + geom_vline(xintercept = q_median, lty = 2)
146 | ```
147 |
148 | We can also calculate quantile credible intervals. For this, we just use the `quantile()` function to figure out the appropriate thresholds for a given probability. Here we will calculate an 80% credible interval.
149 |
150 | ```{r}
151 | q <- quantile(post_samples, probs = c(0.1, 0.9))
152 | g + geom_vline(xintercept = q)
153 | ```
154 |
155 | The Highest Posterior Density (HPD) interval is the shortest possible credible interval with the appropriate probability coverage. It's harder to calculate, so we will rely on an existing function. Quantile-based credible intervals are usually more common.
156 |
157 | ```{r}
158 | q_hpd <- coda::HPDinterval(coda::as.mcmc(post_samples), prob = 0.8)
159 | q_hpd <- as.numeric(q_hpd)
160 | q_hpd
161 | g + geom_vline(xintercept = q) + geom_vline(xintercept = q_hpd, lty = 2)
162 | ```
163 |
164 | We can also calculate one-sided credible intervals. For example, we could say that there is a 0.8 probability of there being [blank] or more proportion water on the globe (conditional on our model and the data we collected). We will sort the samples and find out the value just past the 0.2 quantile:
165 |
166 | ```{r}
167 | threshold_sample <- floor(0.2 * length(post_samples))
168 | length(post_samples)
169 | threshold_sample
170 | q <- sort(post_samples)[threshold_sample]
171 | q
172 | g + geom_vline(xintercept = q)
173 | ```
174 |
175 | Or instead of picking a given probability threshold, we could calculate the probability above some meaningful value. For example, we can calculate how much probability density is above 0.5. This tells us the probability that there is more than 50% water on the globe (conditional on our model and data).
176 |
177 | ```{r}
178 | sum(post_samples > 0.5) / length(post_samples)
179 | mean(post_samples > 0.5)
180 | g + geom_vline(xintercept = 0.5)
181 | ```
182 |
183 | We can also easily calculate how much probability there is between specific values. For example, what is the probability that there is between 60% and 80% water?
184 |
185 | ```{r}
186 | mean(post_samples > 0.6 & post_samples < 0.8)
187 | g + geom_vline(xintercept = c(0.6, 0.8))
188 | ```
189 |
190 |
--------------------------------------------------------------------------------
/02-mcmc-demo.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "The Metropolis MCMC algorithm in R"
3 | output:
4 | html_document:
5 | toc: true
6 | toc_float: true
7 | ---
8 |
9 | # Goals
10 |
11 | - Demystify MCMC sampling by coding one ourselves in R and manually updating it
12 | - Gain an intuition for the need to tune MCMC algorithms with interactive visualization
13 |
14 | # Setup and data simulation
15 |
16 | Run this next code chunk once to generate our data.
17 |
18 | We are are simulating data with a given mean and standard deviation. We'll effectively be estimating an intercept-only model with a known level of observation error.
19 |
20 | ```{r}
21 | set.seed(123)
22 | mu <- 1
23 | sigma <- 2
24 | dat <- rnorm(50, mean = mu, sd = sigma) # our simulated data
25 | plot(dat)
26 | ```
27 |
28 | Then this chunk will create a blank plot that we will fill in with our MCMC samples.
29 |
30 | ```{r}
31 | plot(
32 | 1, xlim = c(0, 30), ylim = c(-2, 2), type = "n",
33 | ylab = expression(mu), xlab = "Chain iteration"
34 | )
35 | jump_sd <- 1 # our chosen jumping standard deviation
36 | i <- 1 # starting iteration
37 | previous_proposal <- 0 # initial proposal value
38 | set.seed(123)
39 | ```
40 |
41 | # Manually running our MCMC algorithm
42 |
43 | Run this next code chunk repeatedly; each MCMC sample will get added to the open plot.
44 |
45 | Things to be aware of:
46 | - We're going to chose a prior of Normal(0, 5^2), i.e. `dnorm(mean = 0, sd = 5)`.
47 | - We're going to assume we know the error standard deviation `sigma` so we can keep this example simple and have only one parameter.
48 |
49 | Step through this carefully:
50 |
51 | ```{r, eval=FALSE}
52 | # our proposed next value in the chain:
53 | (proposal <- rnorm(1, previous_proposal, jump_sd))
54 | # ----------------------------------------------------------------
55 |
56 | # data likelihood at our proposal:
57 | (log_like_proposal <- sum(dnorm(dat, mean = proposal, sd = sigma, log = TRUE)))
58 | (log_like_previous <- sum(dnorm(dat, mean = previous_proposal, sd = sigma, log = TRUE)))
59 | # ----------------------------------------------------------------
60 |
61 | # prior likelihood at our proposal:
62 | (log_like_prior_proposal <- dnorm(proposal, mean = 0, sd = 5, log = TRUE))
63 | (log_like_prior_previous <- dnorm(previous_proposal, mean = 0, sd = 5, log = TRUE))
64 | # ----------------------------------------------------------------
65 |
66 | # Combine the log-prior-likelihood with the log-data-likelihood to get a value
67 | # that is proportional to the posterior probability of that parameter value
68 | # given the data.
69 | # Since we're in log space, we can add:
70 | (log_posterior_proposal <- log_like_prior_proposal + log_like_proposal)
71 | (log_posterior_previous <- log_like_prior_previous + log_like_previous)
72 | # ----------------------------------------------------------------
73 |
74 | # Calculate the ratio of the proposal and previous probabilities
75 | # Doing it in log space is computationally safer (avoids very small numbers).
76 | # (prob_ratio <- exp(log_posterior_proposal) / exp(log_posterior_previous))
77 | (prob_ratio <- exp(log_posterior_proposal - log_posterior_previous))
78 | # ----------------------------------------------------------------
79 |
80 | # If the probability ratio is > 1, then always accept the new parameter value(s).
81 | # If the probability ratio is < 1, then accept the new parameter values in
82 | # proportion to the ratio of proposal probability / previous probability
83 | if (runif(1) < prob_ratio) { # fancy trick to do the above
84 | print("Accept new proposal")
85 | (previous_proposal <- proposal)
86 | } else {
87 | print("Retain previous value")
88 | (previous_proposal <- previous_proposal)
89 | }
90 | # ----------------------------------------------------------------
91 |
92 | points(i, previous_proposal) # plot our chosen value for this iteration
93 | i <- i + 1 # update counter for next proposal
94 | # now repeat!
95 | ```
96 |
97 | You now have an MCMC chain! The distribution of those values (if you take enough of them) will reflect the distribution of the parameter(s).
98 |
99 | ```{r, eval=FALSE}
100 | abline(h = mu, lty = 2) # our known true value
101 | ```
102 |
103 | # MCMC tuning demo
104 |
105 | The following code chunk contains the same code but embedded in a function so that we can call it repeatedly with different argument values.
106 |
107 | ```{r, eval=FALSE}
108 | mcmc_example <- function(
109 | mu = 4, # the true mean; we will estimate this
110 | sigma = 2, # the true residual SD; we will assume we know this for simplicity
111 | .n = 30, # number of data points to simulate
112 | prior_mu = 0, # our prior distribution mean
113 | prior_mu_sd = 10, # our prior distribution SD on the mean
114 |
115 | # Next, the SD of our jump function.
116 | # Too small a value and the chain might take a
117 | # very long time to get to the right parameter space or might get stuck in the
118 | # wrong area of the parameter space.
119 | # Too large a value and the proposed values will be often rejected, and again,
120 | # the chain may get stuck and take a very long time to converge on an
121 | # appropriate answer.
122 | jump_sd = 5,
123 | reps = 10000, # the length of our MCMC chain
124 | seed = sample.int(.Machine$integer.max, 1)
125 | ) {
126 | dat <- rnorm(.n, mean = mu, sd = sigma) # our simulated data
127 |
128 | # We will ensure that our data has exactly our specified mean.
129 | # (This is just to make the simulation easier to follow.)
130 | dat <- dat - (mean(dat) - mu)
131 |
132 | # A vector to hold our MCMC chain output:
133 | out <- vector(length = reps, mode = "numeric")
134 |
135 | # We'll start at an initial value of 0:
136 | out[1] <- 0
137 |
138 | # Now we'll loop through our MCMC chain:
139 | for (i in seq(2, length(out))) {
140 |
141 | # Propose a new value given the previous value and the jump SD:
142 | proposal <- rnorm(1, out[1], jump_sd)
143 |
144 | # Calculate the log-likelihood of the data given the proposed parameter value
145 | # and the previous parameter value:
146 | log_like_proposal <- sum(dnorm(dat, mean = proposal, sd = sigma, log = TRUE))
147 | log_like_previous <- sum(dnorm(dat, mean = out[i - 1], sd = sigma, log = TRUE))
148 |
149 | # Get the log-probability of the proposed and previous parameter values given
150 | # the prior:
151 | log_prior_proposal <- dnorm(proposal, mean = prior_mu, sd = prior_mu_sd, log = TRUE)
152 | log_prior_previous <- dnorm(out[i - 1], mean = prior_mu, sd = prior_mu_sd, log = TRUE)
153 |
154 | # Combine the log-prior with the log-likelihood to get a value that is
155 | # proportional to the posterior probability of that parameter value given the
156 | # data:
157 | log_posterior_proposal <- log_prior_proposal + log_like_proposal
158 | log_posterior_previous <- log_prior_previous + log_like_previous
159 |
160 | # Calculate the ratio of the proposal and previous probabilities:
161 | prob_ratio <- exp(log_posterior_proposal - log_posterior_previous)
162 |
163 | # If the probability ratio is > 1, then always accept the new parameter
164 | # values.
165 | # If the probability ratio is < 1, then accept the new parameter values in
166 | # proportion to the ratio.
167 | if (runif(1, min = 0, max = 1) < prob_ratio) {
168 | out[i] <- proposal # use the proposed parameter value
169 | } else {
170 | out[i] <- out[i - 1] # keep the previous parameter value
171 | }
172 | }
173 |
174 | par(mfrow = c(1, 3))
175 | plot(dat, main = "Observed data")
176 | plot(seq_along(out), out,
177 | type = "l", xlab = "Chain index",
178 | ylab = expression(widehat(mu)), col = "#00000050",
179 | main = "Traceplot"
180 | ) # the MCMC chain
181 | abline(h = mu, col = "red", lty = 1, lwd = 2) # the true mean
182 | hist(out,
183 | xlim = c(-2, 7), breaks = 30,
184 | main = "Prior (line)\nand Posterior (histogram)", xlab = "mu"
185 | )
186 | abline(v = mu, col = "red", lty = 1, lwd = 2)
187 | xx <- seq(-10, 10, length.out = 200)
188 | yy <- dnorm(xx, mean = prior_mu, sd = prior_mu_sd)
189 | par(new = TRUE)
190 | plot(xx, yy,
191 | ylim = c(0, max(yy)), type = "l", axes = FALSE, ann = FALSE,
192 | xlim = c(-5, 5)
193 | )
194 | invisible(out)
195 | }
196 | ```
197 |
198 | Now, we'll call our function, which by default will take 10,000 MCMC samples:
199 |
200 | ```{r, eval=FALSE}
201 | mcmc_example()
202 | ```
203 |
204 | We can play with our function using the manipulate package:
205 |
206 | ```{r, eval=FALSE}
207 | library(manipulate)
208 | manipulate(
209 | mcmc_example(
210 | mu = mu,
211 | sigma = sigma,
212 | .n = .n,
213 | prior_mu = 0,
214 | prior_mu_sd = 10,
215 | jump_sd = jump_sd,
216 | reps = reps,
217 | seed = 42
218 | ),
219 | mu = slider(0, 5, 1, step = 0.1),
220 | sigma = slider(0.1, 10, 5, step = 0.1),
221 | .n = slider(5, 1000, 25, step = 5),
222 | jump_sd = slider(0.05, 40, 5, step = 0.1),
223 | reps = slider(50, 20000, 1000, step = 50),
224 | seed = slider(1, 50, 42, step = 1)
225 | )
226 | ```
227 |
228 | # Questions:
229 |
230 | 1. What happens to the posterior as the number of data points (`.n`) gets large?
231 |
232 | 2. What happens to the posterior as the number of data points (`.n`) gets small?
233 |
234 | 3. What happens to the posterior as the observation error (`sigma`) gets larger?
235 |
236 | 4. What happens if the jump distance gets very big?
237 |
238 | 5. What happens if the jump distance gets very small?
239 |
--------------------------------------------------------------------------------
/03-mcmc-diagnostics-exercise.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: MCMC diagnostics
3 | output:
4 | html_document:
5 | toc: true
6 | toc_float: true
7 | ---
8 |
9 | # Goals
10 |
11 | - Gain experience diagnosing MCMC chains visually, with Rhat, and with ESS.
12 |
13 | # Exercise
14 |
15 | For each of the following sets of saved MCMC chains, answer the following:
16 |
17 | 1. Do these samples appear consistent with convergence?
18 | 2. What helped you come to that conclusion?
19 |
20 | If inconsistent with convergence, continue:
21 |
22 | 3. What might have caused this scenario?
23 | 4. Can the MCMC sampling likely be improved without adjusting the model? How?
24 |
25 | ```{r mcmc-chains1}
26 | samples <- readRDS(here::here("data/mcmc1.rds"))
27 | samples_matrix <- do.call(cbind, samples)
28 |
29 | bayesplot::mcmc_trace(samples)
30 | rstan::Rhat(samples_matrix)
31 | rstan::ess_bulk(samples_matrix)
32 | rstan::ess_tail(samples_matrix)
33 | ```
34 |
35 | 1. No
36 | 2. Rhat a bit high, ESS a bit low, strong autocorrelation apparent in chains
37 | 3. Chains not run for long enough
38 | 4. Possibly just running for longer; no thinning needed!
39 |
40 | ```{r mcmc-chains2}
41 | samples <- readRDS(here::here("data/mcmc2.rds"))
42 | samples_matrix <- do.call(cbind, samples)
43 |
44 | bayesplot::mcmc_trace(samples)
45 | rstan::Rhat(samples_matrix)
46 | rstan::ess_bulk(samples_matrix)
47 | rstan::ess_tail(samples_matrix)
48 | ```
49 |
50 | 1. No
51 | 2. Chains drifting, Rhat high, ESS low
52 | 3. Possibly not enough warmup and starting points not dispersed enough
53 | 4. Longer warmup?
54 |
55 | ```{r mcmc-chains3}
56 | samples <- readRDS(here::here("data/mcmc3.rds"))
57 | samples_matrix <- do.call(cbind, samples)
58 |
59 | bayesplot::mcmc_trace(samples)
60 | bayesplot::mcmc_dens_overlay(samples)
61 | rstan::Rhat(samples_matrix)
62 | rstan::ess_bulk(samples_matrix)
63 | rstan::ess_tail(samples_matrix)
64 | ```
65 |
66 | 1. No
67 | 2. One chain has larger variance
68 | 3. Some chains may be getting stuck and not exploring posterior
69 | 4. May be harder. Tighter priors, re-parameterized model, simplified model?
70 |
71 | ```{r mcmc-chains4}
72 | samples <- readRDS(here::here("data/mcmc4.rds"))
73 | samples_matrix <- do.call(cbind, samples)
74 |
75 | bayesplot::mcmc_trace(samples)
76 | bayesplot::mcmc_dens_overlay(samples)
77 | rstan::Rhat(samples_matrix)
78 | rstan::ess_bulk(samples_matrix)
79 | rstan::ess_tail(samples_matrix)
80 | ```
81 |
82 | 1. Yes
83 | 2. ESS > 400, Rhat < 1.01, chains look good
84 | 3. NA
85 | 4. NA
86 |
87 | ```{r mcmc-chains5}
88 | samples <- readRDS(here::here("data/mcmc5.rds"))
89 | samples_matrix <- do.call(cbind, samples)
90 |
91 | bayesplot::mcmc_trace(samples)
92 | rstan::Rhat(samples_matrix)
93 | rstan::ess_bulk(samples_matrix)
94 | rstan::ess_tail(samples_matrix)
95 | bayesplot::mcmc_dens_overlay(samples)
96 | ```
97 |
98 | 1. No
99 | 2. Chains have different means
100 | 3. Might still reflect dispersed starting points without enough warmup? Model might be weakly identified?
101 | 4. Longer warmup? Simplify model? More data?
102 |
103 | ```{r mcmc-chains6}
104 | samples <- readRDS(here::here("data/mcmc6.rds"))
105 | samples_matrix <- do.call(cbind, samples)
106 |
107 | bayesplot::mcmc_trace(samples)
108 | rstan::Rhat(samples_matrix)
109 | rstan::ess_bulk(samples_matrix)
110 | rstan::ess_tail(samples_matrix)
111 | ```
112 |
113 | 1. No
114 | 2. Chain behaviour at beginning looks different, autocorrelation may be high
115 | 3. Might have forgot to remove warmup or not run warmup for long enough?
116 | 4. Discard warmup, warmup long enough, ...
117 |
--------------------------------------------------------------------------------
/04-priors-explorer.R:
--------------------------------------------------------------------------------
1 | # Demo of the influence of priors in regression
2 |
3 | # Goals
4 | # - Gain some intuition about the influence of priors on a Bayesian regression
5 |
6 | # In the plots, the gray histograms represent the posterior samples, the blue
7 | # lines represent the priors, and the red lines represent the true values. The
8 | # last panel represents the simulated data (dots), the true relationship
9 | # between y and x (red line), and 50 draws from the posterior distribution.
10 |
11 | # Try running the following code chunk and adjusting the argument values. If
12 | # you adjust the `seed` slider, you will change the random number generator
13 | # draw.
14 |
15 | # Remember that the choice of scale or standard deviation value for the priors
16 | # is dependent on the scale of the predictor and response variable.
17 |
18 | # You may consider asking questions like:
19 | #
20 | # - What happens if you have a tight prior on the slope coefficient that
21 | # differs from the true value?
22 | # - How does this change when you have a lot of data vs. a little?
23 | # - How much data do I need before the data overwhelm the prior for a given
24 | # discrepancy between the prior and data?
25 | # - What happens with a very diffuse prior on the slope coefficient and very
26 | # few data points?
27 | # - Can having an informative prior on the slope coefficient help if you've
28 | # collected very little data? When might you have this kind of information?
29 |
30 | library(rstanarm)
31 | library(ggplot2)
32 | theme_set(theme_light())
33 | library(shiny)
34 |
35 | mcmc_example <- function(
36 | seed = 1,
37 | intercept = 3,
38 | slope = 0.7,
39 | sigma = 2, # the true residual SD
40 | .n = 30, # number of data points to simulate
41 | prior_slope_mean = 0,
42 | prior_slope_sd = 3,
43 | prior_intercept_sd = 10,
44 | prior_sigma_sd = 3,
45 | reps = 800 # the length of each MCMC chain
46 | ) {
47 | set.seed(seed)
48 | x <- rnorm(.n, 0, .5)
49 | d <- data.frame(x = x, y = rnorm(.n, mean = intercept + slope * x, sd = sigma))
50 |
51 | suppressWarnings(
52 | m <- rstanarm::stan_glm(y ~ x, d,
53 | iter = reps, chains = 1,
54 | family = gaussian(link = "identity"), refresh = 0,
55 | prior = normal(prior_slope_mean, prior_slope_sd, autoscale = FALSE),
56 | prior_intercept = normal(0, prior_intercept_sd, autoscale = FALSE),
57 | prior_aux = normal(0, prior_sigma_sd, autoscale = FALSE),
58 | chains = 1, seed = seed
59 | )
60 | )
61 |
62 | e <- as.data.frame(m)
63 | xx <- seq(-6, 6, length.out = 100)
64 |
65 | slope_prior <- data.frame(
66 | x = xx,
67 | y = dnorm(xx, mean = prior_slope_mean, sd = prior_slope_sd)
68 | )
69 |
70 | intercept_prior <- data.frame(
71 | x = xx,
72 | y = dnorm(xx, mean = 0, sd = prior_intercept_sd)
73 | )
74 |
75 | xx0 <- seq(0, 6, length.out = 100)
76 | sigma_prior <- data.frame(
77 | x = xx0,
78 | y = extraDistr::dhnorm(xx0, sigma = prior_sigma_sd)
79 | )
80 |
81 | .range <- c(-4, 4)
82 |
83 | g1 <- ggplot(e, aes(`(Intercept)`, after_stat(density))) +
84 | geom_histogram(bins = 50) +
85 | geom_line(data = intercept_prior, aes(x, y), col = "blue") +
86 | coord_cartesian(xlim = .range) +
87 | xlab("Intercept") +
88 | geom_vline(xintercept = intercept, col = "red")
89 |
90 | g2 <- ggplot(e, aes(x, after_stat(density))) +
91 | geom_histogram(bins = 50) +
92 | geom_line(data = slope_prior, aes(x, y), col = "blue") +
93 | coord_cartesian(xlim = .range) +
94 | xlab("Slope coefficient") +
95 | geom_vline(xintercept = slope, col = "red")
96 |
97 | g3 <- ggplot(e, aes(sigma, after_stat(density))) +
98 | geom_histogram(bins = 50) +
99 | geom_line(data = sigma_prior, aes(x, y), col = "blue") +
100 | coord_cartesian(xlim = c(0, max(.range))) +
101 | xlab("Observation error SD") +
102 | geom_vline(xintercept = sigma, col = "red")
103 |
104 | nd <- data.frame(x = seq(-1, 1, length.out = 2))
105 | set.seed(seed)
106 | pp <- posterior_linpred(m, newdata = nd, draws = 50)
107 | pp2 <- reshape2::melt(pp)
108 | pp2$x <- rep(nd$x, each = 50)
109 |
110 | g4 <- ggplot(d, aes(x = x, y = y)) +
111 | geom_point() +
112 | geom_line(
113 | data = pp2, aes(x, value, group = iterations), inherit.aes = FALSE,
114 | alpha = 0.5, col = "grey30"
115 | ) +
116 | geom_abline(
117 | slope = slope, intercept = intercept,
118 | col = "red"
119 | )
120 |
121 | patchwork::wrap_plots(g1, g2, g3, g4, ncol = 2)
122 | }
123 |
124 | ui <- fluidPage(
125 | pageWithSidebar(
126 | titlePanel("Regression prior explorer"),
127 | sidebarPanel(
128 | sliderInput("seed", "Random seed value", value = 1, min = 1, max = 200, step = 1),
129 | sliderInput("slope", "True slope coefficient", value = 0.6, min = -2, max = 2, step = 0.2),
130 | sliderInput("sigma", "True observation error SD (sigma)", value = 1, min = 0.1, max = 8, step = 0.1),
131 | sliderInput(".n", "Number of observations", value = 50, min = 2, max = 1000, step = 2),
132 | sliderInput("prior_slope_mean", "Slope prior mean", value = 0, min = -5, max = 5, step = 0.5),
133 | sliderInput("prior_slope_sd", "Slope prior SD", value = 1, min = 0.1, max = 100, step = .1),
134 | sliderInput("prior_sigma_sd", "Sigma prior SD", value = 1, min = 0.1, max = 50, step = 1)
135 | ),
136 | mainPanel(plotOutput("gg"))
137 | )
138 | )
139 | server <- function(input, output, session) {
140 | output$gg <- renderPlot(
141 | {
142 | mcmc_example(
143 | seed = input$seed,
144 | intercept = 0,
145 | slope = input$slope,
146 | sigma = input$sigma,
147 | .n = input$.n,
148 | prior_slope_mean = input$prior_slope_mean,
149 | prior_slope_sd = input$prior_slope_sd,
150 | prior_sigma_sd = input$prior_sigma_sd
151 | )
152 | },
153 | width = 600,
154 | height = 500
155 | )
156 | }
157 |
158 | shinyApp(ui, server)
159 |
--------------------------------------------------------------------------------
/05-brms-basic.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "An introduction to applied Bayesian regression using brms"
3 | output:
4 | html_document:
5 | toc: true
6 | toc_float: true
7 | ---
8 |
9 | # Goals
10 |
11 | - Learn to fit pre-packaged Bayesian regression models with brms.
12 | - Gain initial exposure to posterior predictive checking and
13 | manipulating posterior samples to calculate posterior probabilities.
14 |
15 | # Setup
16 |
17 | Let's load dplyr, ggplot2, and brms
18 |
19 | ```{r, warning=FALSE, message=FALSE}
20 | library(dplyr)
21 | library(ggplot2)
22 | library(brms)
23 | theme_set(theme_light())
24 | dir.create("cache", showWarnings = FALSE)
25 | options(brms.file_refit = "on_change") # re-fit cached models if changes
26 | ```
27 |
28 | Any time we use rstan (or a package that relies on rstan, such as brms or rstanarm), we can set an R option to use parallel processing with all available cores: `options(mc.cores = parallel::detectCores())`. This example should run so quickly that it will likely run faster on a single core, so you may choose to skip this or explicitly set it to 1 core.
29 |
30 | ```{r, eval=FALSE}
31 | # options(mc.cores = parallel::detectCores())
32 | # options(mc.cores = 1)
33 | ```
34 |
35 | # Data
36 |
37 | We are going to work with data from:
38 |
39 | Hughes, B.B., Lummis, S.C., Anderson, S.C., and Kroeker, K.J. 2018. Unexpected resilience of a seagrass system exposed to global stressors. Glob. Change Biol. 24(1): 224–234.
40 |
41 | The data come from a mesocosm experiment by Brent Hughes where he manipulated water pH and whether or not nutrients were added (to represent nutrient loads in eelgrass beds) to 14 200 L barrels. He measured several variables, but the response variable we are going to work with here is the increase in mass of seahares (*Phyllaplysia taylori*), a type of sea slug, after 24 days.
42 |
43 | ```{r}
44 | d <- readRDS(here::here("data/hughes-etal-2018.rds")) |>
45 | filter(label == "Change in seahare mass (g FW)") |>
46 | rename(change_seahare_mass_g_fw = value) |>
47 | dplyr::select(-label, -figure_panel, -nutrients_text, -response)
48 | glimpse(d)
49 | ```
50 |
51 | ```{r}
52 | ggplot(d,
53 | aes(ph, change_seahare_mass_g_fw, colour = as.factor(nutrients))) +
54 | geom_point()
55 | ```
56 |
57 | Let's rescale (center and possibly divide by the SD) the predictors. This is important
58 |
59 | 1. so that we have some idea of what reasonable prior values will be,
60 | 2. so that our coefficients are on a reasonable scale for interpretation and
61 | for Stan, and
62 | 3. so that we can add a quadratic effect and have one coefficient represent the
63 | slope and the other the curvature.
64 |
65 | ```{r}
66 | d <- mutate(d,
67 | ph_scaled = as.numeric(scale(ph))
68 | )
69 | ```
70 |
71 | Let's look at the data:
72 |
73 | ```{r}
74 | ggplot(d,
75 | aes(ph_scaled, change_seahare_mass_g_fw,
76 | colour = as.factor(nutrients))) +
77 | geom_point()
78 | ```
79 |
80 | # Fitting a model
81 |
82 | We are going to fit this model with the `brms::brm()` function.
83 |
84 | ```{r, results='hide', warning=FALSE}
85 | fit <- brm(
86 | log(change_seahare_mass_g_fw) ~ ph_scaled + I(ph_scaled^2) + nutrients,
87 | data = d,
88 | iter = 2000,
89 | chains = 4,
90 | file = "cache/seahare",
91 | prior = c(
92 | set_prior("normal(0, 5)", class = "b"),
93 | set_prior("normal(0, 10)", class = "Intercept"),
94 | set_prior("student_t(3, 0, 3)", class = "sigma")
95 | )
96 | )
97 | ```
98 |
99 | There are a variety of functions available to inspect our model including the usual print or summary function:
100 |
101 | ```{r}
102 | summary(fit)
103 | ```
104 |
105 | Take a look at the output and make sure you understand everything there.
106 |
107 | ## Questions:
108 |
109 | Open the help file `?summary.brmsfit` and answer the following questions:
110 |
111 | 1. What does the `Estimate` column represent here? Hint: read `?summary.brmsfit`.
112 | 2. What does the `Est.Error` column represent here?
113 | 3. What is `sigma` here?
114 | 4. Do the `Rhat` and `ESS` columns look reasonable?
115 |
116 | Bonus questions:
117 |
118 | 5. Can you get `summary.brmsfit()` to return medians? What does the `Est.Error` column now mean?
119 | 6. Can you get `summary.brmsfit()` to return information on the priors?
120 | 7. Can you get `summary.brmsfit()` to return 87% CIs?
121 |
122 | ```{r}
123 | summary(fit, robust = TRUE) # exercise
124 | summary(fit, priors = TRUE) # exercise
125 | summary(fit, prob = 0.89) # exercise
126 | ```
127 |
128 | There are a lot of helper functions in brms to explore. Here are a few useful ones:
129 |
130 | ```{r}
131 | brms::prior_summary(fit)
132 | brms::stancode(fit) # not easy to read!
133 | brms::standata(fit)
134 | ```
135 |
136 | # Inspecting the chains for convergence
137 |
138 | We are going to use the plotting functions from the package bayesplot, which is also developed by the Stan developers. These plotting functions will work with any kind of MCMC output, not just the output from brms, rstanarm, or rstan, as long as you format the samples correctly.
139 |
140 | There are many available plotting functions in the bayesplot package. Before we start exploring them, we need to make sure that our chains are consistent with convergence. To start with we already checked the effective sample size and Rhat values, but there's no substitute for visually inspecting the chains!
141 |
142 | The primary way of looking at MCMC chains is as overlaid time series:
143 |
144 | ```{r}
145 | bayesplot::mcmc_trace(fit, pars = "b_nutrients")
146 | bayesplot::mcmc_trace(fit, regex_pars = "^b_|sigma")
147 | ```
148 |
149 | How does that look to you?
150 |
151 | Another thing to check is the autocorrelation in the chains:
152 |
153 | ```{r}
154 | bayesplot::mcmc_acf(fit)
155 | ```
156 |
157 | ## Question:
158 |
159 | 1. Is autocorrelation in the chains a problem in itself?
160 |
161 | # Posterior predictive checks
162 |
163 | Posterior predictive checking is a powerful concept in Bayesian statistics.
164 |
165 | The basic idea is to simulate new observation from the model several times and then compare those simulated data sets to the data that we observed. We can then slice and dice that comparison creatively to make sure that our Bayesian probability model is a good representation of the process that generated the observed data. They should be indistinguishable in any way you can think of to compare them.
166 |
167 | We could do this manually, although the bayesplot package has a large number of helpful plots already in available. We will use the built-in `pp_check()` shortcuts for the rest of this exercise, but know that these are just calling the bayesplot functions, and you can use the bayesplot functions with MCMC output from any Bayesian models sampled with MCMC methods.
168 |
169 | Here are all the available posterior predictive checking functions in the bayesplot package:
170 |
171 | ```{r}
172 | bayesplot::available_ppc()
173 | ```
174 |
175 | brms can call these posterior predictive functions directly, although it will generate new simulations each time. E.g.:
176 |
177 | ```{r}
178 | brms::pp_check(fit, ndraws = 50)
179 | ```
180 |
181 | ## Question:
182 |
183 | 1. What are we looking at here?
184 | 2. Are the draws from the posterior consistent with the data that we observed?
185 |
186 | To speed things up, we can instead take one set of posterior predictive draws and then we can plot them in various ways using Bayesplot. E.g.
187 |
188 | ```{r}
189 | y <- log(d$change_seahare_mass_g_fw)
190 | yrep <- posterior_predict(fit, ndraws = 50)
191 | bayesplot::ppc_dens_overlay(y, yrep)
192 | ```
193 |
194 | Is the same as:
195 |
196 | ```{r}
197 | brms::pp_check(fit, ndraws = 50, type = "dens_overlay")
198 | ```
199 |
200 | Where we found `dens_overlay` by running `bayesplot::available_ppc()` and removing the `ppc_` part.
201 |
202 | Read about the various available plotting functions at:
203 |
204 | ```{r, eval=FALSE}
205 | ?bayesplot::`PPC-overview`
206 | ```
207 |
208 | ### Your turn
209 |
210 | Experiment with the available posterior predictive checking functions to evaluate our model.
211 |
212 | ```{r}
213 | pp_check(fit, type = "hist") # exercise
214 | pp_check(fit, type = "error_scatter") # exercise
215 | pp_check(fit, type = "scatter") # exercise
216 | pp_check(fit, type = "scatter_avg") # exercise
217 | pp_check(fit, type = "scatter_avg_grouped", group = "nutrients") # exercise
218 | pp_check(fit, type = "ecdf_overlay") # exercise
219 | pp_check(fit, type = "intervals") # exercise
220 | pp_check(fit, type = "intervals", x = "change_seahare_mass_g_fw") # exercise
221 | pp_check(fit, type = "intervals", x = "nutrients") # exercise
222 | pp_check(fit, type = "intervals", x = "ph_scaled") # exercise
223 | ```
224 |
225 | # Summarizing the posterior samples graphically
226 |
227 | Again, we can look at trace plots like this:
228 |
229 | ```{r}
230 | bayesplot::mcmc_trace(fit)
231 | ```
232 |
233 | These are the available plotting functions:
234 |
235 | ```{r}
236 | bayesplot::available_mcmc()
237 | ```
238 |
239 | ### Your turn
240 |
241 | Experiment with the available plotting functions to summarize the posterior probabilities of the parameters in our model. Which do you find most useful here?
242 |
243 | ```{r}
244 | bayesplot::mcmc_areas(fit, regex_pars = "^b_|sigma") # exercise
245 | bayesplot::mcmc_intervals(fit, regex_pars = "^b_|sigma") # exercise
246 | bayesplot::mcmc_combo(fit, regex_pars = "^b_|sigma") # exercise
247 | bayesplot::mcmc_areas_ridges(fit, regex_pars = "^b_|sigma") # exercise
248 | ```
249 |
250 | - What does the `(Intercept)` coefficient represent?
251 | - What does the `nutrients` coefficient represent?
252 | - What does the `ph_scaled` coefficient represent?
253 | - What does the `I(ph_scaled^2)` coefficient represent?
254 | - What does the `sigma` coefficient represent?
255 |
256 | We can easily extract the credible intervals with:
257 |
258 | ```{r}
259 | posterior_interval(fit)
260 | posterior_interval(fit, prob = 0.9)
261 | posterior_interval(fit, prob = 0.89) # see 'Statistical Rethinking'
262 | posterior_interval(fit, prob = 0.5)
263 | ```
264 |
265 | Why might we prefer 90% or 50% or even 89% credible intervals over the usual 95%?
266 |
267 | # Checking the priors
268 |
269 | We are going to talk about priors more extensively soon.
270 |
271 | It's helpful to know that you can extract details on the priors from an brms model with the `prior_summary()` function. In this case we specified all the priors explicitly, *which is a good practice*. This function is a good way to check that the priors were interpreted correctly, and is also a good way to discover parameters that you might have forgot to set the priors on explicitly.
272 |
273 | ```{r}
274 | brms::prior_summary(fit)
275 | ```
276 |
277 | Let's compare the full posterior distribution for a parameter to its prior as an example.
278 |
279 | For models fit with brms, we can extract the posterior samples with `as.data.frame()` or `as.matrix()`. Let's use the data frame version. We'll also convert to a tibble, just so that it prints nicely.
280 |
281 | ```{r}
282 | post <- as.data.frame(fit)
283 | post
284 | ```
285 |
286 | What does each column in this data frame represent?
287 |
288 | Our prior on ph_scaled^2:
289 |
290 | ```{r}
291 | prior <- tibble(
292 | b_ph_scaled = seq(-10, 10, length.out = 300),
293 | density = dnorm(b_ph_scaled, 0, 3)
294 | )
295 | prior
296 | ```
297 |
298 | Plot them both:
299 |
300 | ```{r}
301 | # note `after_stat(density)` to get probability densities not counts
302 | # see `?geom_histogram()`
303 | ggplot() +
304 | geom_histogram(data = post, aes(b_Iph_scaledE2, after_stat(density)),
305 | bins = 80) +
306 | geom_ribbon(data = prior, aes(x = b_ph_scaled,
307 | ymax = density, ymin = 0), fill = "blue", alpha = 0.5) +
308 | coord_cartesian(xlim = c(-5, 2.5)) +
309 | coord_cartesian(expand = FALSE) # no gap below 0
310 | ```
311 |
312 | One thing we haven't done is test the sensitivity of our posterior samples to the choice of priors. How could we go about testing that?
313 |
314 | # Shiny Stan
315 |
316 | The shinystan package is a one-stop shop for inspecting a Stan model. For a model fit with brms, rstanarm, or rstan we can launch it with:
317 |
318 | ```{r, eval=FALSE}
319 | shinystan::launch_shinystan(fit)
320 | ```
321 |
322 | # Plotting the posterior distribution of the linear predictor
323 |
324 | ```{r}
325 | newdata <- expand.grid(
326 | ph_scaled = seq(min(d$ph_scaled), max(d$ph_scaled), length.out = 500),
327 | nutrients = c(0, 1)
328 | )
329 | head(newdata)
330 | ```
331 |
332 | We can extract samples from the linear predictor with the `posterior_linpred()` function. These are samples from the posterior without observation error. In other words, these are similar in concept to the confidence interval you would get out of `predict.glm()` or `predict.lm()`.
333 |
334 | ```{r}
335 | posterior_linear <- posterior_epred(fit, newdata = newdata)
336 | dim(posterior_linear)
337 | ```
338 |
339 | So we now have a matrix that is 4000 rows long and 1000 columns wide. Where do the 4000 and 1000 come from?
340 |
341 | We can summarize the samples however we would like. I'm going to suggest we use the median and the 25% and 75% quantiles. We could also choose to use the mean and any other quantiles we wanted.
342 |
343 | ```{r}
344 | newdata$est <- apply(posterior_linear, 2, median)
345 | newdata$lwr <- apply(posterior_linear, 2, quantile, probs = 0.25)
346 | newdata$upr <- apply(posterior_linear, 2, quantile, probs = 0.75)
347 | ```
348 |
349 | ```{r}
350 | pp <- posterior_predict(fit, newdata = newdata)
351 | newdata$lwr_pp <- apply(pp, 2, quantile, probs = 0.25)
352 | newdata$upr_pp <- apply(pp, 2, quantile, probs = 0.75)
353 |
354 | ggplot(newdata, aes(ph_scaled, exp(est),
355 | group = nutrients, ymin = exp(lwr), ymax = exp(upr),
356 | fill = as.factor(nutrients))) +
357 | geom_ribbon(alpha = 0.2) +
358 | geom_ribbon(alpha = 0.2, aes(ymin = exp(lwr_pp), ymax = exp(upr_pp))) +
359 | geom_line(lwd = 1, aes(colour = as.factor(nutrients))) +
360 | geom_point(data = d, aes(ph_scaled, change_seahare_mass_g_fw,
361 | colour = as.factor(nutrients)), inherit.aes = FALSE) +
362 | ylab("Change in seahare mass (g FW)")
363 | ```
364 |
365 | Note that I exponentiated the predictions to make our plot on the original natural scale.
366 |
367 | # Summarizing the posterior distribution multiple ways
368 |
369 | One of the nice things about Bayesian statistical models is that we can quantify the probability of nearly any comparison we can imagine. All you have to do is add, subtract, multiply, or divide the samples. Let's try some examples.
370 |
371 | As a reminder, `post` comes from:
372 |
373 | ```{r}
374 | post <- as.data.frame(fit)
375 | post
376 | ```
377 |
378 | What if we wanted to know the probability that there is a negative (frowning) quadratic shape (vs. a positive (smiling) quadratic shape) to the relationship? We can get that from the ph^2 term since we centered our ph predictor before fitting.
379 |
380 | ```{r}
381 | ph2_samples <- post$b_Iph_scaledE2
382 | mean(ph2_samples < 0) # prob. frowny
383 | mean(ph2_samples > 0) # prob. smiley
384 | ```
385 |
386 | We're taking advantage of the fact that R treats `TRUE` and `FALSE` as 1 and 0. So by taking the mean, we are doing the same thing as:
387 |
388 | ```{r}
389 | sum(ph2_samples < 0) / length(ph2_samples)
390 | ```
391 |
392 | What is the probability that the change in seahare mass is greater in the case where nutrients were not added?
393 |
394 | ```{r}
395 | mean(post$b_nutrients < 0)
396 | ```
397 |
398 | A major benefit to MCMC sampling of Bayesian models is how easy it is to quantify any comparison you want to make.
399 |
400 | For example, how much greater would you expect the change in seahare mass to be under conditions of the lowest pH tested without nutrients compared to the average pH condition with nutrients?
401 |
402 | I.e. compare the pink posterior in the lower left to the blue posterior in the middle of the last plot.
403 |
404 | ```{r}
405 | min_ph <- min(d$ph_scaled)
406 | mean_ph <- mean(d$ph_scaled)
407 |
408 | condition1 <- data.frame(
409 | ph_scaled = min_ph,
410 | nutrients = c(0))
411 | pp1 <- posterior_linpred(fit, newdata = condition1)[,1]
412 |
413 | condition2 <- data.frame(
414 | ph_scaled = mean_ph,
415 | nutrients = c(1))
416 | pp2 <- posterior_linpred(fit, newdata = condition2)[,1]
417 |
418 | ratio <- exp(pp2) / exp(pp1)
419 | ggplot(tibble(ratio = ratio), aes(ratio)) +
420 | geom_histogram() +
421 | scale_x_log10() +
422 | geom_vline(xintercept = 1)
423 |
424 | quantile(ratio, probs = c(0.11, 0.5, 0.89))
425 | mean(ratio > 1)
426 | ```
427 |
428 | What's the probability this ratio is greater than 1.5?
429 |
430 | ```{r}
431 | mean(ratio > 1.5)
432 | ```
433 |
434 | If you can think it you can quantify it. And all you have to do is manipulate the MCMC samples. Add, subtract, multiply, or divide as needed.
435 |
--------------------------------------------------------------------------------
/06-posterior-predictive.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Posterior predictive checking exercise"
3 | output:
4 | html_document:
5 | toc: true
6 | toc_float: true
7 | ---
8 |
9 | # Goals
10 |
11 | - Experiment with using posterior predictive checks to discover issues with a Bayesian probability model.
12 |
13 | # Setup
14 |
15 | I've simulated and fit models to 3 data sets. Each model has some issue and the probability model does not represent the data well. See if you can figure out what the problem is by using posterior protective checks.
16 |
17 | For each exercise, you'll read in two objects:
18 | - `df`: a data frame with a predictor `x` and observed values `y`. In some cases there may be an additional column defining groups `g`.
19 | - `yrep`: a matrix of posterior predictive simulated observations returned by `brms::posterior_predict()`. There are 20 rows (20 samples) by 100 or 200 columns (number of data points).
20 |
21 | Use `bayesplot::ppc_*` functions (or make the plots yourself) to find the issues. I've included the code here so you can focus on the interpretation.
22 |
23 | Answer the following questions:
24 |
25 | 1. What is the visualization showing?
26 | 2. Do the predictive simulations have similar properties to the observed data? I.e., could the model have generated the observed data?
27 | 3. If not, what is the issue, what might have caused it, and how you might you fix it? (You don't actually have to fix it for this exercise!)
28 |
29 | # Exercise 1
30 |
31 | ```{r}
32 | df <- readRDS(here::here("data/ppcheck-df1.rds"))
33 | yrep <- readRDS(here::here("data/ppcheck-yrep1.rds"))
34 | ```
35 |
36 | ```{r}
37 | y <- df$y
38 | bayesplot::ppc_dens_overlay(y, yrep)
39 | bayesplot::ppc_error_scatter_avg_vs_x(y, yrep, df$x)
40 | bayesplot::ppc_intervals(y, yrep, x = df$x)
41 |
42 | par(mfrow = c(1, 3))
43 | plot(df$x, y)
44 | plot(df$x, yrep[1,])
45 | plot(df$x, yrep[2,])
46 | ```
47 |
48 | Answer: the model isn't creating the curvature in the values of `y` with respect to `x`. The model was fit as `y ~ x` but is missing a quadratic term: `y ~ x + I(x^2)` or `y ~ x + poly(x, 2)`.
49 |
50 | # Exercise 2
51 |
52 | ```{r}
53 | df <- readRDS(here::here("data/ppcheck-df2.rds"))
54 | yrep <- readRDS(here::here("data/ppcheck-yrep2.rds"))
55 | ```
56 |
57 | ```{r}
58 | y <- df$y
59 | par(mfrow = c(1, 3))
60 | plot(df$x, df$y)
61 | plot(df$x, yrep[1,])
62 | plot(df$x, yrep[2,])
63 | bayesplot::ppc_dens_overlay(y, yrep)
64 | ```
65 |
66 | Answer: the model is isn't creating enough spread in the data for larger values of `x`. The observation error distribution assumptions look off. These data were generated from a negative binomial model but were fit with a Poisson likelihood.
67 |
68 | # Exercise 3
69 |
70 | ```{r}
71 | df <- readRDS(here::here("data/ppcheck-df3.rds"))
72 | yrep <- readRDS(here::here("data/ppcheck-yrep3.rds"))
73 | ```
74 |
75 | Hint: try visualizing the posterior predictive simulations grouped or coloured by column `g`.
76 |
77 | ```{r}
78 | y <- df$y
79 | par(mfrow = c(1, 3))
80 | plot(df$x, df$y)
81 | plot(df$x, yrep[1,])
82 | plot(df$x, yrep[2,])
83 |
84 | bayesplot::ppc_dens_overlay(y, yrep)
85 | bayesplot::ppc_dens_overlay_grouped(y, yrep, df$g)
86 |
87 | df$yrep1 <- yrep[1,]
88 |
89 | library(ggplot2)
90 | ggplot(df, aes(x, y, colour = g)) + geom_point()
91 | ggplot(df, aes(x, yrep2, colour = g)) + geom_point()
92 | ```
93 |
94 | Answer: the model is lacking random intercept by group `g`. The posterior simulations therefore lack the clumping of observations seen in the observed data.
95 |
96 |
--------------------------------------------------------------------------------
/07-priors-brms.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Priors in brms"
3 | output:
4 | html_document:
5 | toc: true
6 | toc_float: true
7 | ---
8 |
9 | # Goals:
10 |
11 | - Learn how to find what priors are needed in a given brms model and what the defaults are.
12 | - Learn how to specify priors in brms.
13 | - Learn how to see the influence of priors on the posterior.
14 | - Learn how to do a prior predictive check.
15 | - Learn how brms treats intercepts.
16 |
17 | # Exercise:
18 |
19 | ```{r}
20 | library(ggplot2)
21 | library(dplyr)
22 | theme_set(theme_light())
23 | library(brms)
24 | dir.create("cache", showWarnings = FALSE)
25 | options(brms.file_refit = "on_change") # re-fit cached models if changes
26 | ```
27 |
28 | Let's create some fake data:
29 |
30 | ```{r}
31 | x <- 1:20
32 | n <- length(x)
33 | a <- 0.2
34 | b <- 0.3
35 | sigma <- 1
36 | set.seed(2141)
37 | y <- a + b * x + sigma * rnorm(n)
38 | fake <- data.frame(x, y)
39 | ```
40 |
41 | ```{r}
42 | ggplot(fake, aes(x, y)) + geom_point()
43 | ```
44 |
45 | If we wanted to fit a simple linear regression of y on x, we could find the default priors for the parameters with:
46 |
47 | ```{r}
48 | brms::default_prior(
49 | y ~ x, data = fake
50 | )
51 | ```
52 |
53 | If we fit out model with the default priors:
54 |
55 | ```{r}
56 | mod1 <- brm(y ~ x, data = fake, family = gaussian())
57 | ```
58 |
59 | We could find out what priors we used with:
60 |
61 | ```{r}
62 | get_prior(mod1)
63 | ```
64 |
65 | We should instead specify our own priors. We can do this with the `prior` argument.
66 |
67 | Note that by default, brms parameterizes the intercept as the intercept when the predictors are at their mean:
68 |
69 | ```{r, eval=FALSE}
70 | mod1 <- brm(y ~ x, data = fake, family = gaussian(),
71 | file = "cache/priors1",
72 | prior = c(
73 | set_prior("normal(0, 5)", class = "Intercept"),
74 | set_prior("normal(0, 1)", class = "b"),
75 | set_prior("student_t(3, 0, 3)", class = "sigma")
76 | ),
77 | sample_prior = "yes"
78 | )
79 | ```
80 |
81 | If instead we wanted to parameterize the intercept as the value of y when our predictors are 0, we would need to use the `0 + Intercept` syntax:
82 |
83 | ```{r}
84 | mod2 <- brm(y ~ 0 + Intercept + x, data = fake, family = gaussian(),
85 | file = "cache/priors2",
86 | prior = c(
87 | set_prior("normal(0, 5)", class = "b", coef = "Intercept"),
88 | set_prior("normal(0, 1)", class = "b", coef = "x"),
89 | set_prior("student_t(3, 0, 3)", class = "sigma")
90 | ),
91 | sample_prior = "yes"
92 | )
93 | ```
94 |
95 | We can look at the what's happening in the Stan code:
96 |
97 | ```{r}
98 | stancode(mod1)
99 | stancode(mod2)
100 | ```
101 |
102 | ```{r}
103 | sims1 <- as_draws_df(mod1)
104 | mean(sims1$b_Intercept)
105 | mean(sims1$Intercept)
106 | ```
107 |
108 | ```{r}
109 | sims2 <- as_draws_df(mod2)
110 | mean(sims2$b_Intercept)
111 | ```
112 |
113 | ```{r}
114 | p2 <- brms::prior_draws(mod2) # sample_prior = "yes"
115 | hist(p2$b_x, breaks = 50)
116 | hist(p2$sigma, breaks = 50)
117 | ```
118 |
119 | ```{r}
120 | mod_prior_only <- brm(
121 | y ~ 0 + Intercept + x, data = fake, family = gaussian(),
122 | file = "cache/priors3",
123 | prior = c(
124 | set_prior("normal(0, 5)", class = "b", coef = "Intercept"),
125 | set_prior("normal(0, 1)", class = "b", coef = "x"),
126 | set_prior("student_t(3, 0, 3)", class = "sigma")
127 | ),
128 | sample_prior = "only", #<
129 | seed = 2028, iter = 100, chains = 1
130 | )
131 | ```
132 |
133 | ```{r}
134 | prior_pushforward <- brms::posterior_epred(mod_prior_only)
135 | prior_predictive <- brms::posterior_predict(mod_prior_only)
136 | ```
137 |
138 | ```{r}
139 | fake$pf1 <- prior_pushforward[1,]
140 | fake$pf2 <- prior_pushforward[2,]
141 | fake$pf3 <- prior_pushforward[3,]
142 | ggplot(fake, aes(x, pf1)) + geom_point()
143 | ggplot(fake, aes(x, pf2)) + geom_point()
144 | ggplot(fake, aes(x, pf3)) + geom_point()
145 | ```
146 |
147 | ```{r}
148 | fake$pp1 <- prior_predictive[2,]
149 | fake$pp2 <- prior_predictive[3,]
150 | fake$pp3 <- prior_predictive[4,]
151 | ggplot(fake, aes(x, pp1)) + geom_point()
152 | ggplot(fake, aes(x, pp2)) + geom_point()
153 | ggplot(fake, aes(x, pp3)) + geom_point()
154 | ```
155 |
156 | ```{r}
157 | mod_prior_only_wide <- brm(
158 | y ~ 0 + Intercept + x, data = fake, family = gaussian(),
159 | file = "cache/priors4",
160 | prior = c(
161 | set_prior("normal(0, 100)", class = "b", coef = "Intercept"),
162 | set_prior("normal(0, 100)", class = "b", coef = "x"),
163 | set_prior("student_t(3, 0, 100)", class = "sigma")
164 | ),
165 | sample_prior = "only",
166 | seed = 2028, iter = 100, chains = 1
167 | )
168 | ```
169 |
170 | ```{r}
171 | prior_predictive <- brms::posterior_predict(mod_prior_only_wide)
172 | fake$pp1 <- prior_predictive[1,]
173 | fake$pp2 <- prior_predictive[18,]
174 | fake$pp3 <- prior_predictive[42,]
175 | ggplot(fake, aes(x, pp1)) + geom_point()
176 | ggplot(fake, aes(x, pp2)) + geom_point()
177 | ggplot(fake, aes(x, pp3)) + geom_point()
178 | ```
179 |
--------------------------------------------------------------------------------
/08-stan-programming-basic.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "An introduction to Stan programming"
3 | output:
4 | html_document:
5 | toc: true
6 | toc_float: true
7 | ---
8 |
9 | # Goals
10 |
11 | - Learn the basics of Stan model syntax and how to interact with Stan in R
12 | - Learn to generate our own posterior predictions from a Stan model
13 | - Learn how to calculate LOOIC from a Stan model we wrote ourselves
14 |
15 | # Setup
16 |
17 | ```{r, echo=FALSE}
18 | knitr::opts_chunk$set(
19 | collapse = TRUE,
20 | comment = "#>",
21 | fig.width = 6,
22 | fig.asp = 0.618,
23 | fig.align = "center"
24 | )
25 | show_file <- function(file) {
26 | cat(paste(readLines(file), collapse = "\n"))
27 | }
28 | ```
29 |
30 | ```{r, warning=FALSE, message=FALSE}
31 | library(ggplot2)
32 | library(dplyr)
33 | theme_set(theme_light())
34 | ```
35 |
36 | # Exercise
37 |
38 | Let's load the rstan R package, which lets us interact with R.
39 |
40 | As an aside, the cmdstanr is another option for interacting with Stan from R and has several advantages .
41 |
42 | ```{r}
43 | library(rstan)
44 | ```
45 |
46 | If we wanted, we could set our option to use parallel processing. This would be useful with more complicated models or more data. For this simple exercise, it will probably be fastest with a single core.
47 |
48 | ```{r}
49 | # options(mc.cores = parallel::detectCores())
50 | options(mc.cores = 1) # default
51 | ```
52 |
53 | Another important option we can set with rstan is whether we want the compiled model to be saved so that we don't have to recompile it every time we run it. You probably want to set this because the compilation can take a while.
54 |
55 | ```{r}
56 | rstan_options(auto_write = TRUE)
57 | ```
58 |
59 | # Simulating data
60 |
61 | Let's simulate some data to fit a very simple Stan model to. We will focus on a linear model with normally distributed errors.
62 |
63 | ```{r}
64 | set.seed(42)
65 | N <- 30
66 | x <- rnorm(N, 0, 0.5)
67 | alpha <- -0.2
68 | beta <- 0.4
69 | sigma <- 0.3
70 | y <- rnorm(N, mean = alpha + x * beta, sd = sigma)
71 | dat <- tibble(x = x, y = y)
72 | ```
73 |
74 | ```{r}
75 | ggplot(dat, aes(x, y)) + geom_point()
76 | ```
77 |
78 | # The Stan model
79 |
80 | Take a look at the following Stan model that I have set up.
81 |
82 | We'll talk about the various sections as a group.
83 |
84 | ```{r}
85 | show_file("stan/lm-simple.stan")
86 | ```
87 |
88 | This next version also has a generated quantity section. This is the one we will actually run. The generated quantities make it easier to do some things after like make posterior predictions.
89 |
90 | We'll talk about these additions as a group.
91 |
92 | ```{r}
93 | show_file("stan/lm.stan")
94 | ```
95 |
96 | # Fitting the model
97 |
98 | The first time we run the next code chunk, Stan will translate our model into C++ and compile it. This will take a little while. After that, assuming we set `rstan_options(auto_write = TRUE)`, Stan will avoid recompiling the model unless something in the model code changes.
99 |
100 | Let's sample from the model now:
101 |
102 | ```{r, message=FALSE, results='hide'}
103 | fit <- stan(here::here("stan/lm.stan"), chains = 4, iter = 2000, seed = 39382,
104 | data = list(x = dat$x, y = dat$y, N = length(dat$y)))
105 | ```
106 |
107 | Congratulations --- you fit your first handwritten Stan model! We can do everything with the posterior samples that we could do with the samples from an rstanarm model or a brms model.
108 |
109 | Some of the built-in helper functions from those packages won't work with our model though.
110 |
111 | ```{r, eval=FALSE}
112 | fit
113 | ```
114 |
115 | Notice all of the extra lines of reported results for our generated quantities? We can focus just on the parameters we want with:
116 |
117 | ```{r}
118 | pars <- c("alpha", "beta", "sigma")
119 | print(fit, pars = pars)
120 | ```
121 |
122 | We can use shinystan if we want to look at the model.
123 |
124 | ```{r, eval=FALSE}
125 | shinystan::launch_shinystan(fit)
126 | ```
127 |
128 | The default plot method shows point estimates and credible intervals.
129 |
130 | ```{r}
131 | plot(fit, pars = pars)
132 | ```
133 |
134 | There are other options.
135 |
136 | ```{r, eval=FALSE}
137 | ?rstan::`rstan-plotting-functions`
138 | stan_dens(fit, pars = pars)
139 | ```
140 |
141 | Alternatively, and probably preferably, we can use the bayesplot package.
142 |
143 | Experiment with inspecting the posterior chains using the bayesplot package.
144 |
145 | ```{r}
146 | fit_array <- as.array(fit)
147 | bayesplot::mcmc_trace(fit_array, pars = pars)
148 | bayesplot::mcmc_dens_overlay(fit_array, pars = pars) # exercise
149 | ```
150 |
151 | # Manipulating the posterior samples ourselves
152 |
153 | ```{r}
154 | post <- rstan::extract(fit)
155 | ```
156 |
157 | The output from `rstan::extract()` is a named list. Each element of the list is a numeric vector of samples if that parameter had one dimension (as all of our parameters did this time), or a matrix of samples if, say, beta had represented multiple slope parameters.
158 |
159 | ```{r}
160 | names(post)
161 | dim(post$beta)
162 | ```
163 |
164 | Let's overlay some model fits from the posterior on the data:
165 |
166 | ```{r}
167 | N <- 200
168 | ggplot(dat, aes(x, y)) + geom_point() +
169 | geom_abline(
170 | intercept = post$alpha[1:N],
171 | slope = post$beta[1:N],
172 | alpha = 0.1)
173 | ```
174 |
175 | Is that starting to look like the confidence/credible intervals you are used to looking at?
176 |
177 | # Posterior predictive simulations
178 |
179 | We can experiment with inspecting the posterior predictive distribution using the bayesplot package:
180 |
181 | ```{r}
182 | post <- rstan::extract(fit) # extract the posterior simulations as a list
183 | # grab the `posterior_predictions` from our `generated quantities` section:
184 | pp <- post$posterior_predictions
185 | bayesplot::ppc_dens_overlay(y = dat$y, yrep = pp[1:25, ])
186 | ```
187 |
188 | Let's do it in R to see how we could create them after. The result is the same. The question is whether it is simpler to compute the posterior simulations with in the Stan code or in R.
189 |
190 | We need to form the prediction for each data point and add observation error. Let's manually create 8 draws from the posterior predictive distribution and compare them to our data.
191 |
192 | There are many ways you could do this. The following is one way. We will create a list with posterior predictions within data frames and then bind the elements of the list into a big data frame. We are doing this so it is easy to plot the output with ggplot.
193 |
194 | ```{r}
195 | set.seed(1)
196 | n_sim <- 8
197 | out <- list()
198 | for (i in seq_len(n_sim)) {
199 | out[[i]] <- tibble(x = x)
200 | out[[i]]$i <- i
201 | out[[i]]$y_pp <-
202 | rnorm(
203 | n = length(x),
204 | mean = post$alpha[i] + post$beta[i] * x,
205 | sd = post$sigma[i]
206 | )
207 | }
208 | out <- dplyr::bind_rows(out) # turn the list into a data.frame
209 | out$type <- "posterior prediction"
210 |
211 | # add the real data as the last panel:
212 | out <- dplyr::bind_rows(
213 | out,
214 | tibble(x = x, y_pp = y, type = "observed", i = 9)
215 | )
216 |
217 | ggplot(out, aes(x, y_pp, colour = type)) +
218 | geom_point() +
219 | facet_wrap(~i)
220 | ```
221 |
222 | ## Questions:
223 |
224 | Which approach do you prefer in this case (Stan generated quantities vs. R code)?
225 |
226 | When might you prefer one over the other approach?
227 |
--------------------------------------------------------------------------------
/09-stan-coding-growth.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "More coding in Stan: a non-linear model example"
3 | output:
4 | html_document:
5 | toc: true
6 | toc_float: true
7 | ---
8 |
9 | # Goals:
10 |
11 | - Gain exposure to a non-linear model written in Stan that's more complicated than the simple linear regression.
12 | - See how we can do prior predictive simulations in a custom Stan model.
13 | - Gain an initial exposure to Bayesian model comparison.
14 |
15 | # Setup
16 |
17 | ```{r, message=FALSE, warning=FALSE}
18 | library(rstan)
19 | library(dplyr)
20 | library(ggplot2)
21 | options(mc.cores = parallel::detectCores())
22 | rstan_options(auto_write = TRUE)
23 | show_file <- function(file) {
24 | cat(paste(readLines(file), collapse = "\n"))
25 | }
26 | ```
27 |
28 | # Exercise
29 |
30 | These are Pacific Cod age and length data from two DFO synoptic trawl surveys in BC for Hecate Strait and West Coast Vancouver Island.
31 |
32 | ```{r}
33 | d <- readRDS(here::here("data/pcod-growth.rds"))
34 | d$i <- seq_len(nrow(d))
35 | ggplot(d, aes(age, length)) + geom_point() +
36 | facet_wrap(~survey)
37 | ```
38 |
39 | We will fit a von Bertalanffy growth model to these data. Here's a simple version:
40 |
41 | ```{r}
42 | show_file("vb/vb_basic.stan")
43 | ```
44 |
45 | We will assume normally distributed observation error for simplicity (it's also relatively common).
46 |
47 | Why is this technically not an ideal choice?
48 |
49 | What would be some alternatives?
50 |
51 | This version has some more bells and whistles:
52 |
53 | ```{r}
54 | show_file("vb/vb_norm.stan")
55 | ```
56 |
57 | Form our data list for rstan:
58 |
59 | ```{r}
60 | dat <- list(
61 | N = nrow(d),
62 | length = d$length,
63 | age = d$age,
64 | prior_sds = c(k = 1, linf = 200, t0 = 1, sigma = 10),
65 | prior_only = 0
66 | )
67 | ```
68 |
69 | Sample from our model:
70 |
71 | ```{r, results="hide", message=FALSE}
72 | fit1 <- stan(here::here("vb/vb_norm.stan"), data = dat, iter = 1000, chains = 4, seed = 9129)
73 | ```
74 |
75 | Note that we set the `seed`` here. Why might we want to do that?
76 |
77 | Print the parameters of interest:
78 |
79 | Why am I specifying the parameters explicitly here?
80 |
81 | ```{r}
82 | print(fit1, pars = c("k", "linf", "t0", "sigma"))
83 | ```
84 |
85 | There are many formats we can extract posterior samples in:
86 |
87 | ```{r}
88 | sims_list <- extract(fit1)
89 | names(sims_list)
90 |
91 | sims_matrix <- as.matrix(fit1)
92 | dim(sims_matrix)
93 | colnames(sims_matrix)[1:4]
94 |
95 | sims_array <- as.array(fit1)
96 | dim(sims_array)
97 | colnames(sims_array[1,,])[1:4]
98 |
99 | sims_df <- as.data.frame(fit1)
100 | head(sims_df[,1:4])
101 | ```
102 |
103 | Look at MCMC trace plots:
104 |
105 | ```{r}
106 | dim(sims_array)
107 | bayesplot::mcmc_trace(sims_array[,,1:4])
108 | ```
109 |
110 | Grab posterior predictions of new observations:
111 |
112 | We'll use `tidybayes::gather_draws()`, which conveniently gathers 20 MCMC samples for `length_sim` and forms a "long-format" data frame that is easy to work with in dplyr or ggplot. `[i]` tells the function to index each observation as `i` in the data frame.
113 |
114 | Again, I set the seed here. Why is that?
115 |
116 | ```{r}
117 | post <- tidybayes::gather_draws(fit1, length_sim[i], ndraws = 20, seed = 9283)
118 | head(post)
119 |
120 | # column 'i' was previously added to our data above so we can join on it:
121 | post <- left_join(post, d)
122 | ggplot(post, aes(age, .value)) + geom_point(alpha = 0.2)
123 | ```
124 |
125 | Question: what are we looking at?
126 |
127 | Expected length values:
128 |
129 | ```{r}
130 | post <- tidybayes::gather_draws(fit1, predicted_length[i], ndraws = 20, seed = 9283)
131 | head(post)
132 |
133 | post <- left_join(post, d)
134 | ggplot(post, aes(age, .value, group = .draw)) + geom_line(alpha = 0.2) +
135 | geom_point(data = d, mapping = aes(age, length), inherit.aes = FALSE, alpha = 0.2)
136 | ```
137 |
138 | Question: what are we looking at?
139 |
140 | The parameter posteriors:
141 |
142 | ```{r}
143 | post <- tidybayes::gather_draws(fit1, c(k, linf, t0, sigma))
144 | head(post)
145 | ggplot(post, aes(.value)) + geom_histogram() +
146 | facet_wrap(~.variable, scales = "free_x")
147 | ```
148 |
149 | We can calculate ELPD (expected log predictive density) of a leave-one-out approximation because we included `log_lik` in our `generated quantities` section. It's not all that useful yet though, since we've only fit one model. We'll dive more deeply into this elsewhere.
150 |
151 | ```{r}
152 | loo1 <- loo(fit1)
153 | loo1
154 | ```
155 |
156 | # Extending our model
157 |
158 | We're now going to take the above model and modify it to allow for separate k, linf, and t0 by survey region (HS vs. WCVI). We want to ask whether there is evidence in the growth curve that we should be treating these regions as separate stocks.
159 |
160 | ```{r}
161 | show_file("vb/vb_norm_regions.stan")
162 | ```
163 |
164 | ```{r, results="hide"}
165 | dat2 <- dat
166 | levels(factor(d$survey))
167 | unique(as.integer(factor(d$survey)))
168 | dat2$survey_id <- as.integer(factor(d$survey))
169 | dat2$N_surveys <- 2
170 |
171 | fit2 <- stan(here::here("vb/vb_norm_regions.stan"), data = dat2, iter = 1000, chains = 4, seed = 9129)
172 | ```
173 |
174 | ```{r}
175 | print(fit2, pars = c("k", "linf", "t0", "sigma"))
176 | ```
177 |
178 | ```{r}
179 | sims2 <- extract(fit2)
180 | ```
181 |
182 | Plot the 80% quantile credible interval of expected values and the posterior predictive distribution:
183 |
184 | ```{r}
185 | dim(sims2$length_sim)
186 | d$upr <- apply(sims2$length_sim, 2, quantile, probs = 0.9)
187 | d$med <- apply(sims2$length_sim, 2, quantile, probs = 0.5)
188 | d$lwr <- apply(sims2$length_sim, 2, quantile, probs = 0.1)
189 |
190 | d$e_upr <- apply(sims2$predicted_length, 2, quantile, probs = 0.9)
191 | d$e_med <- apply(sims2$predicted_length, 2, quantile, probs = 0.5)
192 | d$e_lwr <- apply(sims2$predicted_length, 2, quantile, probs = 0.1)
193 |
194 | ggplot(d, aes(age, med, colour = survey, fill = survey)) +
195 | geom_line() +
196 | geom_line(aes(y = e_med)) +
197 | geom_ribbon(aes(ymin = e_lwr, ymax = e_upr), alpha = 0.5, colour = NA) +
198 | geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2, colour = NA) +
199 | geom_point(data = d, mapping = aes(age, length), inherit.aes = FALSE, alpha = 0.2) +
200 | ylab("Length") + xlab("Age")
201 | ```
202 |
203 | ```{r}
204 | y <- d$length
205 | sims1 <- extract(fit1)
206 | sims2 <- extract(fit2)
207 | set.seed(1)
208 | i <- sample(1:nrow(sims1$length_sim), size = 20)
209 | yrep1 <- sims1$length_sim[i,]
210 | yrep2 <- sims2$length_sim[i,]
211 | bayesplot::ppc_dens_overlay(y, yrep1)
212 | bayesplot::ppc_dens_overlay(y, yrep2)
213 |
214 | bayesplot::ppc_dens_overlay_grouped(y, yrep1, group = d$survey)
215 | bayesplot::ppc_dens_overlay_grouped(y, yrep2, group = d$survey)
216 | ```
217 |
218 | The `^`s in the following "regular expressions" simply mean the parameters must start with each of these patterns.
219 |
220 |
221 | ```{r}
222 | sims_array2 <- as.array(fit2)[,,1:10]
223 | bayesplot::mcmc_trace(sims_array2, regex_pars = c("^k", "^linf", "^t0", "^sigma"))
224 | ```
225 |
226 | What does ELPD based on LOO cross validation tell us about the two models?
227 |
228 | ```{r}
229 | loo2 <- loo(fit2)
230 | loo::loo_compare(loo1, loo2)
231 | ```
232 |
233 | We can compare the posterior distributions of parameters from the two surveys.
234 |
235 | We will calculate differences for each of the parameters:
236 |
237 | ```{r}
238 | sims <- extract(fit2)
239 | dim(sims$k)
240 | k_diff <- sims$k[,2] - sims$k[,1]
241 | linf_diff <- sims$linf[,2] - sims$linf[,1]
242 | t0_diff <- sims$t0[,2] - sims$t0[,1]
243 | ```
244 |
245 | ```{r}
246 | hist(k_diff)
247 | hist(linf_diff)
248 | hist(t0_diff)
249 | ```
250 |
251 | # Exercise:
252 |
253 | Using those posterior draws, answer the following questions:
254 |
255 | What is the probability that linf is greater in WCVI than in HS?
256 |
257 | ```{r}
258 | mean(linf_diff > 0) # exercise
259 | ```
260 |
261 | What is the probability that linf is at least 5 cm greater in WCVI than in HS?
262 |
263 | ```{r}
264 | mean(linf_diff > 5) # exercise
265 | ```
266 |
267 | What is the probability that linf in WCVI is different from linf in HS by more than 4cm?
268 |
269 | ```{r}
270 | mean(linf_diff > 4 | linf_diff < -4) # exercise
271 | ```
272 |
--------------------------------------------------------------------------------
/10-loo.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Leave-one-out cross validation, log scores, and ELPD"
3 | output:
4 | html_document:
5 | toc: true
6 | toc_float: true
7 | ---
8 |
9 | # Goals:
10 |
11 | - Understand ELPD and LOO concepts
12 | - Understand the 'loo' R package approximation
13 |
14 | # Background
15 |
16 | This material builds on code from the book:
17 |
18 | Gelman, A., Hill, J., and Vehtari, A. 2021. Regression and other stories. Cambridge University Press, Cambridge. doi:10.1017/9781139161879.
19 |
20 | We're going to use rstanarm instead of brms here just because rstanarm doesn't require us to compile the models, so the code will run much faster.
21 |
22 | Let's get some acronyms out of the way first:
23 |
24 | - ELPD: expected log (pointwise) predictive density (typically on left-out data)
25 | - LOO: leave-one-out
26 | - LOOIC: leave-one-out information criteria
27 |
28 | ```{r, message=FALSE, warning=FALSE}
29 | library(rstanarm)
30 | library(ggplot2)
31 | theme_set(theme_light())
32 | options(mc.cores = 1) # no parallel: faster for these simple regressions
33 | ```
34 |
35 | Let's simulate some data for a linear regression:
36 |
37 | ```{r }
38 | x <- 1:20
39 | n <- length(x)
40 | a <- 0.2
41 | b <- 0.3
42 | sigma <- 1
43 | set.seed(2141)
44 | y <- a + b * x + sigma * rnorm(n)
45 | fake <- data.frame(x, y)
46 | ```
47 |
48 | Let's fit a linear model. `rstanarm::stan_glm()` here is equivalent to `brms::brm()`
49 |
50 | ```{r results='hide'}
51 | fit_all <- stan_glm(y ~ x, data = fake, seed = 2141, chains = 10, refresh = 0)
52 | ```
53 |
54 | # The concept of leave-one-out prediction (LOO)
55 |
56 | Now, let's fit a linear model without the 18th observation:
57 |
58 | ```{r }
59 | fit_minus_18 <- stan_glm(y ~ x, data = fake[-18, ], seed = 2141, refresh = 0)
60 | ```
61 |
62 | Extract posterior draws:
63 |
64 | ```{r }
65 | sims <- as.matrix(fit_all)
66 | sims_minus_18 <- as.matrix(fit_minus_18)
67 | ```
68 |
69 | We can compute the posterior predictive distribution given x=18:
70 |
71 | ```{r }
72 | condpred <- data.frame(y = seq(0, 9, length.out = 100))
73 | condpred$x <- sapply(condpred$y, \(y)
74 | mean(dnorm(y, sims[, 1] + sims[, 2] * x[18], sims[, 3]) * 6 + 18))
75 | # the * 6 + 18 here is just for plotting purposes below
76 | ```
77 |
78 | Compute LOO (leave-one-out) posterior predictive distribution given x=18:
79 |
80 | ```{r }
81 | condpredloo <- data.frame(y = seq(0, 9, length.out = 100))
82 | condpredloo$x <- sapply(condpredloo$y, \(y)
83 | mean(dnorm(y, sims_minus_18[, 1] + sims_minus_18[, 2] * x[18], sims_minus_18[, 3]) * 6 + 18))
84 | ```
85 |
86 | Create a plot with the posterior mean and posterior predictive distribution:
87 |
88 | ```{r }
89 | ggplot(fake, aes(x = x, y = y)) +
90 | geom_point(color = "white", size = 3) +
91 | geom_point(color = "black", size = 2) +
92 | geom_abline(
93 | intercept = mean(sims[, 1]),
94 | slope = mean(sims[, 2]),
95 | color = "black"
96 | ) +
97 | geom_path(data = condpred, aes(x = x, y = y), color = "black") +
98 | geom_vline(xintercept = 18, linetype = 3, color = "grey") +
99 | geom_point(data = fake[18, ], color = "grey50", size = 5, shape = 1) +
100 | geom_abline(
101 | intercept = mean(sims_minus_18[, 1]),
102 | slope = mean(sims_minus_18[, 2]),
103 | color = "grey50",
104 | linetype = 2
105 | ) +
106 | geom_path(data = condpredloo, aes(x = x, y = y), color = "grey50", linetype = 2)
107 | ```
108 |
109 | Note how dropping data point 18 shifts the posterior predictive distribution for the left-out point.
110 |
111 | Now, let's compute posterior and LOO residuals. `loo_predict()` computes the mean of the LOO predictive distribution:
112 |
113 | ```{r, message=FALSE}
114 | fake$residual <- fake$y - fit_all$fitted.values
115 | fake$looresidual <- fake$y - loo_predict(fit_all)$value
116 | ```
117 |
118 | Plot posterior and LOO residuals:
119 |
120 | ```{r }
121 | ggplot(fake, aes(x = x, y = residual)) +
122 | geom_point(color = "black", size = 2, shape = 16) +
123 | geom_point(aes(y = looresidual), color = "grey50", size = 2, shape = 1) +
124 | geom_segment(aes(xend = x, y = residual, yend = looresidual)) +
125 | geom_hline(yintercept = 0, linetype = 2)
126 | ```
127 |
128 | Note how the LOO residuals are all larger in size than the regular residuals. The model is pulled slightly towards each data point if it's included.
129 |
130 | We can also see this by looking at the standard deviations of posterior and LOO residuals:
131 |
132 | ```{r }
133 | round(sd(fake$residual), 2)
134 | round(sd(fake$looresidual), 2)
135 | ```
136 |
137 | Variance of residuals is connected to R^2, which can be defined as 1-var(res)/var(y):
138 |
139 | ```{r }
140 | round(1 - var(fake$residual) / var(y), 2)
141 | round(1 - var(fake$looresidual) / var(y), 2)
142 | ```
143 |
144 | # The concept of log scores ELPD
145 |
146 | We can compute log predictive densities. This results in a matrix with a value for each MCMC sample and each data point:
147 |
148 | ```{r }
149 | ll_1 <- log_lik(fit_all)
150 | ```
151 |
152 | Compute the average log pointwise posterior density (LPD) in a computationally stable way. This is also known as the log score (although typically `*-1` these values).
153 |
154 | ```{r }
155 | fake$lpd_post <- matrixStats::colLogSumExps(ll_1) - log(nrow(ll_1))
156 | ```
157 |
158 | Let's do that by hand in R to make sure we know what just went on.
159 |
160 | Calculate the expectation for each observation for each MCMC sample:
161 |
162 | ```{r}
163 | y_hat <- matrix(nrow = nrow(sims), ncol = n)
164 | for (s in 1:nrow(sims)) {
165 | for (i in 1:n) {
166 | y_hat[s, i] <- sims[s, 1] + sims[s, 2] * x[i]
167 | }
168 | }
169 | ```
170 |
171 | Now, calculate the log density for each observation and each MCMC sample. This is the log score.
172 |
173 | ```{r}
174 | ll_2 <- matrix(nrow = nrow(sims), ncol = n)
175 | for (s in 1:nrow(sims)) {
176 | for (i in 1:n) {
177 | ll_2[s, i] <- dnorm(y[i], mean = y_hat[s, i], sd = sims[s, 3], log = TRUE)
178 | }
179 | }
180 | ```
181 |
182 | Now, take the average density for each data point across MCMC samples:
183 |
184 | Note we're averaging over the likelihood, *not* the *log* likelihood.
185 |
186 | ```{r}
187 | lpd_2 <- log(apply(exp(ll_2), 2, mean)) # computationally dangerous!
188 | # lpd_2 <- matrixStats::colLogSumExps(ll_2) - log(nrow(ll_2)) # safer
189 | plot(fake$lpd_post, lpd_2)
190 | abline(0, 1)
191 | ```
192 |
193 | These match. The sum of these log predictive densities are called the 'ELPD': the expected log predictive density.
194 |
195 | We can check that our hand calculation matches the calculation from the loo package:
196 |
197 | ```{r}
198 | elpd_2 <- sum(lpd_2)
199 | elpd_2
200 | loo::elpd(log_lik(fit_all))
201 | ```
202 |
203 | # Combining LOO with ELPD
204 |
205 | So far, we have calculated ELPD on the data predicted from a model fit to all the data. But, we know this is an overly optimistic perspective on predictive ability for new data. Instead, we can compute log LOO predictive densities, which is typically how ELPD is used.
206 |
207 | `loo::loo()` uses fast approximate leave-one-out cross-validation to do this:
208 |
209 | ```{r }
210 | loo_1 <- loo(fit_all)
211 | loo_1
212 | fake$lpd_loo <- loo_1$pointwise[, "elpd_loo"]
213 | ```
214 |
215 | This approximation (demonstrated at the end) is equivalent (but much faster) than this:
216 |
217 | ```{r, results="hide"}
218 | lpd_loo <- numeric(n)
219 | for (i in 1:n) {
220 | cat(i, "\n")
221 | this_dat <- fake[-i, ]
222 | fit_minus_i <- rstanarm::stan_glm(y ~ x, data = this_dat, seed = 2141, iter = 2000, chains = 4, cores = 1, refresh = 0)
223 | draws <- as.matrix(fit_minus_i)
224 | y_hat <- matrix(nrow = nrow(draws), ncol = 1)
225 | yhat <- numeric(nrow(draws))
226 | ll <- numeric(nrow(draws))
227 | for (s in 1:nrow(draws)) {
228 | y_hat[s] <- draws[s, 1] + draws[s, 2] * x[i]
229 | ll[s] <- dnorm(y[i], mean = y_hat[s], sd = draws[s, 3], log = TRUE)
230 | }
231 | lpd_loo[i] <- log(mean(exp(ll))) # computationally dangerous
232 | # lpd_loo[i] <- matrixStats::logSumExp(ll) - log(length(ll)) # safer
233 | }
234 | ```
235 |
236 | ```{r}
237 | sum(lpd_loo)
238 | sum(fake$lpd_loo)
239 | ```
240 |
241 | We can compare the ELPD vs. the LOO ELPD values:
242 |
243 | ```{r }
244 | ggplot(fake, aes(x = x, y = lpd_post)) +
245 | geom_point(color = "black", size = 2, shape = 16) +
246 | geom_point(aes(y = lpd_loo), color = "grey50", size = 2, shape = 1) +
247 | geom_segment(aes(xend = x, y = lpd_post, yend = lpd_loo)) +
248 | ylab("log predictive density")
249 | ```
250 |
251 | # LOOIC
252 |
253 | LOOIC is defined as `-2 * elpd_loo`, i.e., converted to the 'deviance' scale as in AIC.
254 |
255 | ```{r}
256 | -2 * sum(lpd_loo)
257 | loo::loo(fit_all)
258 | ```
259 |
260 | There's no reason we have to use that. We can also just work with ELPD. Note that more positive ELPD is 'better'.
261 |
262 | # Model comparison with LOO ELPD
263 |
264 | We will work with a regression on a dataset of child IQ sores, mom IQ scores, and other covariates such as whether the mom finished highschool. It's from the Regression and Other Stories book and originally from the National Longitudinal Survey of Youth.
265 |
266 | ```{r }
267 | kidiq <- readRDS(here::here("data/kidiq.rds"))
268 | ```
269 |
270 | Linear regression with mom highschool and IQ as predictors of child IQ:
271 |
272 | ```{r }
273 | fit_3 <- stan_glm(kid_score ~ mom_hs + mom_iq,
274 | data = kidiq,
275 | seed = 19203, refresh = 0
276 | )
277 | fit_3
278 | ```
279 |
280 | Compute R^2 and LOO-R^2 manually:
281 |
282 | ```{r, message=FALSE}
283 | respost <- kidiq$kid_score - fit_3$fitted.values
284 | resloo <- kidiq$kid_score - loo_predict(fit_3)$value
285 | round(R2 <- 1 - var(respost) / var(kidiq$kid_score), 3)
286 | round(R2loo <- 1 - var(resloo) / var(kidiq$kid_score), 3)
287 | ```
288 |
289 | Add five pure noise predictors to the data:
290 |
291 | ```{r }
292 | set.seed(1)
293 | n <- nrow(kidiq)
294 | kidiqr <- kidiq
295 | kidiqr$noise <- array(rnorm(5 * n), c(n, 5))
296 | ```
297 |
298 | Linear regression with additional noise predictors:
299 |
300 | ```{r }
301 | fit_3n <- stan_glm(kid_score ~ mom_hs + mom_iq + noise,
302 | data = kidiqr,
303 | seed = 19203, refresh = 0
304 | )
305 | fit_3n
306 | ```
307 |
308 | Compute R^2 and LOO-R^2 manually:
309 |
310 | ```{r, message=FALSE}
311 | respostn <- kidiq$kid_score - fit_3n$fitted
312 | resloon <- kidiq$kid_score - loo_predict(fit_3n)$value
313 | round(R2n <- 1 - var(respostn) / var(kidiq$kid_score), 3)
314 | round(R2loon <- 1 - var(resloon) / var(kidiq$kid_score), 3)
315 | ```
316 |
317 | R^2 got better! LOO-R^2 got worse.
318 |
319 | Each pure noise predictor is expected to add 0.5 to the in-sample ELPD and subtract 0.5 from the LOO-ELPD.
320 |
321 | ```{r}
322 | loo_3 <- loo(fit_3)
323 | loo_3n <- loo(fit_3n)
324 | loo_compare(loo_3, loo_3n)
325 | ```
326 |
327 | LOO ELPD favours the model without random predictors, but the difference isn't large. Presumably we'd pick the simpler model.
328 |
329 | "Regression and Other Stories" p. 178 suggests a difference of > 4 if number of observations is > 100 with well-specified models is a reliable way to distinguish. Otherwise, hard to distinguish.
330 |
331 | Let's try a model using only the maternal high school indicator:
332 |
333 | ```{r }
334 | fit_1 <- stan_glm(kid_score ~ mom_hs, data = kidiq, refresh = 0)
335 | loo_1 <- loo(fit_1)
336 | ```
337 |
338 | Compare models using LOO log score (ELPD):
339 |
340 | ```{r }
341 | loo_compare(loo_3, loo_1)
342 | ```
343 |
344 | We can also compare how individual data points are predicted:
345 |
346 | ```{r}
347 | elpdi1 <- loo_1$pointwise[, "elpd_loo"]
348 | elpdi3 <- loo_3$pointwise[, "elpd_loo"]
349 |
350 | kidiq$diff31 <- elpdi3 - elpdi1
351 | kidiq$i <- 1:nrow(kidiq)
352 |
353 | ggplot(kidiq, aes(i, diff31)) +
354 | geom_point() +
355 | geom_hline(yintercept = 0, lty = 2) +
356 | ylab("ELPD mod3 - ELPD mod 1\npositive favours mod3")
357 | ```
358 |
359 | Leave-one-out data above the zero line are better predicted by model 3.
360 |
361 | # Understanding the LOO ELPD approximation
362 |
363 | Let's do the loo approximation by hand without the smoothing part.
364 |
365 | The basic idea is that excluding a data point is equivalent to subtracting the log density of that point data point (assuming independence) or dividing the total posterior density by the density for that one data point. We can use this property to form weights to sample from our existing posterior samples to approximate the posterior if we had dropped that data point. This is a form of "importance sampling".
366 |
367 | Let's work through an example with the original simulated dataset:
368 |
369 | We previously did this to come up with our expectations for each data point:
370 |
371 | ```{r}
372 | y_hat <- matrix(nrow = nrow(sims), ncol = n)
373 | for (s in 1:nrow(sims)) {
374 | for (i in 1:n) {
375 | y_hat[s, i] <- sims[s, 1] + sims[s, 2] * x[i]
376 | }
377 | }
378 | ```
379 |
380 | And we calculated the log density for each observation and each MCMC sample:
381 |
382 | ```{r}
383 | log_dens <- matrix(nrow = nrow(sims), ncol = n)
384 | for (s in 1:nrow(sims)) {
385 | for (i in 1:n) {
386 | log_dens[s, i] <- dnorm(y[i], mean = y_hat[s, i], sd = sims[s, 3], log = TRUE)
387 | }
388 | }
389 | ```
390 |
391 | Now weight the MCMC samples by weights of 1 / density or equivalently, `1/exp(log_dens)`:
392 |
393 | ```{r}
394 | weighted_sims <- matrix(nrow = nrow(sims), ncol = ncol(sims))
395 | row_ids <- seq_len(nrow(weighted_sims))
396 |
397 | lpd_loo1 <- numeric(length(x))
398 | set.seed(123)
399 | for (i in 1:length(x)) {
400 | weights <- 1/exp(log_dens[,i])
401 | weights <- weights / sum(weights)
402 | sampled_rows <- sample(row_ids, 5000, prob = weights, replace = TRUE)
403 | lpd_loo1[i] <- log(mean(exp(log_dens[sampled_rows, i])))
404 | }
405 | loo::loo(fit_all)
406 | sum(lpd_loo1) # about the same
407 | ```
408 |
409 | The only difference with the calculations in the loo package is the package does some smoothing on the weights since the distribution of the weights can have very heavy tails with extremely unlikely data points.
410 |
411 | # Additional resources
412 |
413 | Vehtari, A., Gelman, A., & Gabry, J. 2017. Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC. Statistics and Computing, 27(5), 1413–1432.
414 |
415 | Vehtari, A., Simpson, D., Gelman, A., Yao, Y., and Gabry, J. 2024, March 13. Pareto Smoothed Importance Sampling. arXiv. doi:10.48550/arXiv.1507.02646.
416 |
417 |
418 |
419 |
420 |
--------------------------------------------------------------------------------
/11-divergent-transitions.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Divergent transitions"
3 | output:
4 | html_document:
5 | toc: true
6 | toc_float: true
7 | ---
8 |
9 | Divergent transitions are an important diagnostic with the NUTS MCMC algorithm that Stan uses. **We should be weary of any divergent transitions.** They can be an important diagnostic of a coding error, an inefficient implementation of a model, or a model that is too complex for a given dataset. They can result in biased parameter estimates.
10 |
11 | We'll demonstrate divergent transitions using one of the most classic datasets in Bayesian statistics. The data are from Rubin (1981) . More on the 8-schools data:
12 |
13 | The code in this document has been adapted from:
14 |
15 |
16 | ```{r, message=FALSE, warning=FALSE}
17 | library(rstan)
18 | rstan_options(auto_write = TRUE)
19 | ```
20 |
21 | This dataset represents a test of a coaching program on SAT scores across 8 schools. `y` is the average improvement (or not) test score for a given school and `sigma` is the standard error on that average improvement.
22 |
23 | ```{r}
24 | dat <- list(
25 | N = 8,
26 | y = c(28, 8, -3, 7, -1, 1, 18, 12),
27 | sigma = c(15, 10, 16, 11, 9, 11, 10, 18)
28 | )
29 | ```
30 |
31 | The idea is we can fit a hierarchical model that partially pools information across schools. The model has a 'true' latent test score change per school with some variance that results in the observed values.
32 |
33 | ```stan
34 | data {
35 | int N; // number of schools
36 | vector[N] y; // mean score change by school
37 | vector[N] sigma; // standard error on score change by school
38 | }
39 | parameters {
40 | real mu; // across-school average improvement
41 | real sigma_prime; // SD of latent score values
42 | vector[N] theta; // latent score values by school
43 | }
44 | model {
45 | mu ~ normal(0, 10); // prior
46 | sigma_prime ~ cauchy(0, 10); // prior
47 | theta ~ normal(mu, sigma_prime); // latent score values
48 | y ~ normal(theta, sigma); // data likelihood
49 | }
50 | ```
51 |
52 | Fit the model with Stan:
53 |
54 | ```{r, message=FALSE, warning=FALSE, results='hide'}
55 | mod1 <- stan(here::here("stan/8schools.stan"), data = dat, seed = 9283)
56 | ```
57 |
58 | ```{r}
59 | mod1
60 | ```
61 |
62 | Check divergences:
63 |
64 | ```{r}
65 | rstan::get_num_divergent(mod1)
66 | ```
67 |
68 | ```{r, eval=FALSE}
69 | shinystan::launch_shinystan(mod1)
70 | ```
71 |
72 | We can visualize what's going on. The sampler isn't going all the way down the 'funnel' of the SD of the latent values `sigma_prime` and any specific school latent `theta`:
73 |
74 | ```{r}
75 | bayesplot::mcmc_scatter(
76 | as.array(mod1),
77 | pars = c("theta[1]", "sigma_prime"),
78 | transform = list(sigma_prime = "log"),
79 | np = bayesplot::nuts_params(mod1)
80 | )
81 | ```
82 |
83 | Note the red dots clustered towards the bottom of the funnel.
84 |
85 | We can instead fit a version that is parameterized differently:
86 |
87 | ```stan
88 | data {
89 | int N;
90 | vector[N] y;
91 | vector[N] sigma;
92 | }
93 | parameters {
94 | real mu;
95 | real sigma_prime;
96 | vector[N] eta; // temporary variable: Normal(0, 1)
97 | }
98 | transformed parameters {
99 | vector[N] theta; // define theta before we use it
100 | // now we form `theta` here based on sigma_prime * eta:
101 | theta = mu + sigma_prime * eta;
102 | }
103 | model {
104 | mu ~ normal(0, 10);
105 | sigma_prime ~ cauchy(0, 10);
106 | eta ~ normal(0, 1); // here's our temporary Normal(0, 1) variable
107 | y ~ normal(theta, sigma);
108 | }
109 | ```
110 |
111 | Everything not commented is the same as before. We now have a Normal(0, 1) variable `eta` that we multiply by `sigma_prime`. We have the same probability model, but now the sampler is tracking `sigma_prime` and `eta`, which are uncorrelated, so it has less of a problem exploring the full posterior. This approach is often called a 'non-centered parameterization'.
112 |
113 | ```{r, message=FALSE, warning=FALSE, results='hide'}
114 | mod2 <- stan(here::here("stan/8schools_noncentered.stan"), data = dat, seed = 9283)
115 | ```
116 |
117 | ```{r}
118 | mod2
119 | ```
120 |
121 | ```{r}
122 | bayesplot::mcmc_scatter(
123 | as.array(mod2),
124 | pars = c("theta[1]", "sigma_prime"),
125 | transform = list(sigma_prime = "log"),
126 | np = bayesplot::nuts_params(mod2)
127 | )
128 | ```
129 |
130 | That's much better and our divergences do not appear to be systematically where we might worry about them (although that's harder to say with more complicated models). We can improve this further by increasing the `adapt_delta` from its default 0.8 towards 1:
131 |
132 | ```{r, message=FALSE, warning=FALSE, results='hide'}
133 | mod3 <- stan(
134 | here::here("stan/8schools_noncentered.stan"), data = dat, seed = 9283,
135 | control = list(adapt_delta = 0.95)
136 | )
137 | ```
138 |
139 | This tells Stan to adjust the NUTS algorithm to take smaller steps. That slows things down but can reduce divergent transitions.
140 |
141 | Alternative solutions here would have been to tighten the priors or collect more data.
142 |
143 | ```{r}
144 | bayesplot::mcmc_scatter(
145 | as.array(mod3),
146 | pars = c("theta[1]", "sigma_prime"),
147 | transform = list(sigma_prime = "log"),
148 | np = bayesplot::nuts_params(mod3)
149 | )
150 | ```
151 |
152 | We can check what effect ignoring those divergent transitions would have had on our estimate of `sigma_prime`:
153 |
154 | ```{r}
155 | p1 <- extract(mod1)
156 | p3 <- extract(mod3)
157 | ```
158 |
159 | ```{r}
160 | par(mfrow = c(2, 1))
161 | hist(log(p1$sigma_prime), xlim = c(-10, 5))
162 | hist(log(p3$sigma_prime), xlim = c(-10, 5))
163 |
164 | mean(log(p1$sigma_prime))
165 | mean(log(p3$sigma_prime))
166 | ```
167 |
168 | So, we would have lost the lower tail of the parameter if we had ignored the divergent transitions and ended up with some bias in the parameter posterior.
169 |
170 | ### Other resources
171 |
172 | -
173 | -
174 | -
175 | -
176 |
--------------------------------------------------------------------------------
/12-workflow-sdm.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Bayesian workflow example"
3 | output:
4 | html_document:
5 | toc: true
6 | toc_float: true
7 | ---
8 |
9 | ## Goals:
10 |
11 | - Work through the steps of a Bayesian workflow for an applied example.
12 | - Gain more experience with brms
13 | - Practice prior and posterior predictive checking
14 | - Gain brief exposure to a mixed effects model in brms with hierarchical variances
15 |
16 | ```{r, message=FALSE, warning=FALSE}
17 | library(dplyr)
18 | library(ggplot2)
19 | theme_set(theme_light())
20 | library(bayesplot)
21 | library(brms)
22 | options(mc.cores = parallel::detectCores()) # parallel chains
23 | options(brms.file_refit = "on_change") # re-fit cached models if changes
24 | dir.create("cache", showWarnings = FALSE)
25 | ```
26 |
27 | ## The data and question of interest
28 |
29 | These are rockfish densities from DFO's synoptic bottom trawl surveys off BC. I've only included densities for tows where that rockfish was caught. I.e., I've removed the zeros so we can work with a simple Gaussian model of log transformed rockfish densities.
30 |
31 | ```{r}
32 | d <- readRDS(here::here("data/rockfish-depth.rds"))
33 | d$logdepth <- as.numeric(scale(log(d$depth_m)))
34 | d$fyear <- factor(d$year)
35 | d$fspecies <- factor(d$species_common_name)
36 | d$density <- d$density_kgpm2 * 1000
37 | ```
38 |
39 | We can plot the raw data to see what the patterns look like.
40 |
41 | We're interested in predicting rockfish density in a tow as a product of depth. A question is can we treat these as coming from the same depth-density relationship or do they need their own curves?
42 |
43 | Question: why work with log density as the response?
44 | Question: why work with log depth as the predictor?
45 |
46 | ## Basic data exploration
47 |
48 | ```{r}
49 | ggplot(d, aes(log(depth_m), log(density_kgpm2))) +
50 | geom_point() +
51 | geom_smooth(se = FALSE)
52 |
53 | ggplot(d, aes(log(depth_m), log(density_kgpm2), colour = species_common_name)) +
54 | geom_point() +
55 | geom_smooth(se = FALSE)
56 |
57 | ggplot(d, aes(log(depth_m), log(density_kgpm2))) +
58 | geom_point() +
59 | facet_wrap(~species_common_name) +
60 | geom_smooth(se = FALSE)
61 | ```
62 |
63 | ## Prior predictive checks
64 |
65 | We'll start with some prior predictive checks.
66 |
67 | We can find out what priors we need to specify with brms with `default_priors()`
68 |
69 | ```{r}
70 | default_prior(
71 | log(density) ~ 0 + Intercept + logdepth + I(logdepth^2),
72 | data = d,
73 | family = gaussian()
74 | )
75 | ```
76 |
77 | We're using the `Intercept` option in the formula so that brms doesn't transform the intercept to reflect its value when the other predictors are at their mean. It just simplifies our interpreation here.
78 |
79 | Let's sample from these priors.
80 |
81 | We'll start with N(0, 1) priors on the slope and quadratic coefficients and a half-Student-t(3, 0, 3) prior on the observation error SD.
82 |
83 | We've log transformed depth and we're modelling density as the response. So, a multiplicative increase in depth causes a multiplicative increase in density. We wouldn't expect such effects to be huge.
84 |
85 | E.g., if depth is doubled, how many times might we expect density to increase? Probably not millions of times on average.
86 |
87 | We're working with an intercept prior that roughly matches the mean of the data just to focus on the curvature aspect.
88 |
89 | ```{r, results="hide"}
90 | priors1 <- brm(
91 | log(density) ~ 0 + Intercept + logdepth + I(logdepth^2),
92 | data = d,
93 | iter = 100,
94 | chains = 1,
95 | family = gaussian(),
96 | sample_prior = "only",
97 | seed = 123,
98 | file = "cache/sdm-priors1",
99 | prior = c(
100 | set_prior("normal(-2.82, 5)", class = "b", coef = "Intercept"),
101 | set_prior("normal(0, 1)", class = "b", coef = "logdepth"),
102 | set_prior("normal(0, 1)", class = "b", coef = "IlogdepthE2"),
103 | set_prior("student_t(3, 0, 3)", class = "sigma")
104 | )
105 | )
106 | ```
107 |
108 | Let's pick out a species to look at our simulated data:
109 |
110 | ```{r}
111 | red <- filter(d, fspecies == "redbanded rockfish")
112 | obs <- mutate(red, .prediction = log(density), .draw = 0)
113 |
114 | pp <- tidybayes::predicted_draws(priors1, newdata = red, ndraws = 8)
115 |
116 | ggplot(pp, aes(depth_m, .prediction)) + geom_point() +
117 | facet_wrap(~.draw) +
118 | scale_x_log10() +
119 | ggtitle("Prior predictive simulation")
120 | ```
121 |
122 | We can instead look at prior pushforward simulations:
123 |
124 | ```{r}
125 | pp <- tidybayes::epred_draws(priors1, newdata = red, ndraws = 9)
126 | ggplot(pp, aes(depth_m, .epred)) + geom_point() +
127 | facet_wrap(~.draw) +
128 | scale_x_log10() +
129 | ggtitle("Prior pushforward simulation")
130 | ```
131 |
132 | What about if we had much wider priors?
133 |
134 | Here I've included the data to help remember the scale of the original data. Have we gone beyond the realm of possible parameter space?
135 |
136 | ```{r, results="hide", warning=FALSE}
137 | priors2 <- brm(
138 | log(density) ~ 0 + Intercept + logdepth + I(logdepth^2),
139 | data = d,
140 | iter = 100,
141 | chains = 1,
142 | family = gaussian(),
143 | sample_prior = "only",
144 | seed = 123,
145 | file = "cache/sdm-priors-wide",
146 | prior = c(
147 | set_prior("normal(0, 100)", class = "b", coef = "Intercept"),
148 | set_prior("normal(0, 100)", class = "b", coef = "logdepth"),
149 | set_prior("normal(0, 100)", class = "b", coef = "IlogdepthE2"),
150 | set_prior("student_t(3, 0, 100)", class = "sigma")
151 | )
152 | )
153 |
154 | pp <- tidybayes::predicted_draws(priors2, newdata = red, ndraws = 8)
155 | pp <- bind_rows(pp, obs) |>
156 | mutate(type = ifelse(.draw != 0, "Simulated", "Observed"))
157 |
158 | ggplot(pp, aes(depth_m, .prediction, colour = type)) + geom_point() +
159 | facet_wrap(~.draw) +
160 | scale_x_log10() +
161 | ggtitle("Redbanded Rockfish: wide priors")
162 | ```
163 |
164 | ## Fitting 3 models
165 |
166 | Now we're going to fit 3 models.
167 |
168 | 1. A quadratic effect of depth.
169 | 2. Let those quadratic curves vary by species.
170 | 3. Also let each species have its own level of observation error.
171 |
172 | In reality, we'd probably start with the simplest, check our model, and then increase the complexity to address issues. We'll fit all 3 at once to keep this example easier to follow.
173 |
174 | First, a simple quadratic linear regression:
175 |
176 | ```{r, results="hide"}
177 | fit1 <- brm(
178 | log(density) ~ 0 + Intercept + logdepth + I(logdepth^2),
179 | data = d,
180 | iter = 1000,
181 | chains = 4,
182 | family = gaussian(),
183 | seed = 726328,
184 | file = "cache/sdm-fit1",
185 | prior = c(
186 | set_prior("normal(0, 10)", class = "b", coef = "Intercept"),
187 | set_prior("normal(0, 1)", class = "b", coef = "logdepth"),
188 | set_prior("normal(0, 1)", class = "b", coef = "IlogdepthE2"),
189 | set_prior("student_t(3, 0, 3)", class = "sigma")
190 | )
191 | )
192 | ```
193 |
194 | Second, a version that enables each species to have it's own curve:
195 |
196 | ```{r, results="hide"}
197 | # figure out the priors to specify
198 | default_prior(
199 | log(density) ~ 0 + Intercept + logdepth + I(logdepth^2) +
200 | (logdepth + I(logdepth^2) | fspecies),
201 | data = d
202 | )
203 |
204 | fit2 <- brm(
205 | log(density) ~ 0 + Intercept + logdepth + I(logdepth^2) +
206 | (logdepth + I(logdepth^2) | fspecies),
207 | data = d,
208 | iter = 1000,
209 | chains = 4,
210 | family = gaussian(),
211 | seed = 72632,
212 | control = list(adapt_delta = 0.9),
213 | file = "cache/sdm-fit2",
214 | prior = c(
215 | set_prior("normal(0, 10)", class = "b", coef = "Intercept"),
216 | set_prior("normal(0, 1)", class = "b", coef = "logdepth"),
217 | set_prior("normal(0, 1)", class = "b", coef = "IlogdepthE2"),
218 | set_prior("student_t(3, 0, 3)", class = "sigma"),
219 | set_prior("lkj_corr_cholesky(1)", class = "L"),
220 | set_prior("student_t(3, 0, 3)", class = "sd")
221 | )
222 | )
223 | ```
224 |
225 | Third, a version where we also let each species have its own observation error variance:
226 |
227 | ```{r, results="hide"}
228 | fit3 <- brm(
229 | bf(
230 | log(density) ~ 0 + Intercept + logdepth + I(logdepth^2) +
231 | (logdepth + I(logdepth^2) | fspecies),
232 | sigma ~ fspecies
233 | ),
234 | data = d,
235 | iter = 1000,
236 | chains = 4,
237 | family = gaussian(),
238 | seed = 726328,
239 | control = list(adapt_delta = 0.95),
240 | file = "cache/sdm-fit3",
241 | prior = c(
242 | set_prior("normal(0, 10)", class = "b", coef = "Intercept"),
243 | set_prior("normal(0, 1)", class = "b", coef = "logdepth"),
244 | set_prior("normal(0, 1)", class = "b", coef = "IlogdepthE2"),
245 | set_prior("student_t(3, 0, 3)", class = "Intercept", dpar = "sigma"),
246 | set_prior("normal(0, 1)", class = "b", dpar = "sigma"),
247 | set_prior("lkj_corr_cholesky(1)", class = "L"),
248 | set_prior("student_t(3, 0, 3)", class = "sd")
249 | )
250 | )
251 | ```
252 |
253 | ## Checking for convergence
254 |
255 | Look at our models:
256 |
257 | ```{r}
258 | fit1
259 | fit2
260 | fit3
261 | ```
262 |
263 | We should look at traceplots:
264 |
265 | ```{r}
266 | bayesplot::mcmc_trace(fit1, regex_pars = "^b_")
267 | bayesplot::mcmc_trace(fit3, regex_pars = "^b_")
268 | ```
269 |
270 | ## Summarizing the parameters
271 |
272 | And we can summarize the parameter posterior distributions:
273 |
274 | ```{r}
275 | bayesplot::mcmc_dens_chains(fit1, regex_pars = "^b_")
276 | bayesplot::mcmc_areas(fit3, regex_pars = c("^b_", "sigma"))
277 | bayesplot::mcmc_intervals(fit3, regex_pars = c("^r_"))
278 | ```
279 |
280 | If we wanted to make our own plot, we could have used this to get the data:
281 |
282 | ```{r}
283 | bayesplot::mcmc_intervals_data(fit3, regex_pars = c("^b_")) |>
284 | head()
285 | ```
286 |
287 | ## Posterior predictive simulations
288 |
289 | Let's look at some posterior predictive simulations.
290 |
291 | ```{r}
292 | y <- log(d$density)
293 | yrep1 <- posterior_predict(fit1, ndraws = 20)
294 | bayesplot::ppc_dens_overlay(y, yrep1)
295 | bayesplot::ppc_dens_overlay_grouped(y, yrep1, group = d$fspecies)
296 | ```
297 |
298 | Question: how does that look?
299 |
300 | ```{r}
301 | yrep2 <- posterior_predict(fit2, ndraws = 20)
302 | bayesplot::ppc_dens_overlay_grouped(y, yrep2, group = d$fspecies)
303 | ```
304 |
305 | Question: is that better?
306 |
307 | ```{r}
308 | yrep3 <- posterior_predict(fit3, ndraws = 20)
309 | bayesplot::ppc_dens_overlay_grouped(y, yrep3, group = d$fspecies)
310 | ```
311 |
312 | Question: is that better? Which model best generates data that resemble the observations?
313 |
314 | Question: these maybe still aren't perfect. What are some possible reasons for that? How might we expand the model in reality?
315 |
316 | Let's dig into an example species:
317 |
318 | ```{r}
319 | # grab data for one species:
320 | red <- filter(d, fspecies == "redbanded rockfish")
321 | obs <- mutate(red, .prediction = log(density), .draw = 0)
322 |
323 | # make posterior predictions:
324 | pp1 <- tidybayes::predicted_draws(fit1, newdata = red, ndraws = 8)
325 | pp1 <- bind_rows(pp1, obs) |>
326 | mutate(type = ifelse(.draw != 0, "Simulated", "Observed"))
327 |
328 | # make posterior predictions:
329 | pp3 <- tidybayes::predicted_draws(fit3, newdata = red, ndraws = 8)
330 | pp3 <- bind_rows(pp3, obs) |>
331 | mutate(type = ifelse(.draw != 0, "Simulated", "Observed"))
332 | ```
333 |
334 | And plot the output:
335 |
336 | ```{r}
337 | ggplot(pp1, aes(depth_m, .prediction, colour = type)) + geom_point() +
338 | facet_wrap(~.draw) +
339 | scale_x_log10() +
340 | ggtitle("Redbanded Rockfish: shared quadratic")
341 |
342 | ggplot(pp3, aes(depth_m, .prediction, colour = type)) + geom_point() +
343 | facet_wrap(~.draw) +
344 | scale_x_log10() +
345 | ggtitle("Redbanded Rockfish: species-specific quadratic + error")
346 | ```
347 |
348 | Question: can you tell what is off in the posterior predictive simulations for this example in model 1?
349 |
350 | Hint: look at the spread.
351 |
352 | ## Posterior predictive simulations: statistical properties
353 |
354 | We can also visualize various statistical properties of our simulated and real observations. We'll focus on model 1 and 3 for brevity.
355 |
356 | ```{r}
357 | yrep1 <- posterior_predict(fit1)
358 | yrep3 <- posterior_predict(fit3)
359 | ```
360 |
361 | Medians:
362 |
363 | ```{r}
364 | ppc_stat_grouped(y, yrep1, stat = "median", group = d$fspecies)
365 | ppc_stat_grouped(y, yrep3, stat = "median", group = d$fspecies)
366 | ```
367 |
368 | SD:
369 |
370 | ```{r}
371 | ppc_stat_grouped(y, yrep1, stat = "sd", group = d$fspecies)
372 | ppc_stat_grouped(y, yrep3, stat = "sd", group = d$fspecies)
373 | ```
374 |
375 | Question: why is this not a very useful check here?
376 |
377 | Interquartile range:
378 |
379 | ```{r}
380 | iqr <- function(x) {
381 | q75 <- quantile(x, 0.75)
382 | q25 <- quantile(x, 0.25)
383 | q75 - q25
384 | }
385 | ppc_stat_grouped(y, yrep1, stat = "iqr", group = d$fspecies)
386 | ppc_stat_grouped(y, yrep3, stat = "iqr", group = d$fspecies)
387 | ```
388 |
389 | We might also wonder, might this relationship be changing through time? Should we have included a year covariate? We can check what our simulations say:
390 |
391 | ```{r}
392 | ppc_stat_grouped(y, yrep1, stat = "median", group = d$year)
393 | ```
394 |
395 | ## Comparing models with ELPD
396 |
397 | We can look at ELPD-LOO:
398 |
399 | ```{r}
400 | loo1 <- loo(fit1)
401 | loo2 <- loo(fit2)
402 | loo3 <- loo(fit3)
403 | loo_compare(loo1, loo2, loo3)
404 | ```
405 |
406 | What does this tell us?
407 |
408 | We can also look at the pointwise LOO predictive densities themselves:
409 |
410 | ```{r}
411 | elpdi1 <- loo1$pointwise[, "elpd_loo"]
412 | elpdi2 <- loo2$pointwise[, "elpd_loo"]
413 | elpdi3 <- loo3$pointwise[, "elpd_loo"]
414 | ```
415 |
416 | And compare how good each model was at predicting each left-out point:
417 |
418 | ```{r}
419 | d$diff32 <- elpdi3 - elpdi2
420 | d$diff21 <- elpdi2 - elpdi1
421 |
422 | ggplot(d, aes(logdepth, diff21, colour = fspecies, fill = fspecies)) +
423 | geom_point() +
424 | facet_wrap(~fspecies) +
425 | geom_hline(yintercept = 0, lty = 2)
426 |
427 | ggplot(d, aes(logdepth, diff32, colour = fspecies, fill = fspecies)) +
428 | geom_point() +
429 | facet_wrap(~fspecies) +
430 | geom_hline(yintercept = 0, lty = 2)
431 | ```
432 |
433 | What can these tell us?
434 |
435 | ## Visualizing the model predictions
436 |
437 | We can plot the expected values from the prediction across a sequence of depths.
438 |
439 | First, we could take a sequence of draws from the posterior:
440 |
441 | ```{r}
442 | nd <- expand.grid(
443 | logdepth = seq(min(d$logdepth), max(d$logdepth), length.out = 100),
444 | fspecies = unique(d$fspecies)
445 | )
446 |
447 | out <- tidybayes::add_epred_draws(newdata = nd, object = fit3, ndraws = 50)
448 | ggplot(out, aes(logdepth, .epred, group = .draw)) +
449 | geom_line(alpha = 0.2) +
450 | facet_wrap(~fspecies, scales = "free_y")
451 | ```
452 |
453 | Or we could summarize the distribution of those values:
454 |
455 | ```{r}
456 | lpred <- brms::posterior_epred(fit3, newdata = nd)
457 | nd$med <- apply(lpred, 2, median)
458 | nd$lwr <- apply(lpred, 2, quantile, probs = 0.9)
459 | nd$upr <- apply(lpred, 2, quantile, probs = 0.1)
460 | ggplot(nd, aes(logdepth, exp(med), colour = fspecies, fill = fspecies)) +
461 | geom_ribbon(aes(ymin = exp(lwr), ymax = exp(upr)), alpha = 0.2, colour = NA) +
462 | geom_line()
463 | ```
464 |
465 | What about the distribution of new observations? Are these more spread out? Why?
466 |
467 | ```{r}
468 | ppred <- brms::posterior_predict(fit3, newdata = nd)
469 | nd$pp_med <- apply(ppred, 2, median)
470 | nd$pp_lwr <- apply(ppred, 2, quantile, probs = 0.9)
471 | nd$pp_upr <- apply(ppred, 2, quantile, probs = 0.1)
472 | ggplot(nd, aes(logdepth, exp(pp_med), colour = fspecies, fill = fspecies)) +
473 | geom_ribbon(aes(ymin = exp(pp_lwr), ymax = exp(pp_upr)), alpha = 0.2, colour = NA) +
474 | geom_line()
475 | ```
476 |
477 | ## Questions:
478 |
479 | - What can we conclude from these data given our models?
480 | - What of the above might we report in a paper?
481 | - What else might you consider adding to this model?
482 | - What are some possible uses for the posterior predictive simulations?
483 |
--------------------------------------------------------------------------------
/13-resources.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Useful resources"
3 | output:
4 | html_document:
5 | toc: true
6 | toc_float: true
7 | ---
8 |
9 | # Stan resources
10 |
11 | The main documentation:
12 |
13 |
14 | Stan Best Practices:
15 |
16 |
17 | Prior Choice Recommendations:
18 |
19 |
20 | Case studies:
21 |
22 |
23 | Tutorials, books, online video courses, and presentations:
24 |
25 |
26 | Forums:
27 |
28 |
29 | Examples from many textbooks converted into Stan:
30 |
31 |
32 | tidybayes:
33 |
34 |
35 | Visualize MCMC algorithms including NUTS:
36 |
37 |
38 | # Online lecture recordings
39 |
40 | Statistical Rethinking Fall 2023 lectures:
41 |
42 |
43 | Recordings from Aki Vehtari's course:
44 |
45 | (scroll down)
46 |
47 | # Papers
48 |
49 | Monnahan, C.C., Thorson, J.T., and Branch, T.A. 2016. Faster estimation of Bayesian models in ecology using Hamiltonian Monte Carlo. Methods Ecol Evol. .
50 |
51 | Banner, K.M., Irvine, K.M., and Rodhouse, T. 2020. The Use of Bayesian Priors in Ecology: The Good, The Bad, and The Not Great. Methods Ecol Evol: 2041–210X.13407. .
52 |
53 | Gabry, J., Simpson, D., Vehtari, A., Betancourt, M., and Gelman, A. 2019. Visualization in Bayesian workflow. Journal of the Royal Statistical Society Series A: Statistics in Society 182(2): 389–402.
54 |
55 | Gelman, A., Vehtari, A., Simpson, D., Margossian, C.C., Carpenter, B., Yao, Y., Kennedy, L., Gabry, J., Bürkner, P.-C., and Modrák, M. 2020. Bayesian Workflow. arXiv:2011.01808.
56 |
57 | Vehtari, A., Gelman, A., Simpson, D., Carpenter, B., and Bürkner, P.-C. 2021. Rank-normalization, folding, and localization: an improved $\hat{R}$ for assessing convergence of MCMC (with discussion). Bayesian Analysis 16(2): 667–718. International Society for Bayesian Analysis.
58 |
59 | Vehtari, A., Gelman, A., and Gabry, J. 2017. Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC. Statistics and Computing 27(5): 1413–1432. doi:10.1007/s11222-016-9696-4.
60 |
61 | Vehtari, A., Simpson, D., Gelman, A., Yao, Y., and Gabry, J. 2024, March 13. Pareto Smoothed Importance Sampling. arXiv. doi:10.48550/arXiv.1507.02646.
62 |
63 | Betancourt, M. 2017. A Conceptual Introduction to Hamiltonian Monte Carlo. arXiv:1701.02434 [stat].
64 |
65 | Monnahan, C.C. 2024. Toward good practices for Bayesian data-rich fisheries stock assessments using a modern statistical workflow. Fisheries Research 275: 107024. .
66 |
67 | # Textbooks
68 |
69 | Hobbs, N.T., and Hooten, M.B. 2015. Bayesian models: a statistical primer for ecologists. Princeton University Press, Princeton, New Jersey.
70 |
71 | - The best textbook I've found on the fundamentals of Bayesian models from an ecologist's perspective. I've never found another book that filled in so many gaps in understanding. An excellent resource on how to read and write Bayesian models. Note that this textbook does not focus on code.
72 |
73 | McElreath, R. 2020. Statistical rethinking: a Bayesian course with examples in R and Stan. 2nd edition. CRC Press/Taylor & Francis Group.
74 |
75 |
76 | - A fantastic textbook that will help you think about Bayesian modeling and modeling in general. Perhaps the one downside is that it uses the author's 'rethinking' R package throughout, which is great from pedagogical perspective but not great if you want to learn Stan code itself. Still very much worth it though. Stan code itself is relatively easy to learn once you understand the concepts. And in many cases there's no need to write the code yourself anyways.
77 | - See this repository that includes most of the examples reworked with the brms package and ggplot2:
78 |
79 | Gelman, A., Hill, J., and Vehtari, A. 2021. Regression and other stories. Cambridge University Press, Cambridge. doi:10.1017/9781139161879.
80 |
81 | - An excellent textbook on regression and GLMs in a Bayesian context. Examples are mostly from the social, political, and health sciences, but applicable to anything. Also deals with causal inference.
82 | - Examples and other material here:
83 |
84 | Gelman, A., J. B. Carlin, H. S. Stern, D. B. Dunson, A. Vehtari, and D. B. Rubin. 2014. Bayesian Data Analysis. Chapman & Hall, Boca Raton, FL.
85 |
86 | - The Bayesian data analysis bible, but definitely not easy reading.
87 |
88 | **Official up-to-date PDF version**:
89 |
90 | # Not specifically about Bayesian modeling, but still very useful
91 |
92 | Schielzeth, H. 2010. Simple means to improve the interpretability of regression coefficients. Methods in Ecology and Evolution 1:103–113.
93 |
94 | Gelman, A. 2008. Scaling regression inputs by dividing by two standard deviations. Statistics in Medicine 27:2865–2873.
95 |
96 | Morey, R. D., Hoekstra, R., Rouder, J., Lee, M. D., and Wagenmakers, E. (2016). The fallacy of placing confidence in confidence intervals. Psychonomic Bulletin & Review. 23(1), 103–123.
97 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | Attribution 4.0 International
2 | =======================================================================
3 |
4 | Creative Commons Corporation ("Creative Commons") is not a law firm and
5 | does not provide legal services or legal advice. Distribution of
6 | Creative Commons public licenses does not create a lawyer-client or
7 | other relationship. Creative Commons makes its licenses and related
8 | information available on an "as-is" basis. Creative Commons gives no
9 | warranties regarding its licenses, any material licensed under their
10 | terms and conditions, or any related information. Creative Commons
11 | disclaims all liability for damages resulting from their use to the
12 | fullest extent possible.
13 |
14 | Using Creative Commons Public Licenses
15 |
16 | Creative Commons public licenses provide a standard set of terms and
17 | conditions that creators and other rights holders may use to share
18 | original works of authorship and other material subject to copyright
19 | and certain other rights specified in the public license below. The
20 | following considerations are for informational purposes only, are not
21 | exhaustive, and do not form part of our licenses.
22 |
23 | Considerations for licensors: Our public licenses are
24 | intended for use by those authorized to give the public
25 | permission to use material in ways otherwise restricted by
26 | copyright and certain other rights. Our licenses are
27 | irrevocable. Licensors should read and understand the terms
28 | and conditions of the license they choose before applying it.
29 | Licensors should also secure all rights necessary before
30 | applying our licenses so that the public can reuse the
31 | material as expected. Licensors should clearly mark any
32 | material not subject to the license. This includes other CC-
33 | licensed material, or material used under an exception or
34 | limitation to copyright. More considerations for licensors:
35 | wiki.creativecommons.org/Considerations_for_licensors
36 |
37 | Considerations for the public: By using one of our public
38 | licenses, a licensor grants the public permission to use the
39 | licensed material under specified terms and conditions. If
40 | the licensor's permission is not necessary for any reason--for
41 | example, because of any applicable exception or limitation to
42 | copyright--then that use is not regulated by the license. Our
43 | licenses grant only permissions under copyright and certain
44 | other rights that a licensor has authority to grant. Use of
45 | the licensed material may still be restricted for other
46 | reasons, including because others have copyright or other
47 | rights in the material. A licensor may make special requests,
48 | such as asking that all changes be marked or described.
49 | Although not required by our licenses, you are encouraged to
50 | respect those requests where reasonable. More_considerations
51 | for the public:
52 | wiki.creativecommons.org/Considerations_for_licensees
53 |
54 | =======================================================================
55 |
56 | Creative Commons Attribution 4.0 International Public License
57 |
58 | By exercising the Licensed Rights (defined below), You accept and agree
59 | to be bound by the terms and conditions of this Creative Commons
60 | Attribution 4.0 International Public License ("Public License"). To the
61 | extent this Public License may be interpreted as a contract, You are
62 | granted the Licensed Rights in consideration of Your acceptance of
63 | these terms and conditions, and the Licensor grants You such rights in
64 | consideration of benefits the Licensor receives from making the
65 | Licensed Material available under these terms and conditions.
66 |
67 |
68 | Section 1 -- Definitions.
69 |
70 | a. Adapted Material means material subject to Copyright and Similar
71 | Rights that is derived from or based upon the Licensed Material
72 | and in which the Licensed Material is translated, altered,
73 | arranged, transformed, or otherwise modified in a manner requiring
74 | permission under the Copyright and Similar Rights held by the
75 | Licensor. For purposes of this Public License, where the Licensed
76 | Material is a musical work, performance, or sound recording,
77 | Adapted Material is always produced where the Licensed Material is
78 | synched in timed relation with a moving image.
79 |
80 | b. Adapter's License means the license You apply to Your Copyright
81 | and Similar Rights in Your contributions to Adapted Material in
82 | accordance with the terms and conditions of this Public License.
83 |
84 | c. Copyright and Similar Rights means copyright and/or similar rights
85 | closely related to copyright including, without limitation,
86 | performance, broadcast, sound recording, and Sui Generis Database
87 | Rights, without regard to how the rights are labeled or
88 | categorized. For purposes of this Public License, the rights
89 | specified in Section 2(b)(1)-(2) are not Copyright and Similar
90 | Rights.
91 |
92 | d. Effective Technological Measures means those measures that, in the
93 | absence of proper authority, may not be circumvented under laws
94 | fulfilling obligations under Article 11 of the WIPO Copyright
95 | Treaty adopted on December 20, 1996, and/or similar international
96 | agreements.
97 |
98 | e. Exceptions and Limitations means fair use, fair dealing, and/or
99 | any other exception or limitation to Copyright and Similar Rights
100 | that applies to Your use of the Licensed Material.
101 |
102 | f. Licensed Material means the artistic or literary work, database,
103 | or other material to which the Licensor applied this Public
104 | License.
105 |
106 | g. Licensed Rights means the rights granted to You subject to the
107 | terms and conditions of this Public License, which are limited to
108 | all Copyright and Similar Rights that apply to Your use of the
109 | Licensed Material and that the Licensor has authority to license.
110 |
111 | h. Licensor means the individual(s) or entity(ies) granting rights
112 | under this Public License.
113 |
114 | i. Share means to provide material to the public by any means or
115 | process that requires permission under the Licensed Rights, such
116 | as reproduction, public display, public performance, distribution,
117 | dissemination, communication, or importation, and to make material
118 | available to the public including in ways that members of the
119 | public may access the material from a place and at a time
120 | individually chosen by them.
121 |
122 | j. Sui Generis Database Rights means rights other than copyright
123 | resulting from Directive 96/9/EC of the European Parliament and of
124 | the Council of 11 March 1996 on the legal protection of databases,
125 | as amended and/or succeeded, as well as other essentially
126 | equivalent rights anywhere in the world.
127 |
128 | k. You means the individual or entity exercising the Licensed Rights
129 | under this Public License. Your has a corresponding meaning.
130 |
131 |
132 | Section 2 -- Scope.
133 |
134 | a. License grant.
135 |
136 | 1. Subject to the terms and conditions of this Public License,
137 | the Licensor hereby grants You a worldwide, royalty-free,
138 | non-sublicensable, non-exclusive, irrevocable license to
139 | exercise the Licensed Rights in the Licensed Material to:
140 |
141 | a. reproduce and Share the Licensed Material, in whole or
142 | in part; and
143 |
144 | b. produce, reproduce, and Share Adapted Material.
145 |
146 | 2. Exceptions and Limitations. For the avoidance of doubt, where
147 | Exceptions and Limitations apply to Your use, this Public
148 | License does not apply, and You do not need to comply with
149 | its terms and conditions.
150 |
151 | 3. Term. The term of this Public License is specified in Section
152 | 6(a).
153 |
154 | 4. Media and formats; technical modifications allowed. The
155 | Licensor authorizes You to exercise the Licensed Rights in
156 | all media and formats whether now known or hereafter created,
157 | and to make technical modifications necessary to do so. The
158 | Licensor waives and/or agrees not to assert any right or
159 | authority to forbid You from making technical modifications
160 | necessary to exercise the Licensed Rights, including
161 | technical modifications necessary to circumvent Effective
162 | Technological Measures. For purposes of this Public License,
163 | simply making modifications authorized by this Section 2(a)
164 | (4) never produces Adapted Material.
165 |
166 | 5. Downstream recipients.
167 |
168 | a. Offer from the Licensor -- Licensed Material. Every
169 | recipient of the Licensed Material automatically
170 | receives an offer from the Licensor to exercise the
171 | Licensed Rights under the terms and conditions of this
172 | Public License.
173 |
174 | b. No downstream restrictions. You may not offer or impose
175 | any additional or different terms or conditions on, or
176 | apply any Effective Technological Measures to, the
177 | Licensed Material if doing so restricts exercise of the
178 | Licensed Rights by any recipient of the Licensed
179 | Material.
180 |
181 | 6. No endorsement. Nothing in this Public License constitutes or
182 | may be construed as permission to assert or imply that You
183 | are, or that Your use of the Licensed Material is, connected
184 | with, or sponsored, endorsed, or granted official status by,
185 | the Licensor or others designated to receive attribution as
186 | provided in Section 3(a)(1)(A)(i).
187 |
188 | b. Other rights.
189 |
190 | 1. Moral rights, such as the right of integrity, are not
191 | licensed under this Public License, nor are publicity,
192 | privacy, and/or other similar personality rights; however, to
193 | the extent possible, the Licensor waives and/or agrees not to
194 | assert any such rights held by the Licensor to the limited
195 | extent necessary to allow You to exercise the Licensed
196 | Rights, but not otherwise.
197 |
198 | 2. Patent and trademark rights are not licensed under this
199 | Public License.
200 |
201 | 3. To the extent possible, the Licensor waives any right to
202 | collect royalties from You for the exercise of the Licensed
203 | Rights, whether directly or through a collecting society
204 | under any voluntary or waivable statutory or compulsory
205 | licensing scheme. In all other cases the Licensor expressly
206 | reserves any right to collect such royalties.
207 |
208 |
209 | Section 3 -- License Conditions.
210 |
211 | Your exercise of the Licensed Rights is expressly made subject to the
212 | following conditions.
213 |
214 | a. Attribution.
215 |
216 | 1. If You Share the Licensed Material (including in modified
217 | form), You must:
218 |
219 | a. retain the following if it is supplied by the Licensor
220 | with the Licensed Material:
221 |
222 | i. identification of the creator(s) of the Licensed
223 | Material and any others designated to receive
224 | attribution, in any reasonable manner requested by
225 | the Licensor (including by pseudonym if
226 | designated);
227 |
228 | ii. a copyright notice;
229 |
230 | iii. a notice that refers to this Public License;
231 |
232 | iv. a notice that refers to the disclaimer of
233 | warranties;
234 |
235 | v. a URI or hyperlink to the Licensed Material to the
236 | extent reasonably practicable;
237 |
238 | b. indicate if You modified the Licensed Material and
239 | retain an indication of any previous modifications; and
240 |
241 | c. indicate the Licensed Material is licensed under this
242 | Public License, and include the text of, or the URI or
243 | hyperlink to, this Public License.
244 |
245 | 2. You may satisfy the conditions in Section 3(a)(1) in any
246 | reasonable manner based on the medium, means, and context in
247 | which You Share the Licensed Material. For example, it may be
248 | reasonable to satisfy the conditions by providing a URI or
249 | hyperlink to a resource that includes the required
250 | information.
251 |
252 | 3. If requested by the Licensor, You must remove any of the
253 | information required by Section 3(a)(1)(A) to the extent
254 | reasonably practicable.
255 |
256 | 4. If You Share Adapted Material You produce, the Adapter's
257 | License You apply must not prevent recipients of the Adapted
258 | Material from complying with this Public License.
259 |
260 |
261 | Section 4 -- Sui Generis Database Rights.
262 |
263 | Where the Licensed Rights include Sui Generis Database Rights that
264 | apply to Your use of the Licensed Material:
265 |
266 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right
267 | to extract, reuse, reproduce, and Share all or a substantial
268 | portion of the contents of the database;
269 |
270 | b. if You include all or a substantial portion of the database
271 | contents in a database in which You have Sui Generis Database
272 | Rights, then the database in which You have Sui Generis Database
273 | Rights (but not its individual contents) is Adapted Material; and
274 |
275 | c. You must comply with the conditions in Section 3(a) if You Share
276 | all or a substantial portion of the contents of the database.
277 |
278 | For the avoidance of doubt, this Section 4 supplements and does not
279 | replace Your obligations under this Public License where the Licensed
280 | Rights include other Copyright and Similar Rights.
281 |
282 |
283 | Section 5 -- Disclaimer of Warranties and Limitation of Liability.
284 |
285 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE
286 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS
287 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF
288 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS,
289 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION,
290 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR
291 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS,
292 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT
293 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT
294 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU.
295 |
296 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE
297 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION,
298 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT,
299 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES,
300 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR
301 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN
302 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR
303 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR
304 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU.
305 |
306 | c. The disclaimer of warranties and limitation of liability provided
307 | above shall be interpreted in a manner that, to the extent
308 | possible, most closely approximates an absolute disclaimer and
309 | waiver of all liability.
310 |
311 |
312 | Section 6 -- Term and Termination.
313 |
314 | a. This Public License applies for the term of the Copyright and
315 | Similar Rights licensed here. However, if You fail to comply with
316 | this Public License, then Your rights under this Public License
317 | terminate automatically.
318 |
319 | b. Where Your right to use the Licensed Material has terminated under
320 | Section 6(a), it reinstates:
321 |
322 | 1. automatically as of the date the violation is cured, provided
323 | it is cured within 30 days of Your discovery of the
324 | violation; or
325 |
326 | 2. upon express reinstatement by the Licensor.
327 |
328 | For the avoidance of doubt, this Section 6(b) does not affect any
329 | right the Licensor may have to seek remedies for Your violations
330 | of this Public License.
331 |
332 | c. For the avoidance of doubt, the Licensor may also offer the
333 | Licensed Material under separate terms or conditions or stop
334 | distributing the Licensed Material at any time; however, doing so
335 | will not terminate this Public License.
336 |
337 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public
338 | License.
339 |
340 |
341 | Section 7 -- Other Terms and Conditions.
342 |
343 | a. The Licensor shall not be bound by any additional or different
344 | terms or conditions communicated by You unless expressly agreed.
345 |
346 | b. Any arrangements, understandings, or agreements regarding the
347 | Licensed Material not stated herein are separate from and
348 | independent of the terms and conditions of this Public License.
349 |
350 |
351 | Section 8 -- Interpretation.
352 |
353 | a. For the avoidance of doubt, this Public License does not, and
354 | shall not be interpreted to, reduce, limit, restrict, or impose
355 | conditions on any use of the Licensed Material that could lawfully
356 | be made without permission under this Public License.
357 |
358 | b. To the extent possible, if any provision of this Public License is
359 | deemed unenforceable, it shall be automatically reformed to the
360 | minimum extent necessary to make it enforceable. If the provision
361 | cannot be reformed, it shall be severed from this Public License
362 | without affecting the enforceability of the remaining terms and
363 | conditions.
364 |
365 | c. No term or condition of this Public License will be waived and no
366 | failure to comply consented to unless expressly agreed to by the
367 | Licensor.
368 |
369 | d. Nothing in this Public License constitutes or may be interpreted
370 | as a limitation upon, or waiver of, any privileges and immunities
371 | that apply to the Licensor or You, including from the legal
372 | processes of any jurisdiction or authority.
373 |
374 |
375 | =======================================================================
376 |
377 | Creative Commons is not a party to its public
378 | licenses. Notwithstanding, Creative Commons may elect to apply one of
379 | its public licenses to material it publishes and in those instances
380 | will be considered the “Licensor.” The text of the Creative Commons
381 | public licenses is dedicated to the public domain under the CC0 Public
382 | Domain Dedication. Except for the limited purpose of indicating that
383 | material is shared under a Creative Commons public license or as
384 | otherwise permitted by the Creative Commons policies published at
385 | creativecommons.org/policies, Creative Commons does not authorize the
386 | use of the trademark "Creative Commons" or any other trademark or logo
387 | of Creative Commons without its prior written consent including,
388 | without limitation, in connection with any unauthorized modifications
389 | to any of its public licenses or any other arrangements,
390 | understandings, or agreements concerning use of licensed material. For
391 | the avoidance of doubt, this paragraph does not form part of the
392 | public licenses.
393 |
394 | Creative Commons may be contacted at creativecommons.org.
395 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # An introduction to applied Bayesian data analysis
2 |
3 | [](http://www.repostatus.org/#wip)
4 |
5 | If you are taking this workshop, start by running the file `00-install.R` in R to install the necessary packages and make sure your computer is set up properly.
6 |
7 | The course draws on material from:
8 |
9 | McElreath, R. 2020. Statistical rethinking: a Bayesian course with examples in R and Stan. Second edition. CRC Press, Boca Raton London New York.
10 |
11 | Gelman, A., Hill, J., and Vehtari, A. 2021. Regression and other stories. Cambridge University Press, Cambridge.
12 |
13 | Hobbs, N.T., and Hooten, M.B. 2015. Bayesian models: a statistical primer for ecologists. Princeton University Press, Princeton, New Jersey.
14 |
15 | To start:
16 |
17 | ```r
18 | # install.packages("usethis")
19 | usethis::use_course("bit.ly/bayes-course")
20 | ```
21 |
--------------------------------------------------------------------------------
/bayes-course.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 | ProjectId: a977309f-797d-457d-a655-318e1edfdb9f
3 |
4 | RestoreWorkspace: Default
5 | SaveWorkspace: Default
6 | AlwaysSaveHistory: Default
7 |
8 | EnableCodeIndexing: Yes
9 | UseSpacesForTab: Yes
10 | NumSpacesForTab: 2
11 | Encoding: UTF-8
12 |
13 | RnwWeave: knitr
14 | LaTeX: pdfLaTeX
15 |
16 | AutoAppendNewline: Yes
17 | StripTrailingWhitespace: Yes
18 |
--------------------------------------------------------------------------------
/data-raw/mcmc-diagnostics-make-data.R:
--------------------------------------------------------------------------------
1 | dir.create("slides/figs", showWarnings = FALSE)
2 |
3 | # examples of just MCMC chains to detect issues:
4 | # - a chain is drifting but OK otherwise
5 | # - chains are in separate places
6 | # - chains are highly autocorrelated
7 |
8 | # examples with brms:
9 |
10 | # one example where you just haven't run it long enough
11 |
12 | # one example where there's a pathology that goes away with
13 | # a tighter prior and/or with a higher adapt delta
14 |
15 |
16 | # are these samples consistent with convergence?
17 | # if not, what helps you identify that?
18 | # what might have caused this scenario?
19 | # is this situation likely solvable?
20 | # if so, what might improve the MCMC sampling?
21 |
22 |
23 | # not long enough given autocorrelation:
24 | set.seed(1)
25 | s1 <- function(i) {
26 | x <- arima.sim(n = 500, list(ar = 0.97), sd = 0.3)
27 | x <- matrix(x, ncol = 1)
28 | colnames(x) <- "theta"
29 | x
30 | }
31 |
32 | samples <- purrr::map(1:4, s1)
33 | names(samples) <- paste("Chain", 1:4)
34 | samples_matrix <- do.call(cbind, samples)
35 | bayesplot::mcmc_trace(samples)
36 | rstan::Rhat(samples_matrix)
37 | rstan::ess_bulk(samples_matrix)
38 | rstan::ess_tail(samples_matrix)
39 | saveRDS(samples, "data/mcmc1.rds")
40 |
41 | # drifting:
42 | set.seed(1)
43 | s1 <- function(i) {
44 | N <- 500
45 | x <- arima.sim(n = N, list(ar = 0.5), sd = 0.3)
46 | x <- matrix(x, ncol = 1)
47 | colnames(x) <- "theta"
48 | x <- x + 1:500 * 0.0017
49 | x
50 | }
51 |
52 | samples <- purrr::map(1:4, s1)
53 | names(samples) <- paste("Chain", 1:4)
54 | samples_matrix <- do.call(cbind, samples)
55 | saveRDS(samples, "data/mcmc2.rds")
56 |
57 | bayesplot::mcmc_trace(samples)
58 |
59 | rstan::Rhat(samples_matrix)
60 | rstan::ess_bulk(samples_matrix)
61 | rstan::ess_tail(samples_matrix)
62 |
63 | # one chain getting stuck:
64 | set.seed(1)
65 | s1 <- function(i) {
66 | N <- 1000
67 | .ar <- ifelse(i == 1, 0.90, 0.4)
68 | x <- arima.sim(n = N, list(ar = .ar), sd = 0.3)
69 | x <- matrix(x, ncol = 1)
70 | colnames(x) <- "theta"
71 | x
72 | }
73 |
74 | samples <- purrr::map(1:4, s1)
75 | names(samples) <- paste("Chain", 1:4)
76 | samples_matrix <- do.call(cbind, samples)
77 | bayesplot::mcmc_trace(samples)
78 |
79 | rstan::Rhat(samples_matrix)
80 | rstan::ess_bulk(samples_matrix)
81 | rstan::ess_tail(samples_matrix)
82 |
83 | saveRDS(samples, "data/mcmc3.rds")
84 |
85 | # looks great
86 | set.seed(1)
87 | s1 <- function(i) {
88 | N <- 1000
89 | x <- arima.sim(n = N, list(ar = 0.75), sd = 0.3) + 2.2
90 | x <- matrix(x, ncol = 1)
91 | colnames(x) <- "theta"
92 | x
93 | }
94 |
95 | samples <- purrr::map(1:4, s1)
96 | names(samples) <- paste("Chain", 1:4)
97 | samples_matrix <- do.call(cbind, samples)
98 |
99 | bayesplot::mcmc_trace(samples)
100 |
101 | rstan::Rhat(samples_matrix)
102 | rstan::ess_bulk(samples_matrix)
103 | rstan::ess_tail(samples_matrix)
104 |
105 | saveRDS(samples, "data/mcmc4.rds")
106 |
107 | # stuck in slightly different places
108 | set.seed(1)
109 | s1 <- function(i) {
110 | mu <- c(0.4, 0, -0.1, 0.18)[i]
111 | N <- 1000
112 | x <- mu + arima.sim(n = N, list(ar = 0.6), sd = 0.3)
113 | x <- matrix(x, ncol = 1)
114 | colnames(x) <- "theta"
115 | x
116 | }
117 | samples <- purrr::map(1:4, s1)
118 | names(samples) <- paste("Chain", 1:4)
119 | samples_matrix <- do.call(cbind, samples)
120 |
121 | bayesplot::mcmc_trace(samples)
122 |
123 | rstan::Rhat(samples_matrix)
124 | rstan::ess_bulk(samples_matrix)
125 | rstan::ess_tail(samples_matrix)
126 | saveRDS(samples, "data/mcmc5.rds")
127 |
128 | # not enough warmup:
129 | set.seed(1)
130 | s1 <- function(i) {
131 | start <- c(-6, 8, 0, -1)[i]
132 | N <- 700
133 | x <- arima.sim(n = N, list(ar = 0.96), sd = 0.1, n.start = 1, start.innov = rep(start, 1))
134 | x <- matrix(x, ncol = 1)
135 | colnames(x) <- "theta"
136 | x
137 | }
138 |
139 | samples <- purrr::map(1:4, s1)
140 | names(samples) <- paste("Chain", 1:4)
141 | samples_matrix <- do.call(cbind, samples)
142 |
143 | bayesplot::mcmc_trace(samples)
144 |
145 | rstan::Rhat(samples_matrix)
146 | rstan::ess_bulk(samples_matrix)
147 | rstan::ess_tail(samples_matrix)
148 |
149 | saveRDS(samples, "data/mcmc6.rds")
150 |
151 | # slides:
152 |
153 | library(bayesplot)
154 | color_scheme_set("viridisD")
155 |
156 | # ideal 1 chain
157 | set.seed(1)
158 | s1 <- function(i, n = 1000, mu = 0) {
159 | x <- arima.sim(n = n, list(ar = 0.01), sd = 1) + mu
160 | x <- matrix(x, ncol = 1)
161 | colnames(x) <- "theta"
162 | x
163 | }
164 | samples <- purrr::map(1, s1)
165 | names(samples) <- paste("Chain", 1)
166 | samples_matrix <- do.call(cbind, samples)
167 | bayesplot::mcmc_trace(samples)
168 | ggsave("slides/figs/chains-ideal.png", width = 6, height = 4)
169 |
170 | # getting stuck in multiple modes
171 | samples <- purrr::map(1, s1, n = 300, mu = -2)
172 | samples2 <- purrr::map(1, s1, n = 300, mu = 1)
173 | samples3 <- purrr::map(1, s1, n = 300, mu = -2)
174 | samples <- list(rbind(samples[[1]], samples2[[1]], samples3[[1]]))
175 | names(samples) <- paste("Chain", 1)
176 | samples_matrix <- do.call(rbind, samples)
177 | bayesplot::mcmc_trace(samples)
178 | ggsave("slides/figs/chains-modes.png", width = 6, height = 4)
179 |
180 | # drifting
181 | set.seed(1)
182 | s1 <- function(i) {
183 | N <- 1000
184 | x <- arima.sim(n = N, list(ar = 0.5), sd = 0.3)
185 | x <- matrix(x, ncol = 1)
186 | colnames(x) <- "theta"
187 | x <- x + 1:1000 * 0.001
188 | x
189 | }
190 |
191 | samples <- purrr::map(1, s1)
192 | names(samples) <- paste("Chain", 1:1)
193 | samples_matrix <- do.call(cbind, samples)
194 | bayesplot::mcmc_trace(samples)
195 | ggsave("slides/figs/chains-drift.png", width = 6, height = 4)
196 |
197 | # major autocorrelation
198 | set.seed(1)
199 | s1 <- function(i) {
200 | N <- 1000
201 | x <- arima.sim(n = N, list(ar = 0.97), sd = 0.3)
202 | x <- matrix(x, ncol = 1)
203 | colnames(x) <- "theta"
204 | x
205 | }
206 |
207 | samples <- purrr::map(1, s1)
208 | names(samples) <- paste("Chain", 1:1)
209 | samples_matrix <- do.call(cbind, samples)
210 | bayesplot::mcmc_trace(samples)
211 | ggsave("slides/figs/chains-auto.png", width = 6, height = 4)
212 |
213 |
214 | # drifting different ways
215 | set.seed(1)
216 | s1 <- function(i, drift = 0.001) {
217 | N <- 1000
218 | x <- arima.sim(n = N, list(ar = 0.5), sd = 0.3)
219 | x <- matrix(x, ncol = 1)
220 | colnames(x) <- "theta"
221 | x <- x + (1:1000 - 500) * drift
222 | x
223 | }
224 |
225 | samples <- purrr::map2(1:2, c(0.0015, -0.0015), \(i, drift) s1(i, drift))
226 | names(samples) <- paste("Chain", 1:2)
227 | samples_matrix <- do.call(cbind, samples)
228 | bayesplot::mcmc_trace(samples)
229 | ggsave("slides/figs/chains-drift2.png", width = 6, height = 4)
230 |
231 | # stuck in different places
232 | set.seed(1)
233 | s1 <- function(i, mu = 0) {
234 | N <- 1000
235 | x <- arima.sim(n = N, list(ar = 0.5), sd = 0.4) + mu
236 | x <- matrix(x, ncol = 1)
237 | colnames(x) <- "theta"
238 | x
239 | }
240 |
241 | samples <- purrr::map2(1:2, c(1, -1), \(i, mu) s1(i, mu))
242 | names(samples) <- paste("Chain", 1:2)
243 | samples_matrix <- do.call(cbind, samples)
244 | bayesplot::mcmc_trace(samples)
245 |
246 | ggsave("slides/figs/chains-different-mean.png", width = 6, height = 4)
247 |
248 | # ideal 4 chain
249 | set.seed(1)
250 | s1 <- function(i, n = 1000, mu = 0) {
251 | x <- arima.sim(n = n, list(ar = 0.1), sd = 1) + mu
252 | x <- matrix(x, ncol = 1)
253 | colnames(x) <- "theta"
254 | x
255 | }
256 | samples <- purrr::map(1:4, s1)
257 | names(samples) <- paste("Chain", 1:4)
258 | samples_matrix <- do.call(cbind, samples)
259 | bayesplot::mcmc_trace(samples)
260 | ggsave("slides/figs/chains-ideal4.png", width = 6, height = 4)
261 |
--------------------------------------------------------------------------------
/data-raw/pcod-growth.R:
--------------------------------------------------------------------------------
1 | library(ggplot2)
2 | library(dplyr)
3 |
4 | d <- readRDS("../gfsynopsis-2023/report/data-cache-2024-05/pacific-cod.rds")$survey_samples
5 | # d <- dplyr::filter(d, !is.na(length), !is.na(age)) |> select(length, age)
6 | d <- dplyr::filter(d, !is.na(length), !is.na(age)) |>
7 | filter(survey_abbrev %in% c("SYN HS", "SYN WCVI")) |>
8 | select(length, age, survey = survey_abbrev)
9 |
10 | set.seed(1)
11 | d <- group_by(d, survey) |>
12 | sample_n(500)
13 |
14 | saveRDS(d, "data/pcod-growth.rds")
15 |
--------------------------------------------------------------------------------
/data-raw/postpred-data.R:
--------------------------------------------------------------------------------
1 | # should be quadratic:
2 | x <- rnorm(100)
3 | n <- length(x)
4 | a <- 0.2
5 | b <- 0.2
6 | b2 <- -0.3
7 | sigma <- 0.3
8 | set.seed(2141)
9 | y <- a + b * x + b2 * x^2 + sigma * rnorm(n)
10 | df <- data.frame(x, y)
11 | plot(df)
12 |
13 | library(rstanarm)
14 |
15 | fit <- stan_glm(
16 | y ~ x,
17 | data = df,
18 | iter = 500,
19 | chains = 1, seed = 129
20 | )
21 |
22 |
23 | p <- rstanarm::posterior_predict(fit)
24 |
25 | y <- df$y
26 | yrep <- p[1:20, ]
27 | bayesplot::ppc_dens_overlay(y, yrep)
28 | bayesplot::ppc_error_scatter_avg_vs_x(y, yrep, df$x)
29 | bayesplot::ppc_intervals(y, yrep, x = df$x)
30 |
31 | plot(df$x, y)
32 | plot(df$x, yrep[1,])
33 | plot(df$x, yrep[2,])
34 |
35 | saveRDS(df, "data/ppcheck-df1.rds")
36 | saveRDS(yrep, "data/ppcheck-yrep1.rds")
37 |
38 | # not enough dispersion:
39 | x <- rnorm(100)
40 | n <- length(x)
41 | a <- 1
42 | b <- 0.4
43 | set.seed(2141)
44 | y <- MASS::rnegbin(n, exp(a + b * x), theta = 0.2)
45 | df <- data.frame(x, y)
46 | plot(df)
47 |
48 | fit <- stan_glm(
49 | y ~ x,
50 | family = poisson(),
51 | data = df,
52 | iter = 500,
53 | chains = 1, seed = 291
54 | )
55 |
56 | plot(df)
57 | y <- df$y
58 | p <- rstanarm::posterior_predict(fit)
59 | yrep <- p[1:20, ]
60 | plot(df$x, df$y)
61 | plot(df$x, yrep[1,])
62 | plot(df$x, yrep[2,])
63 | bayesplot::ppc_dens_overlay(y, yrep)
64 |
65 | saveRDS(df, "data/ppcheck-df2.rds")
66 | saveRDS(yrep, "data/ppcheck-yrep2.rds")
67 |
68 | # # missing some major variable
69 | # set.seed(1)
70 | # library(MASS)
71 | # Sigma <- matrix(c(10,3,3,2),2,2)
72 | # Sigma
73 | # x <- mvrnorm(n = 100, rep(0, 2), Sigma)
74 | # plot(x)
75 | # n <- nrow(x)
76 | # a <- 1
77 | # b1 <- 0.2
78 | # b2 <- -2
79 | # set.seed(2141)
80 | # x1 <- x[,1]
81 | # x2 <- x[,2]
82 | # y <- rnorm(n, a + b * x1 + b2 * x2, 0.2)
83 | # df <- data.frame(x1, x2, y)
84 | # plot(df$x1, df$y)
85 | # plot(df$x2, df$y)
86 | #
87 | # fit <- stan_glm(
88 | # y ~ x1,
89 | # data = df,
90 | # iter = 500,
91 | # chains = 1
92 | # )
93 | #
94 | # p <- rstanarm::posterior_predict(fit)
95 | # yrep <- p[1:20, ]
96 | # plot(df$x1, df$y)
97 | # plot(df$x1, yrep[1,])
98 | # plot(df$x1, yrep[2,])
99 | # plot(df$x2, yrep[2,])
100 | # bayesplot::ppc_dens_overlay(y, yrep)
101 | # bayesplot::ppc_error_scatter_avg_vs_x(y, yrep, df$x2)
102 |
103 | # missing group
104 |
105 | set.seed(1)
106 | x <- rnorm(200)
107 | a <- rnorm(5, 1, 2)
108 | b <- 0.9
109 | g <- rep(1:5, each = 40)
110 | y <- rnorm(n, a[g] + b * x, 0.2)
111 | df <- data.frame(x, y, g = factor(g))
112 | ggplot(df, aes(x, y, colour = g)) + geom_point()
113 |
114 | fit <- stan_glm(
115 | y ~ x,
116 | data = df,
117 | iter = 500,
118 | chains = 1, seed = 292
119 | )
120 |
121 | p <- rstanarm::posterior_predict(fit)
122 | ggplot(df, aes(x, y)) + geom_point()
123 |
124 | y <- df$y
125 | yrep <- p[1:20, ]
126 |
127 | plot(df$x, yrep[1,])
128 | plot(df$x, yrep[2,])
129 | plot(df$x, yrep[3,])
130 |
131 | bayesplot::ppc_dens_overlay(y, yrep)
132 | bayesplot::ppc_dens_overlay_grouped(y, yrep, df$g)
133 |
134 | df$yrep2 <- yrep[2,]
135 | ggplot(df, aes(x, y, colour = g)) + geom_point()
136 | ggplot(df, aes(x, yrep2, colour = g)) + geom_point()
137 |
138 | saveRDS(df, "data/ppcheck-df3.rds")
139 | saveRDS(yrep, "data/ppcheck-yrep3.rds")
140 |
--------------------------------------------------------------------------------
/data-raw/rockfish-depth.R:
--------------------------------------------------------------------------------
1 | library(dplyr)
2 | library(ggplot2)
3 |
4 | spp <- gfsynopsis::get_spp_names()
5 | rock <- spp |>
6 | filter(grepl("rock", species_common_name))
7 |
8 | out <- list()
9 | for (i in seq_len(nrow(rock))) {
10 | print(rock[i, ])
11 | d0 <- readRDS(paste0("../gfsynopsis-2023/report/data-cache-2024-05/", rock[i, "spp_w_hyphens"], ".rds"))$survey_sets
12 | d <- filter(d0, survey_abbrev %in% c("SYN WCVI"), depth_m < 700, year != 2020, year != 2007)
13 | d <- filter(d, density_kgpm2 > 0)
14 | out[[i]] <- select(d, species_common_name, year, density_kgpm2, depth_m)
15 | }
16 | dat <- bind_rows(out)
17 |
18 | set.seed(1)
19 | dat <- group_by(dat, species_common_name) |>
20 | sample_frac(0.33)
21 |
22 |
23 | ggplot(dat, aes(log(depth_m), log(density_kgpm2))) +
24 | geom_point() +
25 | facet_wrap(~species_common_name) +
26 | geom_smooth(se = FALSE)
27 |
28 | dat <- group_by(dat, species_common_name) |>
29 | mutate(n = n()) |>
30 | filter(n > 100)
31 |
32 | d <- dat
33 | saveRDS(dat, file = "data/rockfish-depth.rds")
34 |
--------------------------------------------------------------------------------
/data/grey-heron.csv:
--------------------------------------------------------------------------------
1 | main_id,sample_year,population_untransformed
2 | 20579,1928,6233
3 | 20579,1929,5104
4 | 20579,1930,5159
5 | 20579,1931,5736
6 | 20579,1932,5428
7 | 20579,1933,6653
8 | 20579,1934,6622
9 | 20579,1935,6148
10 | 20579,1936,6677
11 | 20579,1937,6046
12 | 20579,1938,6346
13 | 20579,1939,6552
14 | 20579,1940,5636
15 | 20579,1941,5612
16 | 20579,1942,4981
17 | 20579,1943,5605
18 | 20579,1944,6190
19 | 20579,1945,5921
20 | 20579,1946,5558
21 | 20579,1947,4192
22 | 20579,1948,4421
23 | 20579,1949,5551
24 | 20579,1950,6191
25 | 20579,1951,6879
26 | 20579,1952,7258
27 | 20579,1953,7171
28 | 20579,1954,7353
29 | 20579,1955,6927
30 | 20579,1956,6730
31 | 20579,1957,7244
32 | 20579,1958,7015
33 | 20579,1959,7560
34 | 20579,1960,7015
35 | 20579,1961,7166
36 | 20579,1962,5815
37 | 20579,1963,3949
38 | 20579,1964,4243
39 | 20579,1965,4678
40 | 20579,1966,5034
41 | 20579,1967,5595
42 | 20579,1968,6069
43 | 20579,1969,6449
44 | 20579,1970,6741
45 | 20579,1971,7255
46 | 20579,1972,7326
47 | 20579,1973,7872
48 | 20579,1974,7998
49 | 20579,1975,8006
50 | 20579,1976,7580
51 | 20579,1977,7675
52 | 20579,1978,7565
53 | 20579,1979,7289
54 | 20579,1980,7763
55 | 20579,1981,7850
56 | 20579,1982,7388
57 | 20579,1983,7321
58 | 20579,1984,7503
59 | 20579,1985,7401
60 | 20579,1986,6714
61 | 20579,1987,6746
62 | 20579,1988,7054
63 | 20579,1989,7876
64 | 20579,1990,8145
65 | 20579,1991,7979
66 | 20579,1992,8121
67 | 20579,1993,8501
68 | 20579,1994,8430
69 | 20579,1995,8809
70 | 20579,1996,8557
71 | 20579,1997,8731
72 | 20579,1998,8826
73 |
--------------------------------------------------------------------------------
/data/house-wren.csv:
--------------------------------------------------------------------------------
1 | main_id,sample_year,population_untransformed
2 | 1163,1940,10
3 | 1163,1941,20
4 | 1163,1942,31
5 | 1163,1943,40
6 | 1163,1944,25
7 | 1163,1945,25
8 | 1163,1946,28
9 | 1163,1947,32
10 | 1163,1948,45
11 | 1163,1949,61
12 | 1163,1950,46
13 | 1163,1951,31
14 | 1163,1952,28
15 | 1163,1953,30
16 | 1163,1954,31
17 | 1163,1955,51
18 | 1163,1956,32
19 | 1163,1957,45
20 | 1163,1958,22
21 | 1163,1959,28
22 | 1163,1960,31
23 | 1163,1961,27
24 | 1163,1962,19
25 | 1163,1963,20
26 | 1163,1964,21
27 | 1163,1965,25
28 | 1163,1966,22
29 | 1163,1967,14
30 | 1163,1968,16
31 | 1163,1969,13
32 | 1163,1970,14
33 | 1163,1971,5
34 | 1163,1972,9
35 | 1163,1973,8
36 | 1163,1974,12
37 | 1163,1975,10
38 | 1163,1976,7
39 |
--------------------------------------------------------------------------------
/data/hughes-etal-2018.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/hughes-etal-2018.rds
--------------------------------------------------------------------------------
/data/kidiq.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/kidiq.rds
--------------------------------------------------------------------------------
/data/mcmc1.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/mcmc1.rds
--------------------------------------------------------------------------------
/data/mcmc2.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/mcmc2.rds
--------------------------------------------------------------------------------
/data/mcmc3.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/mcmc3.rds
--------------------------------------------------------------------------------
/data/mcmc4.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/mcmc4.rds
--------------------------------------------------------------------------------
/data/mcmc5.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/mcmc5.rds
--------------------------------------------------------------------------------
/data/mcmc6.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/mcmc6.rds
--------------------------------------------------------------------------------
/data/pcod-age-length.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/pcod-age-length.rds
--------------------------------------------------------------------------------
/data/pcod-growth.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/pcod-growth.rds
--------------------------------------------------------------------------------
/data/ppcheck-df1.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/ppcheck-df1.rds
--------------------------------------------------------------------------------
/data/ppcheck-df2.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/ppcheck-df2.rds
--------------------------------------------------------------------------------
/data/ppcheck-df3.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/ppcheck-df3.rds
--------------------------------------------------------------------------------
/data/ppcheck-yrep1.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/ppcheck-yrep1.rds
--------------------------------------------------------------------------------
/data/ppcheck-yrep2.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/ppcheck-yrep2.rds
--------------------------------------------------------------------------------
/data/ppcheck-yrep3.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/ppcheck-yrep3.rds
--------------------------------------------------------------------------------
/data/ppcheck1.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/ppcheck1.rds
--------------------------------------------------------------------------------
/data/ppcheck2.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/ppcheck2.rds
--------------------------------------------------------------------------------
/data/rockfish-depth.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/rockfish-depth.rds
--------------------------------------------------------------------------------
/extra/distributions.R:
--------------------------------------------------------------------------------
1 | library(ggplot2)
2 | library(dplyr)
3 | fill <- "#00000010"
4 | col <- "grey20"
5 |
6 | x <- seq(-5, 5, length.out = 1000)
7 | ggplot(tibble(x, y = dnorm(x, 0, 1)), aes(x, ymax = y)) +
8 | geom_ribbon(ymin = 0, fill = fill, col = col) +
9 | theme_void()
10 | ggsave("extra/dist-normal.pdf", width = 6, height = 6 * 0.618)
11 |
12 | x <- seq(0, 5, length.out = 1000)
13 | ggplot(tibble(x, y = dnorm(x, 0, 1)), aes(x, ymax = y)) +
14 | geom_ribbon(ymin = 0, fill = fill, col = col) +
15 | ylim(0, NA) +
16 | theme_void()
17 | ggsave("extra/dist-half-normal.pdf", width = 6, height = 6 * 0.618)
18 |
19 | x <- seq(-1, 1, length.out = 1000)
20 | ggplot(tibble(x, y = dnorm(x, 0, 0.7)), aes(x, ymax = y)) +
21 | geom_ribbon(ymin = 0, fill = fill, col = col) +
22 | ylim(0, NA) +
23 | theme_void()
24 | ggsave("extra/dist-truncated-normal.pdf", width = 6, height = 6 * 0.618)
25 |
26 | x <- seq(-5, 5, length.out = 1000)
27 | ggplot(tibble(x, ynorm = dnorm(x = x, 0, 1),
28 | y = dt(x = x, df = 3)), aes(x, ymax = y)) +
29 | geom_ribbon(ymin = 0, fill = fill, col = col) +
30 | geom_ribbon(ymin = 0, aes(ymax = ynorm), fill = NA,
31 | colour = "grey10", lty = 2) +
32 | ylim(0, NA) +
33 | theme_void()
34 | ggsave("extra/dist-t.pdf", width = 6, height = 6 * 0.618)
35 |
36 | x <- seq(0, 5, length.out = 1000)
37 | ggplot(tibble(x, ynorm = dnorm(x = x, 0, 1),
38 | y = dt(x = x, df = 3)), aes(x, ymax = y)) +
39 | geom_ribbon(ymin = 0, fill = fill, col = col) +
40 | geom_ribbon(ymin = 0, aes(ymax = ynorm), fill = NA,
41 | colour = "grey10", lty = 2) +
42 | ylim(0, NA) +
43 | theme_void()
44 | ggsave("extra/dist-half-t.pdf", width = 6, height = 6 * 0.618)
45 |
46 | x <- seq(-10, 10, length.out = 1000)
47 | ggplot(tibble(x, ynorm = dnorm(x = x, 0, 1),
48 | y = dcauchy(x = x)), aes(x, ymax = y)) +
49 | geom_ribbon(ymin = 0, fill = fill, col = col) +
50 | geom_ribbon(ymin = 0, aes(ymax = ynorm), fill = NA,
51 | colour = "grey10", lty = 2) +
52 | ylim(0, NA) +
53 | theme_void()
54 | ggsave("extra/dist-cauchy.pdf", width = 6, height = 6 * 0.618)
55 |
56 | x <- seq(0, 10, length.out = 1000)
57 | ggplot(tibble(x, ynorm = dnorm(x = x, 0, 1),
58 | y = dcauchy(x = x)), aes(x, ymax = y)) +
59 | geom_ribbon(ymin = 0, fill = fill, col = col) +
60 | geom_ribbon(ymin = 0, aes(ymax = ynorm), fill = NA,
61 | colour = "grey10", lty = 2) +
62 | ylim(0, NA) +
63 | theme_void()
64 | ggsave("extra/dist-half-cauchy.pdf", width = 6, height = 6 * 0.618)
65 |
66 | x <- seq(0, 100, length.out = 1000)
67 | ggplot(tibble(x, y = dexp(x = x, rate = 0.1),
68 | y2 = dexp(x = x, rate = 0.2)), aes(x, ymax = y)) +
69 | geom_ribbon(ymin = 0, fill = fill, col = col) +
70 | geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y2)) +
71 | ylim(0, NA) +
72 | theme_void()
73 | ggsave("extra/dist-exp.pdf", width = 6, height = 6 * 0.618)
74 |
75 | x <- seq(0, 15, length.out = 1000)
76 | ggplot(tibble(x,
77 | y = dgamma(x = x, shape = 5, rate = 1),
78 | y2 = dgamma(x = x, shape = 2, rate = 1),
79 | y3 = dgamma(x = x, shape = 1, rate = 1)),
80 | aes(x, ymax = y)) +
81 | geom_ribbon(ymin = 0, fill = fill, col = col) +
82 | geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y2)) +
83 | geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y3)) +
84 | ylim(0, NA) +
85 | theme_void()
86 | ggsave("extra/dist-gamma.pdf", width = 6, height = 6 * 0.618)
87 |
88 | x <- seq(0, 1, length.out = 1000)
89 | ggplot(tibble(x,
90 | y = dbeta(x = x, shape1 = 2, shape2 = 2),
91 | y2 = dbeta(x = x, shape = 0.8, shape2 = 0.8),
92 | y3 = dbeta(x = x, shape = 3, shape2 = 1),
93 | y4 = dbeta(x = x, shape = 1, shape2 = 3)),
94 | aes(x, ymax = y)) +
95 | ylim(0, NA) +
96 | geom_ribbon(ymin = 0, fill = fill, col = col) +
97 | geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y2)) +
98 | geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y3)) +
99 | geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y4)) +
100 | theme_void()
101 | ggsave("extra/dist-beta.pdf", width = 6, height = 6 * 0.618)
102 |
103 | x <- seq(0, 1, length.out = 1000)
104 | ggplot(tibble(x,
105 | y = dbinom(x = 3, size = 4, prob = x),
106 | y2 = dbinom(x = 1, size = 5, prob = x),
107 | y3 = dbinom(x = 2, size = 4, prob = x)),
108 | aes(x, ymax = y)) +
109 | ylim(0, NA) +
110 | geom_ribbon(ymin = 0, fill = fill, col = col) +
111 | geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y2)) +
112 | geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y3)) +
113 | # geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y4)) +
114 | theme_void()
115 | ggsave("extra/dist-binom.pdf", width = 6, height = 6 * 0.618)
116 |
117 | x <- seq(0, 20, length.out = 1000)
118 | ggplot(tibble(x,
119 | y = invgamma::dinvgamma(x, shape = 1, rate = 1)),
120 | aes(x, ymax = y)) +
121 | geom_ribbon(ymin = 0, fill = fill, col = col) +
122 | geom_vline(xintercept = 0, lty = 2) +
123 | ylim(0, NA) +
124 | theme_void()
125 | ggsave("extra/dist-inv-gamma.pdf", width = 6, height = 6 * 0.618)
126 |
127 | x <- seq(0, 20, length.out = 1000)
128 | ggplot(tibble(x,
129 | y = dunif(x, 0, 20)),
130 | aes(x, ymax = y)) +
131 | geom_ribbon(ymin = 0, fill = fill, col = col) +
132 | ylim(0, 0.1) +
133 | theme_void()
134 | ggsave("extra/dist-uniform.pdf", width = 6, height = 6 * 0.618)
135 |
136 |
137 | x <- runif(1e6, min = 0, max = 10)
138 | ggplot(tibble(x), aes(x)) +
139 | geom_histogram(fill = fill, col = col, breaks = seq(0, 10, length.out = 40)) +
140 | theme_void()
141 | ggsave("extra/dist-uniform-samples.pdf", width = 6, height = 6 * 0.618)
142 |
143 | ggplot(tibble(x), aes(exp(x))) +
144 | geom_histogram(fill = fill, col = col, breaks = seq(exp(0), exp(10), length.out = 40)) +
145 | theme_void()
146 | ggsave("extra/dist-uniform-exp-samples.pdf", width = 6, height = 6 * 0.618)
147 |
148 | library(dplyr)
149 | library(ggplot2)
150 | library(ggdist)
151 |
152 | # theme_set(theme_ggdist())
153 |
154 | expand.grid(
155 | eta = 1:4,
156 | K = 2:5
157 | ) %>%
158 | ggplot(aes(y = ordered(eta), dist = "lkjcorr_marginal", arg1 = K, arg2 = eta)) +
159 | stat_slab() +
160 | facet_grid(~ paste0(K, "x", K)) +
161 | scale_y_discrete(limits = rev) +
162 | labs(
163 | title = paste0(
164 | "LKJ prior on different matrix sizes"
165 | ),
166 | y = "eta",
167 | x = "Marginal correlation"
168 | )
169 | # theme(axis.title = element_text(hjust = 0))
170 |
--------------------------------------------------------------------------------
/extra/equations.tex:
--------------------------------------------------------------------------------
1 | \documentclass[10pt]{article}
2 | \usepackage[usenames]{color} %used for font color
3 | \usepackage{amssymb} %maths
4 | \usepackage{amsmath} %maths
5 | \usepackage[utf8]{inputenc} %useful to type directly diacritic characters
6 | \begin{document}
7 | \begin{align*}[\beta_0, \beta_1, \sigma | \mathbf{y}] \propto \prod_{i=1}^n [y_i | \beta_0, \beta_1, \sigma] [ \beta_0 ] [ \beta_1 ] [ \sigma ]\end{align*}
8 | \end{document}
--------------------------------------------------------------------------------
/extra/equations2.tex:
--------------------------------------------------------------------------------
1 | \documentclass[10pt]{article}
2 | \usepackage[usenames]{color} %used for font color
3 | \usepackage{amssymb} %maths
4 | \usepackage{amsmath} %maths
5 | \usepackage[utf8]{inputenc} %useful to type directly diacritic characters
6 | \begin{document}
7 | \begin{align*}y_i &\sim \mathrm{Normal}(\beta_0 + x_i \cdot \beta_1, \sigma), \ \text{for}\ i = 1, \ldots, n\\
8 | \beta_0 &\sim \mathrm{Normal}(0, 10)\\
9 | \beta_1 &\sim \mathrm{Normal}(0, 2)\\
10 | \sigma &\sim \text{Half-t}(3, 0, 3)\end{align*}
11 | \end{document}
--------------------------------------------------------------------------------
/extra/render.R:
--------------------------------------------------------------------------------
1 | files <- list.files(here::here(), pattern = "*.Rmd")
2 | # dir.create("exercises", showWarnings = FALSE)
3 | # remove_exercises <- function(x) {
4 | # f <- readLines(x)
5 | # i <- grep("```", f)[[1]] - 1
6 | # f <- c(f[seq(1, i)],
7 | # "```{r, include=FALSE, eval=TRUE}",
8 | # "knitr::opts_chunk$set(root.dir = '..')",
9 | # "```",
10 | # "",
11 | # f[seq(i+1, length(f))])
12 | # f_ex <- ifelse(grepl("# exercise", f), "# exercise", f)
13 | # f_ex <- ifelse(grepl("", f_ex), "", f_ex)
14 | # writeLines(as.character(f_ex), con = file.path("exercises", x))
15 | # }
16 | # purrr::walk(files, remove_exercises)
17 |
18 | ## knit all exercises (slow)
19 | purrr::walk(files, rmarkdown::render, envir = new.env())
20 |
21 | library(future)
22 | plan(multisession)
23 | furrr::future_walk(files, rmarkdown::render, envir = new.env(), .options = furrr::furrr_options(seed = TRUE))
24 | plan(sequential)
25 |
--------------------------------------------------------------------------------
/extra/rstan-growth.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Fitting von Bertalanffy growth curves with error using Stan"
3 | output: html_document
4 | ---
5 |
6 | ```{r}
7 | knitr::opts_chunk$set(
8 | collapse = TRUE,
9 | comment = "#>"
10 | )
11 | ```
12 |
13 |
14 |
15 | ```{r}
16 | library(dplyr)
17 | dd <- readRDS("~/src/gfsynopsis/report/data-cache2/pbs-survey-samples.rds")
18 | dd2 <- readRDS("~/src/gfsynopsis/report/data-cache2/pbs-age-precision.rds")
19 |
20 | d <- filter(dd, species_common_name == "shortraker rockfish", major_stat_area_name == "5E: WEST COAST Q.C. ISLANDS")
21 | d2 <- filter(dd2, species_common_name == "SHORTRAKER ROCKFISH")
22 |
23 | x <- gfplot::tidy_age_precision(d2)
24 | m <- lm(log(prim_age) ~ log(prec_age) - 0, data = x)
25 | summary(m)
26 |
27 | d <- d[!duplicated(select(d, specimen_id)), , drop = FALSE]
28 |
29 | # d <- inner_join(d, select(d2, specimen_id, specimen_age)
30 |
31 | d <- filter(d, !is.na(age), !is.na(length), sex == 2)
32 | nrow(d)
33 | library(ggplot2)
34 | ggplot(d, aes(age, length)) + geom_point(aes(colour = (year))) +
35 | viridis::scale_color_viridis()
36 | ```
37 |
38 | ```{r}
39 | newdata <- data.frame(age = seq(min(d$age), max(d$age), length.out = 100))
40 | ```
41 |
42 | ```{r}
43 | library(rstan)
44 | rstan_options(auto_write = TRUE)
45 | options(mc.cores = parallel::detectCores())
46 | vb_fit <- stan("vb.stan",
47 | data =
48 | list(
49 | age = d$age,
50 | length = d$length,
51 | N = length(d$age),
52 | N_pred = nrow(newdata),
53 | age_pred = newdata$age),
54 | iter = 2000, chains = 1)
55 |
56 | pars <- c("k", "linf", "sigma", "t0")
57 | library(bayesplot)
58 | theme_set(theme_light())
59 | vb_fit_array <- as.array(vb_fit)
60 | mcmc_trace(vb_fit_array, pars = pars)
61 | mcmc_hist(vb_fit_array, pars = pars)
62 | mcmc_dens(vb_fit_array, pars = pars)
63 | ```
64 |
65 | ```{r}
66 | vb_fit2 <- stan("vb-fixed-error.stan",
67 | data =
68 | list(
69 | age = d$age,
70 | length = d$length,
71 | N = length(d$age),
72 | N_pred = nrow(newdata),
73 | age_pred = newdata$age,
74 | error = 0.2), # CV of ~0.5 error
75 | iter = 1000, chains = 1,
76 | pars = c("k", "linf", "sigma", "t0", "length_pred", "posterior_predictions"))
77 | vb_fit2
78 |
79 | vb_fit_array2 <- as.array(vb_fit2)
80 | mcmc_trace(vb_fit_array2, pars = c("k", "linf", "sigma", "t0"))
81 | mcmc_hist(vb_fit_array2, pars = c("k", "linf", "sigma", "t0"))
82 | ```
83 |
84 | ```{r}
85 | vb_fit2 <- stan("vb-fixed-length-error.stan",
86 | data =
87 | list(
88 | age = d$age,
89 | length = d$length,
90 | N = length(d$age),
91 | N_pred = nrow(newdata),
92 | age_pred = newdata$age,
93 | error = 0.05), # CV of ~0.1 error
94 | iter = 800, chains = 2, seed = 29348,
95 | control = list(adapt_delta = 0.99, max_treedepth = 20),
96 | pars = c("k", "linf", "sigma", "t0", "length_pred", "posterior_predictions"))
97 | vb_fit2
98 |
99 | vb_fit_array2 <- as.array(vb_fit2)
100 | mcmc_trace(vb_fit_array2, pars = c("k", "linf", "sigma", "t0"))
101 | mcmc_hist(vb_fit_array2, pars = c("k", "linf", "sigma", "t0"))
102 | ```
103 |
104 | ```{r}
105 | e <- extract(vb_fit)
106 | e2 <- extract(vb_fit2)
107 | median(e$k)
108 | median(e2$k)
109 |
110 | quantile(e$k)
111 | quantile(e2$k)
112 |
113 | median(e$linf)
114 | median(e2$linf)
115 |
116 | quantile(e$linf)
117 | quantile(e2$linf)
118 |
119 | median(e$sigma)
120 | median(e2$sigma)
121 | ```
122 |
123 | ```{r}
124 | newdata$length <- apply(e$length_pred, 2, median)
125 | newdata$length2 <- apply(e2$length_pred, 2, median)
126 |
127 | ppc_dens_overlay(d$length, e$posterior_predictions[1:10, ])
128 | ppc_dens_overlay(d$length, e2$posterior_predictions[1:10, ])
129 |
130 | ggplot(d, aes(age, length)) + geom_point(aes(colour = as.factor(major_stat_area_name))) +
131 | geom_line(data = newdata, aes(age, length), alpha = 0.5) +
132 | geom_line(data = newdata, aes(age, length2), alpha = 0.5, lty = 2)
133 | ```
134 |
135 |
136 |
137 |
138 |
139 |
--------------------------------------------------------------------------------
/extra/rstanarm-counterfactual.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Fitting a counterfactual model with rstanarm"
3 | output: html_document
4 | ---
5 |
6 | ```{r}
7 | knitr::opts_chunk$set(
8 | collapse = TRUE,
9 | comment = "#>"
10 | )
11 | ```
12 |
13 | ```{r}
14 | library(dplyr)
15 | library(ggplot2)
16 | # library(brms)
17 | library(rstanarm)
18 | theme_set(ggsidekick::theme_sleek())
19 | ```
20 |
21 | ```{r}
22 | dory <- "Paracanthurus hepatus"
23 | cf <- c("Zebrasoma xanthurum",
24 | "Chrysiptera parasema",
25 | "Chrysiptera hemicyanea",
26 | "Acanthurus nigricans",
27 | "Acanthurus sohal",
28 | "Acanthurus japonicus",
29 | "Acanthurus coeruleus",
30 | "Acanthurus leucosternon")
31 | terms <- c(dory, cf)
32 | ```
33 |
34 | ```{r}
35 | if (!file.exists("data-generated/google-dat.rds")) {
36 | dd <- lapply(terms, function(x) {
37 | gtrendsR::gtrends(x, time = "2014-01-01 2017-07-10", gprop = "web",
38 | low_search_volume = TRUE)
39 | })
40 | d <- purrr::map_df(dd, function(x) {
41 | tibble(date = x$interest_over_time$date,
42 | keyword = x$interest_over_time$keyword,
43 | hits = x$interest_over_time$hits)
44 | })
45 | saveRDS(d, file = "data-generated/google-dat.rds")
46 | } else {
47 | d <- readRDS("data-generated/google-dat.rds")
48 | }
49 | ```
50 |
51 | ```{r}
52 | movie_date <- lubridate::ymd("2016-06-17", tz = "EST")
53 | ggplot(d, aes(date, hits)) +
54 | geom_line(col = "grey20", lwd = 0.5) +
55 | facet_wrap(~keyword) + xlab("") +
56 | ylab("Google searches as percent of maximum") +
57 | coord_cartesian(expand = FALSE) +
58 | geom_vline(xintercept = movie_date, col = "grey60", lty = 2)
59 | ```
60 |
61 | ```{r}
62 | dat <- d
63 | dat <- mutate(dat,
64 | hits = ifelse(hits == 100 & keyword == "Paracanthurus hepatus", 60, hits))
65 | dat <- dat %>% mutate(hits = hits / 100) %>%
66 | reshape2::dcast(date ~ keyword, value.var = "hits") %>%
67 | as_tibble()
68 | names(dat) <- gsub(" $", "", names(dat))
69 | names(dat) <- gsub(" ", "_", names(dat))
70 | ```
71 |
72 | ```{r}
73 | dat <- mutate(dat, before_movie = date < movie_date)
74 | ```
75 |
76 | ```{r}
77 | glimpse(dat)
78 | ```
79 |
80 | ```{r}
81 | # options(mc.cores = parallel::detectCores())
82 | before_dat <- filter(dat, before_movie)
83 | m1 <- stan_betareg(
84 | formula = Paracanthurus_hepatus ~
85 | Chrysiptera_parasema +
86 | Chrysiptera_hemicyanea +
87 | Acanthurus_nigricans +
88 | Acanthurus_sohal +
89 | Acanthurus_japonicus +
90 | Acanthurus_coeruleus +
91 | Acanthurus_leucosternon,
92 | data = before_dat,
93 | prior = normal(0, 2, autoscale = FALSE),
94 | prior_intercept = normal(0, 10, autoscale = FALSE),
95 | prior_phi = student_t(3, 0, 25, autoscale = FALSE))
96 | ```
97 |
98 | ```{r, eval=FALSE}
99 | ?plot.stanreg
100 | ```
101 |
102 | ```{r}
103 | summary(m1)
104 | # plot(m1, pars = "Chrysiptera_parasema")
105 | pp_check(m1, plotfun = "dens_overlay")
106 | pp_check(m1, plotfun = "error_hist")
107 | pp_check(m1, plotfun = "error_scatter")
108 | pp_check(m1, plotfun = "ribbon")
109 | pp_check(m1, plotfun = "ribbon", newdata = dat)
110 |
111 | pp_check(m1, plotfun = "intervals")
112 |
113 | coef_regex <- "^[A-Z]+[a-z_]+|\\(Intercept\\)"
114 | plot(m1, plotfun = "trace")
115 | plot(m1, plotfun = "acf")
116 | plot(m1, plotfun = "areas")
117 | plot(m1, plotfun = "areas", regex_pars = coef_regex)
118 | plot(m1, plotfun = "areas_ridges", regex_pars = coef_regex)
119 | plot(m1, plotfun = "intervals", regex_pars = coef_regex)
120 | ```
121 |
122 | ```{r}
123 | pred <- posterior_predict(m1, newdata = dat)
124 | dim(pred)
125 | num_draws <- 4
126 | pred_long <- reshape2::melt(pred[seq_len(num_draws), ], # n draws from the posterior
127 | varnames = c("mcmc_sample", "time_step"), value.name = "y") %>%
128 | as_tibble() %>%
129 | mutate(date = rep(dat$date, each = num_draws))
130 | ```
131 |
132 | ```{r}
133 | date_start <- lubridate::ymd("2015-04-01", tz = "EST")
134 | ggplot(dat, aes(date, Paracanthurus_hepatus)) +
135 | geom_line(lwd = 0.8, col = "blue") +
136 | xlab("") +
137 | ylab("Google searches as fraction of maximum") +
138 | coord_cartesian(expand = FALSE) +
139 | geom_vline(xintercept = movie_date, col = "grey60", lty = 2) +
140 | geom_line(data = pred_long, aes(y = y, group = mcmc_sample), alpha = 0.3) +
141 | xlim(date_start, NA)
142 | ```
143 |
144 | ```{r}
145 | dat$posterior_median <- apply(pred, 2, median)
146 | dat$posterior_upr <- apply(pred, 2, quantile, probs = 0.1)
147 | dat$posterior_lwr <- apply(pred, 2, quantile, probs = 0.8)
148 | ```
149 |
150 | ```{r}
151 | ggplot(dat, aes(date, Paracanthurus_hepatus)) +
152 | xlab("") +
153 | ylab("Google searches as fraction of maximum") +
154 | coord_cartesian(expand = FALSE) +
155 | geom_vline(xintercept = movie_date, col = "grey60", lty = 2) +
156 | geom_ribbon(aes(ymin = posterior_lwr, ymax = posterior_upr), alpha = 0.2) +
157 | geom_line() +
158 | xlim(date_start, NA)
159 | ```
160 |
161 | ```{r}
162 | pred_diff <- pred
163 | for (i in seq(1, ncol(pred))) {
164 | pred_diff[, i] <- dat$Paracanthurus_hepatus[i] / pred_diff[, i]
165 | }
166 | dat$posterior_ratio_median <- apply(pred_diff, 2, median)
167 | dat$posterior_ratio_upr <- apply(pred_diff, 2, quantile, probs = 0.1)
168 | dat$posterior_ratio_lwr <- apply(pred_diff, 2, quantile, probs = 0.9)
169 | ```
170 |
171 | ```{r}
172 | ggplot(dat, aes(date, posterior_ratio_median)) +
173 | xlab("") +
174 | ylab("Google searches as fraction of maximum") +
175 | coord_cartesian(expand = FALSE) +
176 | geom_vline(xintercept = movie_date, col = "grey60", lty = 2) +
177 | geom_hline(yintercept = 1, lty = 2, col = "grey60") +
178 | geom_ribbon(aes(ymin = posterior_ratio_upr, ymax = posterior_ratio_lwr), alpha = 0.4) +
179 | geom_line() +
180 | xlim(date_start, NA) +
181 | scale_y_log10(breaks = c(0.2, 0.5, 1, 2, 5))
182 | ```
183 |
184 | ```{r}
185 | dat$p <- apply(pred_diff, 2, function(x) mean(x > 1))
186 | ggplot(dat, aes(date, p)) + geom_line() + xlim(date_start, NA) +
187 | geom_vline(xintercept = movie_date, col = "grey60", lty = 2) +
188 | coord_cartesian(ylim = c(0, 1), expand = FALSE) +
189 | ylab("Probability of a positive effect")
190 | ```
191 |
192 | ```{r}
193 | dat$p <- apply(pred_diff, 2, function(x) mean(x > 2))
194 | ggplot(dat, aes(date, p)) + geom_line() + xlim(date_start, NA) +
195 | geom_vline(xintercept = movie_date, col = "grey60", lty = 2) +
196 | coord_cartesian(ylim = c(0, 1), expand = FALSE) +
197 | ylab("Probability of at least a 2-fold effect")
198 | ```
199 |
200 | ```{r}
201 | dat$p <- apply(pred_diff, 2, function(x) mean(x > 3))
202 | ggplot(dat, aes(date, p)) + geom_line() + xlim(date_start, NA) +
203 | geom_vline(xintercept = movie_date, col = "grey60", lty = 2) +
204 | coord_cartesian(ylim = c(0, 1), expand = FALSE) +
205 | ylab("Probability of at least a 2-fold effect")
206 | ```
207 |
208 | # brms
209 |
210 | Bonus:
211 |
212 | ```{r, eval=FALSE}
213 | library(brms)
214 | m1 <- brm(
215 | formula = Paracanthurus_hepatus ~
216 | Chrysiptera_parasema +
217 | Chrysiptera_hemicyanea +
218 | Acanthurus_nigricans +
219 | Acanthurus_sohal +
220 | Acanthurus_japonicus +
221 | Acanthurus_coeruleus +
222 | Acanthurus_leucosternon,
223 | data = filter(dat, before_movie),
224 | family = Beta(link = "logit"),
225 | save_model = "brm-model.stan",
226 | prior = c(
227 | set_prior("normal(0, 2)", class = "b"),
228 | set_prior("normal(0, 10)", class = "Intercept"),
229 | set_prior("student_t(3, 0, 25)", class = "phi"))
230 | )
231 | ```
232 |
--------------------------------------------------------------------------------
/extra/slides.R:
--------------------------------------------------------------------------------
1 | ratio <- 0.68
2 | width <- 8
3 | dat <- expand.grid(x = 1:40, y = 1:25)
4 | dat$colour <- "a"
5 |
6 | ggplot(dat, aes(x, y, colour = colour)) +
7 | geom_point(size = 4) +
8 | theme_void() +
9 | scale_color_manual(values = c("a" = "grey70", "b" = "red")) +
10 | guides(colour = FALSE)
11 | ggsave("extra/bayes-dots-0.pdf", width = width, height = ratio * width)
12 |
13 | dat$colour[450] <- "b"
14 | ggplot(dat, aes(x, y, colour = colour)) +
15 | geom_point(size = 4) +
16 | theme_void() +
17 | scale_color_manual(values = c("a" = "grey70", "b" = "red")) +
18 | guides(colour = FALSE)
19 | ggsave("extra/bayes-dots-1.pdf", width = width, height = ratio * width)
20 |
21 | set.seed(1)
22 | has_disease <- sample(seq_len(1000), size = 50)
23 | dat$colour[has_disease] <- "c"
24 | ggplot(dat, aes(x, y, colour = colour)) +
25 | geom_point(size = 4) +
26 | theme_void() +
27 | scale_color_manual(values = c("a" = "grey70", "b" = "red", "c"= "black")) +
28 | guides(colour = FALSE)
29 | ggsave("extra/bayes-dots-2.pdf", width = width, height = ratio * width)
30 |
31 | dat2 <- data.frame(x = c(1:25, 1:25), y = c(rep(1, 25), rep(2, 25)), colour = "c", stringsAsFactors = FALSE)
32 | dat2$colour[8] <- "b"
33 | ggplot(dat2, aes(x, y, colour = colour)) +
34 | geom_point(size = 8) +
35 | theme_void() +
36 | ylim(-4, 7) +
37 | scale_color_manual(values = c("a" = "grey70", "b" = "red", "c"= "black")) +
38 | guides(colour = FALSE)
39 | ggsave("extra/bayes-dots-3.pdf", width = width, height = ratio * width)
40 |
--------------------------------------------------------------------------------
/extra/vb-fixed-error.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N;
3 | vector[N] length;
4 | vector[N] age;
5 | real error;
6 |
7 | int N_pred;
8 | vector[N_pred] age_pred;
9 | }
10 | parameters {
11 | real k;
12 | real linf;
13 | real sigma;
14 | real t0;
15 | vector[N] age_true; // this is new
16 | }
17 | model {
18 | // priors:
19 | k ~ normal(0, 2);
20 | linf ~ normal(0, 200);
21 | sigma ~ student_t(3, 0, 2);
22 | t0 ~ normal(0, 20);
23 |
24 | // aging measurement error:
25 | age ~ lognormal(log(age_true), error); // this is new
26 |
27 | // likelihood:
28 | length ~ lognormal(log(linf * (1 - exp(-k * (age_true - t0)))), sigma); // this is modified
29 | }
30 | generated quantities {
31 | vector[N_pred] length_pred;
32 | vector[N] posterior_predictions;
33 | vector[N] age_true_posterior_predict; // this is new
34 |
35 | for (i in 1:N_pred) {
36 | length_pred[i] = linf * (1 - exp(-k * (age_pred[i] - t0)));
37 | }
38 |
39 | for (i in 1:N) {
40 | age_true_posterior_predict[i] = lognormal_rng(log(age[i]), error); // this is new
41 | posterior_predictions[i] =
42 | lognormal_rng(log(linf * (1 - exp(-k * (age_true_posterior_predict[i] - t0)))), sigma); // this is new
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/extra/vb.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N;
3 | vector[N] length;
4 | vector[N] age;
5 |
6 | int N_pred;
7 | vector[N_pred] age_pred;
8 | }
9 | parameters {
10 | real k;
11 | real linf;
12 | real sigma;
13 | real t0;
14 | }
15 | model {
16 | k ~ normal(0, 2);
17 | linf ~ normal(0, 200);
18 | sigma ~ student_t(3, 0, 2);
19 | t0 ~ normal(0, 20);
20 | length ~ lognormal(log(linf * (1 - exp(-k * (age - t0)))), sigma);
21 | }
22 | generated quantities {
23 | vector[N_pred] length_pred;
24 | vector[N] posterior_predictions;
25 |
26 | for (i in 1:N_pred) {
27 | length_pred[i] = linf * (1 - exp(-k * (age_pred[i] - t0)));
28 | }
29 |
30 | for (i in 1:N) {
31 | posterior_predictions[i] = lognormal_rng(log(linf * (1 - exp(-k * (age[i] - t0)))), sigma);
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/slides/loo.R:
--------------------------------------------------------------------------------
1 | library(rstanarm)
2 | library(ggplot2)
3 | theme_set(theme_light())
4 | options(mc.cores = 1) # no parallel: faster for these simple regressions
5 | library(dplyr)
6 |
7 | x <- 1:20
8 | n <- length(x)
9 | a <- 0.2
10 | b <- 0.3
11 | sigma <- 1
12 | set.seed(2141)
13 | y <- a + b * x + sigma * rnorm(n)
14 | fake <- data.frame(x, y)
15 | fit_all <- stan_glm(y ~ x, data = fake, seed = 2141, chains = 10, refresh = 0)
16 | fit_minus_18 <- stan_glm(y ~ x, data = fake[-18, ], seed = 2141, refresh = 0)
17 | sims <- as.matrix(fit_all)
18 | sims_minus_18 <- as.matrix(fit_minus_18)
19 | condpred <- data.frame(y = seq(0, 9, length.out = 100))
20 | condpred$x <- sapply(condpred$y, \(y)
21 | mean(dnorm(y, sims[, 1] + sims[, 2] * x[18], sims[, 3]) * 6 + 18))
22 | condpredloo <- data.frame(y = seq(0, 9, length.out = 100))
23 | condpredloo$x <- sapply(condpredloo$y, \(y)
24 | mean(dnorm(y, sims_minus_18[, 1] + sims_minus_18[, 2] * x[18], sims_minus_18[, 3]) * 6 + 18))
25 |
26 | ggplot(fake, aes(x = x, y = y)) +
27 | geom_point(color = "white", size = 3) +
28 | geom_point(color = "black", size = 2) +
29 | geom_abline(
30 | intercept = mean(sims[, 1]),
31 | slope = mean(sims[, 2]),
32 | color = "black"
33 | ) +
34 | geom_path(data = condpred, aes(x = x, y = y), color = "black") +
35 | geom_vline(xintercept = 18, linetype = 3, color = "grey") +
36 | geom_point(data = fake[18, ], color = "grey50", size = 5, shape = 1) +
37 | geom_abline(
38 | intercept = mean(sims_minus_18[, 1]),
39 | slope = mean(sims_minus_18[, 2]),
40 | color = "grey50",
41 | linetype = 2
42 | ) +
43 | geom_path(data = condpredloo, aes(x = x, y = y), color = "grey50", linetype = 2)
44 | ggsave("slides/figs/loo-init-eg.png", width = 6, height = 4.5)
45 |
46 | fake$residual <- fake$y - fit_all$fitted.values
47 | fake$looresidual <- fake$y - loo_predict(fit_all)$value
48 | ggplot(fake, aes(x = x, y = residual)) +
49 | geom_point(color = "black", size = 2, shape = 16) +
50 | geom_point(aes(y = looresidual), color = "grey50", size = 2, shape = 1) +
51 | geom_segment(aes(xend = x, y = residual, yend = looresidual)) +
52 | geom_hline(yintercept = 0, linetype = 2)
53 | ggsave("slides/figs/loo-resid-eg.png", width = 6, height = 4.5)
54 |
55 |
56 | S <- 10
57 | y_hat <- matrix(nrow = S, ncol = n)
58 | for (s in 1:S) {
59 | for (i in 1:n) {
60 | y_hat[s, i] <- sims[s, 1] + sims[s, 2] * x[i]
61 | }
62 | }
63 |
64 |
65 | y_i <- 10
66 | xx <- seq(0, 7, length.out = 100)
67 | mult <- 6
68 |
69 | gg <- list()
70 | for (s in 1:6) {
71 | yy <- sapply(xx, \(x) dnorm(x, mean = y_hat[s, y_i], sd = sims[s, 3], log = FALSE))
72 | df <- data.frame(x = xx, y = yy)
73 |
74 | df2 <- data.frame(x = y[y_i], y = dnorm(y[y_i], mean = y_hat[s, y_i], sd = sims[s, 3], log = FALSE)*mult + y_i)
75 | df2 <- rbind(data.frame(x = y[y_i], y = y_i), df2)
76 |
77 | gg[[s]] <- data.frame(x = x, y = y_hat[s,]) |>
78 | ggplot(aes(x, y)) +
79 | geom_line() +
80 | geom_point(data = data.frame(Var2 = y_i, value = y[y_i]), mapping = aes(Var2, value), inherit.aes = FALSE) +
81 | geom_path(data = df, mapping = aes(x = y_i + y * mult, y = x), inherit.aes = FALSE) +
82 | geom_path(data = df2, mapping = aes(x = y, y = x), colour = "red") +
83 | xlab("x") + ylab("y") +
84 | geom_point(data = fake, mapping = aes(x, y), inherit.aes = FALSE, pch = 21) +
85 | ggtitle(paste0("MCMC sample ", s))
86 | }
87 | gg[[1]]
88 | ggsave("slides/figs/elpd1.png", width = 6, height = 4.5)
89 | patchwork::wrap_plots(gg)
90 | ggsave("slides/figs/elpd6.png", width = 10, height = 7)
91 |
92 | S <- 50
93 | y_hat <- matrix(nrow = S, ncol = n)
94 | for (s in 1:S) {
95 | for (i in 1:n) {
96 | y_hat[s, i] <- sims[s, 1] + sims[s, 2] * x[i]
97 | }
98 | }
99 | reshape2::melt(y_hat) |>
100 | ggplot(aes(Var2, value, group = Var1)) +
101 | geom_line(alpha = 0.3) +
102 | xlab("x") + ylab("y") +
103 | geom_point(data = fake, mapping = aes(x, y), inherit.aes = FALSE, pch = 21)
104 | ggsave("slides/figs/elpd-fits.png", width = 6, height = 4.5)
105 |
106 | S <- 1000
107 | y_hat <- matrix(nrow = S, ncol = n)
108 | for (s in 1:S) {
109 | for (i in 1:n) {
110 | y_hat[s, i] <- sims[s, 1] + sims[s, 2] * x[i]
111 | }
112 | }
113 | ll_2 <- matrix(nrow = S, ncol = n)
114 | for (s in 1:S) {
115 | for (i in 1:n) {
116 | ll_2[s, i] <- dnorm(y[i], mean = y_hat[s, i], sd = sims[s, 3], log = TRUE)
117 | }
118 | }
119 |
120 | df <- reshape2::melt(ll_2) |>
121 | filter(Var2 %in% 1:6) |>
122 | mutate(Var2 = paste("Data point", Var2))
123 |
124 | df2 <- group_by(df, Var2) |>
125 | summarise(mean = mean(exp(value)))
126 |
127 | df |>
128 | ggplot(aes(exp(value))) +
129 | facet_wrap(~Var2) +
130 | geom_histogram() + xlab("Likelihood") + ylab("MCMC sample count") +
131 | geom_vline(data = df2, mapping = aes(xintercept = mean), colour = "red") +
132 | coord_cartesian(expand = FALSE)
133 | ggsave("slides/figs/elpd-dist.png", width = 7.5, height = 5)
134 |
135 | lpd_2 <- log(apply(exp(ll_2), 2, mean)) # computationally dangerous!
136 |
137 | sum(lpd_2)
138 |
--------------------------------------------------------------------------------
/stan/8schools.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N;
3 | vector[N] y;
4 | vector[N] sigma;
5 | }
6 | parameters {
7 | real mu;
8 | real sigma_prime;
9 | vector[N] theta;
10 | }
11 | model {
12 | mu ~ normal(0, 10);
13 | sigma_prime ~ cauchy(0, 10);
14 | theta ~ normal(mu, sigma_prime);
15 | y ~ normal(theta, sigma);
16 | }
17 |
--------------------------------------------------------------------------------
/stan/8schools_noncentered.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N;
3 | vector[N] y;
4 | vector[N] sigma;
5 | }
6 | parameters {
7 | real mu;
8 | real sigma_prime;
9 | vector[N] eta;
10 | }
11 | transformed parameters {
12 | vector[N] theta;
13 | theta = mu + sigma_prime * eta;
14 | }
15 | model {
16 | mu ~ normal(0, 10);
17 | sigma_prime ~ cauchy(0, 10);
18 | eta ~ normal(0, 1);
19 | y ~ normal(theta, sigma);
20 | }
21 |
--------------------------------------------------------------------------------
/stan/gompertz.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N; // rows of data
3 | vector[N] y; // vector to hold observations
4 | real nu_rate; // rate parameter for nu exponential prior
5 | }
6 | parameters {
7 | real lambda;
8 | real b;
9 | real sigma;
10 | real nu;
11 | }
12 | model {
13 | // priors
14 | lambda ~ normal(0, 5);
15 | b ~ normal(0, 5);
16 | sigma ~ student_t(3, 0, 3);
17 | nu ~ exponential(nu_rate);
18 |
19 | // likelihood
20 | for (i in 2:N) {
21 | y[i] ~ student_t(nu, lambda + b * y[i-1], sigma);
22 | }
23 | }
24 | generated quantities {
25 | vector[N] pred;
26 | pred[1] = y[1];
27 | for (i in 2:N) {
28 | pred[i] = student_t_rng(nu, lambda + b * y[i-1], sigma);
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/stan/lm-matrix.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N; // number of observations
3 | int K; // number of predictors
4 | real y_meas[N]; // measurement of y
5 | matrix[N, K] X; // model predictor matrix
6 | }
7 | parameters {
8 | vector[K] beta; // vector of predictors
9 | real alpha; // intercept
10 | real sigma; // residual sd
11 | }
12 | model {
13 | sigma ~ student_t(3, 0, 2); // prior
14 | alpha ~ normal(0, 10); // prior
15 | beta ~ normal(0, 2); // prior
16 | y_meas ~ normal(alpha + X * beta, sigma); // likelihood
17 | }
18 |
--------------------------------------------------------------------------------
/stan/lm-measure.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N; // number of observations
3 | vector[N] y; // response
4 | vector[N] x; // a predictor
5 | real tau; // measurement noise [This is new.]
6 | }
7 | parameters {
8 | real beta; // slope coefficient
9 | real alpha; // intercept coefficient
10 | real sigma; // residual error standard deviation
11 | vector[N] x_true; // unknown true x value [This is new.]
12 | }
13 | model {
14 | sigma ~ student_t(3, 0, 2); // prior
15 | alpha ~ normal(0, 10); // prior
16 | beta ~ normal(0, 2); // prior
17 |
18 | // [optional prior on x_true could go here]
19 | x ~ normal(x_true, tau); // measurement model [This is new.]
20 | y ~ normal(alpha + x_true * beta, sigma); // data likelihood [This has changed.]
21 | }
22 | generated quantities {
23 | vector[N] posterior_predictions;
24 | for (i in 1:N) {
25 | posterior_predictions[i] = normal_rng(alpha + x_true[i] * beta, sigma);
26 | }
27 | }
28 |
--------------------------------------------------------------------------------
/stan/lm-simple.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N; // number of observations
3 | vector[N] y; // response
4 | vector[N] x; // a predictor
5 | }
6 | parameters {
7 | real beta; // slope coefficient
8 | real alpha; // intercept coefficient
9 | real sigma; // residual error standard deviation
10 | }
11 | model {
12 | sigma ~ student_t(3, 0, 2); // prior
13 | alpha ~ normal(0, 10); // prior
14 | beta ~ normal(0, 2); // prior
15 |
16 | y ~ normal(alpha + x * beta, sigma); // data likelihood
17 | }
18 |
--------------------------------------------------------------------------------
/stan/lm.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N; // number of observations
3 | vector[N] y; // response
4 | vector[N] x; // a predictor
5 | }
6 | parameters {
7 | real beta; // slope coefficient
8 | real alpha; // intercept coefficient
9 | real sigma; // residual error standard deviation
10 | }
11 | model {
12 | sigma ~ student_t(3, 0, 2); // prior
13 | alpha ~ normal(0, 10); // prior
14 | beta ~ normal(0, 2); // prior
15 |
16 | y ~ normal(alpha + x * beta, sigma); // data likelihood
17 | }
18 | generated quantities {
19 | vector[N] posterior_predictions;
20 | vector[N] log_lik;
21 |
22 | for (i in 1:N) {
23 | posterior_predictions[i] = normal_rng(alpha + x[i] * beta, sigma);
24 | log_lik[i] = normal_lpdf(y[i] | alpha + x[i] * beta, sigma);
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/vb/vb_basic.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N;
3 | vector[N] length;
4 | vector[N] age;
5 | }
6 | parameters {
7 | real k;
8 | real linf;
9 | real sigma;
10 | real t0;
11 | }
12 | model {
13 | k ~ normal(0, 1);
14 | linf ~ normal(0, 100);
15 | t0 ~ normal(0, 1);
16 | sigma ~ student_t(3, 0, 2);
17 | length ~ normal(linf * (1 - exp(-k * (age - t0))), sigma);
18 | }
19 |
--------------------------------------------------------------------------------
/vb/vb_norm.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N;
3 | vector[N] length;
4 | vector[N] age;
5 | vector[4] prior_sds;
6 | int prior_only;
7 | }
8 | parameters {
9 | real k;
10 | real linf;
11 | real sigma;
12 | real t0;
13 | }
14 | transformed parameters {
15 | vector[N] predicted_length;
16 | for (i in 1:N) {
17 | // record expected length for visualization and to use below:
18 | predicted_length[i] = linf * (1 - exp(-k * (age[i] - t0)));
19 | }
20 | }
21 | model {
22 | // priors:
23 | k ~ normal(0, prior_sds[1]);
24 | linf ~ normal(0, prior_sds[2]);
25 | t0 ~ normal(0, prior_sds[3]);
26 | sigma ~ student_t(3, 0, prior_sds[4]);
27 | // data likelihood:
28 | if (!prior_only) { // enable prior predictive simulations
29 | length ~ normal(predicted_length, sigma);
30 | }
31 | }
32 | generated quantities {
33 | vector[N] length_sim; // for posterior predictive simulations
34 | vector[N] log_lik; // for ELPD calculations
35 | for (i in 1:N) {
36 | length_sim[i] = normal_rng(predicted_length[i], sigma);
37 | log_lik[i] = normal_lpdf(length[i] | predicted_length[i], sigma);
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/vb/vb_norm_regions.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N;
3 | vector[N] length;
4 | vector[N] age;
5 | vector[4] prior_sds;
6 | int prior_only;
7 | int survey_id[N];
8 | int N_surveys;
9 | }
10 | parameters {
11 | real k[N_surveys];
12 | real linf[N_surveys];
13 | real t0[N_surveys];
14 | real sigma;
15 | }
16 | transformed parameters {
17 | vector[N] predicted_length;
18 | for (i in 1:N) {
19 | // record expected length for visualization and to use below:
20 | predicted_length[i] = linf[survey_id[i]] *
21 | (1 - exp(-k[survey_id[i]] * (age[i] - t0[survey_id[i]])));
22 | }
23 | }
24 | model {
25 | // priors:
26 | k ~ normal(0, prior_sds[1]);
27 | linf ~ normal(0, prior_sds[2]);
28 | t0 ~ normal(0, prior_sds[3]);
29 | sigma ~ student_t(3, 0, prior_sds[4]);
30 | // data likelihood:
31 | if (!prior_only) { // enable prior predictive simulations
32 | length ~ normal(predicted_length, sigma);
33 | }
34 | }
35 | generated quantities {
36 | vector[N] length_sim; // for posterior predictive simulations
37 | vector[N] log_lik; // for ELPD calculations
38 | for (i in 1:N) {
39 | length_sim[i] = normal_rng(predicted_length[i], sigma);
40 | log_lik[i] = normal_lpdf(length[i] | predicted_length[i], sigma);
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/vb/vb_norm_regions_sigma.stan:
--------------------------------------------------------------------------------
1 | data {
2 | int N;
3 | vector[N] length;
4 | vector[N] age;
5 | vector[4] prior_sds;
6 | int prior_only;
7 | int survey_id[N];
8 | int N_surveys;
9 | }
10 | parameters {
11 | real k[N_surveys];
12 | real linf[N_surveys];
13 | real t0[N_surveys];
14 | real sigma;
15 | }
16 | transformed parameters {
17 | vector[N] predicted_length;
18 | for (i in 1:N) {
19 | // record expected length for visualization and to use below:
20 | predicted_length[i] = linf[survey_id[i]] *
21 | (1 - exp(-k[survey_id[i]] * (age[i] - t0[survey_id[i]])));
22 | }
23 | }
24 | model {
25 | // priors:
26 | k ~ normal(0, prior_sds[1]);
27 | linf ~ normal(0, prior_sds[2]);
28 | t0 ~ normal(0, prior_sds[3]);
29 | sigma ~ student_t(3, 0, prior_sds[4]);
30 | // data likelihood:
31 | if (!prior_only) { // enable prior predictive simulations
32 | length ~ normal(predicted_length, sigma);
33 | }
34 | }
35 | generated quantities {
36 | vector[N] length_sim; // for posterior predictive simulations
37 | vector[N] log_lik; // for ELPD calculations
38 | for (i in 1:N) {
39 | length_sim[i] = normal_rng(predicted_length[i], sigma);
40 | log_lik[i] = normal_lpdf(length[i] | predicted_length[i], sigma);
41 | }
42 | }
43 |
--------------------------------------------------------------------------------