├── .gitignore ├── 00-install.R ├── 00-make-exercises.R ├── 00-plan.md ├── 01-small-world.Rmd ├── 02-mcmc-demo.Rmd ├── 03-mcmc-diagnostics-exercise.Rmd ├── 04-priors-explorer.R ├── 05-brms-basic.Rmd ├── 06-posterior-predictive.Rmd ├── 07-priors-brms.Rmd ├── 08-stan-programming-basic.Rmd ├── 09-stan-coding-growth.Rmd ├── 10-loo.Rmd ├── 11-divergent-transitions.Rmd ├── 12-workflow-sdm.Rmd ├── 13-resources.Rmd ├── LICENSE.txt ├── README.md ├── bayes-course.Rproj ├── data-raw ├── mcmc-diagnostics-make-data.R ├── pcod-growth.R ├── postpred-data.R └── rockfish-depth.R ├── data ├── grey-heron.csv ├── house-wren.csv ├── hughes-etal-2018.rds ├── kidiq.rds ├── mcmc1.rds ├── mcmc2.rds ├── mcmc3.rds ├── mcmc4.rds ├── mcmc5.rds ├── mcmc6.rds ├── pcod-age-length.rds ├── pcod-growth.rds ├── ppcheck-df1.rds ├── ppcheck-df2.rds ├── ppcheck-df3.rds ├── ppcheck-yrep1.rds ├── ppcheck-yrep2.rds ├── ppcheck-yrep3.rds ├── ppcheck1.rds ├── ppcheck2.rds └── rockfish-depth.rds ├── extra ├── distributions.R ├── equations.tex ├── equations2.tex ├── render.R ├── rstan-growth.Rmd ├── rstanarm-counterfactual.Rmd ├── slides.R ├── vb-fixed-error.stan └── vb.stan ├── slides └── loo.R ├── stan ├── 8schools.stan ├── 8schools_noncentered.stan ├── gompertz.stan ├── lm-matrix.stan ├── lm-measure.stan ├── lm-simple.stan └── lm.stan └── vb ├── vb_basic.stan ├── vb_norm.stan ├── vb_norm_regions.stan └── vb_norm_regions_sigma.stan /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # produced vignettes 21 | vignettes/*.html 22 | vignettes/*.pdf 23 | 24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 25 | .httr-oauth 26 | 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | 31 | # Temporary files created by R markdown 32 | *.utf8.md 33 | *.knit.md 34 | 35 | 36 | *.pdf 37 | *.rda 38 | *.rds 39 | *.png 40 | *.html 41 | .Rproj.user 42 | 43 | original-files/ 44 | *.key 45 | admin/ 46 | extra/ 47 | 48 | 01-small-world_files/* 49 | dates.R 50 | 51 | exercise-files/* 52 | -------------------------------------------------------------------------------- /00-install.R: -------------------------------------------------------------------------------- 1 | # The first half is adapted from Hadley Wickham's install script. 2 | 3 | # A polite helper for installing packages --------------------------------- 4 | 5 | please_install <- function(pkgs, install_fun = install.packages) { 6 | if (length(pkgs) == 0) { 7 | return(invisible()) 8 | } 9 | if (!interactive()) { 10 | stop("Please run in interactive session", call. = FALSE) 11 | } 12 | 13 | title <- paste0( 14 | "Ok to install these packges?\n", 15 | paste("* ", pkgs, collapse = "\n") 16 | ) 17 | ok <- menu(c("Yes", "No"), title = title) == 1 18 | 19 | if (!ok) { 20 | return(invisible()) 21 | } 22 | 23 | install_fun(pkgs) 24 | } 25 | 26 | # Do you have all the needed packages? ------------------------------------ 27 | 28 | pkgs <- c( 29 | "tidyverse", "rstan", "rstanarm", "brms", "rmarkdown", 30 | "manipulate", "shiny", "usethis", "bayesplot", "loo", 31 | "devtools", "gridExtra", "patchwork", "extraDistr", "coda" 32 | ) 33 | have <- rownames(installed.packages()) 34 | needed <- setdiff(pkgs, have) 35 | 36 | please_install(needed) 37 | 38 | # Do you have build tools? --------------------------------------- 39 | devtools::has_devel() 40 | 41 | # If not: 42 | 43 | # On a Mac: 44 | # Open the Terminal.app (see /Applications/Utilities/Terminal.app) 45 | # Run: 46 | # xcode-select --install 47 | 48 | # On a PC: 49 | # Install the Rtools version that matches your version of R 50 | # DFO users can find this in the Software Center 51 | 52 | # Restart R 53 | 54 | # Did it work? 55 | devtools::has_devel() 56 | 57 | # Stan working? --------------------------------------- 58 | 59 | library("rstan") 60 | scode <- " 61 | parameters { 62 | real y[2]; 63 | } 64 | model { 65 | y[1] ~ normal(0, 1); 66 | y[2] ~ double_exponential(0, 2); 67 | } 68 | " 69 | cat("Please wait a minute while the model compiles.\n") 70 | fit1 <- stan(model_code = scode, iter = 50, verbose = FALSE, chains = 1) 71 | 72 | if (identical(class(fit1)[[1]], "stanfit")) { 73 | cat("Stan is working. Congratulations! You're done. You can ignore any warnings about 'R-hat' and 'Effective Samples Size (ESS)' above\n") 74 | } else { 75 | cat("Stan is *not* working. Please ask a coworker or contact Sean.\n") 76 | } 77 | 78 | # Check brms ------------------------------------------- 79 | 80 | library(brms) 81 | fit1 <- brm( 82 | count ~ zBase * Trt + (1|patient), 83 | data = epilepsy, family = poisson(), 84 | chains = 1, iter = 1000, 85 | prior = prior(normal(0, 1), class = b) + 86 | prior(cauchy(0, 1), class = sd) 87 | ) 88 | summary(fit1) 89 | # Ignore any warnings about R-hat or ESS 90 | 91 | if (identical(class(fit1)[[1]], "brmsfit")) { 92 | cat("brms is working. You can ignore any warnings about 'R-hat' and 'Effective Samples Size (ESS)' above\n") 93 | } else { 94 | cat("brms is *not* working. Please ask a coworker or contact Sean.\n") 95 | } 96 | -------------------------------------------------------------------------------- /00-make-exercises.R: -------------------------------------------------------------------------------- 1 | files <- list.files(".", pattern = "*.Rmd$|*.R$") 2 | 3 | dir.create("exercise-files", showWarnings = FALSE) 4 | remove_exercises <- function(x) { 5 | file.copy(x, "exercise-files") 6 | f <- readLines(x) 7 | f_ex <- ifelse(grepl("# exercise", f), "# exercise", f) 8 | f_ex <- ifelse(grepl("", f_ex), "", f_ex) 9 | f_ex <- ifelse(grepl("^Answer: ", f_ex), "Answer: ", f_ex) 10 | writeLines(as.character(f_ex), con = file.path("exercise-files", x)) 11 | } 12 | purrr::walk(files, remove_exercises) 13 | -------------------------------------------------------------------------------- /00-plan.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: "An introduction to applied Bayesian data analysis for ecologists" 3 | output: 4 | html_document: 5 | toc: true 6 | --- 7 | 8 | # Objectives 9 | 10 | 1. Develop an intuition for Bayes' theorem, how a Bayesian approach fundamentally differs from a frequentist approach, and when using a Bayesian approach is particularly advantageous. 11 | 12 | 2. Understand the principle behind MCMC (Markov chain Monte Carlo) sampling. Become familiar with the concepts of chain convergence and MCMC tuning. Develop a high-level understanding of Hamiltonian MCMC. 13 | 14 | 3. Learn to fit pre-packaged Bayesian regression models with brms. Become familiar with the concepts of posterior predictive checking and manipulating posterior samples to calculate posterior probabilities. 15 | 16 | 4. Learn how to assess the relative contribution of priors vs. the data. Learn the difference between weakly informative and informative priors. Learn what some common choices of weakly informative priors are. Become familiar with prior predictive checking. 17 | 18 | 5. Learn the basics of Stan model syntax and how to interact with Stan in R. 19 | 20 | 6. Become familiar with how the pieces fit together into a Bayesian workflow. 21 | 22 | 7. Throughout, gain some familiarity with several useful tools built around Stan and R (e.g., rstan, brms, bayesplot, loo, tidybayes). 23 | 24 | 6. Leave with some ideas for where to find more information. 25 | 26 | # Plan 27 | 28 | ## Day 1 29 | 30 | 1. Introduction to probability, Bayes' theorem, when to go Bayesian 31 | - Slides: an introduction to Bayes' theorem and Bayesian updating 32 | - Slides: frequentist vs. Bayesian inference interpretation 33 | - Slides: went to go Bayes: advantages and disadvantages 34 | - *All together exercise*: small world Bayesian updating 35 | - Rmd: manipulating and summarizing the posterior various ways 36 | 37 | 2. Demystifying MCMC (group exercises and online demo) 38 | - Slides: MCMC intro 39 | - Rmd: run through Metropolis MCMC in R and plot the chain together 40 | - *Individual exercise*: tuning MCMC 41 | - Hamiltonian and NUTS slides 42 | - *Individual exercise*: play with online demo of Hamiltonian and NUTS MCMC 43 | 44 | 3. Convergence and MCMC diagnostics 45 | - Slides: MCMC diagnostics part 1 46 | - *Small groups exercise*: MCMC diagnostics 47 | 48 | 4. Priors (interactive code, slides, and discussion) 49 | - Slides: goals of priors, types of priors 50 | - *Small groups exercise*: experiment with an interactive prior demo 51 | 52 | ## Day 2 53 | 54 | Recap 55 | 56 | 5. Posterior and prior predictive checking 57 | - Slides: predictive checking 58 | - *Small groups exercise*: posterior predictive checking 59 | 60 | 6. Introduction to applied Bayesian modeling 61 | - Rmd: brms-basic.Rmd 62 | - fit a regression model with brms 63 | - inspect MCMC chains for convergence 64 | - summarize MCMC chains to quantify the posterior 65 | - first intro to posterior predictive checking 66 | - making probabilistic statements by manipulating the posterior samples 67 | 68 | 7. Rmd: brms priors and prior predictive checks 69 | 70 | ## Day 3 71 | 72 | 8. Introduction to Stan code and rstan 73 | - Slides: Stan model syntax 74 | - Rmd: regression with a Stan model 75 | - understand when/why you might use brms vs. custom Stan code 76 | - look at the syntax and the code sections of a Stan model 77 | - call the Stan model from R 78 | - extract the posterior samples and make similar plots as before 79 | - fit an length-age growth model to groundfish data and summarize the output 80 | 81 | 9. Leave-one-out cross validation, log scores, and ELPD 82 | - Slides: cross-validation concepts and terms 83 | - Rmd: ELPD + LOO 84 | 85 | ## Day 4 86 | 87 | 10. Divergent transitions 88 | - Slides: divergent transitions 89 | - Rmd: divergent transitions 90 | 91 | 11. Putting it all together: Bayesian workflow 92 | - discuss why and what the suggested steps are 93 | - *All together exercise*: work through an example as a group 94 | - *Small groups exercise*: work through an example as an exercise 95 | 96 | 12. Applied Bayesian modeling standards, words of wisdom, and resources (slides) 97 | - standards for iterations, warmup, chains, and assessing convergence 98 | - Stan warnings to watch out for 99 | - how to describe the models 100 | - what to report in a paper 101 | - good books and online resources 102 | -------------------------------------------------------------------------------- /01-small-world.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "An introduction to Bayesian updating with a grid search approach" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: true 7 | --- 8 | 9 | # Goals 10 | 11 | - Develop an intuition for Bayes' theorem and Bayesian updating 12 | - See the inner workings of how easy it is to apply Bayes' rule with a 'grid 13 | search' approach (for very simple problems) 14 | - Become familiar with the various ways we can summarize samples from a 15 | posterior distribution 16 | 17 | # Setup 18 | 19 | ```{r, message=FALSE, warning=FALSE} 20 | library(ggplot2) 21 | library(dplyr) 22 | theme_set(theme_light()) 23 | ``` 24 | 25 | # Bayesian updating of the posterior 26 | 27 | We will start by performing an experiment where we "sample" from a spinning globe and record whether a given finger lands on land `0` or water `1`. 28 | 29 | In person: launch the beach ball! 30 | 31 | Online: 32 | 33 | The goal is to estimate what proportion of the globe is covered in water. 34 | 35 | This example has some built-in data from a previous version of that experiment. In person, we will replace it with the data we collect. 36 | 37 | ```{r} 38 | dat <- c(1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1) # data we collected 39 | 40 | # dat <- sample(0:1, size = 24, replace = T) 41 | p <- seq(0, 1, length.out = 200) # a sequence of 'proportions water' to evaluate 42 | 43 | # -------------------- 44 | # try different priors: 45 | prior <- dunif(p, 0, 1) 46 | # prior <- dbinom(2, size = 3, prob = p) 47 | # prior <- dbinom(7, size = 10, prob = p) 48 | # prior <- dbinom(71, size = 100, prob = p) 49 | # prior <- dbinom(5, size = 10, prob = p) 50 | # prior <- dnorm(p, mean = 0.5, sd = 0.3) 51 | # prior <- dbeta(p, shape1 = 3, shape2 = 2) 52 | # prior <- dnorm(p, mean = 0.5, sd = 0.25);prior[1:100] <- 0 53 | # -------------------- 54 | 55 | prior <- prior / sum(prior) # make the prior sum to 1 56 | 57 | out <- list() 58 | for (i in seq_along(dat)) { 59 | likelihood <- dbinom(sum(dat[i]), size = 1, prob = p) # data likelihood 60 | posterior <- likelihood * prior # Bayes rule numerator 61 | posterior <- posterior / sum(posterior) # make it sum to 1 62 | out[[i]] <- data.frame(p, prior, posterior, toss = i) # save it 63 | prior <- posterior # for next time around 64 | } 65 | out <- bind_rows(out) 66 | 67 | ggplot(out, aes(x = p, ymax = posterior, ymin = 0, group = toss)) + 68 | geom_ribbon(alpha = 0.1, fill = "red") + 69 | geom_ribbon(aes(x = p, ymax = prior, ymin = 0), alpha = 0.1) + 70 | coord_cartesian(expand = FALSE) + 71 | xlab("Proportion water") + 72 | ylab("Probability density") + 73 | facet_wrap(~toss) 74 | ``` 75 | 76 | In the above figure, the grey represents the prior distribution and red represents the posterior distribution. 77 | 78 | In each panel, we add one observation to our data, take the posterior from the previous calculation and turn it into our new prior, multiply the prior by the data likelihood, and divide that by the sum of all likelihood times the prior values to make sure our posterior sums to 1. 79 | 80 | Questions: 81 | 1. What effect does changing the prior have on the posterior for the first few data points? 82 | 2. Does the posterior end up in a qualitatively different place with the different priors above? 83 | 84 | # Fitting all the data at once 85 | 86 | In the last example, we updated are posterior with each additional piece of data. Let's do our calculations in a way that is more consistent with how we would probably do this experiment. We'll calculate the posterior distribution including all the data at once along with our initial prior. 87 | 88 | Again, try playing with the prior. 89 | 90 | ```{r} 91 | N <- 1000 # bigger for smoother histogram plots below 92 | p <- seq(0, 1, length.out = N) 93 | 94 | prior <- dunif(p, 0, 1) / length(p) 95 | # prior <- dbinom(2, size = 3, prob = p) 96 | # prior <- dnorm(p, mean = 0.5, sd = 0.25) 97 | # prior <- dbeta(p, shape1 = 3, shape2 = 2) 98 | # prior <- dnorm(p, mean = 0.5, sd = 0.25);prior[1:100] <- 0 99 | # prior <- c(1:250, 250:1) 100 | prior <- prior / sum(prior) 101 | 102 | likelihood <- dbinom(sum(dat), size = length(dat), prob = p) 103 | posterior <- likelihood * prior 104 | posterior <- posterior / sum(posterior) 105 | 106 | ggplot(tibble(posterior, prior), 107 | aes(x = p, ymax = posterior, ymin = 0)) + 108 | geom_ribbon(alpha = 0.2, fill = "red") + 109 | geom_ribbon(aes(x = p, ymax = prior, ymin = 0), alpha = 0.2) + 110 | coord_cartesian(expand = FALSE, ylim = c(0, 1.03 * max(c(posterior, prior)))) + 111 | xlab("Proportion water") + 112 | ylab("Probability density") 113 | ``` 114 | 115 | Does that look the same as the final panel of the previous plot? 116 | 117 | # Summarizing the posterior with samples 118 | 119 | If we sample from the values of `p` in proportion to their probability, we will be drawing samples from the posterior. 120 | 121 | This makes it easy to summarize the posterior however we would like: it's just a matter of manipulating the samples. 122 | 123 | This is a helpful warmup practice because next we will be using Markov chain Monte Carlo (MCMC), which will also return samples from the posterior. 124 | 125 | ```{r} 126 | post_samples <- sample(p, prob = posterior, size = 2000, replace = TRUE) 127 | ``` 128 | 129 | Now we can make a histogram of the posterior distribution samples. 130 | 131 | ```{r} 132 | g <- ggplot(tibble(post_samples), aes(post_samples)) + 133 | geom_histogram(bins = 30, alpha = 0.4, fill = "red") + 134 | coord_cartesian(expand = FALSE) 135 | g 136 | ``` 137 | 138 | We can look at the means or the median of the posterior as one way to summarize it. 139 | 140 | ```{r} 141 | q <- mean(post_samples) 142 | g + geom_vline(xintercept = q) 143 | 144 | q_median <- median(post_samples) 145 | g + geom_vline(xintercept = q) + geom_vline(xintercept = q_median, lty = 2) 146 | ``` 147 | 148 | We can also calculate quantile credible intervals. For this, we just use the `quantile()` function to figure out the appropriate thresholds for a given probability. Here we will calculate an 80% credible interval. 149 | 150 | ```{r} 151 | q <- quantile(post_samples, probs = c(0.1, 0.9)) 152 | g + geom_vline(xintercept = q) 153 | ``` 154 | 155 | The Highest Posterior Density (HPD) interval is the shortest possible credible interval with the appropriate probability coverage. It's harder to calculate, so we will rely on an existing function. Quantile-based credible intervals are usually more common. 156 | 157 | ```{r} 158 | q_hpd <- coda::HPDinterval(coda::as.mcmc(post_samples), prob = 0.8) 159 | q_hpd <- as.numeric(q_hpd) 160 | q_hpd 161 | g + geom_vline(xintercept = q) + geom_vline(xintercept = q_hpd, lty = 2) 162 | ``` 163 | 164 | We can also calculate one-sided credible intervals. For example, we could say that there is a 0.8 probability of there being [blank] or more proportion water on the globe (conditional on our model and the data we collected). We will sort the samples and find out the value just past the 0.2 quantile: 165 | 166 | ```{r} 167 | threshold_sample <- floor(0.2 * length(post_samples)) 168 | length(post_samples) 169 | threshold_sample 170 | q <- sort(post_samples)[threshold_sample] 171 | q 172 | g + geom_vline(xintercept = q) 173 | ``` 174 | 175 | Or instead of picking a given probability threshold, we could calculate the probability above some meaningful value. For example, we can calculate how much probability density is above 0.5. This tells us the probability that there is more than 50% water on the globe (conditional on our model and data). 176 | 177 | ```{r} 178 | sum(post_samples > 0.5) / length(post_samples) 179 | mean(post_samples > 0.5) 180 | g + geom_vline(xintercept = 0.5) 181 | ``` 182 | 183 | We can also easily calculate how much probability there is between specific values. For example, what is the probability that there is between 60% and 80% water? 184 | 185 | ```{r} 186 | mean(post_samples > 0.6 & post_samples < 0.8) 187 | g + geom_vline(xintercept = c(0.6, 0.8)) 188 | ``` 189 | 190 | -------------------------------------------------------------------------------- /02-mcmc-demo.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "The Metropolis MCMC algorithm in R" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: true 7 | --- 8 | 9 | # Goals 10 | 11 | - Demystify MCMC sampling by coding one ourselves in R and manually updating it 12 | - Gain an intuition for the need to tune MCMC algorithms with interactive visualization 13 | 14 | # Setup and data simulation 15 | 16 | Run this next code chunk once to generate our data. 17 | 18 | We are are simulating data with a given mean and standard deviation. We'll effectively be estimating an intercept-only model with a known level of observation error. 19 | 20 | ```{r} 21 | set.seed(123) 22 | mu <- 1 23 | sigma <- 2 24 | dat <- rnorm(50, mean = mu, sd = sigma) # our simulated data 25 | plot(dat) 26 | ``` 27 | 28 | Then this chunk will create a blank plot that we will fill in with our MCMC samples. 29 | 30 | ```{r} 31 | plot( 32 | 1, xlim = c(0, 30), ylim = c(-2, 2), type = "n", 33 | ylab = expression(mu), xlab = "Chain iteration" 34 | ) 35 | jump_sd <- 1 # our chosen jumping standard deviation 36 | i <- 1 # starting iteration 37 | previous_proposal <- 0 # initial proposal value 38 | set.seed(123) 39 | ``` 40 | 41 | # Manually running our MCMC algorithm 42 | 43 | Run this next code chunk repeatedly; each MCMC sample will get added to the open plot. 44 | 45 | Things to be aware of: 46 | - We're going to chose a prior of Normal(0, 5^2), i.e. `dnorm(mean = 0, sd = 5)`. 47 | - We're going to assume we know the error standard deviation `sigma` so we can keep this example simple and have only one parameter. 48 | 49 | Step through this carefully: 50 | 51 | ```{r, eval=FALSE} 52 | # our proposed next value in the chain: 53 | (proposal <- rnorm(1, previous_proposal, jump_sd)) 54 | # ---------------------------------------------------------------- 55 | 56 | # data likelihood at our proposal: 57 | (log_like_proposal <- sum(dnorm(dat, mean = proposal, sd = sigma, log = TRUE))) 58 | (log_like_previous <- sum(dnorm(dat, mean = previous_proposal, sd = sigma, log = TRUE))) 59 | # ---------------------------------------------------------------- 60 | 61 | # prior likelihood at our proposal: 62 | (log_like_prior_proposal <- dnorm(proposal, mean = 0, sd = 5, log = TRUE)) 63 | (log_like_prior_previous <- dnorm(previous_proposal, mean = 0, sd = 5, log = TRUE)) 64 | # ---------------------------------------------------------------- 65 | 66 | # Combine the log-prior-likelihood with the log-data-likelihood to get a value 67 | # that is proportional to the posterior probability of that parameter value 68 | # given the data. 69 | # Since we're in log space, we can add: 70 | (log_posterior_proposal <- log_like_prior_proposal + log_like_proposal) 71 | (log_posterior_previous <- log_like_prior_previous + log_like_previous) 72 | # ---------------------------------------------------------------- 73 | 74 | # Calculate the ratio of the proposal and previous probabilities 75 | # Doing it in log space is computationally safer (avoids very small numbers). 76 | # (prob_ratio <- exp(log_posterior_proposal) / exp(log_posterior_previous)) 77 | (prob_ratio <- exp(log_posterior_proposal - log_posterior_previous)) 78 | # ---------------------------------------------------------------- 79 | 80 | # If the probability ratio is > 1, then always accept the new parameter value(s). 81 | # If the probability ratio is < 1, then accept the new parameter values in 82 | # proportion to the ratio of proposal probability / previous probability 83 | if (runif(1) < prob_ratio) { # fancy trick to do the above 84 | print("Accept new proposal") 85 | (previous_proposal <- proposal) 86 | } else { 87 | print("Retain previous value") 88 | (previous_proposal <- previous_proposal) 89 | } 90 | # ---------------------------------------------------------------- 91 | 92 | points(i, previous_proposal) # plot our chosen value for this iteration 93 | i <- i + 1 # update counter for next proposal 94 | # now repeat! 95 | ``` 96 | 97 | You now have an MCMC chain! The distribution of those values (if you take enough of them) will reflect the distribution of the parameter(s). 98 | 99 | ```{r, eval=FALSE} 100 | abline(h = mu, lty = 2) # our known true value 101 | ``` 102 | 103 | # MCMC tuning demo 104 | 105 | The following code chunk contains the same code but embedded in a function so that we can call it repeatedly with different argument values. 106 | 107 | ```{r, eval=FALSE} 108 | mcmc_example <- function( 109 | mu = 4, # the true mean; we will estimate this 110 | sigma = 2, # the true residual SD; we will assume we know this for simplicity 111 | .n = 30, # number of data points to simulate 112 | prior_mu = 0, # our prior distribution mean 113 | prior_mu_sd = 10, # our prior distribution SD on the mean 114 | 115 | # Next, the SD of our jump function. 116 | # Too small a value and the chain might take a 117 | # very long time to get to the right parameter space or might get stuck in the 118 | # wrong area of the parameter space. 119 | # Too large a value and the proposed values will be often rejected, and again, 120 | # the chain may get stuck and take a very long time to converge on an 121 | # appropriate answer. 122 | jump_sd = 5, 123 | reps = 10000, # the length of our MCMC chain 124 | seed = sample.int(.Machine$integer.max, 1) 125 | ) { 126 | dat <- rnorm(.n, mean = mu, sd = sigma) # our simulated data 127 | 128 | # We will ensure that our data has exactly our specified mean. 129 | # (This is just to make the simulation easier to follow.) 130 | dat <- dat - (mean(dat) - mu) 131 | 132 | # A vector to hold our MCMC chain output: 133 | out <- vector(length = reps, mode = "numeric") 134 | 135 | # We'll start at an initial value of 0: 136 | out[1] <- 0 137 | 138 | # Now we'll loop through our MCMC chain: 139 | for (i in seq(2, length(out))) { 140 | 141 | # Propose a new value given the previous value and the jump SD: 142 | proposal <- rnorm(1, out[1], jump_sd) 143 | 144 | # Calculate the log-likelihood of the data given the proposed parameter value 145 | # and the previous parameter value: 146 | log_like_proposal <- sum(dnorm(dat, mean = proposal, sd = sigma, log = TRUE)) 147 | log_like_previous <- sum(dnorm(dat, mean = out[i - 1], sd = sigma, log = TRUE)) 148 | 149 | # Get the log-probability of the proposed and previous parameter values given 150 | # the prior: 151 | log_prior_proposal <- dnorm(proposal, mean = prior_mu, sd = prior_mu_sd, log = TRUE) 152 | log_prior_previous <- dnorm(out[i - 1], mean = prior_mu, sd = prior_mu_sd, log = TRUE) 153 | 154 | # Combine the log-prior with the log-likelihood to get a value that is 155 | # proportional to the posterior probability of that parameter value given the 156 | # data: 157 | log_posterior_proposal <- log_prior_proposal + log_like_proposal 158 | log_posterior_previous <- log_prior_previous + log_like_previous 159 | 160 | # Calculate the ratio of the proposal and previous probabilities: 161 | prob_ratio <- exp(log_posterior_proposal - log_posterior_previous) 162 | 163 | # If the probability ratio is > 1, then always accept the new parameter 164 | # values. 165 | # If the probability ratio is < 1, then accept the new parameter values in 166 | # proportion to the ratio. 167 | if (runif(1, min = 0, max = 1) < prob_ratio) { 168 | out[i] <- proposal # use the proposed parameter value 169 | } else { 170 | out[i] <- out[i - 1] # keep the previous parameter value 171 | } 172 | } 173 | 174 | par(mfrow = c(1, 3)) 175 | plot(dat, main = "Observed data") 176 | plot(seq_along(out), out, 177 | type = "l", xlab = "Chain index", 178 | ylab = expression(widehat(mu)), col = "#00000050", 179 | main = "Traceplot" 180 | ) # the MCMC chain 181 | abline(h = mu, col = "red", lty = 1, lwd = 2) # the true mean 182 | hist(out, 183 | xlim = c(-2, 7), breaks = 30, 184 | main = "Prior (line)\nand Posterior (histogram)", xlab = "mu" 185 | ) 186 | abline(v = mu, col = "red", lty = 1, lwd = 2) 187 | xx <- seq(-10, 10, length.out = 200) 188 | yy <- dnorm(xx, mean = prior_mu, sd = prior_mu_sd) 189 | par(new = TRUE) 190 | plot(xx, yy, 191 | ylim = c(0, max(yy)), type = "l", axes = FALSE, ann = FALSE, 192 | xlim = c(-5, 5) 193 | ) 194 | invisible(out) 195 | } 196 | ``` 197 | 198 | Now, we'll call our function, which by default will take 10,000 MCMC samples: 199 | 200 | ```{r, eval=FALSE} 201 | mcmc_example() 202 | ``` 203 | 204 | We can play with our function using the manipulate package: 205 | 206 | ```{r, eval=FALSE} 207 | library(manipulate) 208 | manipulate( 209 | mcmc_example( 210 | mu = mu, 211 | sigma = sigma, 212 | .n = .n, 213 | prior_mu = 0, 214 | prior_mu_sd = 10, 215 | jump_sd = jump_sd, 216 | reps = reps, 217 | seed = 42 218 | ), 219 | mu = slider(0, 5, 1, step = 0.1), 220 | sigma = slider(0.1, 10, 5, step = 0.1), 221 | .n = slider(5, 1000, 25, step = 5), 222 | jump_sd = slider(0.05, 40, 5, step = 0.1), 223 | reps = slider(50, 20000, 1000, step = 50), 224 | seed = slider(1, 50, 42, step = 1) 225 | ) 226 | ``` 227 | 228 | # Questions: 229 | 230 | 1. What happens to the posterior as the number of data points (`.n`) gets large? 231 | 232 | 2. What happens to the posterior as the number of data points (`.n`) gets small? 233 | 234 | 3. What happens to the posterior as the observation error (`sigma`) gets larger? 235 | 236 | 4. What happens if the jump distance gets very big? 237 | 238 | 5. What happens if the jump distance gets very small? 239 | -------------------------------------------------------------------------------- /03-mcmc-diagnostics-exercise.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: MCMC diagnostics 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: true 7 | --- 8 | 9 | # Goals 10 | 11 | - Gain experience diagnosing MCMC chains visually, with Rhat, and with ESS. 12 | 13 | # Exercise 14 | 15 | For each of the following sets of saved MCMC chains, answer the following: 16 | 17 | 1. Do these samples appear consistent with convergence? 18 | 2. What helped you come to that conclusion? 19 | 20 | If inconsistent with convergence, continue: 21 | 22 | 3. What might have caused this scenario? 23 | 4. Can the MCMC sampling likely be improved without adjusting the model? How? 24 | 25 | ```{r mcmc-chains1} 26 | samples <- readRDS(here::here("data/mcmc1.rds")) 27 | samples_matrix <- do.call(cbind, samples) 28 | 29 | bayesplot::mcmc_trace(samples) 30 | rstan::Rhat(samples_matrix) 31 | rstan::ess_bulk(samples_matrix) 32 | rstan::ess_tail(samples_matrix) 33 | ``` 34 | 35 | 1. No 36 | 2. Rhat a bit high, ESS a bit low, strong autocorrelation apparent in chains 37 | 3. Chains not run for long enough 38 | 4. Possibly just running for longer; no thinning needed! 39 | 40 | ```{r mcmc-chains2} 41 | samples <- readRDS(here::here("data/mcmc2.rds")) 42 | samples_matrix <- do.call(cbind, samples) 43 | 44 | bayesplot::mcmc_trace(samples) 45 | rstan::Rhat(samples_matrix) 46 | rstan::ess_bulk(samples_matrix) 47 | rstan::ess_tail(samples_matrix) 48 | ``` 49 | 50 | 1. No 51 | 2. Chains drifting, Rhat high, ESS low 52 | 3. Possibly not enough warmup and starting points not dispersed enough 53 | 4. Longer warmup? 54 | 55 | ```{r mcmc-chains3} 56 | samples <- readRDS(here::here("data/mcmc3.rds")) 57 | samples_matrix <- do.call(cbind, samples) 58 | 59 | bayesplot::mcmc_trace(samples) 60 | bayesplot::mcmc_dens_overlay(samples) 61 | rstan::Rhat(samples_matrix) 62 | rstan::ess_bulk(samples_matrix) 63 | rstan::ess_tail(samples_matrix) 64 | ``` 65 | 66 | 1. No 67 | 2. One chain has larger variance 68 | 3. Some chains may be getting stuck and not exploring posterior 69 | 4. May be harder. Tighter priors, re-parameterized model, simplified model? 70 | 71 | ```{r mcmc-chains4} 72 | samples <- readRDS(here::here("data/mcmc4.rds")) 73 | samples_matrix <- do.call(cbind, samples) 74 | 75 | bayesplot::mcmc_trace(samples) 76 | bayesplot::mcmc_dens_overlay(samples) 77 | rstan::Rhat(samples_matrix) 78 | rstan::ess_bulk(samples_matrix) 79 | rstan::ess_tail(samples_matrix) 80 | ``` 81 | 82 | 1. Yes 83 | 2. ESS > 400, Rhat < 1.01, chains look good 84 | 3. NA 85 | 4. NA 86 | 87 | ```{r mcmc-chains5} 88 | samples <- readRDS(here::here("data/mcmc5.rds")) 89 | samples_matrix <- do.call(cbind, samples) 90 | 91 | bayesplot::mcmc_trace(samples) 92 | rstan::Rhat(samples_matrix) 93 | rstan::ess_bulk(samples_matrix) 94 | rstan::ess_tail(samples_matrix) 95 | bayesplot::mcmc_dens_overlay(samples) 96 | ``` 97 | 98 | 1. No 99 | 2. Chains have different means 100 | 3. Might still reflect dispersed starting points without enough warmup? Model might be weakly identified? 101 | 4. Longer warmup? Simplify model? More data? 102 | 103 | ```{r mcmc-chains6} 104 | samples <- readRDS(here::here("data/mcmc6.rds")) 105 | samples_matrix <- do.call(cbind, samples) 106 | 107 | bayesplot::mcmc_trace(samples) 108 | rstan::Rhat(samples_matrix) 109 | rstan::ess_bulk(samples_matrix) 110 | rstan::ess_tail(samples_matrix) 111 | ``` 112 | 113 | 1. No 114 | 2. Chain behaviour at beginning looks different, autocorrelation may be high 115 | 3. Might have forgot to remove warmup or not run warmup for long enough? 116 | 4. Discard warmup, warmup long enough, ... 117 | -------------------------------------------------------------------------------- /04-priors-explorer.R: -------------------------------------------------------------------------------- 1 | # Demo of the influence of priors in regression 2 | 3 | # Goals 4 | # - Gain some intuition about the influence of priors on a Bayesian regression 5 | 6 | # In the plots, the gray histograms represent the posterior samples, the blue 7 | # lines represent the priors, and the red lines represent the true values. The 8 | # last panel represents the simulated data (dots), the true relationship 9 | # between y and x (red line), and 50 draws from the posterior distribution. 10 | 11 | # Try running the following code chunk and adjusting the argument values. If 12 | # you adjust the `seed` slider, you will change the random number generator 13 | # draw. 14 | 15 | # Remember that the choice of scale or standard deviation value for the priors 16 | # is dependent on the scale of the predictor and response variable. 17 | 18 | # You may consider asking questions like: 19 | # 20 | # - What happens if you have a tight prior on the slope coefficient that 21 | # differs from the true value? 22 | # - How does this change when you have a lot of data vs. a little? 23 | # - How much data do I need before the data overwhelm the prior for a given 24 | # discrepancy between the prior and data? 25 | # - What happens with a very diffuse prior on the slope coefficient and very 26 | # few data points? 27 | # - Can having an informative prior on the slope coefficient help if you've 28 | # collected very little data? When might you have this kind of information? 29 | 30 | library(rstanarm) 31 | library(ggplot2) 32 | theme_set(theme_light()) 33 | library(shiny) 34 | 35 | mcmc_example <- function( 36 | seed = 1, 37 | intercept = 3, 38 | slope = 0.7, 39 | sigma = 2, # the true residual SD 40 | .n = 30, # number of data points to simulate 41 | prior_slope_mean = 0, 42 | prior_slope_sd = 3, 43 | prior_intercept_sd = 10, 44 | prior_sigma_sd = 3, 45 | reps = 800 # the length of each MCMC chain 46 | ) { 47 | set.seed(seed) 48 | x <- rnorm(.n, 0, .5) 49 | d <- data.frame(x = x, y = rnorm(.n, mean = intercept + slope * x, sd = sigma)) 50 | 51 | suppressWarnings( 52 | m <- rstanarm::stan_glm(y ~ x, d, 53 | iter = reps, chains = 1, 54 | family = gaussian(link = "identity"), refresh = 0, 55 | prior = normal(prior_slope_mean, prior_slope_sd, autoscale = FALSE), 56 | prior_intercept = normal(0, prior_intercept_sd, autoscale = FALSE), 57 | prior_aux = normal(0, prior_sigma_sd, autoscale = FALSE), 58 | chains = 1, seed = seed 59 | ) 60 | ) 61 | 62 | e <- as.data.frame(m) 63 | xx <- seq(-6, 6, length.out = 100) 64 | 65 | slope_prior <- data.frame( 66 | x = xx, 67 | y = dnorm(xx, mean = prior_slope_mean, sd = prior_slope_sd) 68 | ) 69 | 70 | intercept_prior <- data.frame( 71 | x = xx, 72 | y = dnorm(xx, mean = 0, sd = prior_intercept_sd) 73 | ) 74 | 75 | xx0 <- seq(0, 6, length.out = 100) 76 | sigma_prior <- data.frame( 77 | x = xx0, 78 | y = extraDistr::dhnorm(xx0, sigma = prior_sigma_sd) 79 | ) 80 | 81 | .range <- c(-4, 4) 82 | 83 | g1 <- ggplot(e, aes(`(Intercept)`, after_stat(density))) + 84 | geom_histogram(bins = 50) + 85 | geom_line(data = intercept_prior, aes(x, y), col = "blue") + 86 | coord_cartesian(xlim = .range) + 87 | xlab("Intercept") + 88 | geom_vline(xintercept = intercept, col = "red") 89 | 90 | g2 <- ggplot(e, aes(x, after_stat(density))) + 91 | geom_histogram(bins = 50) + 92 | geom_line(data = slope_prior, aes(x, y), col = "blue") + 93 | coord_cartesian(xlim = .range) + 94 | xlab("Slope coefficient") + 95 | geom_vline(xintercept = slope, col = "red") 96 | 97 | g3 <- ggplot(e, aes(sigma, after_stat(density))) + 98 | geom_histogram(bins = 50) + 99 | geom_line(data = sigma_prior, aes(x, y), col = "blue") + 100 | coord_cartesian(xlim = c(0, max(.range))) + 101 | xlab("Observation error SD") + 102 | geom_vline(xintercept = sigma, col = "red") 103 | 104 | nd <- data.frame(x = seq(-1, 1, length.out = 2)) 105 | set.seed(seed) 106 | pp <- posterior_linpred(m, newdata = nd, draws = 50) 107 | pp2 <- reshape2::melt(pp) 108 | pp2$x <- rep(nd$x, each = 50) 109 | 110 | g4 <- ggplot(d, aes(x = x, y = y)) + 111 | geom_point() + 112 | geom_line( 113 | data = pp2, aes(x, value, group = iterations), inherit.aes = FALSE, 114 | alpha = 0.5, col = "grey30" 115 | ) + 116 | geom_abline( 117 | slope = slope, intercept = intercept, 118 | col = "red" 119 | ) 120 | 121 | patchwork::wrap_plots(g1, g2, g3, g4, ncol = 2) 122 | } 123 | 124 | ui <- fluidPage( 125 | pageWithSidebar( 126 | titlePanel("Regression prior explorer"), 127 | sidebarPanel( 128 | sliderInput("seed", "Random seed value", value = 1, min = 1, max = 200, step = 1), 129 | sliderInput("slope", "True slope coefficient", value = 0.6, min = -2, max = 2, step = 0.2), 130 | sliderInput("sigma", "True observation error SD (sigma)", value = 1, min = 0.1, max = 8, step = 0.1), 131 | sliderInput(".n", "Number of observations", value = 50, min = 2, max = 1000, step = 2), 132 | sliderInput("prior_slope_mean", "Slope prior mean", value = 0, min = -5, max = 5, step = 0.5), 133 | sliderInput("prior_slope_sd", "Slope prior SD", value = 1, min = 0.1, max = 100, step = .1), 134 | sliderInput("prior_sigma_sd", "Sigma prior SD", value = 1, min = 0.1, max = 50, step = 1) 135 | ), 136 | mainPanel(plotOutput("gg")) 137 | ) 138 | ) 139 | server <- function(input, output, session) { 140 | output$gg <- renderPlot( 141 | { 142 | mcmc_example( 143 | seed = input$seed, 144 | intercept = 0, 145 | slope = input$slope, 146 | sigma = input$sigma, 147 | .n = input$.n, 148 | prior_slope_mean = input$prior_slope_mean, 149 | prior_slope_sd = input$prior_slope_sd, 150 | prior_sigma_sd = input$prior_sigma_sd 151 | ) 152 | }, 153 | width = 600, 154 | height = 500 155 | ) 156 | } 157 | 158 | shinyApp(ui, server) 159 | -------------------------------------------------------------------------------- /05-brms-basic.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "An introduction to applied Bayesian regression using brms" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: true 7 | --- 8 | 9 | # Goals 10 | 11 | - Learn to fit pre-packaged Bayesian regression models with brms. 12 | - Gain initial exposure to posterior predictive checking and 13 | manipulating posterior samples to calculate posterior probabilities. 14 | 15 | # Setup 16 | 17 | Let's load dplyr, ggplot2, and brms 18 | 19 | ```{r, warning=FALSE, message=FALSE} 20 | library(dplyr) 21 | library(ggplot2) 22 | library(brms) 23 | theme_set(theme_light()) 24 | dir.create("cache", showWarnings = FALSE) 25 | options(brms.file_refit = "on_change") # re-fit cached models if changes 26 | ``` 27 | 28 | Any time we use rstan (or a package that relies on rstan, such as brms or rstanarm), we can set an R option to use parallel processing with all available cores: `options(mc.cores = parallel::detectCores())`. This example should run so quickly that it will likely run faster on a single core, so you may choose to skip this or explicitly set it to 1 core. 29 | 30 | ```{r, eval=FALSE} 31 | # options(mc.cores = parallel::detectCores()) 32 | # options(mc.cores = 1) 33 | ``` 34 | 35 | # Data 36 | 37 | We are going to work with data from: 38 | 39 | Hughes, B.B., Lummis, S.C., Anderson, S.C., and Kroeker, K.J. 2018. Unexpected resilience of a seagrass system exposed to global stressors. Glob. Change Biol. 24(1): 224–234. 40 | 41 | The data come from a mesocosm experiment by Brent Hughes where he manipulated water pH and whether or not nutrients were added (to represent nutrient loads in eelgrass beds) to 14 200 L barrels. He measured several variables, but the response variable we are going to work with here is the increase in mass of seahares (*Phyllaplysia taylori*), a type of sea slug, after 24 days. 42 | 43 | ```{r} 44 | d <- readRDS(here::here("data/hughes-etal-2018.rds")) |> 45 | filter(label == "Change in seahare mass (g FW)") |> 46 | rename(change_seahare_mass_g_fw = value) |> 47 | dplyr::select(-label, -figure_panel, -nutrients_text, -response) 48 | glimpse(d) 49 | ``` 50 | 51 | ```{r} 52 | ggplot(d, 53 | aes(ph, change_seahare_mass_g_fw, colour = as.factor(nutrients))) + 54 | geom_point() 55 | ``` 56 | 57 | Let's rescale (center and possibly divide by the SD) the predictors. This is important 58 | 59 | 1. so that we have some idea of what reasonable prior values will be, 60 | 2. so that our coefficients are on a reasonable scale for interpretation and 61 | for Stan, and 62 | 3. so that we can add a quadratic effect and have one coefficient represent the 63 | slope and the other the curvature. 64 | 65 | ```{r} 66 | d <- mutate(d, 67 | ph_scaled = as.numeric(scale(ph)) 68 | ) 69 | ``` 70 | 71 | Let's look at the data: 72 | 73 | ```{r} 74 | ggplot(d, 75 | aes(ph_scaled, change_seahare_mass_g_fw, 76 | colour = as.factor(nutrients))) + 77 | geom_point() 78 | ``` 79 | 80 | # Fitting a model 81 | 82 | We are going to fit this model with the `brms::brm()` function. 83 | 84 | ```{r, results='hide', warning=FALSE} 85 | fit <- brm( 86 | log(change_seahare_mass_g_fw) ~ ph_scaled + I(ph_scaled^2) + nutrients, 87 | data = d, 88 | iter = 2000, 89 | chains = 4, 90 | file = "cache/seahare", 91 | prior = c( 92 | set_prior("normal(0, 5)", class = "b"), 93 | set_prior("normal(0, 10)", class = "Intercept"), 94 | set_prior("student_t(3, 0, 3)", class = "sigma") 95 | ) 96 | ) 97 | ``` 98 | 99 | There are a variety of functions available to inspect our model including the usual print or summary function: 100 | 101 | ```{r} 102 | summary(fit) 103 | ``` 104 | 105 | Take a look at the output and make sure you understand everything there. 106 | 107 | ## Questions: 108 | 109 | Open the help file `?summary.brmsfit` and answer the following questions: 110 | 111 | 1. What does the `Estimate` column represent here? Hint: read `?summary.brmsfit`. 112 | 2. What does the `Est.Error` column represent here? 113 | 3. What is `sigma` here? 114 | 4. Do the `Rhat` and `ESS` columns look reasonable? 115 | 116 | Bonus questions: 117 | 118 | 5. Can you get `summary.brmsfit()` to return medians? What does the `Est.Error` column now mean? 119 | 6. Can you get `summary.brmsfit()` to return information on the priors? 120 | 7. Can you get `summary.brmsfit()` to return 87% CIs? 121 | 122 | ```{r} 123 | summary(fit, robust = TRUE) # exercise 124 | summary(fit, priors = TRUE) # exercise 125 | summary(fit, prob = 0.89) # exercise 126 | ``` 127 | 128 | There are a lot of helper functions in brms to explore. Here are a few useful ones: 129 | 130 | ```{r} 131 | brms::prior_summary(fit) 132 | brms::stancode(fit) # not easy to read! 133 | brms::standata(fit) 134 | ``` 135 | 136 | # Inspecting the chains for convergence 137 | 138 | We are going to use the plotting functions from the package bayesplot, which is also developed by the Stan developers. These plotting functions will work with any kind of MCMC output, not just the output from brms, rstanarm, or rstan, as long as you format the samples correctly. 139 | 140 | There are many available plotting functions in the bayesplot package. Before we start exploring them, we need to make sure that our chains are consistent with convergence. To start with we already checked the effective sample size and Rhat values, but there's no substitute for visually inspecting the chains! 141 | 142 | The primary way of looking at MCMC chains is as overlaid time series: 143 | 144 | ```{r} 145 | bayesplot::mcmc_trace(fit, pars = "b_nutrients") 146 | bayesplot::mcmc_trace(fit, regex_pars = "^b_|sigma") 147 | ``` 148 | 149 | How does that look to you? 150 | 151 | Another thing to check is the autocorrelation in the chains: 152 | 153 | ```{r} 154 | bayesplot::mcmc_acf(fit) 155 | ``` 156 | 157 | ## Question: 158 | 159 | 1. Is autocorrelation in the chains a problem in itself? 160 | 161 | # Posterior predictive checks 162 | 163 | Posterior predictive checking is a powerful concept in Bayesian statistics. 164 | 165 | The basic idea is to simulate new observation from the model several times and then compare those simulated data sets to the data that we observed. We can then slice and dice that comparison creatively to make sure that our Bayesian probability model is a good representation of the process that generated the observed data. They should be indistinguishable in any way you can think of to compare them. 166 | 167 | We could do this manually, although the bayesplot package has a large number of helpful plots already in available. We will use the built-in `pp_check()` shortcuts for the rest of this exercise, but know that these are just calling the bayesplot functions, and you can use the bayesplot functions with MCMC output from any Bayesian models sampled with MCMC methods. 168 | 169 | Here are all the available posterior predictive checking functions in the bayesplot package: 170 | 171 | ```{r} 172 | bayesplot::available_ppc() 173 | ``` 174 | 175 | brms can call these posterior predictive functions directly, although it will generate new simulations each time. E.g.: 176 | 177 | ```{r} 178 | brms::pp_check(fit, ndraws = 50) 179 | ``` 180 | 181 | ## Question: 182 | 183 | 1. What are we looking at here? 184 | 2. Are the draws from the posterior consistent with the data that we observed? 185 | 186 | To speed things up, we can instead take one set of posterior predictive draws and then we can plot them in various ways using Bayesplot. E.g. 187 | 188 | ```{r} 189 | y <- log(d$change_seahare_mass_g_fw) 190 | yrep <- posterior_predict(fit, ndraws = 50) 191 | bayesplot::ppc_dens_overlay(y, yrep) 192 | ``` 193 | 194 | Is the same as: 195 | 196 | ```{r} 197 | brms::pp_check(fit, ndraws = 50, type = "dens_overlay") 198 | ``` 199 | 200 | Where we found `dens_overlay` by running `bayesplot::available_ppc()` and removing the `ppc_` part. 201 | 202 | Read about the various available plotting functions at: 203 | 204 | ```{r, eval=FALSE} 205 | ?bayesplot::`PPC-overview` 206 | ``` 207 | 208 | ### Your turn 209 | 210 | Experiment with the available posterior predictive checking functions to evaluate our model. 211 | 212 | ```{r} 213 | pp_check(fit, type = "hist") # exercise 214 | pp_check(fit, type = "error_scatter") # exercise 215 | pp_check(fit, type = "scatter") # exercise 216 | pp_check(fit, type = "scatter_avg") # exercise 217 | pp_check(fit, type = "scatter_avg_grouped", group = "nutrients") # exercise 218 | pp_check(fit, type = "ecdf_overlay") # exercise 219 | pp_check(fit, type = "intervals") # exercise 220 | pp_check(fit, type = "intervals", x = "change_seahare_mass_g_fw") # exercise 221 | pp_check(fit, type = "intervals", x = "nutrients") # exercise 222 | pp_check(fit, type = "intervals", x = "ph_scaled") # exercise 223 | ``` 224 | 225 | # Summarizing the posterior samples graphically 226 | 227 | Again, we can look at trace plots like this: 228 | 229 | ```{r} 230 | bayesplot::mcmc_trace(fit) 231 | ``` 232 | 233 | These are the available plotting functions: 234 | 235 | ```{r} 236 | bayesplot::available_mcmc() 237 | ``` 238 | 239 | ### Your turn 240 | 241 | Experiment with the available plotting functions to summarize the posterior probabilities of the parameters in our model. Which do you find most useful here? 242 | 243 | ```{r} 244 | bayesplot::mcmc_areas(fit, regex_pars = "^b_|sigma") # exercise 245 | bayesplot::mcmc_intervals(fit, regex_pars = "^b_|sigma") # exercise 246 | bayesplot::mcmc_combo(fit, regex_pars = "^b_|sigma") # exercise 247 | bayesplot::mcmc_areas_ridges(fit, regex_pars = "^b_|sigma") # exercise 248 | ``` 249 | 250 | - What does the `(Intercept)` coefficient represent? 251 | - What does the `nutrients` coefficient represent? 252 | - What does the `ph_scaled` coefficient represent? 253 | - What does the `I(ph_scaled^2)` coefficient represent? 254 | - What does the `sigma` coefficient represent? 255 | 256 | We can easily extract the credible intervals with: 257 | 258 | ```{r} 259 | posterior_interval(fit) 260 | posterior_interval(fit, prob = 0.9) 261 | posterior_interval(fit, prob = 0.89) # see 'Statistical Rethinking' 262 | posterior_interval(fit, prob = 0.5) 263 | ``` 264 | 265 | Why might we prefer 90% or 50% or even 89% credible intervals over the usual 95%? 266 | 267 | # Checking the priors 268 | 269 | We are going to talk about priors more extensively soon. 270 | 271 | It's helpful to know that you can extract details on the priors from an brms model with the `prior_summary()` function. In this case we specified all the priors explicitly, *which is a good practice*. This function is a good way to check that the priors were interpreted correctly, and is also a good way to discover parameters that you might have forgot to set the priors on explicitly. 272 | 273 | ```{r} 274 | brms::prior_summary(fit) 275 | ``` 276 | 277 | Let's compare the full posterior distribution for a parameter to its prior as an example. 278 | 279 | For models fit with brms, we can extract the posterior samples with `as.data.frame()` or `as.matrix()`. Let's use the data frame version. We'll also convert to a tibble, just so that it prints nicely. 280 | 281 | ```{r} 282 | post <- as.data.frame(fit) 283 | post 284 | ``` 285 | 286 | What does each column in this data frame represent? 287 | 288 | Our prior on ph_scaled^2: 289 | 290 | ```{r} 291 | prior <- tibble( 292 | b_ph_scaled = seq(-10, 10, length.out = 300), 293 | density = dnorm(b_ph_scaled, 0, 3) 294 | ) 295 | prior 296 | ``` 297 | 298 | Plot them both: 299 | 300 | ```{r} 301 | # note `after_stat(density)` to get probability densities not counts 302 | # see `?geom_histogram()` 303 | ggplot() + 304 | geom_histogram(data = post, aes(b_Iph_scaledE2, after_stat(density)), 305 | bins = 80) + 306 | geom_ribbon(data = prior, aes(x = b_ph_scaled, 307 | ymax = density, ymin = 0), fill = "blue", alpha = 0.5) + 308 | coord_cartesian(xlim = c(-5, 2.5)) + 309 | coord_cartesian(expand = FALSE) # no gap below 0 310 | ``` 311 | 312 | One thing we haven't done is test the sensitivity of our posterior samples to the choice of priors. How could we go about testing that? 313 | 314 | # Shiny Stan 315 | 316 | The shinystan package is a one-stop shop for inspecting a Stan model. For a model fit with brms, rstanarm, or rstan we can launch it with: 317 | 318 | ```{r, eval=FALSE} 319 | shinystan::launch_shinystan(fit) 320 | ``` 321 | 322 | # Plotting the posterior distribution of the linear predictor 323 | 324 | ```{r} 325 | newdata <- expand.grid( 326 | ph_scaled = seq(min(d$ph_scaled), max(d$ph_scaled), length.out = 500), 327 | nutrients = c(0, 1) 328 | ) 329 | head(newdata) 330 | ``` 331 | 332 | We can extract samples from the linear predictor with the `posterior_linpred()` function. These are samples from the posterior without observation error. In other words, these are similar in concept to the confidence interval you would get out of `predict.glm()` or `predict.lm()`. 333 | 334 | ```{r} 335 | posterior_linear <- posterior_epred(fit, newdata = newdata) 336 | dim(posterior_linear) 337 | ``` 338 | 339 | So we now have a matrix that is 4000 rows long and 1000 columns wide. Where do the 4000 and 1000 come from? 340 | 341 | We can summarize the samples however we would like. I'm going to suggest we use the median and the 25% and 75% quantiles. We could also choose to use the mean and any other quantiles we wanted. 342 | 343 | ```{r} 344 | newdata$est <- apply(posterior_linear, 2, median) 345 | newdata$lwr <- apply(posterior_linear, 2, quantile, probs = 0.25) 346 | newdata$upr <- apply(posterior_linear, 2, quantile, probs = 0.75) 347 | ``` 348 | 349 | ```{r} 350 | pp <- posterior_predict(fit, newdata = newdata) 351 | newdata$lwr_pp <- apply(pp, 2, quantile, probs = 0.25) 352 | newdata$upr_pp <- apply(pp, 2, quantile, probs = 0.75) 353 | 354 | ggplot(newdata, aes(ph_scaled, exp(est), 355 | group = nutrients, ymin = exp(lwr), ymax = exp(upr), 356 | fill = as.factor(nutrients))) + 357 | geom_ribbon(alpha = 0.2) + 358 | geom_ribbon(alpha = 0.2, aes(ymin = exp(lwr_pp), ymax = exp(upr_pp))) + 359 | geom_line(lwd = 1, aes(colour = as.factor(nutrients))) + 360 | geom_point(data = d, aes(ph_scaled, change_seahare_mass_g_fw, 361 | colour = as.factor(nutrients)), inherit.aes = FALSE) + 362 | ylab("Change in seahare mass (g FW)") 363 | ``` 364 | 365 | Note that I exponentiated the predictions to make our plot on the original natural scale. 366 | 367 | # Summarizing the posterior distribution multiple ways 368 | 369 | One of the nice things about Bayesian statistical models is that we can quantify the probability of nearly any comparison we can imagine. All you have to do is add, subtract, multiply, or divide the samples. Let's try some examples. 370 | 371 | As a reminder, `post` comes from: 372 | 373 | ```{r} 374 | post <- as.data.frame(fit) 375 | post 376 | ``` 377 | 378 | What if we wanted to know the probability that there is a negative (frowning) quadratic shape (vs. a positive (smiling) quadratic shape) to the relationship? We can get that from the ph^2 term since we centered our ph predictor before fitting. 379 | 380 | ```{r} 381 | ph2_samples <- post$b_Iph_scaledE2 382 | mean(ph2_samples < 0) # prob. frowny 383 | mean(ph2_samples > 0) # prob. smiley 384 | ``` 385 | 386 | We're taking advantage of the fact that R treats `TRUE` and `FALSE` as 1 and 0. So by taking the mean, we are doing the same thing as: 387 | 388 | ```{r} 389 | sum(ph2_samples < 0) / length(ph2_samples) 390 | ``` 391 | 392 | What is the probability that the change in seahare mass is greater in the case where nutrients were not added? 393 | 394 | ```{r} 395 | mean(post$b_nutrients < 0) 396 | ``` 397 | 398 | A major benefit to MCMC sampling of Bayesian models is how easy it is to quantify any comparison you want to make. 399 | 400 | For example, how much greater would you expect the change in seahare mass to be under conditions of the lowest pH tested without nutrients compared to the average pH condition with nutrients? 401 | 402 | I.e. compare the pink posterior in the lower left to the blue posterior in the middle of the last plot. 403 | 404 | ```{r} 405 | min_ph <- min(d$ph_scaled) 406 | mean_ph <- mean(d$ph_scaled) 407 | 408 | condition1 <- data.frame( 409 | ph_scaled = min_ph, 410 | nutrients = c(0)) 411 | pp1 <- posterior_linpred(fit, newdata = condition1)[,1] 412 | 413 | condition2 <- data.frame( 414 | ph_scaled = mean_ph, 415 | nutrients = c(1)) 416 | pp2 <- posterior_linpred(fit, newdata = condition2)[,1] 417 | 418 | ratio <- exp(pp2) / exp(pp1) 419 | ggplot(tibble(ratio = ratio), aes(ratio)) + 420 | geom_histogram() + 421 | scale_x_log10() + 422 | geom_vline(xintercept = 1) 423 | 424 | quantile(ratio, probs = c(0.11, 0.5, 0.89)) 425 | mean(ratio > 1) 426 | ``` 427 | 428 | What's the probability this ratio is greater than 1.5? 429 | 430 | ```{r} 431 | mean(ratio > 1.5) 432 | ``` 433 | 434 | If you can think it you can quantify it. And all you have to do is manipulate the MCMC samples. Add, subtract, multiply, or divide as needed. 435 | -------------------------------------------------------------------------------- /06-posterior-predictive.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Posterior predictive checking exercise" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: true 7 | --- 8 | 9 | # Goals 10 | 11 | - Experiment with using posterior predictive checks to discover issues with a Bayesian probability model. 12 | 13 | # Setup 14 | 15 | I've simulated and fit models to 3 data sets. Each model has some issue and the probability model does not represent the data well. See if you can figure out what the problem is by using posterior protective checks. 16 | 17 | For each exercise, you'll read in two objects: 18 | - `df`: a data frame with a predictor `x` and observed values `y`. In some cases there may be an additional column defining groups `g`. 19 | - `yrep`: a matrix of posterior predictive simulated observations returned by `brms::posterior_predict()`. There are 20 rows (20 samples) by 100 or 200 columns (number of data points). 20 | 21 | Use `bayesplot::ppc_*` functions (or make the plots yourself) to find the issues. I've included the code here so you can focus on the interpretation. 22 | 23 | Answer the following questions: 24 | 25 | 1. What is the visualization showing? 26 | 2. Do the predictive simulations have similar properties to the observed data? I.e., could the model have generated the observed data? 27 | 3. If not, what is the issue, what might have caused it, and how you might you fix it? (You don't actually have to fix it for this exercise!) 28 | 29 | # Exercise 1 30 | 31 | ```{r} 32 | df <- readRDS(here::here("data/ppcheck-df1.rds")) 33 | yrep <- readRDS(here::here("data/ppcheck-yrep1.rds")) 34 | ``` 35 | 36 | ```{r} 37 | y <- df$y 38 | bayesplot::ppc_dens_overlay(y, yrep) 39 | bayesplot::ppc_error_scatter_avg_vs_x(y, yrep, df$x) 40 | bayesplot::ppc_intervals(y, yrep, x = df$x) 41 | 42 | par(mfrow = c(1, 3)) 43 | plot(df$x, y) 44 | plot(df$x, yrep[1,]) 45 | plot(df$x, yrep[2,]) 46 | ``` 47 | 48 | Answer: the model isn't creating the curvature in the values of `y` with respect to `x`. The model was fit as `y ~ x` but is missing a quadratic term: `y ~ x + I(x^2)` or `y ~ x + poly(x, 2)`. 49 | 50 | # Exercise 2 51 | 52 | ```{r} 53 | df <- readRDS(here::here("data/ppcheck-df2.rds")) 54 | yrep <- readRDS(here::here("data/ppcheck-yrep2.rds")) 55 | ``` 56 | 57 | ```{r} 58 | y <- df$y 59 | par(mfrow = c(1, 3)) 60 | plot(df$x, df$y) 61 | plot(df$x, yrep[1,]) 62 | plot(df$x, yrep[2,]) 63 | bayesplot::ppc_dens_overlay(y, yrep) 64 | ``` 65 | 66 | Answer: the model is isn't creating enough spread in the data for larger values of `x`. The observation error distribution assumptions look off. These data were generated from a negative binomial model but were fit with a Poisson likelihood. 67 | 68 | # Exercise 3 69 | 70 | ```{r} 71 | df <- readRDS(here::here("data/ppcheck-df3.rds")) 72 | yrep <- readRDS(here::here("data/ppcheck-yrep3.rds")) 73 | ``` 74 | 75 | Hint: try visualizing the posterior predictive simulations grouped or coloured by column `g`. 76 | 77 | ```{r} 78 | y <- df$y 79 | par(mfrow = c(1, 3)) 80 | plot(df$x, df$y) 81 | plot(df$x, yrep[1,]) 82 | plot(df$x, yrep[2,]) 83 | 84 | bayesplot::ppc_dens_overlay(y, yrep) 85 | bayesplot::ppc_dens_overlay_grouped(y, yrep, df$g) 86 | 87 | df$yrep1 <- yrep[1,] 88 | 89 | library(ggplot2) 90 | ggplot(df, aes(x, y, colour = g)) + geom_point() 91 | ggplot(df, aes(x, yrep2, colour = g)) + geom_point() 92 | ``` 93 | 94 | Answer: the model is lacking random intercept by group `g`. The posterior simulations therefore lack the clumping of observations seen in the observed data. 95 | 96 | -------------------------------------------------------------------------------- /07-priors-brms.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Priors in brms" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: true 7 | --- 8 | 9 | # Goals: 10 | 11 | - Learn how to find what priors are needed in a given brms model and what the defaults are. 12 | - Learn how to specify priors in brms. 13 | - Learn how to see the influence of priors on the posterior. 14 | - Learn how to do a prior predictive check. 15 | - Learn how brms treats intercepts. 16 | 17 | # Exercise: 18 | 19 | ```{r} 20 | library(ggplot2) 21 | library(dplyr) 22 | theme_set(theme_light()) 23 | library(brms) 24 | dir.create("cache", showWarnings = FALSE) 25 | options(brms.file_refit = "on_change") # re-fit cached models if changes 26 | ``` 27 | 28 | Let's create some fake data: 29 | 30 | ```{r} 31 | x <- 1:20 32 | n <- length(x) 33 | a <- 0.2 34 | b <- 0.3 35 | sigma <- 1 36 | set.seed(2141) 37 | y <- a + b * x + sigma * rnorm(n) 38 | fake <- data.frame(x, y) 39 | ``` 40 | 41 | ```{r} 42 | ggplot(fake, aes(x, y)) + geom_point() 43 | ``` 44 | 45 | If we wanted to fit a simple linear regression of y on x, we could find the default priors for the parameters with: 46 | 47 | ```{r} 48 | brms::default_prior( 49 | y ~ x, data = fake 50 | ) 51 | ``` 52 | 53 | If we fit out model with the default priors: 54 | 55 | ```{r} 56 | mod1 <- brm(y ~ x, data = fake, family = gaussian()) 57 | ``` 58 | 59 | We could find out what priors we used with: 60 | 61 | ```{r} 62 | get_prior(mod1) 63 | ``` 64 | 65 | We should instead specify our own priors. We can do this with the `prior` argument. 66 | 67 | Note that by default, brms parameterizes the intercept as the intercept when the predictors are at their mean: 68 | 69 | ```{r, eval=FALSE} 70 | mod1 <- brm(y ~ x, data = fake, family = gaussian(), 71 | file = "cache/priors1", 72 | prior = c( 73 | set_prior("normal(0, 5)", class = "Intercept"), 74 | set_prior("normal(0, 1)", class = "b"), 75 | set_prior("student_t(3, 0, 3)", class = "sigma") 76 | ), 77 | sample_prior = "yes" 78 | ) 79 | ``` 80 | 81 | If instead we wanted to parameterize the intercept as the value of y when our predictors are 0, we would need to use the `0 + Intercept` syntax: 82 | 83 | ```{r} 84 | mod2 <- brm(y ~ 0 + Intercept + x, data = fake, family = gaussian(), 85 | file = "cache/priors2", 86 | prior = c( 87 | set_prior("normal(0, 5)", class = "b", coef = "Intercept"), 88 | set_prior("normal(0, 1)", class = "b", coef = "x"), 89 | set_prior("student_t(3, 0, 3)", class = "sigma") 90 | ), 91 | sample_prior = "yes" 92 | ) 93 | ``` 94 | 95 | We can look at the what's happening in the Stan code: 96 | 97 | ```{r} 98 | stancode(mod1) 99 | stancode(mod2) 100 | ``` 101 | 102 | ```{r} 103 | sims1 <- as_draws_df(mod1) 104 | mean(sims1$b_Intercept) 105 | mean(sims1$Intercept) 106 | ``` 107 | 108 | ```{r} 109 | sims2 <- as_draws_df(mod2) 110 | mean(sims2$b_Intercept) 111 | ``` 112 | 113 | ```{r} 114 | p2 <- brms::prior_draws(mod2) # sample_prior = "yes" 115 | hist(p2$b_x, breaks = 50) 116 | hist(p2$sigma, breaks = 50) 117 | ``` 118 | 119 | ```{r} 120 | mod_prior_only <- brm( 121 | y ~ 0 + Intercept + x, data = fake, family = gaussian(), 122 | file = "cache/priors3", 123 | prior = c( 124 | set_prior("normal(0, 5)", class = "b", coef = "Intercept"), 125 | set_prior("normal(0, 1)", class = "b", coef = "x"), 126 | set_prior("student_t(3, 0, 3)", class = "sigma") 127 | ), 128 | sample_prior = "only", #< 129 | seed = 2028, iter = 100, chains = 1 130 | ) 131 | ``` 132 | 133 | ```{r} 134 | prior_pushforward <- brms::posterior_epred(mod_prior_only) 135 | prior_predictive <- brms::posterior_predict(mod_prior_only) 136 | ``` 137 | 138 | ```{r} 139 | fake$pf1 <- prior_pushforward[1,] 140 | fake$pf2 <- prior_pushforward[2,] 141 | fake$pf3 <- prior_pushforward[3,] 142 | ggplot(fake, aes(x, pf1)) + geom_point() 143 | ggplot(fake, aes(x, pf2)) + geom_point() 144 | ggplot(fake, aes(x, pf3)) + geom_point() 145 | ``` 146 | 147 | ```{r} 148 | fake$pp1 <- prior_predictive[2,] 149 | fake$pp2 <- prior_predictive[3,] 150 | fake$pp3 <- prior_predictive[4,] 151 | ggplot(fake, aes(x, pp1)) + geom_point() 152 | ggplot(fake, aes(x, pp2)) + geom_point() 153 | ggplot(fake, aes(x, pp3)) + geom_point() 154 | ``` 155 | 156 | ```{r} 157 | mod_prior_only_wide <- brm( 158 | y ~ 0 + Intercept + x, data = fake, family = gaussian(), 159 | file = "cache/priors4", 160 | prior = c( 161 | set_prior("normal(0, 100)", class = "b", coef = "Intercept"), 162 | set_prior("normal(0, 100)", class = "b", coef = "x"), 163 | set_prior("student_t(3, 0, 100)", class = "sigma") 164 | ), 165 | sample_prior = "only", 166 | seed = 2028, iter = 100, chains = 1 167 | ) 168 | ``` 169 | 170 | ```{r} 171 | prior_predictive <- brms::posterior_predict(mod_prior_only_wide) 172 | fake$pp1 <- prior_predictive[1,] 173 | fake$pp2 <- prior_predictive[18,] 174 | fake$pp3 <- prior_predictive[42,] 175 | ggplot(fake, aes(x, pp1)) + geom_point() 176 | ggplot(fake, aes(x, pp2)) + geom_point() 177 | ggplot(fake, aes(x, pp3)) + geom_point() 178 | ``` 179 | -------------------------------------------------------------------------------- /08-stan-programming-basic.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "An introduction to Stan programming" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: true 7 | --- 8 | 9 | # Goals 10 | 11 | - Learn the basics of Stan model syntax and how to interact with Stan in R 12 | - Learn to generate our own posterior predictions from a Stan model 13 | - Learn how to calculate LOOIC from a Stan model we wrote ourselves 14 | 15 | # Setup 16 | 17 | ```{r, echo=FALSE} 18 | knitr::opts_chunk$set( 19 | collapse = TRUE, 20 | comment = "#>", 21 | fig.width = 6, 22 | fig.asp = 0.618, 23 | fig.align = "center" 24 | ) 25 | show_file <- function(file) { 26 | cat(paste(readLines(file), collapse = "\n")) 27 | } 28 | ``` 29 | 30 | ```{r, warning=FALSE, message=FALSE} 31 | library(ggplot2) 32 | library(dplyr) 33 | theme_set(theme_light()) 34 | ``` 35 | 36 | # Exercise 37 | 38 | Let's load the rstan R package, which lets us interact with R. 39 | 40 | As an aside, the cmdstanr is another option for interacting with Stan from R and has several advantages . 41 | 42 | ```{r} 43 | library(rstan) 44 | ``` 45 | 46 | If we wanted, we could set our option to use parallel processing. This would be useful with more complicated models or more data. For this simple exercise, it will probably be fastest with a single core. 47 | 48 | ```{r} 49 | # options(mc.cores = parallel::detectCores()) 50 | options(mc.cores = 1) # default 51 | ``` 52 | 53 | Another important option we can set with rstan is whether we want the compiled model to be saved so that we don't have to recompile it every time we run it. You probably want to set this because the compilation can take a while. 54 | 55 | ```{r} 56 | rstan_options(auto_write = TRUE) 57 | ``` 58 | 59 | # Simulating data 60 | 61 | Let's simulate some data to fit a very simple Stan model to. We will focus on a linear model with normally distributed errors. 62 | 63 | ```{r} 64 | set.seed(42) 65 | N <- 30 66 | x <- rnorm(N, 0, 0.5) 67 | alpha <- -0.2 68 | beta <- 0.4 69 | sigma <- 0.3 70 | y <- rnorm(N, mean = alpha + x * beta, sd = sigma) 71 | dat <- tibble(x = x, y = y) 72 | ``` 73 | 74 | ```{r} 75 | ggplot(dat, aes(x, y)) + geom_point() 76 | ``` 77 | 78 | # The Stan model 79 | 80 | Take a look at the following Stan model that I have set up. 81 | 82 | We'll talk about the various sections as a group. 83 | 84 | ```{r} 85 | show_file("stan/lm-simple.stan") 86 | ``` 87 | 88 | This next version also has a generated quantity section. This is the one we will actually run. The generated quantities make it easier to do some things after like make posterior predictions. 89 | 90 | We'll talk about these additions as a group. 91 | 92 | ```{r} 93 | show_file("stan/lm.stan") 94 | ``` 95 | 96 | # Fitting the model 97 | 98 | The first time we run the next code chunk, Stan will translate our model into C++ and compile it. This will take a little while. After that, assuming we set `rstan_options(auto_write = TRUE)`, Stan will avoid recompiling the model unless something in the model code changes. 99 | 100 | Let's sample from the model now: 101 | 102 | ```{r, message=FALSE, results='hide'} 103 | fit <- stan(here::here("stan/lm.stan"), chains = 4, iter = 2000, seed = 39382, 104 | data = list(x = dat$x, y = dat$y, N = length(dat$y))) 105 | ``` 106 | 107 | Congratulations --- you fit your first handwritten Stan model! We can do everything with the posterior samples that we could do with the samples from an rstanarm model or a brms model. 108 | 109 | Some of the built-in helper functions from those packages won't work with our model though. 110 | 111 | ```{r, eval=FALSE} 112 | fit 113 | ``` 114 | 115 | Notice all of the extra lines of reported results for our generated quantities? We can focus just on the parameters we want with: 116 | 117 | ```{r} 118 | pars <- c("alpha", "beta", "sigma") 119 | print(fit, pars = pars) 120 | ``` 121 | 122 | We can use shinystan if we want to look at the model. 123 | 124 | ```{r, eval=FALSE} 125 | shinystan::launch_shinystan(fit) 126 | ``` 127 | 128 | The default plot method shows point estimates and credible intervals. 129 | 130 | ```{r} 131 | plot(fit, pars = pars) 132 | ``` 133 | 134 | There are other options. 135 | 136 | ```{r, eval=FALSE} 137 | ?rstan::`rstan-plotting-functions` 138 | stan_dens(fit, pars = pars) 139 | ``` 140 | 141 | Alternatively, and probably preferably, we can use the bayesplot package. 142 | 143 | Experiment with inspecting the posterior chains using the bayesplot package. 144 | 145 | ```{r} 146 | fit_array <- as.array(fit) 147 | bayesplot::mcmc_trace(fit_array, pars = pars) 148 | bayesplot::mcmc_dens_overlay(fit_array, pars = pars) # exercise 149 | ``` 150 | 151 | # Manipulating the posterior samples ourselves 152 | 153 | ```{r} 154 | post <- rstan::extract(fit) 155 | ``` 156 | 157 | The output from `rstan::extract()` is a named list. Each element of the list is a numeric vector of samples if that parameter had one dimension (as all of our parameters did this time), or a matrix of samples if, say, beta had represented multiple slope parameters. 158 | 159 | ```{r} 160 | names(post) 161 | dim(post$beta) 162 | ``` 163 | 164 | Let's overlay some model fits from the posterior on the data: 165 | 166 | ```{r} 167 | N <- 200 168 | ggplot(dat, aes(x, y)) + geom_point() + 169 | geom_abline( 170 | intercept = post$alpha[1:N], 171 | slope = post$beta[1:N], 172 | alpha = 0.1) 173 | ``` 174 | 175 | Is that starting to look like the confidence/credible intervals you are used to looking at? 176 | 177 | # Posterior predictive simulations 178 | 179 | We can experiment with inspecting the posterior predictive distribution using the bayesplot package: 180 | 181 | ```{r} 182 | post <- rstan::extract(fit) # extract the posterior simulations as a list 183 | # grab the `posterior_predictions` from our `generated quantities` section: 184 | pp <- post$posterior_predictions 185 | bayesplot::ppc_dens_overlay(y = dat$y, yrep = pp[1:25, ]) 186 | ``` 187 | 188 | Let's do it in R to see how we could create them after. The result is the same. The question is whether it is simpler to compute the posterior simulations with in the Stan code or in R. 189 | 190 | We need to form the prediction for each data point and add observation error. Let's manually create 8 draws from the posterior predictive distribution and compare them to our data. 191 | 192 | There are many ways you could do this. The following is one way. We will create a list with posterior predictions within data frames and then bind the elements of the list into a big data frame. We are doing this so it is easy to plot the output with ggplot. 193 | 194 | ```{r} 195 | set.seed(1) 196 | n_sim <- 8 197 | out <- list() 198 | for (i in seq_len(n_sim)) { 199 | out[[i]] <- tibble(x = x) 200 | out[[i]]$i <- i 201 | out[[i]]$y_pp <- 202 | rnorm( 203 | n = length(x), 204 | mean = post$alpha[i] + post$beta[i] * x, 205 | sd = post$sigma[i] 206 | ) 207 | } 208 | out <- dplyr::bind_rows(out) # turn the list into a data.frame 209 | out$type <- "posterior prediction" 210 | 211 | # add the real data as the last panel: 212 | out <- dplyr::bind_rows( 213 | out, 214 | tibble(x = x, y_pp = y, type = "observed", i = 9) 215 | ) 216 | 217 | ggplot(out, aes(x, y_pp, colour = type)) + 218 | geom_point() + 219 | facet_wrap(~i) 220 | ``` 221 | 222 | ## Questions: 223 | 224 | Which approach do you prefer in this case (Stan generated quantities vs. R code)? 225 | 226 | When might you prefer one over the other approach? 227 | -------------------------------------------------------------------------------- /09-stan-coding-growth.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "More coding in Stan: a non-linear model example" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: true 7 | --- 8 | 9 | # Goals: 10 | 11 | - Gain exposure to a non-linear model written in Stan that's more complicated than the simple linear regression. 12 | - See how we can do prior predictive simulations in a custom Stan model. 13 | - Gain an initial exposure to Bayesian model comparison. 14 | 15 | # Setup 16 | 17 | ```{r, message=FALSE, warning=FALSE} 18 | library(rstan) 19 | library(dplyr) 20 | library(ggplot2) 21 | options(mc.cores = parallel::detectCores()) 22 | rstan_options(auto_write = TRUE) 23 | show_file <- function(file) { 24 | cat(paste(readLines(file), collapse = "\n")) 25 | } 26 | ``` 27 | 28 | # Exercise 29 | 30 | These are Pacific Cod age and length data from two DFO synoptic trawl surveys in BC for Hecate Strait and West Coast Vancouver Island. 31 | 32 | ```{r} 33 | d <- readRDS(here::here("data/pcod-growth.rds")) 34 | d$i <- seq_len(nrow(d)) 35 | ggplot(d, aes(age, length)) + geom_point() + 36 | facet_wrap(~survey) 37 | ``` 38 | 39 | We will fit a von Bertalanffy growth model to these data. Here's a simple version: 40 | 41 | ```{r} 42 | show_file("vb/vb_basic.stan") 43 | ``` 44 | 45 | We will assume normally distributed observation error for simplicity (it's also relatively common). 46 | 47 | Why is this technically not an ideal choice? 48 | 49 | What would be some alternatives? 50 | 51 | This version has some more bells and whistles: 52 | 53 | ```{r} 54 | show_file("vb/vb_norm.stan") 55 | ``` 56 | 57 | Form our data list for rstan: 58 | 59 | ```{r} 60 | dat <- list( 61 | N = nrow(d), 62 | length = d$length, 63 | age = d$age, 64 | prior_sds = c(k = 1, linf = 200, t0 = 1, sigma = 10), 65 | prior_only = 0 66 | ) 67 | ``` 68 | 69 | Sample from our model: 70 | 71 | ```{r, results="hide", message=FALSE} 72 | fit1 <- stan(here::here("vb/vb_norm.stan"), data = dat, iter = 1000, chains = 4, seed = 9129) 73 | ``` 74 | 75 | Note that we set the `seed`` here. Why might we want to do that? 76 | 77 | Print the parameters of interest: 78 | 79 | Why am I specifying the parameters explicitly here? 80 | 81 | ```{r} 82 | print(fit1, pars = c("k", "linf", "t0", "sigma")) 83 | ``` 84 | 85 | There are many formats we can extract posterior samples in: 86 | 87 | ```{r} 88 | sims_list <- extract(fit1) 89 | names(sims_list) 90 | 91 | sims_matrix <- as.matrix(fit1) 92 | dim(sims_matrix) 93 | colnames(sims_matrix)[1:4] 94 | 95 | sims_array <- as.array(fit1) 96 | dim(sims_array) 97 | colnames(sims_array[1,,])[1:4] 98 | 99 | sims_df <- as.data.frame(fit1) 100 | head(sims_df[,1:4]) 101 | ``` 102 | 103 | Look at MCMC trace plots: 104 | 105 | ```{r} 106 | dim(sims_array) 107 | bayesplot::mcmc_trace(sims_array[,,1:4]) 108 | ``` 109 | 110 | Grab posterior predictions of new observations: 111 | 112 | We'll use `tidybayes::gather_draws()`, which conveniently gathers 20 MCMC samples for `length_sim` and forms a "long-format" data frame that is easy to work with in dplyr or ggplot. `[i]` tells the function to index each observation as `i` in the data frame. 113 | 114 | Again, I set the seed here. Why is that? 115 | 116 | ```{r} 117 | post <- tidybayes::gather_draws(fit1, length_sim[i], ndraws = 20, seed = 9283) 118 | head(post) 119 | 120 | # column 'i' was previously added to our data above so we can join on it: 121 | post <- left_join(post, d) 122 | ggplot(post, aes(age, .value)) + geom_point(alpha = 0.2) 123 | ``` 124 | 125 | Question: what are we looking at? 126 | 127 | Expected length values: 128 | 129 | ```{r} 130 | post <- tidybayes::gather_draws(fit1, predicted_length[i], ndraws = 20, seed = 9283) 131 | head(post) 132 | 133 | post <- left_join(post, d) 134 | ggplot(post, aes(age, .value, group = .draw)) + geom_line(alpha = 0.2) + 135 | geom_point(data = d, mapping = aes(age, length), inherit.aes = FALSE, alpha = 0.2) 136 | ``` 137 | 138 | Question: what are we looking at? 139 | 140 | The parameter posteriors: 141 | 142 | ```{r} 143 | post <- tidybayes::gather_draws(fit1, c(k, linf, t0, sigma)) 144 | head(post) 145 | ggplot(post, aes(.value)) + geom_histogram() + 146 | facet_wrap(~.variable, scales = "free_x") 147 | ``` 148 | 149 | We can calculate ELPD (expected log predictive density) of a leave-one-out approximation because we included `log_lik` in our `generated quantities` section. It's not all that useful yet though, since we've only fit one model. We'll dive more deeply into this elsewhere. 150 | 151 | ```{r} 152 | loo1 <- loo(fit1) 153 | loo1 154 | ``` 155 | 156 | # Extending our model 157 | 158 | We're now going to take the above model and modify it to allow for separate k, linf, and t0 by survey region (HS vs. WCVI). We want to ask whether there is evidence in the growth curve that we should be treating these regions as separate stocks. 159 | 160 | ```{r} 161 | show_file("vb/vb_norm_regions.stan") 162 | ``` 163 | 164 | ```{r, results="hide"} 165 | dat2 <- dat 166 | levels(factor(d$survey)) 167 | unique(as.integer(factor(d$survey))) 168 | dat2$survey_id <- as.integer(factor(d$survey)) 169 | dat2$N_surveys <- 2 170 | 171 | fit2 <- stan(here::here("vb/vb_norm_regions.stan"), data = dat2, iter = 1000, chains = 4, seed = 9129) 172 | ``` 173 | 174 | ```{r} 175 | print(fit2, pars = c("k", "linf", "t0", "sigma")) 176 | ``` 177 | 178 | ```{r} 179 | sims2 <- extract(fit2) 180 | ``` 181 | 182 | Plot the 80% quantile credible interval of expected values and the posterior predictive distribution: 183 | 184 | ```{r} 185 | dim(sims2$length_sim) 186 | d$upr <- apply(sims2$length_sim, 2, quantile, probs = 0.9) 187 | d$med <- apply(sims2$length_sim, 2, quantile, probs = 0.5) 188 | d$lwr <- apply(sims2$length_sim, 2, quantile, probs = 0.1) 189 | 190 | d$e_upr <- apply(sims2$predicted_length, 2, quantile, probs = 0.9) 191 | d$e_med <- apply(sims2$predicted_length, 2, quantile, probs = 0.5) 192 | d$e_lwr <- apply(sims2$predicted_length, 2, quantile, probs = 0.1) 193 | 194 | ggplot(d, aes(age, med, colour = survey, fill = survey)) + 195 | geom_line() + 196 | geom_line(aes(y = e_med)) + 197 | geom_ribbon(aes(ymin = e_lwr, ymax = e_upr), alpha = 0.5, colour = NA) + 198 | geom_ribbon(aes(ymin = lwr, ymax = upr), alpha = 0.2, colour = NA) + 199 | geom_point(data = d, mapping = aes(age, length), inherit.aes = FALSE, alpha = 0.2) + 200 | ylab("Length") + xlab("Age") 201 | ``` 202 | 203 | ```{r} 204 | y <- d$length 205 | sims1 <- extract(fit1) 206 | sims2 <- extract(fit2) 207 | set.seed(1) 208 | i <- sample(1:nrow(sims1$length_sim), size = 20) 209 | yrep1 <- sims1$length_sim[i,] 210 | yrep2 <- sims2$length_sim[i,] 211 | bayesplot::ppc_dens_overlay(y, yrep1) 212 | bayesplot::ppc_dens_overlay(y, yrep2) 213 | 214 | bayesplot::ppc_dens_overlay_grouped(y, yrep1, group = d$survey) 215 | bayesplot::ppc_dens_overlay_grouped(y, yrep2, group = d$survey) 216 | ``` 217 | 218 | The `^`s in the following "regular expressions" simply mean the parameters must start with each of these patterns. 219 | 220 | 221 | ```{r} 222 | sims_array2 <- as.array(fit2)[,,1:10] 223 | bayesplot::mcmc_trace(sims_array2, regex_pars = c("^k", "^linf", "^t0", "^sigma")) 224 | ``` 225 | 226 | What does ELPD based on LOO cross validation tell us about the two models? 227 | 228 | ```{r} 229 | loo2 <- loo(fit2) 230 | loo::loo_compare(loo1, loo2) 231 | ``` 232 | 233 | We can compare the posterior distributions of parameters from the two surveys. 234 | 235 | We will calculate differences for each of the parameters: 236 | 237 | ```{r} 238 | sims <- extract(fit2) 239 | dim(sims$k) 240 | k_diff <- sims$k[,2] - sims$k[,1] 241 | linf_diff <- sims$linf[,2] - sims$linf[,1] 242 | t0_diff <- sims$t0[,2] - sims$t0[,1] 243 | ``` 244 | 245 | ```{r} 246 | hist(k_diff) 247 | hist(linf_diff) 248 | hist(t0_diff) 249 | ``` 250 | 251 | # Exercise: 252 | 253 | Using those posterior draws, answer the following questions: 254 | 255 | What is the probability that linf is greater in WCVI than in HS? 256 | 257 | ```{r} 258 | mean(linf_diff > 0) # exercise 259 | ``` 260 | 261 | What is the probability that linf is at least 5 cm greater in WCVI than in HS? 262 | 263 | ```{r} 264 | mean(linf_diff > 5) # exercise 265 | ``` 266 | 267 | What is the probability that linf in WCVI is different from linf in HS by more than 4cm? 268 | 269 | ```{r} 270 | mean(linf_diff > 4 | linf_diff < -4) # exercise 271 | ``` 272 | -------------------------------------------------------------------------------- /10-loo.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Leave-one-out cross validation, log scores, and ELPD" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: true 7 | --- 8 | 9 | # Goals: 10 | 11 | - Understand ELPD and LOO concepts 12 | - Understand the 'loo' R package approximation 13 | 14 | # Background 15 | 16 | This material builds on code from the book: 17 | 18 | Gelman, A., Hill, J., and Vehtari, A. 2021. Regression and other stories. Cambridge University Press, Cambridge. doi:10.1017/9781139161879. 19 | 20 | We're going to use rstanarm instead of brms here just because rstanarm doesn't require us to compile the models, so the code will run much faster. 21 | 22 | Let's get some acronyms out of the way first: 23 | 24 | - ELPD: expected log (pointwise) predictive density (typically on left-out data) 25 | - LOO: leave-one-out 26 | - LOOIC: leave-one-out information criteria 27 | 28 | ```{r, message=FALSE, warning=FALSE} 29 | library(rstanarm) 30 | library(ggplot2) 31 | theme_set(theme_light()) 32 | options(mc.cores = 1) # no parallel: faster for these simple regressions 33 | ``` 34 | 35 | Let's simulate some data for a linear regression: 36 | 37 | ```{r } 38 | x <- 1:20 39 | n <- length(x) 40 | a <- 0.2 41 | b <- 0.3 42 | sigma <- 1 43 | set.seed(2141) 44 | y <- a + b * x + sigma * rnorm(n) 45 | fake <- data.frame(x, y) 46 | ``` 47 | 48 | Let's fit a linear model. `rstanarm::stan_glm()` here is equivalent to `brms::brm()` 49 | 50 | ```{r results='hide'} 51 | fit_all <- stan_glm(y ~ x, data = fake, seed = 2141, chains = 10, refresh = 0) 52 | ``` 53 | 54 | # The concept of leave-one-out prediction (LOO) 55 | 56 | Now, let's fit a linear model without the 18th observation: 57 | 58 | ```{r } 59 | fit_minus_18 <- stan_glm(y ~ x, data = fake[-18, ], seed = 2141, refresh = 0) 60 | ``` 61 | 62 | Extract posterior draws: 63 | 64 | ```{r } 65 | sims <- as.matrix(fit_all) 66 | sims_minus_18 <- as.matrix(fit_minus_18) 67 | ``` 68 | 69 | We can compute the posterior predictive distribution given x=18: 70 | 71 | ```{r } 72 | condpred <- data.frame(y = seq(0, 9, length.out = 100)) 73 | condpred$x <- sapply(condpred$y, \(y) 74 | mean(dnorm(y, sims[, 1] + sims[, 2] * x[18], sims[, 3]) * 6 + 18)) 75 | # the * 6 + 18 here is just for plotting purposes below 76 | ``` 77 | 78 | Compute LOO (leave-one-out) posterior predictive distribution given x=18: 79 | 80 | ```{r } 81 | condpredloo <- data.frame(y = seq(0, 9, length.out = 100)) 82 | condpredloo$x <- sapply(condpredloo$y, \(y) 83 | mean(dnorm(y, sims_minus_18[, 1] + sims_minus_18[, 2] * x[18], sims_minus_18[, 3]) * 6 + 18)) 84 | ``` 85 | 86 | Create a plot with the posterior mean and posterior predictive distribution: 87 | 88 | ```{r } 89 | ggplot(fake, aes(x = x, y = y)) + 90 | geom_point(color = "white", size = 3) + 91 | geom_point(color = "black", size = 2) + 92 | geom_abline( 93 | intercept = mean(sims[, 1]), 94 | slope = mean(sims[, 2]), 95 | color = "black" 96 | ) + 97 | geom_path(data = condpred, aes(x = x, y = y), color = "black") + 98 | geom_vline(xintercept = 18, linetype = 3, color = "grey") + 99 | geom_point(data = fake[18, ], color = "grey50", size = 5, shape = 1) + 100 | geom_abline( 101 | intercept = mean(sims_minus_18[, 1]), 102 | slope = mean(sims_minus_18[, 2]), 103 | color = "grey50", 104 | linetype = 2 105 | ) + 106 | geom_path(data = condpredloo, aes(x = x, y = y), color = "grey50", linetype = 2) 107 | ``` 108 | 109 | Note how dropping data point 18 shifts the posterior predictive distribution for the left-out point. 110 | 111 | Now, let's compute posterior and LOO residuals. `loo_predict()` computes the mean of the LOO predictive distribution: 112 | 113 | ```{r, message=FALSE} 114 | fake$residual <- fake$y - fit_all$fitted.values 115 | fake$looresidual <- fake$y - loo_predict(fit_all)$value 116 | ``` 117 | 118 | Plot posterior and LOO residuals: 119 | 120 | ```{r } 121 | ggplot(fake, aes(x = x, y = residual)) + 122 | geom_point(color = "black", size = 2, shape = 16) + 123 | geom_point(aes(y = looresidual), color = "grey50", size = 2, shape = 1) + 124 | geom_segment(aes(xend = x, y = residual, yend = looresidual)) + 125 | geom_hline(yintercept = 0, linetype = 2) 126 | ``` 127 | 128 | Note how the LOO residuals are all larger in size than the regular residuals. The model is pulled slightly towards each data point if it's included. 129 | 130 | We can also see this by looking at the standard deviations of posterior and LOO residuals: 131 | 132 | ```{r } 133 | round(sd(fake$residual), 2) 134 | round(sd(fake$looresidual), 2) 135 | ``` 136 | 137 | Variance of residuals is connected to R^2, which can be defined as 1-var(res)/var(y): 138 | 139 | ```{r } 140 | round(1 - var(fake$residual) / var(y), 2) 141 | round(1 - var(fake$looresidual) / var(y), 2) 142 | ``` 143 | 144 | # The concept of log scores ELPD 145 | 146 | We can compute log predictive densities. This results in a matrix with a value for each MCMC sample and each data point: 147 | 148 | ```{r } 149 | ll_1 <- log_lik(fit_all) 150 | ``` 151 | 152 | Compute the average log pointwise posterior density (LPD) in a computationally stable way. This is also known as the log score (although typically `*-1` these values). 153 | 154 | ```{r } 155 | fake$lpd_post <- matrixStats::colLogSumExps(ll_1) - log(nrow(ll_1)) 156 | ``` 157 | 158 | Let's do that by hand in R to make sure we know what just went on. 159 | 160 | Calculate the expectation for each observation for each MCMC sample: 161 | 162 | ```{r} 163 | y_hat <- matrix(nrow = nrow(sims), ncol = n) 164 | for (s in 1:nrow(sims)) { 165 | for (i in 1:n) { 166 | y_hat[s, i] <- sims[s, 1] + sims[s, 2] * x[i] 167 | } 168 | } 169 | ``` 170 | 171 | Now, calculate the log density for each observation and each MCMC sample. This is the log score. 172 | 173 | ```{r} 174 | ll_2 <- matrix(nrow = nrow(sims), ncol = n) 175 | for (s in 1:nrow(sims)) { 176 | for (i in 1:n) { 177 | ll_2[s, i] <- dnorm(y[i], mean = y_hat[s, i], sd = sims[s, 3], log = TRUE) 178 | } 179 | } 180 | ``` 181 | 182 | Now, take the average density for each data point across MCMC samples: 183 | 184 | Note we're averaging over the likelihood, *not* the *log* likelihood. 185 | 186 | ```{r} 187 | lpd_2 <- log(apply(exp(ll_2), 2, mean)) # computationally dangerous! 188 | # lpd_2 <- matrixStats::colLogSumExps(ll_2) - log(nrow(ll_2)) # safer 189 | plot(fake$lpd_post, lpd_2) 190 | abline(0, 1) 191 | ``` 192 | 193 | These match. The sum of these log predictive densities are called the 'ELPD': the expected log predictive density. 194 | 195 | We can check that our hand calculation matches the calculation from the loo package: 196 | 197 | ```{r} 198 | elpd_2 <- sum(lpd_2) 199 | elpd_2 200 | loo::elpd(log_lik(fit_all)) 201 | ``` 202 | 203 | # Combining LOO with ELPD 204 | 205 | So far, we have calculated ELPD on the data predicted from a model fit to all the data. But, we know this is an overly optimistic perspective on predictive ability for new data. Instead, we can compute log LOO predictive densities, which is typically how ELPD is used. 206 | 207 | `loo::loo()` uses fast approximate leave-one-out cross-validation to do this: 208 | 209 | ```{r } 210 | loo_1 <- loo(fit_all) 211 | loo_1 212 | fake$lpd_loo <- loo_1$pointwise[, "elpd_loo"] 213 | ``` 214 | 215 | This approximation (demonstrated at the end) is equivalent (but much faster) than this: 216 | 217 | ```{r, results="hide"} 218 | lpd_loo <- numeric(n) 219 | for (i in 1:n) { 220 | cat(i, "\n") 221 | this_dat <- fake[-i, ] 222 | fit_minus_i <- rstanarm::stan_glm(y ~ x, data = this_dat, seed = 2141, iter = 2000, chains = 4, cores = 1, refresh = 0) 223 | draws <- as.matrix(fit_minus_i) 224 | y_hat <- matrix(nrow = nrow(draws), ncol = 1) 225 | yhat <- numeric(nrow(draws)) 226 | ll <- numeric(nrow(draws)) 227 | for (s in 1:nrow(draws)) { 228 | y_hat[s] <- draws[s, 1] + draws[s, 2] * x[i] 229 | ll[s] <- dnorm(y[i], mean = y_hat[s], sd = draws[s, 3], log = TRUE) 230 | } 231 | lpd_loo[i] <- log(mean(exp(ll))) # computationally dangerous 232 | # lpd_loo[i] <- matrixStats::logSumExp(ll) - log(length(ll)) # safer 233 | } 234 | ``` 235 | 236 | ```{r} 237 | sum(lpd_loo) 238 | sum(fake$lpd_loo) 239 | ``` 240 | 241 | We can compare the ELPD vs. the LOO ELPD values: 242 | 243 | ```{r } 244 | ggplot(fake, aes(x = x, y = lpd_post)) + 245 | geom_point(color = "black", size = 2, shape = 16) + 246 | geom_point(aes(y = lpd_loo), color = "grey50", size = 2, shape = 1) + 247 | geom_segment(aes(xend = x, y = lpd_post, yend = lpd_loo)) + 248 | ylab("log predictive density") 249 | ``` 250 | 251 | # LOOIC 252 | 253 | LOOIC is defined as `-2 * elpd_loo`, i.e., converted to the 'deviance' scale as in AIC. 254 | 255 | ```{r} 256 | -2 * sum(lpd_loo) 257 | loo::loo(fit_all) 258 | ``` 259 | 260 | There's no reason we have to use that. We can also just work with ELPD. Note that more positive ELPD is 'better'. 261 | 262 | # Model comparison with LOO ELPD 263 | 264 | We will work with a regression on a dataset of child IQ sores, mom IQ scores, and other covariates such as whether the mom finished highschool. It's from the Regression and Other Stories book and originally from the National Longitudinal Survey of Youth. 265 | 266 | ```{r } 267 | kidiq <- readRDS(here::here("data/kidiq.rds")) 268 | ``` 269 | 270 | Linear regression with mom highschool and IQ as predictors of child IQ: 271 | 272 | ```{r } 273 | fit_3 <- stan_glm(kid_score ~ mom_hs + mom_iq, 274 | data = kidiq, 275 | seed = 19203, refresh = 0 276 | ) 277 | fit_3 278 | ``` 279 | 280 | Compute R^2 and LOO-R^2 manually: 281 | 282 | ```{r, message=FALSE} 283 | respost <- kidiq$kid_score - fit_3$fitted.values 284 | resloo <- kidiq$kid_score - loo_predict(fit_3)$value 285 | round(R2 <- 1 - var(respost) / var(kidiq$kid_score), 3) 286 | round(R2loo <- 1 - var(resloo) / var(kidiq$kid_score), 3) 287 | ``` 288 | 289 | Add five pure noise predictors to the data: 290 | 291 | ```{r } 292 | set.seed(1) 293 | n <- nrow(kidiq) 294 | kidiqr <- kidiq 295 | kidiqr$noise <- array(rnorm(5 * n), c(n, 5)) 296 | ``` 297 | 298 | Linear regression with additional noise predictors: 299 | 300 | ```{r } 301 | fit_3n <- stan_glm(kid_score ~ mom_hs + mom_iq + noise, 302 | data = kidiqr, 303 | seed = 19203, refresh = 0 304 | ) 305 | fit_3n 306 | ``` 307 | 308 | Compute R^2 and LOO-R^2 manually: 309 | 310 | ```{r, message=FALSE} 311 | respostn <- kidiq$kid_score - fit_3n$fitted 312 | resloon <- kidiq$kid_score - loo_predict(fit_3n)$value 313 | round(R2n <- 1 - var(respostn) / var(kidiq$kid_score), 3) 314 | round(R2loon <- 1 - var(resloon) / var(kidiq$kid_score), 3) 315 | ``` 316 | 317 | R^2 got better! LOO-R^2 got worse. 318 | 319 | Each pure noise predictor is expected to add 0.5 to the in-sample ELPD and subtract 0.5 from the LOO-ELPD. 320 | 321 | ```{r} 322 | loo_3 <- loo(fit_3) 323 | loo_3n <- loo(fit_3n) 324 | loo_compare(loo_3, loo_3n) 325 | ``` 326 | 327 | LOO ELPD favours the model without random predictors, but the difference isn't large. Presumably we'd pick the simpler model. 328 | 329 | "Regression and Other Stories" p. 178 suggests a difference of > 4 if number of observations is > 100 with well-specified models is a reliable way to distinguish. Otherwise, hard to distinguish. 330 | 331 | Let's try a model using only the maternal high school indicator: 332 | 333 | ```{r } 334 | fit_1 <- stan_glm(kid_score ~ mom_hs, data = kidiq, refresh = 0) 335 | loo_1 <- loo(fit_1) 336 | ``` 337 | 338 | Compare models using LOO log score (ELPD): 339 | 340 | ```{r } 341 | loo_compare(loo_3, loo_1) 342 | ``` 343 | 344 | We can also compare how individual data points are predicted: 345 | 346 | ```{r} 347 | elpdi1 <- loo_1$pointwise[, "elpd_loo"] 348 | elpdi3 <- loo_3$pointwise[, "elpd_loo"] 349 | 350 | kidiq$diff31 <- elpdi3 - elpdi1 351 | kidiq$i <- 1:nrow(kidiq) 352 | 353 | ggplot(kidiq, aes(i, diff31)) + 354 | geom_point() + 355 | geom_hline(yintercept = 0, lty = 2) + 356 | ylab("ELPD mod3 - ELPD mod 1\npositive favours mod3") 357 | ``` 358 | 359 | Leave-one-out data above the zero line are better predicted by model 3. 360 | 361 | # Understanding the LOO ELPD approximation 362 | 363 | Let's do the loo approximation by hand without the smoothing part. 364 | 365 | The basic idea is that excluding a data point is equivalent to subtracting the log density of that point data point (assuming independence) or dividing the total posterior density by the density for that one data point. We can use this property to form weights to sample from our existing posterior samples to approximate the posterior if we had dropped that data point. This is a form of "importance sampling". 366 | 367 | Let's work through an example with the original simulated dataset: 368 | 369 | We previously did this to come up with our expectations for each data point: 370 | 371 | ```{r} 372 | y_hat <- matrix(nrow = nrow(sims), ncol = n) 373 | for (s in 1:nrow(sims)) { 374 | for (i in 1:n) { 375 | y_hat[s, i] <- sims[s, 1] + sims[s, 2] * x[i] 376 | } 377 | } 378 | ``` 379 | 380 | And we calculated the log density for each observation and each MCMC sample: 381 | 382 | ```{r} 383 | log_dens <- matrix(nrow = nrow(sims), ncol = n) 384 | for (s in 1:nrow(sims)) { 385 | for (i in 1:n) { 386 | log_dens[s, i] <- dnorm(y[i], mean = y_hat[s, i], sd = sims[s, 3], log = TRUE) 387 | } 388 | } 389 | ``` 390 | 391 | Now weight the MCMC samples by weights of 1 / density or equivalently, `1/exp(log_dens)`: 392 | 393 | ```{r} 394 | weighted_sims <- matrix(nrow = nrow(sims), ncol = ncol(sims)) 395 | row_ids <- seq_len(nrow(weighted_sims)) 396 | 397 | lpd_loo1 <- numeric(length(x)) 398 | set.seed(123) 399 | for (i in 1:length(x)) { 400 | weights <- 1/exp(log_dens[,i]) 401 | weights <- weights / sum(weights) 402 | sampled_rows <- sample(row_ids, 5000, prob = weights, replace = TRUE) 403 | lpd_loo1[i] <- log(mean(exp(log_dens[sampled_rows, i]))) 404 | } 405 | loo::loo(fit_all) 406 | sum(lpd_loo1) # about the same 407 | ``` 408 | 409 | The only difference with the calculations in the loo package is the package does some smoothing on the weights since the distribution of the weights can have very heavy tails with extremely unlikely data points. 410 | 411 | # Additional resources 412 | 413 | Vehtari, A., Gelman, A., & Gabry, J. 2017. Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC. Statistics and Computing, 27(5), 1413–1432. 414 | 415 | Vehtari, A., Simpson, D., Gelman, A., Yao, Y., and Gabry, J. 2024, March 13. Pareto Smoothed Importance Sampling. arXiv. doi:10.48550/arXiv.1507.02646. 416 | 417 | 418 | 419 | 420 | -------------------------------------------------------------------------------- /11-divergent-transitions.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Divergent transitions" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: true 7 | --- 8 | 9 | Divergent transitions are an important diagnostic with the NUTS MCMC algorithm that Stan uses. **We should be weary of any divergent transitions.** They can be an important diagnostic of a coding error, an inefficient implementation of a model, or a model that is too complex for a given dataset. They can result in biased parameter estimates. 10 | 11 | We'll demonstrate divergent transitions using one of the most classic datasets in Bayesian statistics. The data are from Rubin (1981) . More on the 8-schools data: 12 | 13 | The code in this document has been adapted from: 14 | 15 | 16 | ```{r, message=FALSE, warning=FALSE} 17 | library(rstan) 18 | rstan_options(auto_write = TRUE) 19 | ``` 20 | 21 | This dataset represents a test of a coaching program on SAT scores across 8 schools. `y` is the average improvement (or not) test score for a given school and `sigma` is the standard error on that average improvement. 22 | 23 | ```{r} 24 | dat <- list( 25 | N = 8, 26 | y = c(28, 8, -3, 7, -1, 1, 18, 12), 27 | sigma = c(15, 10, 16, 11, 9, 11, 10, 18) 28 | ) 29 | ``` 30 | 31 | The idea is we can fit a hierarchical model that partially pools information across schools. The model has a 'true' latent test score change per school with some variance that results in the observed values. 32 | 33 | ```stan 34 | data { 35 | int N; // number of schools 36 | vector[N] y; // mean score change by school 37 | vector[N] sigma; // standard error on score change by school 38 | } 39 | parameters { 40 | real mu; // across-school average improvement 41 | real sigma_prime; // SD of latent score values 42 | vector[N] theta; // latent score values by school 43 | } 44 | model { 45 | mu ~ normal(0, 10); // prior 46 | sigma_prime ~ cauchy(0, 10); // prior 47 | theta ~ normal(mu, sigma_prime); // latent score values 48 | y ~ normal(theta, sigma); // data likelihood 49 | } 50 | ``` 51 | 52 | Fit the model with Stan: 53 | 54 | ```{r, message=FALSE, warning=FALSE, results='hide'} 55 | mod1 <- stan(here::here("stan/8schools.stan"), data = dat, seed = 9283) 56 | ``` 57 | 58 | ```{r} 59 | mod1 60 | ``` 61 | 62 | Check divergences: 63 | 64 | ```{r} 65 | rstan::get_num_divergent(mod1) 66 | ``` 67 | 68 | ```{r, eval=FALSE} 69 | shinystan::launch_shinystan(mod1) 70 | ``` 71 | 72 | We can visualize what's going on. The sampler isn't going all the way down the 'funnel' of the SD of the latent values `sigma_prime` and any specific school latent `theta`: 73 | 74 | ```{r} 75 | bayesplot::mcmc_scatter( 76 | as.array(mod1), 77 | pars = c("theta[1]", "sigma_prime"), 78 | transform = list(sigma_prime = "log"), 79 | np = bayesplot::nuts_params(mod1) 80 | ) 81 | ``` 82 | 83 | Note the red dots clustered towards the bottom of the funnel. 84 | 85 | We can instead fit a version that is parameterized differently: 86 | 87 | ```stan 88 | data { 89 | int N; 90 | vector[N] y; 91 | vector[N] sigma; 92 | } 93 | parameters { 94 | real mu; 95 | real sigma_prime; 96 | vector[N] eta; // temporary variable: Normal(0, 1) 97 | } 98 | transformed parameters { 99 | vector[N] theta; // define theta before we use it 100 | // now we form `theta` here based on sigma_prime * eta: 101 | theta = mu + sigma_prime * eta; 102 | } 103 | model { 104 | mu ~ normal(0, 10); 105 | sigma_prime ~ cauchy(0, 10); 106 | eta ~ normal(0, 1); // here's our temporary Normal(0, 1) variable 107 | y ~ normal(theta, sigma); 108 | } 109 | ``` 110 | 111 | Everything not commented is the same as before. We now have a Normal(0, 1) variable `eta` that we multiply by `sigma_prime`. We have the same probability model, but now the sampler is tracking `sigma_prime` and `eta`, which are uncorrelated, so it has less of a problem exploring the full posterior. This approach is often called a 'non-centered parameterization'. 112 | 113 | ```{r, message=FALSE, warning=FALSE, results='hide'} 114 | mod2 <- stan(here::here("stan/8schools_noncentered.stan"), data = dat, seed = 9283) 115 | ``` 116 | 117 | ```{r} 118 | mod2 119 | ``` 120 | 121 | ```{r} 122 | bayesplot::mcmc_scatter( 123 | as.array(mod2), 124 | pars = c("theta[1]", "sigma_prime"), 125 | transform = list(sigma_prime = "log"), 126 | np = bayesplot::nuts_params(mod2) 127 | ) 128 | ``` 129 | 130 | That's much better and our divergences do not appear to be systematically where we might worry about them (although that's harder to say with more complicated models). We can improve this further by increasing the `adapt_delta` from its default 0.8 towards 1: 131 | 132 | ```{r, message=FALSE, warning=FALSE, results='hide'} 133 | mod3 <- stan( 134 | here::here("stan/8schools_noncentered.stan"), data = dat, seed = 9283, 135 | control = list(adapt_delta = 0.95) 136 | ) 137 | ``` 138 | 139 | This tells Stan to adjust the NUTS algorithm to take smaller steps. That slows things down but can reduce divergent transitions. 140 | 141 | Alternative solutions here would have been to tighten the priors or collect more data. 142 | 143 | ```{r} 144 | bayesplot::mcmc_scatter( 145 | as.array(mod3), 146 | pars = c("theta[1]", "sigma_prime"), 147 | transform = list(sigma_prime = "log"), 148 | np = bayesplot::nuts_params(mod3) 149 | ) 150 | ``` 151 | 152 | We can check what effect ignoring those divergent transitions would have had on our estimate of `sigma_prime`: 153 | 154 | ```{r} 155 | p1 <- extract(mod1) 156 | p3 <- extract(mod3) 157 | ``` 158 | 159 | ```{r} 160 | par(mfrow = c(2, 1)) 161 | hist(log(p1$sigma_prime), xlim = c(-10, 5)) 162 | hist(log(p3$sigma_prime), xlim = c(-10, 5)) 163 | 164 | mean(log(p1$sigma_prime)) 165 | mean(log(p3$sigma_prime)) 166 | ``` 167 | 168 | So, we would have lost the lower tail of the parameter if we had ignored the divergent transitions and ended up with some bias in the parameter posterior. 169 | 170 | ### Other resources 171 | 172 | - 173 | - 174 | - 175 | - 176 | -------------------------------------------------------------------------------- /12-workflow-sdm.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Bayesian workflow example" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: true 7 | --- 8 | 9 | ## Goals: 10 | 11 | - Work through the steps of a Bayesian workflow for an applied example. 12 | - Gain more experience with brms 13 | - Practice prior and posterior predictive checking 14 | - Gain brief exposure to a mixed effects model in brms with hierarchical variances 15 | 16 | ```{r, message=FALSE, warning=FALSE} 17 | library(dplyr) 18 | library(ggplot2) 19 | theme_set(theme_light()) 20 | library(bayesplot) 21 | library(brms) 22 | options(mc.cores = parallel::detectCores()) # parallel chains 23 | options(brms.file_refit = "on_change") # re-fit cached models if changes 24 | dir.create("cache", showWarnings = FALSE) 25 | ``` 26 | 27 | ## The data and question of interest 28 | 29 | These are rockfish densities from DFO's synoptic bottom trawl surveys off BC. I've only included densities for tows where that rockfish was caught. I.e., I've removed the zeros so we can work with a simple Gaussian model of log transformed rockfish densities. 30 | 31 | ```{r} 32 | d <- readRDS(here::here("data/rockfish-depth.rds")) 33 | d$logdepth <- as.numeric(scale(log(d$depth_m))) 34 | d$fyear <- factor(d$year) 35 | d$fspecies <- factor(d$species_common_name) 36 | d$density <- d$density_kgpm2 * 1000 37 | ``` 38 | 39 | We can plot the raw data to see what the patterns look like. 40 | 41 | We're interested in predicting rockfish density in a tow as a product of depth. A question is can we treat these as coming from the same depth-density relationship or do they need their own curves? 42 | 43 | Question: why work with log density as the response? 44 | Question: why work with log depth as the predictor? 45 | 46 | ## Basic data exploration 47 | 48 | ```{r} 49 | ggplot(d, aes(log(depth_m), log(density_kgpm2))) + 50 | geom_point() + 51 | geom_smooth(se = FALSE) 52 | 53 | ggplot(d, aes(log(depth_m), log(density_kgpm2), colour = species_common_name)) + 54 | geom_point() + 55 | geom_smooth(se = FALSE) 56 | 57 | ggplot(d, aes(log(depth_m), log(density_kgpm2))) + 58 | geom_point() + 59 | facet_wrap(~species_common_name) + 60 | geom_smooth(se = FALSE) 61 | ``` 62 | 63 | ## Prior predictive checks 64 | 65 | We'll start with some prior predictive checks. 66 | 67 | We can find out what priors we need to specify with brms with `default_priors()` 68 | 69 | ```{r} 70 | default_prior( 71 | log(density) ~ 0 + Intercept + logdepth + I(logdepth^2), 72 | data = d, 73 | family = gaussian() 74 | ) 75 | ``` 76 | 77 | We're using the `Intercept` option in the formula so that brms doesn't transform the intercept to reflect its value when the other predictors are at their mean. It just simplifies our interpreation here. 78 | 79 | Let's sample from these priors. 80 | 81 | We'll start with N(0, 1) priors on the slope and quadratic coefficients and a half-Student-t(3, 0, 3) prior on the observation error SD. 82 | 83 | We've log transformed depth and we're modelling density as the response. So, a multiplicative increase in depth causes a multiplicative increase in density. We wouldn't expect such effects to be huge. 84 | 85 | E.g., if depth is doubled, how many times might we expect density to increase? Probably not millions of times on average. 86 | 87 | We're working with an intercept prior that roughly matches the mean of the data just to focus on the curvature aspect. 88 | 89 | ```{r, results="hide"} 90 | priors1 <- brm( 91 | log(density) ~ 0 + Intercept + logdepth + I(logdepth^2), 92 | data = d, 93 | iter = 100, 94 | chains = 1, 95 | family = gaussian(), 96 | sample_prior = "only", 97 | seed = 123, 98 | file = "cache/sdm-priors1", 99 | prior = c( 100 | set_prior("normal(-2.82, 5)", class = "b", coef = "Intercept"), 101 | set_prior("normal(0, 1)", class = "b", coef = "logdepth"), 102 | set_prior("normal(0, 1)", class = "b", coef = "IlogdepthE2"), 103 | set_prior("student_t(3, 0, 3)", class = "sigma") 104 | ) 105 | ) 106 | ``` 107 | 108 | Let's pick out a species to look at our simulated data: 109 | 110 | ```{r} 111 | red <- filter(d, fspecies == "redbanded rockfish") 112 | obs <- mutate(red, .prediction = log(density), .draw = 0) 113 | 114 | pp <- tidybayes::predicted_draws(priors1, newdata = red, ndraws = 8) 115 | 116 | ggplot(pp, aes(depth_m, .prediction)) + geom_point() + 117 | facet_wrap(~.draw) + 118 | scale_x_log10() + 119 | ggtitle("Prior predictive simulation") 120 | ``` 121 | 122 | We can instead look at prior pushforward simulations: 123 | 124 | ```{r} 125 | pp <- tidybayes::epred_draws(priors1, newdata = red, ndraws = 9) 126 | ggplot(pp, aes(depth_m, .epred)) + geom_point() + 127 | facet_wrap(~.draw) + 128 | scale_x_log10() + 129 | ggtitle("Prior pushforward simulation") 130 | ``` 131 | 132 | What about if we had much wider priors? 133 | 134 | Here I've included the data to help remember the scale of the original data. Have we gone beyond the realm of possible parameter space? 135 | 136 | ```{r, results="hide", warning=FALSE} 137 | priors2 <- brm( 138 | log(density) ~ 0 + Intercept + logdepth + I(logdepth^2), 139 | data = d, 140 | iter = 100, 141 | chains = 1, 142 | family = gaussian(), 143 | sample_prior = "only", 144 | seed = 123, 145 | file = "cache/sdm-priors-wide", 146 | prior = c( 147 | set_prior("normal(0, 100)", class = "b", coef = "Intercept"), 148 | set_prior("normal(0, 100)", class = "b", coef = "logdepth"), 149 | set_prior("normal(0, 100)", class = "b", coef = "IlogdepthE2"), 150 | set_prior("student_t(3, 0, 100)", class = "sigma") 151 | ) 152 | ) 153 | 154 | pp <- tidybayes::predicted_draws(priors2, newdata = red, ndraws = 8) 155 | pp <- bind_rows(pp, obs) |> 156 | mutate(type = ifelse(.draw != 0, "Simulated", "Observed")) 157 | 158 | ggplot(pp, aes(depth_m, .prediction, colour = type)) + geom_point() + 159 | facet_wrap(~.draw) + 160 | scale_x_log10() + 161 | ggtitle("Redbanded Rockfish: wide priors") 162 | ``` 163 | 164 | ## Fitting 3 models 165 | 166 | Now we're going to fit 3 models. 167 | 168 | 1. A quadratic effect of depth. 169 | 2. Let those quadratic curves vary by species. 170 | 3. Also let each species have its own level of observation error. 171 | 172 | In reality, we'd probably start with the simplest, check our model, and then increase the complexity to address issues. We'll fit all 3 at once to keep this example easier to follow. 173 | 174 | First, a simple quadratic linear regression: 175 | 176 | ```{r, results="hide"} 177 | fit1 <- brm( 178 | log(density) ~ 0 + Intercept + logdepth + I(logdepth^2), 179 | data = d, 180 | iter = 1000, 181 | chains = 4, 182 | family = gaussian(), 183 | seed = 726328, 184 | file = "cache/sdm-fit1", 185 | prior = c( 186 | set_prior("normal(0, 10)", class = "b", coef = "Intercept"), 187 | set_prior("normal(0, 1)", class = "b", coef = "logdepth"), 188 | set_prior("normal(0, 1)", class = "b", coef = "IlogdepthE2"), 189 | set_prior("student_t(3, 0, 3)", class = "sigma") 190 | ) 191 | ) 192 | ``` 193 | 194 | Second, a version that enables each species to have it's own curve: 195 | 196 | ```{r, results="hide"} 197 | # figure out the priors to specify 198 | default_prior( 199 | log(density) ~ 0 + Intercept + logdepth + I(logdepth^2) + 200 | (logdepth + I(logdepth^2) | fspecies), 201 | data = d 202 | ) 203 | 204 | fit2 <- brm( 205 | log(density) ~ 0 + Intercept + logdepth + I(logdepth^2) + 206 | (logdepth + I(logdepth^2) | fspecies), 207 | data = d, 208 | iter = 1000, 209 | chains = 4, 210 | family = gaussian(), 211 | seed = 72632, 212 | control = list(adapt_delta = 0.9), 213 | file = "cache/sdm-fit2", 214 | prior = c( 215 | set_prior("normal(0, 10)", class = "b", coef = "Intercept"), 216 | set_prior("normal(0, 1)", class = "b", coef = "logdepth"), 217 | set_prior("normal(0, 1)", class = "b", coef = "IlogdepthE2"), 218 | set_prior("student_t(3, 0, 3)", class = "sigma"), 219 | set_prior("lkj_corr_cholesky(1)", class = "L"), 220 | set_prior("student_t(3, 0, 3)", class = "sd") 221 | ) 222 | ) 223 | ``` 224 | 225 | Third, a version where we also let each species have its own observation error variance: 226 | 227 | ```{r, results="hide"} 228 | fit3 <- brm( 229 | bf( 230 | log(density) ~ 0 + Intercept + logdepth + I(logdepth^2) + 231 | (logdepth + I(logdepth^2) | fspecies), 232 | sigma ~ fspecies 233 | ), 234 | data = d, 235 | iter = 1000, 236 | chains = 4, 237 | family = gaussian(), 238 | seed = 726328, 239 | control = list(adapt_delta = 0.95), 240 | file = "cache/sdm-fit3", 241 | prior = c( 242 | set_prior("normal(0, 10)", class = "b", coef = "Intercept"), 243 | set_prior("normal(0, 1)", class = "b", coef = "logdepth"), 244 | set_prior("normal(0, 1)", class = "b", coef = "IlogdepthE2"), 245 | set_prior("student_t(3, 0, 3)", class = "Intercept", dpar = "sigma"), 246 | set_prior("normal(0, 1)", class = "b", dpar = "sigma"), 247 | set_prior("lkj_corr_cholesky(1)", class = "L"), 248 | set_prior("student_t(3, 0, 3)", class = "sd") 249 | ) 250 | ) 251 | ``` 252 | 253 | ## Checking for convergence 254 | 255 | Look at our models: 256 | 257 | ```{r} 258 | fit1 259 | fit2 260 | fit3 261 | ``` 262 | 263 | We should look at traceplots: 264 | 265 | ```{r} 266 | bayesplot::mcmc_trace(fit1, regex_pars = "^b_") 267 | bayesplot::mcmc_trace(fit3, regex_pars = "^b_") 268 | ``` 269 | 270 | ## Summarizing the parameters 271 | 272 | And we can summarize the parameter posterior distributions: 273 | 274 | ```{r} 275 | bayesplot::mcmc_dens_chains(fit1, regex_pars = "^b_") 276 | bayesplot::mcmc_areas(fit3, regex_pars = c("^b_", "sigma")) 277 | bayesplot::mcmc_intervals(fit3, regex_pars = c("^r_")) 278 | ``` 279 | 280 | If we wanted to make our own plot, we could have used this to get the data: 281 | 282 | ```{r} 283 | bayesplot::mcmc_intervals_data(fit3, regex_pars = c("^b_")) |> 284 | head() 285 | ``` 286 | 287 | ## Posterior predictive simulations 288 | 289 | Let's look at some posterior predictive simulations. 290 | 291 | ```{r} 292 | y <- log(d$density) 293 | yrep1 <- posterior_predict(fit1, ndraws = 20) 294 | bayesplot::ppc_dens_overlay(y, yrep1) 295 | bayesplot::ppc_dens_overlay_grouped(y, yrep1, group = d$fspecies) 296 | ``` 297 | 298 | Question: how does that look? 299 | 300 | ```{r} 301 | yrep2 <- posterior_predict(fit2, ndraws = 20) 302 | bayesplot::ppc_dens_overlay_grouped(y, yrep2, group = d$fspecies) 303 | ``` 304 | 305 | Question: is that better? 306 | 307 | ```{r} 308 | yrep3 <- posterior_predict(fit3, ndraws = 20) 309 | bayesplot::ppc_dens_overlay_grouped(y, yrep3, group = d$fspecies) 310 | ``` 311 | 312 | Question: is that better? Which model best generates data that resemble the observations? 313 | 314 | Question: these maybe still aren't perfect. What are some possible reasons for that? How might we expand the model in reality? 315 | 316 | Let's dig into an example species: 317 | 318 | ```{r} 319 | # grab data for one species: 320 | red <- filter(d, fspecies == "redbanded rockfish") 321 | obs <- mutate(red, .prediction = log(density), .draw = 0) 322 | 323 | # make posterior predictions: 324 | pp1 <- tidybayes::predicted_draws(fit1, newdata = red, ndraws = 8) 325 | pp1 <- bind_rows(pp1, obs) |> 326 | mutate(type = ifelse(.draw != 0, "Simulated", "Observed")) 327 | 328 | # make posterior predictions: 329 | pp3 <- tidybayes::predicted_draws(fit3, newdata = red, ndraws = 8) 330 | pp3 <- bind_rows(pp3, obs) |> 331 | mutate(type = ifelse(.draw != 0, "Simulated", "Observed")) 332 | ``` 333 | 334 | And plot the output: 335 | 336 | ```{r} 337 | ggplot(pp1, aes(depth_m, .prediction, colour = type)) + geom_point() + 338 | facet_wrap(~.draw) + 339 | scale_x_log10() + 340 | ggtitle("Redbanded Rockfish: shared quadratic") 341 | 342 | ggplot(pp3, aes(depth_m, .prediction, colour = type)) + geom_point() + 343 | facet_wrap(~.draw) + 344 | scale_x_log10() + 345 | ggtitle("Redbanded Rockfish: species-specific quadratic + error") 346 | ``` 347 | 348 | Question: can you tell what is off in the posterior predictive simulations for this example in model 1? 349 | 350 | Hint: look at the spread. 351 | 352 | ## Posterior predictive simulations: statistical properties 353 | 354 | We can also visualize various statistical properties of our simulated and real observations. We'll focus on model 1 and 3 for brevity. 355 | 356 | ```{r} 357 | yrep1 <- posterior_predict(fit1) 358 | yrep3 <- posterior_predict(fit3) 359 | ``` 360 | 361 | Medians: 362 | 363 | ```{r} 364 | ppc_stat_grouped(y, yrep1, stat = "median", group = d$fspecies) 365 | ppc_stat_grouped(y, yrep3, stat = "median", group = d$fspecies) 366 | ``` 367 | 368 | SD: 369 | 370 | ```{r} 371 | ppc_stat_grouped(y, yrep1, stat = "sd", group = d$fspecies) 372 | ppc_stat_grouped(y, yrep3, stat = "sd", group = d$fspecies) 373 | ``` 374 | 375 | Question: why is this not a very useful check here? 376 | 377 | Interquartile range: 378 | 379 | ```{r} 380 | iqr <- function(x) { 381 | q75 <- quantile(x, 0.75) 382 | q25 <- quantile(x, 0.25) 383 | q75 - q25 384 | } 385 | ppc_stat_grouped(y, yrep1, stat = "iqr", group = d$fspecies) 386 | ppc_stat_grouped(y, yrep3, stat = "iqr", group = d$fspecies) 387 | ``` 388 | 389 | We might also wonder, might this relationship be changing through time? Should we have included a year covariate? We can check what our simulations say: 390 | 391 | ```{r} 392 | ppc_stat_grouped(y, yrep1, stat = "median", group = d$year) 393 | ``` 394 | 395 | ## Comparing models with ELPD 396 | 397 | We can look at ELPD-LOO: 398 | 399 | ```{r} 400 | loo1 <- loo(fit1) 401 | loo2 <- loo(fit2) 402 | loo3 <- loo(fit3) 403 | loo_compare(loo1, loo2, loo3) 404 | ``` 405 | 406 | What does this tell us? 407 | 408 | We can also look at the pointwise LOO predictive densities themselves: 409 | 410 | ```{r} 411 | elpdi1 <- loo1$pointwise[, "elpd_loo"] 412 | elpdi2 <- loo2$pointwise[, "elpd_loo"] 413 | elpdi3 <- loo3$pointwise[, "elpd_loo"] 414 | ``` 415 | 416 | And compare how good each model was at predicting each left-out point: 417 | 418 | ```{r} 419 | d$diff32 <- elpdi3 - elpdi2 420 | d$diff21 <- elpdi2 - elpdi1 421 | 422 | ggplot(d, aes(logdepth, diff21, colour = fspecies, fill = fspecies)) + 423 | geom_point() + 424 | facet_wrap(~fspecies) + 425 | geom_hline(yintercept = 0, lty = 2) 426 | 427 | ggplot(d, aes(logdepth, diff32, colour = fspecies, fill = fspecies)) + 428 | geom_point() + 429 | facet_wrap(~fspecies) + 430 | geom_hline(yintercept = 0, lty = 2) 431 | ``` 432 | 433 | What can these tell us? 434 | 435 | ## Visualizing the model predictions 436 | 437 | We can plot the expected values from the prediction across a sequence of depths. 438 | 439 | First, we could take a sequence of draws from the posterior: 440 | 441 | ```{r} 442 | nd <- expand.grid( 443 | logdepth = seq(min(d$logdepth), max(d$logdepth), length.out = 100), 444 | fspecies = unique(d$fspecies) 445 | ) 446 | 447 | out <- tidybayes::add_epred_draws(newdata = nd, object = fit3, ndraws = 50) 448 | ggplot(out, aes(logdepth, .epred, group = .draw)) + 449 | geom_line(alpha = 0.2) + 450 | facet_wrap(~fspecies, scales = "free_y") 451 | ``` 452 | 453 | Or we could summarize the distribution of those values: 454 | 455 | ```{r} 456 | lpred <- brms::posterior_epred(fit3, newdata = nd) 457 | nd$med <- apply(lpred, 2, median) 458 | nd$lwr <- apply(lpred, 2, quantile, probs = 0.9) 459 | nd$upr <- apply(lpred, 2, quantile, probs = 0.1) 460 | ggplot(nd, aes(logdepth, exp(med), colour = fspecies, fill = fspecies)) + 461 | geom_ribbon(aes(ymin = exp(lwr), ymax = exp(upr)), alpha = 0.2, colour = NA) + 462 | geom_line() 463 | ``` 464 | 465 | What about the distribution of new observations? Are these more spread out? Why? 466 | 467 | ```{r} 468 | ppred <- brms::posterior_predict(fit3, newdata = nd) 469 | nd$pp_med <- apply(ppred, 2, median) 470 | nd$pp_lwr <- apply(ppred, 2, quantile, probs = 0.9) 471 | nd$pp_upr <- apply(ppred, 2, quantile, probs = 0.1) 472 | ggplot(nd, aes(logdepth, exp(pp_med), colour = fspecies, fill = fspecies)) + 473 | geom_ribbon(aes(ymin = exp(pp_lwr), ymax = exp(pp_upr)), alpha = 0.2, colour = NA) + 474 | geom_line() 475 | ``` 476 | 477 | ## Questions: 478 | 479 | - What can we conclude from these data given our models? 480 | - What of the above might we report in a paper? 481 | - What else might you consider adding to this model? 482 | - What are some possible uses for the posterior predictive simulations? 483 | -------------------------------------------------------------------------------- /13-resources.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Useful resources" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: true 7 | --- 8 | 9 | # Stan resources 10 | 11 | The main documentation: 12 | 13 | 14 | Stan Best Practices: 15 | 16 | 17 | Prior Choice Recommendations: 18 | 19 | 20 | Case studies: 21 | 22 | 23 | Tutorials, books, online video courses, and presentations: 24 | 25 | 26 | Forums: 27 | 28 | 29 | Examples from many textbooks converted into Stan: 30 | 31 | 32 | tidybayes: 33 | 34 | 35 | Visualize MCMC algorithms including NUTS: 36 | 37 | 38 | # Online lecture recordings 39 | 40 | Statistical Rethinking Fall 2023 lectures: 41 | 42 | 43 | Recordings from Aki Vehtari's course: 44 | 45 | (scroll down) 46 | 47 | # Papers 48 | 49 | Monnahan, C.C., Thorson, J.T., and Branch, T.A. 2016. Faster estimation of Bayesian models in ecology using Hamiltonian Monte Carlo. Methods Ecol Evol. . 50 | 51 | Banner, K.M., Irvine, K.M., and Rodhouse, T. 2020. The Use of Bayesian Priors in Ecology: The Good, The Bad, and The Not Great. Methods Ecol Evol: 2041–210X.13407. . 52 | 53 | Gabry, J., Simpson, D., Vehtari, A., Betancourt, M., and Gelman, A. 2019. Visualization in Bayesian workflow. Journal of the Royal Statistical Society Series A: Statistics in Society 182(2): 389–402. 54 | 55 | Gelman, A., Vehtari, A., Simpson, D., Margossian, C.C., Carpenter, B., Yao, Y., Kennedy, L., Gabry, J., Bürkner, P.-C., and Modrák, M. 2020. Bayesian Workflow. arXiv:2011.01808. 56 | 57 | Vehtari, A., Gelman, A., Simpson, D., Carpenter, B., and Bürkner, P.-C. 2021. Rank-normalization, folding, and localization: an improved $\hat{R}$ for assessing convergence of MCMC (with discussion). Bayesian Analysis 16(2): 667–718. International Society for Bayesian Analysis. 58 | 59 | Vehtari, A., Gelman, A., and Gabry, J. 2017. Practical Bayesian model evaluation using leave-one-out cross-validation and WAIC. Statistics and Computing 27(5): 1413–1432. doi:10.1007/s11222-016-9696-4. 60 | 61 | Vehtari, A., Simpson, D., Gelman, A., Yao, Y., and Gabry, J. 2024, March 13. Pareto Smoothed Importance Sampling. arXiv. doi:10.48550/arXiv.1507.02646. 62 | 63 | Betancourt, M. 2017. A Conceptual Introduction to Hamiltonian Monte Carlo. arXiv:1701.02434 [stat]. 64 | 65 | Monnahan, C.C. 2024. Toward good practices for Bayesian data-rich fisheries stock assessments using a modern statistical workflow. Fisheries Research 275: 107024. . 66 | 67 | # Textbooks 68 | 69 | Hobbs, N.T., and Hooten, M.B. 2015. Bayesian models: a statistical primer for ecologists. Princeton University Press, Princeton, New Jersey. 70 | 71 | - The best textbook I've found on the fundamentals of Bayesian models from an ecologist's perspective. I've never found another book that filled in so many gaps in understanding. An excellent resource on how to read and write Bayesian models. Note that this textbook does not focus on code. 72 | 73 | McElreath, R. 2020. Statistical rethinking: a Bayesian course with examples in R and Stan. 2nd edition. CRC Press/Taylor & Francis Group. 74 | 75 | 76 | - A fantastic textbook that will help you think about Bayesian modeling and modeling in general. Perhaps the one downside is that it uses the author's 'rethinking' R package throughout, which is great from pedagogical perspective but not great if you want to learn Stan code itself. Still very much worth it though. Stan code itself is relatively easy to learn once you understand the concepts. And in many cases there's no need to write the code yourself anyways. 77 | - See this repository that includes most of the examples reworked with the brms package and ggplot2: 78 | 79 | Gelman, A., Hill, J., and Vehtari, A. 2021. Regression and other stories. Cambridge University Press, Cambridge. doi:10.1017/9781139161879. 80 | 81 | - An excellent textbook on regression and GLMs in a Bayesian context. Examples are mostly from the social, political, and health sciences, but applicable to anything. Also deals with causal inference. 82 | - Examples and other material here: 83 | 84 | Gelman, A., J. B. Carlin, H. S. Stern, D. B. Dunson, A. Vehtari, and D. B. Rubin. 2014. Bayesian Data Analysis. Chapman & Hall, Boca Raton, FL. 85 | 86 | - The Bayesian data analysis bible, but definitely not easy reading. 87 | 88 | **Official up-to-date PDF version**: 89 | 90 | # Not specifically about Bayesian modeling, but still very useful 91 | 92 | Schielzeth, H. 2010. Simple means to improve the interpretability of regression coefficients. Methods in Ecology and Evolution 1:103–113. 93 | 94 | Gelman, A. 2008. Scaling regression inputs by dividing by two standard deviations. Statistics in Medicine 27:2865–2873. 95 | 96 | Morey, R. D., Hoekstra, R., Rouder, J., Lee, M. D., and Wagenmakers, E. (2016). The fallacy of placing confidence in confidence intervals. Psychonomic Bulletin & Review. 23(1), 103–123. 97 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Attribution 4.0 International 2 | ======================================================================= 3 | 4 | Creative Commons Corporation ("Creative Commons") is not a law firm and 5 | does not provide legal services or legal advice. Distribution of 6 | Creative Commons public licenses does not create a lawyer-client or 7 | other relationship. Creative Commons makes its licenses and related 8 | information available on an "as-is" basis. Creative Commons gives no 9 | warranties regarding its licenses, any material licensed under their 10 | terms and conditions, or any related information. Creative Commons 11 | disclaims all liability for damages resulting from their use to the 12 | fullest extent possible. 13 | 14 | Using Creative Commons Public Licenses 15 | 16 | Creative Commons public licenses provide a standard set of terms and 17 | conditions that creators and other rights holders may use to share 18 | original works of authorship and other material subject to copyright 19 | and certain other rights specified in the public license below. The 20 | following considerations are for informational purposes only, are not 21 | exhaustive, and do not form part of our licenses. 22 | 23 | Considerations for licensors: Our public licenses are 24 | intended for use by those authorized to give the public 25 | permission to use material in ways otherwise restricted by 26 | copyright and certain other rights. Our licenses are 27 | irrevocable. Licensors should read and understand the terms 28 | and conditions of the license they choose before applying it. 29 | Licensors should also secure all rights necessary before 30 | applying our licenses so that the public can reuse the 31 | material as expected. Licensors should clearly mark any 32 | material not subject to the license. This includes other CC- 33 | licensed material, or material used under an exception or 34 | limitation to copyright. More considerations for licensors: 35 | wiki.creativecommons.org/Considerations_for_licensors 36 | 37 | Considerations for the public: By using one of our public 38 | licenses, a licensor grants the public permission to use the 39 | licensed material under specified terms and conditions. If 40 | the licensor's permission is not necessary for any reason--for 41 | example, because of any applicable exception or limitation to 42 | copyright--then that use is not regulated by the license. Our 43 | licenses grant only permissions under copyright and certain 44 | other rights that a licensor has authority to grant. Use of 45 | the licensed material may still be restricted for other 46 | reasons, including because others have copyright or other 47 | rights in the material. A licensor may make special requests, 48 | such as asking that all changes be marked or described. 49 | Although not required by our licenses, you are encouraged to 50 | respect those requests where reasonable. More_considerations 51 | for the public: 52 | wiki.creativecommons.org/Considerations_for_licensees 53 | 54 | ======================================================================= 55 | 56 | Creative Commons Attribution 4.0 International Public License 57 | 58 | By exercising the Licensed Rights (defined below), You accept and agree 59 | to be bound by the terms and conditions of this Creative Commons 60 | Attribution 4.0 International Public License ("Public License"). To the 61 | extent this Public License may be interpreted as a contract, You are 62 | granted the Licensed Rights in consideration of Your acceptance of 63 | these terms and conditions, and the Licensor grants You such rights in 64 | consideration of benefits the Licensor receives from making the 65 | Licensed Material available under these terms and conditions. 66 | 67 | 68 | Section 1 -- Definitions. 69 | 70 | a. Adapted Material means material subject to Copyright and Similar 71 | Rights that is derived from or based upon the Licensed Material 72 | and in which the Licensed Material is translated, altered, 73 | arranged, transformed, or otherwise modified in a manner requiring 74 | permission under the Copyright and Similar Rights held by the 75 | Licensor. For purposes of this Public License, where the Licensed 76 | Material is a musical work, performance, or sound recording, 77 | Adapted Material is always produced where the Licensed Material is 78 | synched in timed relation with a moving image. 79 | 80 | b. Adapter's License means the license You apply to Your Copyright 81 | and Similar Rights in Your contributions to Adapted Material in 82 | accordance with the terms and conditions of this Public License. 83 | 84 | c. Copyright and Similar Rights means copyright and/or similar rights 85 | closely related to copyright including, without limitation, 86 | performance, broadcast, sound recording, and Sui Generis Database 87 | Rights, without regard to how the rights are labeled or 88 | categorized. For purposes of this Public License, the rights 89 | specified in Section 2(b)(1)-(2) are not Copyright and Similar 90 | Rights. 91 | 92 | d. Effective Technological Measures means those measures that, in the 93 | absence of proper authority, may not be circumvented under laws 94 | fulfilling obligations under Article 11 of the WIPO Copyright 95 | Treaty adopted on December 20, 1996, and/or similar international 96 | agreements. 97 | 98 | e. Exceptions and Limitations means fair use, fair dealing, and/or 99 | any other exception or limitation to Copyright and Similar Rights 100 | that applies to Your use of the Licensed Material. 101 | 102 | f. Licensed Material means the artistic or literary work, database, 103 | or other material to which the Licensor applied this Public 104 | License. 105 | 106 | g. Licensed Rights means the rights granted to You subject to the 107 | terms and conditions of this Public License, which are limited to 108 | all Copyright and Similar Rights that apply to Your use of the 109 | Licensed Material and that the Licensor has authority to license. 110 | 111 | h. Licensor means the individual(s) or entity(ies) granting rights 112 | under this Public License. 113 | 114 | i. Share means to provide material to the public by any means or 115 | process that requires permission under the Licensed Rights, such 116 | as reproduction, public display, public performance, distribution, 117 | dissemination, communication, or importation, and to make material 118 | available to the public including in ways that members of the 119 | public may access the material from a place and at a time 120 | individually chosen by them. 121 | 122 | j. Sui Generis Database Rights means rights other than copyright 123 | resulting from Directive 96/9/EC of the European Parliament and of 124 | the Council of 11 March 1996 on the legal protection of databases, 125 | as amended and/or succeeded, as well as other essentially 126 | equivalent rights anywhere in the world. 127 | 128 | k. You means the individual or entity exercising the Licensed Rights 129 | under this Public License. Your has a corresponding meaning. 130 | 131 | 132 | Section 2 -- Scope. 133 | 134 | a. License grant. 135 | 136 | 1. Subject to the terms and conditions of this Public License, 137 | the Licensor hereby grants You a worldwide, royalty-free, 138 | non-sublicensable, non-exclusive, irrevocable license to 139 | exercise the Licensed Rights in the Licensed Material to: 140 | 141 | a. reproduce and Share the Licensed Material, in whole or 142 | in part; and 143 | 144 | b. produce, reproduce, and Share Adapted Material. 145 | 146 | 2. Exceptions and Limitations. For the avoidance of doubt, where 147 | Exceptions and Limitations apply to Your use, this Public 148 | License does not apply, and You do not need to comply with 149 | its terms and conditions. 150 | 151 | 3. Term. The term of this Public License is specified in Section 152 | 6(a). 153 | 154 | 4. Media and formats; technical modifications allowed. The 155 | Licensor authorizes You to exercise the Licensed Rights in 156 | all media and formats whether now known or hereafter created, 157 | and to make technical modifications necessary to do so. The 158 | Licensor waives and/or agrees not to assert any right or 159 | authority to forbid You from making technical modifications 160 | necessary to exercise the Licensed Rights, including 161 | technical modifications necessary to circumvent Effective 162 | Technological Measures. For purposes of this Public License, 163 | simply making modifications authorized by this Section 2(a) 164 | (4) never produces Adapted Material. 165 | 166 | 5. Downstream recipients. 167 | 168 | a. Offer from the Licensor -- Licensed Material. Every 169 | recipient of the Licensed Material automatically 170 | receives an offer from the Licensor to exercise the 171 | Licensed Rights under the terms and conditions of this 172 | Public License. 173 | 174 | b. No downstream restrictions. You may not offer or impose 175 | any additional or different terms or conditions on, or 176 | apply any Effective Technological Measures to, the 177 | Licensed Material if doing so restricts exercise of the 178 | Licensed Rights by any recipient of the Licensed 179 | Material. 180 | 181 | 6. No endorsement. Nothing in this Public License constitutes or 182 | may be construed as permission to assert or imply that You 183 | are, or that Your use of the Licensed Material is, connected 184 | with, or sponsored, endorsed, or granted official status by, 185 | the Licensor or others designated to receive attribution as 186 | provided in Section 3(a)(1)(A)(i). 187 | 188 | b. Other rights. 189 | 190 | 1. Moral rights, such as the right of integrity, are not 191 | licensed under this Public License, nor are publicity, 192 | privacy, and/or other similar personality rights; however, to 193 | the extent possible, the Licensor waives and/or agrees not to 194 | assert any such rights held by the Licensor to the limited 195 | extent necessary to allow You to exercise the Licensed 196 | Rights, but not otherwise. 197 | 198 | 2. Patent and trademark rights are not licensed under this 199 | Public License. 200 | 201 | 3. To the extent possible, the Licensor waives any right to 202 | collect royalties from You for the exercise of the Licensed 203 | Rights, whether directly or through a collecting society 204 | under any voluntary or waivable statutory or compulsory 205 | licensing scheme. In all other cases the Licensor expressly 206 | reserves any right to collect such royalties. 207 | 208 | 209 | Section 3 -- License Conditions. 210 | 211 | Your exercise of the Licensed Rights is expressly made subject to the 212 | following conditions. 213 | 214 | a. Attribution. 215 | 216 | 1. If You Share the Licensed Material (including in modified 217 | form), You must: 218 | 219 | a. retain the following if it is supplied by the Licensor 220 | with the Licensed Material: 221 | 222 | i. identification of the creator(s) of the Licensed 223 | Material and any others designated to receive 224 | attribution, in any reasonable manner requested by 225 | the Licensor (including by pseudonym if 226 | designated); 227 | 228 | ii. a copyright notice; 229 | 230 | iii. a notice that refers to this Public License; 231 | 232 | iv. a notice that refers to the disclaimer of 233 | warranties; 234 | 235 | v. a URI or hyperlink to the Licensed Material to the 236 | extent reasonably practicable; 237 | 238 | b. indicate if You modified the Licensed Material and 239 | retain an indication of any previous modifications; and 240 | 241 | c. indicate the Licensed Material is licensed under this 242 | Public License, and include the text of, or the URI or 243 | hyperlink to, this Public License. 244 | 245 | 2. You may satisfy the conditions in Section 3(a)(1) in any 246 | reasonable manner based on the medium, means, and context in 247 | which You Share the Licensed Material. For example, it may be 248 | reasonable to satisfy the conditions by providing a URI or 249 | hyperlink to a resource that includes the required 250 | information. 251 | 252 | 3. If requested by the Licensor, You must remove any of the 253 | information required by Section 3(a)(1)(A) to the extent 254 | reasonably practicable. 255 | 256 | 4. If You Share Adapted Material You produce, the Adapter's 257 | License You apply must not prevent recipients of the Adapted 258 | Material from complying with this Public License. 259 | 260 | 261 | Section 4 -- Sui Generis Database Rights. 262 | 263 | Where the Licensed Rights include Sui Generis Database Rights that 264 | apply to Your use of the Licensed Material: 265 | 266 | a. for the avoidance of doubt, Section 2(a)(1) grants You the right 267 | to extract, reuse, reproduce, and Share all or a substantial 268 | portion of the contents of the database; 269 | 270 | b. if You include all or a substantial portion of the database 271 | contents in a database in which You have Sui Generis Database 272 | Rights, then the database in which You have Sui Generis Database 273 | Rights (but not its individual contents) is Adapted Material; and 274 | 275 | c. You must comply with the conditions in Section 3(a) if You Share 276 | all or a substantial portion of the contents of the database. 277 | 278 | For the avoidance of doubt, this Section 4 supplements and does not 279 | replace Your obligations under this Public License where the Licensed 280 | Rights include other Copyright and Similar Rights. 281 | 282 | 283 | Section 5 -- Disclaimer of Warranties and Limitation of Liability. 284 | 285 | a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE 286 | EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS 287 | AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF 288 | ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, 289 | IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, 290 | WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR 291 | PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, 292 | ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT 293 | KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT 294 | ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. 295 | 296 | b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE 297 | TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, 298 | NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, 299 | INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, 300 | COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR 301 | USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN 302 | ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR 303 | DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR 304 | IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. 305 | 306 | c. The disclaimer of warranties and limitation of liability provided 307 | above shall be interpreted in a manner that, to the extent 308 | possible, most closely approximates an absolute disclaimer and 309 | waiver of all liability. 310 | 311 | 312 | Section 6 -- Term and Termination. 313 | 314 | a. This Public License applies for the term of the Copyright and 315 | Similar Rights licensed here. However, if You fail to comply with 316 | this Public License, then Your rights under this Public License 317 | terminate automatically. 318 | 319 | b. Where Your right to use the Licensed Material has terminated under 320 | Section 6(a), it reinstates: 321 | 322 | 1. automatically as of the date the violation is cured, provided 323 | it is cured within 30 days of Your discovery of the 324 | violation; or 325 | 326 | 2. upon express reinstatement by the Licensor. 327 | 328 | For the avoidance of doubt, this Section 6(b) does not affect any 329 | right the Licensor may have to seek remedies for Your violations 330 | of this Public License. 331 | 332 | c. For the avoidance of doubt, the Licensor may also offer the 333 | Licensed Material under separate terms or conditions or stop 334 | distributing the Licensed Material at any time; however, doing so 335 | will not terminate this Public License. 336 | 337 | d. Sections 1, 5, 6, 7, and 8 survive termination of this Public 338 | License. 339 | 340 | 341 | Section 7 -- Other Terms and Conditions. 342 | 343 | a. The Licensor shall not be bound by any additional or different 344 | terms or conditions communicated by You unless expressly agreed. 345 | 346 | b. Any arrangements, understandings, or agreements regarding the 347 | Licensed Material not stated herein are separate from and 348 | independent of the terms and conditions of this Public License. 349 | 350 | 351 | Section 8 -- Interpretation. 352 | 353 | a. For the avoidance of doubt, this Public License does not, and 354 | shall not be interpreted to, reduce, limit, restrict, or impose 355 | conditions on any use of the Licensed Material that could lawfully 356 | be made without permission under this Public License. 357 | 358 | b. To the extent possible, if any provision of this Public License is 359 | deemed unenforceable, it shall be automatically reformed to the 360 | minimum extent necessary to make it enforceable. If the provision 361 | cannot be reformed, it shall be severed from this Public License 362 | without affecting the enforceability of the remaining terms and 363 | conditions. 364 | 365 | c. No term or condition of this Public License will be waived and no 366 | failure to comply consented to unless expressly agreed to by the 367 | Licensor. 368 | 369 | d. Nothing in this Public License constitutes or may be interpreted 370 | as a limitation upon, or waiver of, any privileges and immunities 371 | that apply to the Licensor or You, including from the legal 372 | processes of any jurisdiction or authority. 373 | 374 | 375 | ======================================================================= 376 | 377 | Creative Commons is not a party to its public 378 | licenses. Notwithstanding, Creative Commons may elect to apply one of 379 | its public licenses to material it publishes and in those instances 380 | will be considered the “Licensor.” The text of the Creative Commons 381 | public licenses is dedicated to the public domain under the CC0 Public 382 | Domain Dedication. Except for the limited purpose of indicating that 383 | material is shared under a Creative Commons public license or as 384 | otherwise permitted by the Creative Commons policies published at 385 | creativecommons.org/policies, Creative Commons does not authorize the 386 | use of the trademark "Creative Commons" or any other trademark or logo 387 | of Creative Commons without its prior written consent including, 388 | without limitation, in connection with any unauthorized modifications 389 | to any of its public licenses or any other arrangements, 390 | understandings, or agreements concerning use of licensed material. For 391 | the avoidance of doubt, this paragraph does not form part of the 392 | public licenses. 393 | 394 | Creative Commons may be contacted at creativecommons.org. 395 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # An introduction to applied Bayesian data analysis 2 | 3 | [![Project Status: WIP - Initial development is in progress, but there has not yet been a stable, usable release suitable for the public.](http://www.repostatus.org/badges/latest/wip.svg)](http://www.repostatus.org/#wip) 4 | 5 | If you are taking this workshop, start by running the file `00-install.R` in R to install the necessary packages and make sure your computer is set up properly. 6 | 7 | The course draws on material from: 8 | 9 | McElreath, R. 2020. Statistical rethinking: a Bayesian course with examples in R and Stan. Second edition. CRC Press, Boca Raton London New York. 10 | 11 | Gelman, A., Hill, J., and Vehtari, A. 2021. Regression and other stories. Cambridge University Press, Cambridge. 12 | 13 | Hobbs, N.T., and Hooten, M.B. 2015. Bayesian models: a statistical primer for ecologists. Princeton University Press, Princeton, New Jersey. 14 | 15 | To start: 16 | 17 | ```r 18 | # install.packages("usethis") 19 | usethis::use_course("bit.ly/bayes-course") 20 | ``` 21 | -------------------------------------------------------------------------------- /bayes-course.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | ProjectId: a977309f-797d-457d-a655-318e1edfdb9f 3 | 4 | RestoreWorkspace: Default 5 | SaveWorkspace: Default 6 | AlwaysSaveHistory: Default 7 | 8 | EnableCodeIndexing: Yes 9 | UseSpacesForTab: Yes 10 | NumSpacesForTab: 2 11 | Encoding: UTF-8 12 | 13 | RnwWeave: knitr 14 | LaTeX: pdfLaTeX 15 | 16 | AutoAppendNewline: Yes 17 | StripTrailingWhitespace: Yes 18 | -------------------------------------------------------------------------------- /data-raw/mcmc-diagnostics-make-data.R: -------------------------------------------------------------------------------- 1 | dir.create("slides/figs", showWarnings = FALSE) 2 | 3 | # examples of just MCMC chains to detect issues: 4 | # - a chain is drifting but OK otherwise 5 | # - chains are in separate places 6 | # - chains are highly autocorrelated 7 | 8 | # examples with brms: 9 | 10 | # one example where you just haven't run it long enough 11 | 12 | # one example where there's a pathology that goes away with 13 | # a tighter prior and/or with a higher adapt delta 14 | 15 | 16 | # are these samples consistent with convergence? 17 | # if not, what helps you identify that? 18 | # what might have caused this scenario? 19 | # is this situation likely solvable? 20 | # if so, what might improve the MCMC sampling? 21 | 22 | 23 | # not long enough given autocorrelation: 24 | set.seed(1) 25 | s1 <- function(i) { 26 | x <- arima.sim(n = 500, list(ar = 0.97), sd = 0.3) 27 | x <- matrix(x, ncol = 1) 28 | colnames(x) <- "theta" 29 | x 30 | } 31 | 32 | samples <- purrr::map(1:4, s1) 33 | names(samples) <- paste("Chain", 1:4) 34 | samples_matrix <- do.call(cbind, samples) 35 | bayesplot::mcmc_trace(samples) 36 | rstan::Rhat(samples_matrix) 37 | rstan::ess_bulk(samples_matrix) 38 | rstan::ess_tail(samples_matrix) 39 | saveRDS(samples, "data/mcmc1.rds") 40 | 41 | # drifting: 42 | set.seed(1) 43 | s1 <- function(i) { 44 | N <- 500 45 | x <- arima.sim(n = N, list(ar = 0.5), sd = 0.3) 46 | x <- matrix(x, ncol = 1) 47 | colnames(x) <- "theta" 48 | x <- x + 1:500 * 0.0017 49 | x 50 | } 51 | 52 | samples <- purrr::map(1:4, s1) 53 | names(samples) <- paste("Chain", 1:4) 54 | samples_matrix <- do.call(cbind, samples) 55 | saveRDS(samples, "data/mcmc2.rds") 56 | 57 | bayesplot::mcmc_trace(samples) 58 | 59 | rstan::Rhat(samples_matrix) 60 | rstan::ess_bulk(samples_matrix) 61 | rstan::ess_tail(samples_matrix) 62 | 63 | # one chain getting stuck: 64 | set.seed(1) 65 | s1 <- function(i) { 66 | N <- 1000 67 | .ar <- ifelse(i == 1, 0.90, 0.4) 68 | x <- arima.sim(n = N, list(ar = .ar), sd = 0.3) 69 | x <- matrix(x, ncol = 1) 70 | colnames(x) <- "theta" 71 | x 72 | } 73 | 74 | samples <- purrr::map(1:4, s1) 75 | names(samples) <- paste("Chain", 1:4) 76 | samples_matrix <- do.call(cbind, samples) 77 | bayesplot::mcmc_trace(samples) 78 | 79 | rstan::Rhat(samples_matrix) 80 | rstan::ess_bulk(samples_matrix) 81 | rstan::ess_tail(samples_matrix) 82 | 83 | saveRDS(samples, "data/mcmc3.rds") 84 | 85 | # looks great 86 | set.seed(1) 87 | s1 <- function(i) { 88 | N <- 1000 89 | x <- arima.sim(n = N, list(ar = 0.75), sd = 0.3) + 2.2 90 | x <- matrix(x, ncol = 1) 91 | colnames(x) <- "theta" 92 | x 93 | } 94 | 95 | samples <- purrr::map(1:4, s1) 96 | names(samples) <- paste("Chain", 1:4) 97 | samples_matrix <- do.call(cbind, samples) 98 | 99 | bayesplot::mcmc_trace(samples) 100 | 101 | rstan::Rhat(samples_matrix) 102 | rstan::ess_bulk(samples_matrix) 103 | rstan::ess_tail(samples_matrix) 104 | 105 | saveRDS(samples, "data/mcmc4.rds") 106 | 107 | # stuck in slightly different places 108 | set.seed(1) 109 | s1 <- function(i) { 110 | mu <- c(0.4, 0, -0.1, 0.18)[i] 111 | N <- 1000 112 | x <- mu + arima.sim(n = N, list(ar = 0.6), sd = 0.3) 113 | x <- matrix(x, ncol = 1) 114 | colnames(x) <- "theta" 115 | x 116 | } 117 | samples <- purrr::map(1:4, s1) 118 | names(samples) <- paste("Chain", 1:4) 119 | samples_matrix <- do.call(cbind, samples) 120 | 121 | bayesplot::mcmc_trace(samples) 122 | 123 | rstan::Rhat(samples_matrix) 124 | rstan::ess_bulk(samples_matrix) 125 | rstan::ess_tail(samples_matrix) 126 | saveRDS(samples, "data/mcmc5.rds") 127 | 128 | # not enough warmup: 129 | set.seed(1) 130 | s1 <- function(i) { 131 | start <- c(-6, 8, 0, -1)[i] 132 | N <- 700 133 | x <- arima.sim(n = N, list(ar = 0.96), sd = 0.1, n.start = 1, start.innov = rep(start, 1)) 134 | x <- matrix(x, ncol = 1) 135 | colnames(x) <- "theta" 136 | x 137 | } 138 | 139 | samples <- purrr::map(1:4, s1) 140 | names(samples) <- paste("Chain", 1:4) 141 | samples_matrix <- do.call(cbind, samples) 142 | 143 | bayesplot::mcmc_trace(samples) 144 | 145 | rstan::Rhat(samples_matrix) 146 | rstan::ess_bulk(samples_matrix) 147 | rstan::ess_tail(samples_matrix) 148 | 149 | saveRDS(samples, "data/mcmc6.rds") 150 | 151 | # slides: 152 | 153 | library(bayesplot) 154 | color_scheme_set("viridisD") 155 | 156 | # ideal 1 chain 157 | set.seed(1) 158 | s1 <- function(i, n = 1000, mu = 0) { 159 | x <- arima.sim(n = n, list(ar = 0.01), sd = 1) + mu 160 | x <- matrix(x, ncol = 1) 161 | colnames(x) <- "theta" 162 | x 163 | } 164 | samples <- purrr::map(1, s1) 165 | names(samples) <- paste("Chain", 1) 166 | samples_matrix <- do.call(cbind, samples) 167 | bayesplot::mcmc_trace(samples) 168 | ggsave("slides/figs/chains-ideal.png", width = 6, height = 4) 169 | 170 | # getting stuck in multiple modes 171 | samples <- purrr::map(1, s1, n = 300, mu = -2) 172 | samples2 <- purrr::map(1, s1, n = 300, mu = 1) 173 | samples3 <- purrr::map(1, s1, n = 300, mu = -2) 174 | samples <- list(rbind(samples[[1]], samples2[[1]], samples3[[1]])) 175 | names(samples) <- paste("Chain", 1) 176 | samples_matrix <- do.call(rbind, samples) 177 | bayesplot::mcmc_trace(samples) 178 | ggsave("slides/figs/chains-modes.png", width = 6, height = 4) 179 | 180 | # drifting 181 | set.seed(1) 182 | s1 <- function(i) { 183 | N <- 1000 184 | x <- arima.sim(n = N, list(ar = 0.5), sd = 0.3) 185 | x <- matrix(x, ncol = 1) 186 | colnames(x) <- "theta" 187 | x <- x + 1:1000 * 0.001 188 | x 189 | } 190 | 191 | samples <- purrr::map(1, s1) 192 | names(samples) <- paste("Chain", 1:1) 193 | samples_matrix <- do.call(cbind, samples) 194 | bayesplot::mcmc_trace(samples) 195 | ggsave("slides/figs/chains-drift.png", width = 6, height = 4) 196 | 197 | # major autocorrelation 198 | set.seed(1) 199 | s1 <- function(i) { 200 | N <- 1000 201 | x <- arima.sim(n = N, list(ar = 0.97), sd = 0.3) 202 | x <- matrix(x, ncol = 1) 203 | colnames(x) <- "theta" 204 | x 205 | } 206 | 207 | samples <- purrr::map(1, s1) 208 | names(samples) <- paste("Chain", 1:1) 209 | samples_matrix <- do.call(cbind, samples) 210 | bayesplot::mcmc_trace(samples) 211 | ggsave("slides/figs/chains-auto.png", width = 6, height = 4) 212 | 213 | 214 | # drifting different ways 215 | set.seed(1) 216 | s1 <- function(i, drift = 0.001) { 217 | N <- 1000 218 | x <- arima.sim(n = N, list(ar = 0.5), sd = 0.3) 219 | x <- matrix(x, ncol = 1) 220 | colnames(x) <- "theta" 221 | x <- x + (1:1000 - 500) * drift 222 | x 223 | } 224 | 225 | samples <- purrr::map2(1:2, c(0.0015, -0.0015), \(i, drift) s1(i, drift)) 226 | names(samples) <- paste("Chain", 1:2) 227 | samples_matrix <- do.call(cbind, samples) 228 | bayesplot::mcmc_trace(samples) 229 | ggsave("slides/figs/chains-drift2.png", width = 6, height = 4) 230 | 231 | # stuck in different places 232 | set.seed(1) 233 | s1 <- function(i, mu = 0) { 234 | N <- 1000 235 | x <- arima.sim(n = N, list(ar = 0.5), sd = 0.4) + mu 236 | x <- matrix(x, ncol = 1) 237 | colnames(x) <- "theta" 238 | x 239 | } 240 | 241 | samples <- purrr::map2(1:2, c(1, -1), \(i, mu) s1(i, mu)) 242 | names(samples) <- paste("Chain", 1:2) 243 | samples_matrix <- do.call(cbind, samples) 244 | bayesplot::mcmc_trace(samples) 245 | 246 | ggsave("slides/figs/chains-different-mean.png", width = 6, height = 4) 247 | 248 | # ideal 4 chain 249 | set.seed(1) 250 | s1 <- function(i, n = 1000, mu = 0) { 251 | x <- arima.sim(n = n, list(ar = 0.1), sd = 1) + mu 252 | x <- matrix(x, ncol = 1) 253 | colnames(x) <- "theta" 254 | x 255 | } 256 | samples <- purrr::map(1:4, s1) 257 | names(samples) <- paste("Chain", 1:4) 258 | samples_matrix <- do.call(cbind, samples) 259 | bayesplot::mcmc_trace(samples) 260 | ggsave("slides/figs/chains-ideal4.png", width = 6, height = 4) 261 | -------------------------------------------------------------------------------- /data-raw/pcod-growth.R: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | library(dplyr) 3 | 4 | d <- readRDS("../gfsynopsis-2023/report/data-cache-2024-05/pacific-cod.rds")$survey_samples 5 | # d <- dplyr::filter(d, !is.na(length), !is.na(age)) |> select(length, age) 6 | d <- dplyr::filter(d, !is.na(length), !is.na(age)) |> 7 | filter(survey_abbrev %in% c("SYN HS", "SYN WCVI")) |> 8 | select(length, age, survey = survey_abbrev) 9 | 10 | set.seed(1) 11 | d <- group_by(d, survey) |> 12 | sample_n(500) 13 | 14 | saveRDS(d, "data/pcod-growth.rds") 15 | -------------------------------------------------------------------------------- /data-raw/postpred-data.R: -------------------------------------------------------------------------------- 1 | # should be quadratic: 2 | x <- rnorm(100) 3 | n <- length(x) 4 | a <- 0.2 5 | b <- 0.2 6 | b2 <- -0.3 7 | sigma <- 0.3 8 | set.seed(2141) 9 | y <- a + b * x + b2 * x^2 + sigma * rnorm(n) 10 | df <- data.frame(x, y) 11 | plot(df) 12 | 13 | library(rstanarm) 14 | 15 | fit <- stan_glm( 16 | y ~ x, 17 | data = df, 18 | iter = 500, 19 | chains = 1, seed = 129 20 | ) 21 | 22 | 23 | p <- rstanarm::posterior_predict(fit) 24 | 25 | y <- df$y 26 | yrep <- p[1:20, ] 27 | bayesplot::ppc_dens_overlay(y, yrep) 28 | bayesplot::ppc_error_scatter_avg_vs_x(y, yrep, df$x) 29 | bayesplot::ppc_intervals(y, yrep, x = df$x) 30 | 31 | plot(df$x, y) 32 | plot(df$x, yrep[1,]) 33 | plot(df$x, yrep[2,]) 34 | 35 | saveRDS(df, "data/ppcheck-df1.rds") 36 | saveRDS(yrep, "data/ppcheck-yrep1.rds") 37 | 38 | # not enough dispersion: 39 | x <- rnorm(100) 40 | n <- length(x) 41 | a <- 1 42 | b <- 0.4 43 | set.seed(2141) 44 | y <- MASS::rnegbin(n, exp(a + b * x), theta = 0.2) 45 | df <- data.frame(x, y) 46 | plot(df) 47 | 48 | fit <- stan_glm( 49 | y ~ x, 50 | family = poisson(), 51 | data = df, 52 | iter = 500, 53 | chains = 1, seed = 291 54 | ) 55 | 56 | plot(df) 57 | y <- df$y 58 | p <- rstanarm::posterior_predict(fit) 59 | yrep <- p[1:20, ] 60 | plot(df$x, df$y) 61 | plot(df$x, yrep[1,]) 62 | plot(df$x, yrep[2,]) 63 | bayesplot::ppc_dens_overlay(y, yrep) 64 | 65 | saveRDS(df, "data/ppcheck-df2.rds") 66 | saveRDS(yrep, "data/ppcheck-yrep2.rds") 67 | 68 | # # missing some major variable 69 | # set.seed(1) 70 | # library(MASS) 71 | # Sigma <- matrix(c(10,3,3,2),2,2) 72 | # Sigma 73 | # x <- mvrnorm(n = 100, rep(0, 2), Sigma) 74 | # plot(x) 75 | # n <- nrow(x) 76 | # a <- 1 77 | # b1 <- 0.2 78 | # b2 <- -2 79 | # set.seed(2141) 80 | # x1 <- x[,1] 81 | # x2 <- x[,2] 82 | # y <- rnorm(n, a + b * x1 + b2 * x2, 0.2) 83 | # df <- data.frame(x1, x2, y) 84 | # plot(df$x1, df$y) 85 | # plot(df$x2, df$y) 86 | # 87 | # fit <- stan_glm( 88 | # y ~ x1, 89 | # data = df, 90 | # iter = 500, 91 | # chains = 1 92 | # ) 93 | # 94 | # p <- rstanarm::posterior_predict(fit) 95 | # yrep <- p[1:20, ] 96 | # plot(df$x1, df$y) 97 | # plot(df$x1, yrep[1,]) 98 | # plot(df$x1, yrep[2,]) 99 | # plot(df$x2, yrep[2,]) 100 | # bayesplot::ppc_dens_overlay(y, yrep) 101 | # bayesplot::ppc_error_scatter_avg_vs_x(y, yrep, df$x2) 102 | 103 | # missing group 104 | 105 | set.seed(1) 106 | x <- rnorm(200) 107 | a <- rnorm(5, 1, 2) 108 | b <- 0.9 109 | g <- rep(1:5, each = 40) 110 | y <- rnorm(n, a[g] + b * x, 0.2) 111 | df <- data.frame(x, y, g = factor(g)) 112 | ggplot(df, aes(x, y, colour = g)) + geom_point() 113 | 114 | fit <- stan_glm( 115 | y ~ x, 116 | data = df, 117 | iter = 500, 118 | chains = 1, seed = 292 119 | ) 120 | 121 | p <- rstanarm::posterior_predict(fit) 122 | ggplot(df, aes(x, y)) + geom_point() 123 | 124 | y <- df$y 125 | yrep <- p[1:20, ] 126 | 127 | plot(df$x, yrep[1,]) 128 | plot(df$x, yrep[2,]) 129 | plot(df$x, yrep[3,]) 130 | 131 | bayesplot::ppc_dens_overlay(y, yrep) 132 | bayesplot::ppc_dens_overlay_grouped(y, yrep, df$g) 133 | 134 | df$yrep2 <- yrep[2,] 135 | ggplot(df, aes(x, y, colour = g)) + geom_point() 136 | ggplot(df, aes(x, yrep2, colour = g)) + geom_point() 137 | 138 | saveRDS(df, "data/ppcheck-df3.rds") 139 | saveRDS(yrep, "data/ppcheck-yrep3.rds") 140 | -------------------------------------------------------------------------------- /data-raw/rockfish-depth.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot2) 3 | 4 | spp <- gfsynopsis::get_spp_names() 5 | rock <- spp |> 6 | filter(grepl("rock", species_common_name)) 7 | 8 | out <- list() 9 | for (i in seq_len(nrow(rock))) { 10 | print(rock[i, ]) 11 | d0 <- readRDS(paste0("../gfsynopsis-2023/report/data-cache-2024-05/", rock[i, "spp_w_hyphens"], ".rds"))$survey_sets 12 | d <- filter(d0, survey_abbrev %in% c("SYN WCVI"), depth_m < 700, year != 2020, year != 2007) 13 | d <- filter(d, density_kgpm2 > 0) 14 | out[[i]] <- select(d, species_common_name, year, density_kgpm2, depth_m) 15 | } 16 | dat <- bind_rows(out) 17 | 18 | set.seed(1) 19 | dat <- group_by(dat, species_common_name) |> 20 | sample_frac(0.33) 21 | 22 | 23 | ggplot(dat, aes(log(depth_m), log(density_kgpm2))) + 24 | geom_point() + 25 | facet_wrap(~species_common_name) + 26 | geom_smooth(se = FALSE) 27 | 28 | dat <- group_by(dat, species_common_name) |> 29 | mutate(n = n()) |> 30 | filter(n > 100) 31 | 32 | d <- dat 33 | saveRDS(dat, file = "data/rockfish-depth.rds") 34 | -------------------------------------------------------------------------------- /data/grey-heron.csv: -------------------------------------------------------------------------------- 1 | main_id,sample_year,population_untransformed 2 | 20579,1928,6233 3 | 20579,1929,5104 4 | 20579,1930,5159 5 | 20579,1931,5736 6 | 20579,1932,5428 7 | 20579,1933,6653 8 | 20579,1934,6622 9 | 20579,1935,6148 10 | 20579,1936,6677 11 | 20579,1937,6046 12 | 20579,1938,6346 13 | 20579,1939,6552 14 | 20579,1940,5636 15 | 20579,1941,5612 16 | 20579,1942,4981 17 | 20579,1943,5605 18 | 20579,1944,6190 19 | 20579,1945,5921 20 | 20579,1946,5558 21 | 20579,1947,4192 22 | 20579,1948,4421 23 | 20579,1949,5551 24 | 20579,1950,6191 25 | 20579,1951,6879 26 | 20579,1952,7258 27 | 20579,1953,7171 28 | 20579,1954,7353 29 | 20579,1955,6927 30 | 20579,1956,6730 31 | 20579,1957,7244 32 | 20579,1958,7015 33 | 20579,1959,7560 34 | 20579,1960,7015 35 | 20579,1961,7166 36 | 20579,1962,5815 37 | 20579,1963,3949 38 | 20579,1964,4243 39 | 20579,1965,4678 40 | 20579,1966,5034 41 | 20579,1967,5595 42 | 20579,1968,6069 43 | 20579,1969,6449 44 | 20579,1970,6741 45 | 20579,1971,7255 46 | 20579,1972,7326 47 | 20579,1973,7872 48 | 20579,1974,7998 49 | 20579,1975,8006 50 | 20579,1976,7580 51 | 20579,1977,7675 52 | 20579,1978,7565 53 | 20579,1979,7289 54 | 20579,1980,7763 55 | 20579,1981,7850 56 | 20579,1982,7388 57 | 20579,1983,7321 58 | 20579,1984,7503 59 | 20579,1985,7401 60 | 20579,1986,6714 61 | 20579,1987,6746 62 | 20579,1988,7054 63 | 20579,1989,7876 64 | 20579,1990,8145 65 | 20579,1991,7979 66 | 20579,1992,8121 67 | 20579,1993,8501 68 | 20579,1994,8430 69 | 20579,1995,8809 70 | 20579,1996,8557 71 | 20579,1997,8731 72 | 20579,1998,8826 73 | -------------------------------------------------------------------------------- /data/house-wren.csv: -------------------------------------------------------------------------------- 1 | main_id,sample_year,population_untransformed 2 | 1163,1940,10 3 | 1163,1941,20 4 | 1163,1942,31 5 | 1163,1943,40 6 | 1163,1944,25 7 | 1163,1945,25 8 | 1163,1946,28 9 | 1163,1947,32 10 | 1163,1948,45 11 | 1163,1949,61 12 | 1163,1950,46 13 | 1163,1951,31 14 | 1163,1952,28 15 | 1163,1953,30 16 | 1163,1954,31 17 | 1163,1955,51 18 | 1163,1956,32 19 | 1163,1957,45 20 | 1163,1958,22 21 | 1163,1959,28 22 | 1163,1960,31 23 | 1163,1961,27 24 | 1163,1962,19 25 | 1163,1963,20 26 | 1163,1964,21 27 | 1163,1965,25 28 | 1163,1966,22 29 | 1163,1967,14 30 | 1163,1968,16 31 | 1163,1969,13 32 | 1163,1970,14 33 | 1163,1971,5 34 | 1163,1972,9 35 | 1163,1973,8 36 | 1163,1974,12 37 | 1163,1975,10 38 | 1163,1976,7 39 | -------------------------------------------------------------------------------- /data/hughes-etal-2018.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/hughes-etal-2018.rds -------------------------------------------------------------------------------- /data/kidiq.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/kidiq.rds -------------------------------------------------------------------------------- /data/mcmc1.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/mcmc1.rds -------------------------------------------------------------------------------- /data/mcmc2.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/mcmc2.rds -------------------------------------------------------------------------------- /data/mcmc3.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/mcmc3.rds -------------------------------------------------------------------------------- /data/mcmc4.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/mcmc4.rds -------------------------------------------------------------------------------- /data/mcmc5.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/mcmc5.rds -------------------------------------------------------------------------------- /data/mcmc6.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/mcmc6.rds -------------------------------------------------------------------------------- /data/pcod-age-length.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/pcod-age-length.rds -------------------------------------------------------------------------------- /data/pcod-growth.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/pcod-growth.rds -------------------------------------------------------------------------------- /data/ppcheck-df1.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/ppcheck-df1.rds -------------------------------------------------------------------------------- /data/ppcheck-df2.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/ppcheck-df2.rds -------------------------------------------------------------------------------- /data/ppcheck-df3.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/ppcheck-df3.rds -------------------------------------------------------------------------------- /data/ppcheck-yrep1.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/ppcheck-yrep1.rds -------------------------------------------------------------------------------- /data/ppcheck-yrep2.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/ppcheck-yrep2.rds -------------------------------------------------------------------------------- /data/ppcheck-yrep3.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/ppcheck-yrep3.rds -------------------------------------------------------------------------------- /data/ppcheck1.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/ppcheck1.rds -------------------------------------------------------------------------------- /data/ppcheck2.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/ppcheck2.rds -------------------------------------------------------------------------------- /data/rockfish-depth.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/seananderson/bayes-course/fc8603a1d9a8903f8e970856e0f749c1e7e1944a/data/rockfish-depth.rds -------------------------------------------------------------------------------- /extra/distributions.R: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | library(dplyr) 3 | fill <- "#00000010" 4 | col <- "grey20" 5 | 6 | x <- seq(-5, 5, length.out = 1000) 7 | ggplot(tibble(x, y = dnorm(x, 0, 1)), aes(x, ymax = y)) + 8 | geom_ribbon(ymin = 0, fill = fill, col = col) + 9 | theme_void() 10 | ggsave("extra/dist-normal.pdf", width = 6, height = 6 * 0.618) 11 | 12 | x <- seq(0, 5, length.out = 1000) 13 | ggplot(tibble(x, y = dnorm(x, 0, 1)), aes(x, ymax = y)) + 14 | geom_ribbon(ymin = 0, fill = fill, col = col) + 15 | ylim(0, NA) + 16 | theme_void() 17 | ggsave("extra/dist-half-normal.pdf", width = 6, height = 6 * 0.618) 18 | 19 | x <- seq(-1, 1, length.out = 1000) 20 | ggplot(tibble(x, y = dnorm(x, 0, 0.7)), aes(x, ymax = y)) + 21 | geom_ribbon(ymin = 0, fill = fill, col = col) + 22 | ylim(0, NA) + 23 | theme_void() 24 | ggsave("extra/dist-truncated-normal.pdf", width = 6, height = 6 * 0.618) 25 | 26 | x <- seq(-5, 5, length.out = 1000) 27 | ggplot(tibble(x, ynorm = dnorm(x = x, 0, 1), 28 | y = dt(x = x, df = 3)), aes(x, ymax = y)) + 29 | geom_ribbon(ymin = 0, fill = fill, col = col) + 30 | geom_ribbon(ymin = 0, aes(ymax = ynorm), fill = NA, 31 | colour = "grey10", lty = 2) + 32 | ylim(0, NA) + 33 | theme_void() 34 | ggsave("extra/dist-t.pdf", width = 6, height = 6 * 0.618) 35 | 36 | x <- seq(0, 5, length.out = 1000) 37 | ggplot(tibble(x, ynorm = dnorm(x = x, 0, 1), 38 | y = dt(x = x, df = 3)), aes(x, ymax = y)) + 39 | geom_ribbon(ymin = 0, fill = fill, col = col) + 40 | geom_ribbon(ymin = 0, aes(ymax = ynorm), fill = NA, 41 | colour = "grey10", lty = 2) + 42 | ylim(0, NA) + 43 | theme_void() 44 | ggsave("extra/dist-half-t.pdf", width = 6, height = 6 * 0.618) 45 | 46 | x <- seq(-10, 10, length.out = 1000) 47 | ggplot(tibble(x, ynorm = dnorm(x = x, 0, 1), 48 | y = dcauchy(x = x)), aes(x, ymax = y)) + 49 | geom_ribbon(ymin = 0, fill = fill, col = col) + 50 | geom_ribbon(ymin = 0, aes(ymax = ynorm), fill = NA, 51 | colour = "grey10", lty = 2) + 52 | ylim(0, NA) + 53 | theme_void() 54 | ggsave("extra/dist-cauchy.pdf", width = 6, height = 6 * 0.618) 55 | 56 | x <- seq(0, 10, length.out = 1000) 57 | ggplot(tibble(x, ynorm = dnorm(x = x, 0, 1), 58 | y = dcauchy(x = x)), aes(x, ymax = y)) + 59 | geom_ribbon(ymin = 0, fill = fill, col = col) + 60 | geom_ribbon(ymin = 0, aes(ymax = ynorm), fill = NA, 61 | colour = "grey10", lty = 2) + 62 | ylim(0, NA) + 63 | theme_void() 64 | ggsave("extra/dist-half-cauchy.pdf", width = 6, height = 6 * 0.618) 65 | 66 | x <- seq(0, 100, length.out = 1000) 67 | ggplot(tibble(x, y = dexp(x = x, rate = 0.1), 68 | y2 = dexp(x = x, rate = 0.2)), aes(x, ymax = y)) + 69 | geom_ribbon(ymin = 0, fill = fill, col = col) + 70 | geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y2)) + 71 | ylim(0, NA) + 72 | theme_void() 73 | ggsave("extra/dist-exp.pdf", width = 6, height = 6 * 0.618) 74 | 75 | x <- seq(0, 15, length.out = 1000) 76 | ggplot(tibble(x, 77 | y = dgamma(x = x, shape = 5, rate = 1), 78 | y2 = dgamma(x = x, shape = 2, rate = 1), 79 | y3 = dgamma(x = x, shape = 1, rate = 1)), 80 | aes(x, ymax = y)) + 81 | geom_ribbon(ymin = 0, fill = fill, col = col) + 82 | geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y2)) + 83 | geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y3)) + 84 | ylim(0, NA) + 85 | theme_void() 86 | ggsave("extra/dist-gamma.pdf", width = 6, height = 6 * 0.618) 87 | 88 | x <- seq(0, 1, length.out = 1000) 89 | ggplot(tibble(x, 90 | y = dbeta(x = x, shape1 = 2, shape2 = 2), 91 | y2 = dbeta(x = x, shape = 0.8, shape2 = 0.8), 92 | y3 = dbeta(x = x, shape = 3, shape2 = 1), 93 | y4 = dbeta(x = x, shape = 1, shape2 = 3)), 94 | aes(x, ymax = y)) + 95 | ylim(0, NA) + 96 | geom_ribbon(ymin = 0, fill = fill, col = col) + 97 | geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y2)) + 98 | geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y3)) + 99 | geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y4)) + 100 | theme_void() 101 | ggsave("extra/dist-beta.pdf", width = 6, height = 6 * 0.618) 102 | 103 | x <- seq(0, 1, length.out = 1000) 104 | ggplot(tibble(x, 105 | y = dbinom(x = 3, size = 4, prob = x), 106 | y2 = dbinom(x = 1, size = 5, prob = x), 107 | y3 = dbinom(x = 2, size = 4, prob = x)), 108 | aes(x, ymax = y)) + 109 | ylim(0, NA) + 110 | geom_ribbon(ymin = 0, fill = fill, col = col) + 111 | geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y2)) + 112 | geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y3)) + 113 | # geom_ribbon(ymin = 0, fill = fill, col = col, aes(ymax = y4)) + 114 | theme_void() 115 | ggsave("extra/dist-binom.pdf", width = 6, height = 6 * 0.618) 116 | 117 | x <- seq(0, 20, length.out = 1000) 118 | ggplot(tibble(x, 119 | y = invgamma::dinvgamma(x, shape = 1, rate = 1)), 120 | aes(x, ymax = y)) + 121 | geom_ribbon(ymin = 0, fill = fill, col = col) + 122 | geom_vline(xintercept = 0, lty = 2) + 123 | ylim(0, NA) + 124 | theme_void() 125 | ggsave("extra/dist-inv-gamma.pdf", width = 6, height = 6 * 0.618) 126 | 127 | x <- seq(0, 20, length.out = 1000) 128 | ggplot(tibble(x, 129 | y = dunif(x, 0, 20)), 130 | aes(x, ymax = y)) + 131 | geom_ribbon(ymin = 0, fill = fill, col = col) + 132 | ylim(0, 0.1) + 133 | theme_void() 134 | ggsave("extra/dist-uniform.pdf", width = 6, height = 6 * 0.618) 135 | 136 | 137 | x <- runif(1e6, min = 0, max = 10) 138 | ggplot(tibble(x), aes(x)) + 139 | geom_histogram(fill = fill, col = col, breaks = seq(0, 10, length.out = 40)) + 140 | theme_void() 141 | ggsave("extra/dist-uniform-samples.pdf", width = 6, height = 6 * 0.618) 142 | 143 | ggplot(tibble(x), aes(exp(x))) + 144 | geom_histogram(fill = fill, col = col, breaks = seq(exp(0), exp(10), length.out = 40)) + 145 | theme_void() 146 | ggsave("extra/dist-uniform-exp-samples.pdf", width = 6, height = 6 * 0.618) 147 | 148 | library(dplyr) 149 | library(ggplot2) 150 | library(ggdist) 151 | 152 | # theme_set(theme_ggdist()) 153 | 154 | expand.grid( 155 | eta = 1:4, 156 | K = 2:5 157 | ) %>% 158 | ggplot(aes(y = ordered(eta), dist = "lkjcorr_marginal", arg1 = K, arg2 = eta)) + 159 | stat_slab() + 160 | facet_grid(~ paste0(K, "x", K)) + 161 | scale_y_discrete(limits = rev) + 162 | labs( 163 | title = paste0( 164 | "LKJ prior on different matrix sizes" 165 | ), 166 | y = "eta", 167 | x = "Marginal correlation" 168 | ) 169 | # theme(axis.title = element_text(hjust = 0)) 170 | -------------------------------------------------------------------------------- /extra/equations.tex: -------------------------------------------------------------------------------- 1 | \documentclass[10pt]{article} 2 | \usepackage[usenames]{color} %used for font color 3 | \usepackage{amssymb} %maths 4 | \usepackage{amsmath} %maths 5 | \usepackage[utf8]{inputenc} %useful to type directly diacritic characters 6 | \begin{document} 7 | \begin{align*}[\beta_0, \beta_1, \sigma | \mathbf{y}] \propto \prod_{i=1}^n [y_i | \beta_0, \beta_1, \sigma] [ \beta_0 ] [ \beta_1 ] [ \sigma ]\end{align*} 8 | \end{document} -------------------------------------------------------------------------------- /extra/equations2.tex: -------------------------------------------------------------------------------- 1 | \documentclass[10pt]{article} 2 | \usepackage[usenames]{color} %used for font color 3 | \usepackage{amssymb} %maths 4 | \usepackage{amsmath} %maths 5 | \usepackage[utf8]{inputenc} %useful to type directly diacritic characters 6 | \begin{document} 7 | \begin{align*}y_i &\sim \mathrm{Normal}(\beta_0 + x_i \cdot \beta_1, \sigma), \ \text{for}\ i = 1, \ldots, n\\ 8 | \beta_0 &\sim \mathrm{Normal}(0, 10)\\ 9 | \beta_1 &\sim \mathrm{Normal}(0, 2)\\ 10 | \sigma &\sim \text{Half-t}(3, 0, 3)\end{align*} 11 | \end{document} -------------------------------------------------------------------------------- /extra/render.R: -------------------------------------------------------------------------------- 1 | files <- list.files(here::here(), pattern = "*.Rmd") 2 | # dir.create("exercises", showWarnings = FALSE) 3 | # remove_exercises <- function(x) { 4 | # f <- readLines(x) 5 | # i <- grep("```", f)[[1]] - 1 6 | # f <- c(f[seq(1, i)], 7 | # "```{r, include=FALSE, eval=TRUE}", 8 | # "knitr::opts_chunk$set(root.dir = '..')", 9 | # "```", 10 | # "", 11 | # f[seq(i+1, length(f))]) 12 | # f_ex <- ifelse(grepl("# exercise", f), "# exercise", f) 13 | # f_ex <- ifelse(grepl("", f_ex), "", f_ex) 14 | # writeLines(as.character(f_ex), con = file.path("exercises", x)) 15 | # } 16 | # purrr::walk(files, remove_exercises) 17 | 18 | ## knit all exercises (slow) 19 | purrr::walk(files, rmarkdown::render, envir = new.env()) 20 | 21 | library(future) 22 | plan(multisession) 23 | furrr::future_walk(files, rmarkdown::render, envir = new.env(), .options = furrr::furrr_options(seed = TRUE)) 24 | plan(sequential) 25 | -------------------------------------------------------------------------------- /extra/rstan-growth.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Fitting von Bertalanffy growth curves with error using Stan" 3 | output: html_document 4 | --- 5 | 6 | ```{r} 7 | knitr::opts_chunk$set( 8 | collapse = TRUE, 9 | comment = "#>" 10 | ) 11 | ``` 12 | 13 | 14 | 15 | ```{r} 16 | library(dplyr) 17 | dd <- readRDS("~/src/gfsynopsis/report/data-cache2/pbs-survey-samples.rds") 18 | dd2 <- readRDS("~/src/gfsynopsis/report/data-cache2/pbs-age-precision.rds") 19 | 20 | d <- filter(dd, species_common_name == "shortraker rockfish", major_stat_area_name == "5E: WEST COAST Q.C. ISLANDS") 21 | d2 <- filter(dd2, species_common_name == "SHORTRAKER ROCKFISH") 22 | 23 | x <- gfplot::tidy_age_precision(d2) 24 | m <- lm(log(prim_age) ~ log(prec_age) - 0, data = x) 25 | summary(m) 26 | 27 | d <- d[!duplicated(select(d, specimen_id)), , drop = FALSE] 28 | 29 | # d <- inner_join(d, select(d2, specimen_id, specimen_age) 30 | 31 | d <- filter(d, !is.na(age), !is.na(length), sex == 2) 32 | nrow(d) 33 | library(ggplot2) 34 | ggplot(d, aes(age, length)) + geom_point(aes(colour = (year))) + 35 | viridis::scale_color_viridis() 36 | ``` 37 | 38 | ```{r} 39 | newdata <- data.frame(age = seq(min(d$age), max(d$age), length.out = 100)) 40 | ``` 41 | 42 | ```{r} 43 | library(rstan) 44 | rstan_options(auto_write = TRUE) 45 | options(mc.cores = parallel::detectCores()) 46 | vb_fit <- stan("vb.stan", 47 | data = 48 | list( 49 | age = d$age, 50 | length = d$length, 51 | N = length(d$age), 52 | N_pred = nrow(newdata), 53 | age_pred = newdata$age), 54 | iter = 2000, chains = 1) 55 | 56 | pars <- c("k", "linf", "sigma", "t0") 57 | library(bayesplot) 58 | theme_set(theme_light()) 59 | vb_fit_array <- as.array(vb_fit) 60 | mcmc_trace(vb_fit_array, pars = pars) 61 | mcmc_hist(vb_fit_array, pars = pars) 62 | mcmc_dens(vb_fit_array, pars = pars) 63 | ``` 64 | 65 | ```{r} 66 | vb_fit2 <- stan("vb-fixed-error.stan", 67 | data = 68 | list( 69 | age = d$age, 70 | length = d$length, 71 | N = length(d$age), 72 | N_pred = nrow(newdata), 73 | age_pred = newdata$age, 74 | error = 0.2), # CV of ~0.5 error 75 | iter = 1000, chains = 1, 76 | pars = c("k", "linf", "sigma", "t0", "length_pred", "posterior_predictions")) 77 | vb_fit2 78 | 79 | vb_fit_array2 <- as.array(vb_fit2) 80 | mcmc_trace(vb_fit_array2, pars = c("k", "linf", "sigma", "t0")) 81 | mcmc_hist(vb_fit_array2, pars = c("k", "linf", "sigma", "t0")) 82 | ``` 83 | 84 | ```{r} 85 | vb_fit2 <- stan("vb-fixed-length-error.stan", 86 | data = 87 | list( 88 | age = d$age, 89 | length = d$length, 90 | N = length(d$age), 91 | N_pred = nrow(newdata), 92 | age_pred = newdata$age, 93 | error = 0.05), # CV of ~0.1 error 94 | iter = 800, chains = 2, seed = 29348, 95 | control = list(adapt_delta = 0.99, max_treedepth = 20), 96 | pars = c("k", "linf", "sigma", "t0", "length_pred", "posterior_predictions")) 97 | vb_fit2 98 | 99 | vb_fit_array2 <- as.array(vb_fit2) 100 | mcmc_trace(vb_fit_array2, pars = c("k", "linf", "sigma", "t0")) 101 | mcmc_hist(vb_fit_array2, pars = c("k", "linf", "sigma", "t0")) 102 | ``` 103 | 104 | ```{r} 105 | e <- extract(vb_fit) 106 | e2 <- extract(vb_fit2) 107 | median(e$k) 108 | median(e2$k) 109 | 110 | quantile(e$k) 111 | quantile(e2$k) 112 | 113 | median(e$linf) 114 | median(e2$linf) 115 | 116 | quantile(e$linf) 117 | quantile(e2$linf) 118 | 119 | median(e$sigma) 120 | median(e2$sigma) 121 | ``` 122 | 123 | ```{r} 124 | newdata$length <- apply(e$length_pred, 2, median) 125 | newdata$length2 <- apply(e2$length_pred, 2, median) 126 | 127 | ppc_dens_overlay(d$length, e$posterior_predictions[1:10, ]) 128 | ppc_dens_overlay(d$length, e2$posterior_predictions[1:10, ]) 129 | 130 | ggplot(d, aes(age, length)) + geom_point(aes(colour = as.factor(major_stat_area_name))) + 131 | geom_line(data = newdata, aes(age, length), alpha = 0.5) + 132 | geom_line(data = newdata, aes(age, length2), alpha = 0.5, lty = 2) 133 | ``` 134 | 135 | 136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /extra/rstanarm-counterfactual.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Fitting a counterfactual model with rstanarm" 3 | output: html_document 4 | --- 5 | 6 | ```{r} 7 | knitr::opts_chunk$set( 8 | collapse = TRUE, 9 | comment = "#>" 10 | ) 11 | ``` 12 | 13 | ```{r} 14 | library(dplyr) 15 | library(ggplot2) 16 | # library(brms) 17 | library(rstanarm) 18 | theme_set(ggsidekick::theme_sleek()) 19 | ``` 20 | 21 | ```{r} 22 | dory <- "Paracanthurus hepatus" 23 | cf <- c("Zebrasoma xanthurum", 24 | "Chrysiptera parasema", 25 | "Chrysiptera hemicyanea", 26 | "Acanthurus nigricans", 27 | "Acanthurus sohal", 28 | "Acanthurus japonicus", 29 | "Acanthurus coeruleus", 30 | "Acanthurus leucosternon") 31 | terms <- c(dory, cf) 32 | ``` 33 | 34 | ```{r} 35 | if (!file.exists("data-generated/google-dat.rds")) { 36 | dd <- lapply(terms, function(x) { 37 | gtrendsR::gtrends(x, time = "2014-01-01 2017-07-10", gprop = "web", 38 | low_search_volume = TRUE) 39 | }) 40 | d <- purrr::map_df(dd, function(x) { 41 | tibble(date = x$interest_over_time$date, 42 | keyword = x$interest_over_time$keyword, 43 | hits = x$interest_over_time$hits) 44 | }) 45 | saveRDS(d, file = "data-generated/google-dat.rds") 46 | } else { 47 | d <- readRDS("data-generated/google-dat.rds") 48 | } 49 | ``` 50 | 51 | ```{r} 52 | movie_date <- lubridate::ymd("2016-06-17", tz = "EST") 53 | ggplot(d, aes(date, hits)) + 54 | geom_line(col = "grey20", lwd = 0.5) + 55 | facet_wrap(~keyword) + xlab("") + 56 | ylab("Google searches as percent of maximum") + 57 | coord_cartesian(expand = FALSE) + 58 | geom_vline(xintercept = movie_date, col = "grey60", lty = 2) 59 | ``` 60 | 61 | ```{r} 62 | dat <- d 63 | dat <- mutate(dat, 64 | hits = ifelse(hits == 100 & keyword == "Paracanthurus hepatus", 60, hits)) 65 | dat <- dat %>% mutate(hits = hits / 100) %>% 66 | reshape2::dcast(date ~ keyword, value.var = "hits") %>% 67 | as_tibble() 68 | names(dat) <- gsub(" $", "", names(dat)) 69 | names(dat) <- gsub(" ", "_", names(dat)) 70 | ``` 71 | 72 | ```{r} 73 | dat <- mutate(dat, before_movie = date < movie_date) 74 | ``` 75 | 76 | ```{r} 77 | glimpse(dat) 78 | ``` 79 | 80 | ```{r} 81 | # options(mc.cores = parallel::detectCores()) 82 | before_dat <- filter(dat, before_movie) 83 | m1 <- stan_betareg( 84 | formula = Paracanthurus_hepatus ~ 85 | Chrysiptera_parasema + 86 | Chrysiptera_hemicyanea + 87 | Acanthurus_nigricans + 88 | Acanthurus_sohal + 89 | Acanthurus_japonicus + 90 | Acanthurus_coeruleus + 91 | Acanthurus_leucosternon, 92 | data = before_dat, 93 | prior = normal(0, 2, autoscale = FALSE), 94 | prior_intercept = normal(0, 10, autoscale = FALSE), 95 | prior_phi = student_t(3, 0, 25, autoscale = FALSE)) 96 | ``` 97 | 98 | ```{r, eval=FALSE} 99 | ?plot.stanreg 100 | ``` 101 | 102 | ```{r} 103 | summary(m1) 104 | # plot(m1, pars = "Chrysiptera_parasema") 105 | pp_check(m1, plotfun = "dens_overlay") 106 | pp_check(m1, plotfun = "error_hist") 107 | pp_check(m1, plotfun = "error_scatter") 108 | pp_check(m1, plotfun = "ribbon") 109 | pp_check(m1, plotfun = "ribbon", newdata = dat) 110 | 111 | pp_check(m1, plotfun = "intervals") 112 | 113 | coef_regex <- "^[A-Z]+[a-z_]+|\\(Intercept\\)" 114 | plot(m1, plotfun = "trace") 115 | plot(m1, plotfun = "acf") 116 | plot(m1, plotfun = "areas") 117 | plot(m1, plotfun = "areas", regex_pars = coef_regex) 118 | plot(m1, plotfun = "areas_ridges", regex_pars = coef_regex) 119 | plot(m1, plotfun = "intervals", regex_pars = coef_regex) 120 | ``` 121 | 122 | ```{r} 123 | pred <- posterior_predict(m1, newdata = dat) 124 | dim(pred) 125 | num_draws <- 4 126 | pred_long <- reshape2::melt(pred[seq_len(num_draws), ], # n draws from the posterior 127 | varnames = c("mcmc_sample", "time_step"), value.name = "y") %>% 128 | as_tibble() %>% 129 | mutate(date = rep(dat$date, each = num_draws)) 130 | ``` 131 | 132 | ```{r} 133 | date_start <- lubridate::ymd("2015-04-01", tz = "EST") 134 | ggplot(dat, aes(date, Paracanthurus_hepatus)) + 135 | geom_line(lwd = 0.8, col = "blue") + 136 | xlab("") + 137 | ylab("Google searches as fraction of maximum") + 138 | coord_cartesian(expand = FALSE) + 139 | geom_vline(xintercept = movie_date, col = "grey60", lty = 2) + 140 | geom_line(data = pred_long, aes(y = y, group = mcmc_sample), alpha = 0.3) + 141 | xlim(date_start, NA) 142 | ``` 143 | 144 | ```{r} 145 | dat$posterior_median <- apply(pred, 2, median) 146 | dat$posterior_upr <- apply(pred, 2, quantile, probs = 0.1) 147 | dat$posterior_lwr <- apply(pred, 2, quantile, probs = 0.8) 148 | ``` 149 | 150 | ```{r} 151 | ggplot(dat, aes(date, Paracanthurus_hepatus)) + 152 | xlab("") + 153 | ylab("Google searches as fraction of maximum") + 154 | coord_cartesian(expand = FALSE) + 155 | geom_vline(xintercept = movie_date, col = "grey60", lty = 2) + 156 | geom_ribbon(aes(ymin = posterior_lwr, ymax = posterior_upr), alpha = 0.2) + 157 | geom_line() + 158 | xlim(date_start, NA) 159 | ``` 160 | 161 | ```{r} 162 | pred_diff <- pred 163 | for (i in seq(1, ncol(pred))) { 164 | pred_diff[, i] <- dat$Paracanthurus_hepatus[i] / pred_diff[, i] 165 | } 166 | dat$posterior_ratio_median <- apply(pred_diff, 2, median) 167 | dat$posterior_ratio_upr <- apply(pred_diff, 2, quantile, probs = 0.1) 168 | dat$posterior_ratio_lwr <- apply(pred_diff, 2, quantile, probs = 0.9) 169 | ``` 170 | 171 | ```{r} 172 | ggplot(dat, aes(date, posterior_ratio_median)) + 173 | xlab("") + 174 | ylab("Google searches as fraction of maximum") + 175 | coord_cartesian(expand = FALSE) + 176 | geom_vline(xintercept = movie_date, col = "grey60", lty = 2) + 177 | geom_hline(yintercept = 1, lty = 2, col = "grey60") + 178 | geom_ribbon(aes(ymin = posterior_ratio_upr, ymax = posterior_ratio_lwr), alpha = 0.4) + 179 | geom_line() + 180 | xlim(date_start, NA) + 181 | scale_y_log10(breaks = c(0.2, 0.5, 1, 2, 5)) 182 | ``` 183 | 184 | ```{r} 185 | dat$p <- apply(pred_diff, 2, function(x) mean(x > 1)) 186 | ggplot(dat, aes(date, p)) + geom_line() + xlim(date_start, NA) + 187 | geom_vline(xintercept = movie_date, col = "grey60", lty = 2) + 188 | coord_cartesian(ylim = c(0, 1), expand = FALSE) + 189 | ylab("Probability of a positive effect") 190 | ``` 191 | 192 | ```{r} 193 | dat$p <- apply(pred_diff, 2, function(x) mean(x > 2)) 194 | ggplot(dat, aes(date, p)) + geom_line() + xlim(date_start, NA) + 195 | geom_vline(xintercept = movie_date, col = "grey60", lty = 2) + 196 | coord_cartesian(ylim = c(0, 1), expand = FALSE) + 197 | ylab("Probability of at least a 2-fold effect") 198 | ``` 199 | 200 | ```{r} 201 | dat$p <- apply(pred_diff, 2, function(x) mean(x > 3)) 202 | ggplot(dat, aes(date, p)) + geom_line() + xlim(date_start, NA) + 203 | geom_vline(xintercept = movie_date, col = "grey60", lty = 2) + 204 | coord_cartesian(ylim = c(0, 1), expand = FALSE) + 205 | ylab("Probability of at least a 2-fold effect") 206 | ``` 207 | 208 | # brms 209 | 210 | Bonus: 211 | 212 | ```{r, eval=FALSE} 213 | library(brms) 214 | m1 <- brm( 215 | formula = Paracanthurus_hepatus ~ 216 | Chrysiptera_parasema + 217 | Chrysiptera_hemicyanea + 218 | Acanthurus_nigricans + 219 | Acanthurus_sohal + 220 | Acanthurus_japonicus + 221 | Acanthurus_coeruleus + 222 | Acanthurus_leucosternon, 223 | data = filter(dat, before_movie), 224 | family = Beta(link = "logit"), 225 | save_model = "brm-model.stan", 226 | prior = c( 227 | set_prior("normal(0, 2)", class = "b"), 228 | set_prior("normal(0, 10)", class = "Intercept"), 229 | set_prior("student_t(3, 0, 25)", class = "phi")) 230 | ) 231 | ``` 232 | -------------------------------------------------------------------------------- /extra/slides.R: -------------------------------------------------------------------------------- 1 | ratio <- 0.68 2 | width <- 8 3 | dat <- expand.grid(x = 1:40, y = 1:25) 4 | dat$colour <- "a" 5 | 6 | ggplot(dat, aes(x, y, colour = colour)) + 7 | geom_point(size = 4) + 8 | theme_void() + 9 | scale_color_manual(values = c("a" = "grey70", "b" = "red")) + 10 | guides(colour = FALSE) 11 | ggsave("extra/bayes-dots-0.pdf", width = width, height = ratio * width) 12 | 13 | dat$colour[450] <- "b" 14 | ggplot(dat, aes(x, y, colour = colour)) + 15 | geom_point(size = 4) + 16 | theme_void() + 17 | scale_color_manual(values = c("a" = "grey70", "b" = "red")) + 18 | guides(colour = FALSE) 19 | ggsave("extra/bayes-dots-1.pdf", width = width, height = ratio * width) 20 | 21 | set.seed(1) 22 | has_disease <- sample(seq_len(1000), size = 50) 23 | dat$colour[has_disease] <- "c" 24 | ggplot(dat, aes(x, y, colour = colour)) + 25 | geom_point(size = 4) + 26 | theme_void() + 27 | scale_color_manual(values = c("a" = "grey70", "b" = "red", "c"= "black")) + 28 | guides(colour = FALSE) 29 | ggsave("extra/bayes-dots-2.pdf", width = width, height = ratio * width) 30 | 31 | dat2 <- data.frame(x = c(1:25, 1:25), y = c(rep(1, 25), rep(2, 25)), colour = "c", stringsAsFactors = FALSE) 32 | dat2$colour[8] <- "b" 33 | ggplot(dat2, aes(x, y, colour = colour)) + 34 | geom_point(size = 8) + 35 | theme_void() + 36 | ylim(-4, 7) + 37 | scale_color_manual(values = c("a" = "grey70", "b" = "red", "c"= "black")) + 38 | guides(colour = FALSE) 39 | ggsave("extra/bayes-dots-3.pdf", width = width, height = ratio * width) 40 | -------------------------------------------------------------------------------- /extra/vb-fixed-error.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; 3 | vector[N] length; 4 | vector[N] age; 5 | real error; 6 | 7 | int N_pred; 8 | vector[N_pred] age_pred; 9 | } 10 | parameters { 11 | real k; 12 | real linf; 13 | real sigma; 14 | real t0; 15 | vector[N] age_true; // this is new 16 | } 17 | model { 18 | // priors: 19 | k ~ normal(0, 2); 20 | linf ~ normal(0, 200); 21 | sigma ~ student_t(3, 0, 2); 22 | t0 ~ normal(0, 20); 23 | 24 | // aging measurement error: 25 | age ~ lognormal(log(age_true), error); // this is new 26 | 27 | // likelihood: 28 | length ~ lognormal(log(linf * (1 - exp(-k * (age_true - t0)))), sigma); // this is modified 29 | } 30 | generated quantities { 31 | vector[N_pred] length_pred; 32 | vector[N] posterior_predictions; 33 | vector[N] age_true_posterior_predict; // this is new 34 | 35 | for (i in 1:N_pred) { 36 | length_pred[i] = linf * (1 - exp(-k * (age_pred[i] - t0))); 37 | } 38 | 39 | for (i in 1:N) { 40 | age_true_posterior_predict[i] = lognormal_rng(log(age[i]), error); // this is new 41 | posterior_predictions[i] = 42 | lognormal_rng(log(linf * (1 - exp(-k * (age_true_posterior_predict[i] - t0)))), sigma); // this is new 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /extra/vb.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; 3 | vector[N] length; 4 | vector[N] age; 5 | 6 | int N_pred; 7 | vector[N_pred] age_pred; 8 | } 9 | parameters { 10 | real k; 11 | real linf; 12 | real sigma; 13 | real t0; 14 | } 15 | model { 16 | k ~ normal(0, 2); 17 | linf ~ normal(0, 200); 18 | sigma ~ student_t(3, 0, 2); 19 | t0 ~ normal(0, 20); 20 | length ~ lognormal(log(linf * (1 - exp(-k * (age - t0)))), sigma); 21 | } 22 | generated quantities { 23 | vector[N_pred] length_pred; 24 | vector[N] posterior_predictions; 25 | 26 | for (i in 1:N_pred) { 27 | length_pred[i] = linf * (1 - exp(-k * (age_pred[i] - t0))); 28 | } 29 | 30 | for (i in 1:N) { 31 | posterior_predictions[i] = lognormal_rng(log(linf * (1 - exp(-k * (age[i] - t0)))), sigma); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /slides/loo.R: -------------------------------------------------------------------------------- 1 | library(rstanarm) 2 | library(ggplot2) 3 | theme_set(theme_light()) 4 | options(mc.cores = 1) # no parallel: faster for these simple regressions 5 | library(dplyr) 6 | 7 | x <- 1:20 8 | n <- length(x) 9 | a <- 0.2 10 | b <- 0.3 11 | sigma <- 1 12 | set.seed(2141) 13 | y <- a + b * x + sigma * rnorm(n) 14 | fake <- data.frame(x, y) 15 | fit_all <- stan_glm(y ~ x, data = fake, seed = 2141, chains = 10, refresh = 0) 16 | fit_minus_18 <- stan_glm(y ~ x, data = fake[-18, ], seed = 2141, refresh = 0) 17 | sims <- as.matrix(fit_all) 18 | sims_minus_18 <- as.matrix(fit_minus_18) 19 | condpred <- data.frame(y = seq(0, 9, length.out = 100)) 20 | condpred$x <- sapply(condpred$y, \(y) 21 | mean(dnorm(y, sims[, 1] + sims[, 2] * x[18], sims[, 3]) * 6 + 18)) 22 | condpredloo <- data.frame(y = seq(0, 9, length.out = 100)) 23 | condpredloo$x <- sapply(condpredloo$y, \(y) 24 | mean(dnorm(y, sims_minus_18[, 1] + sims_minus_18[, 2] * x[18], sims_minus_18[, 3]) * 6 + 18)) 25 | 26 | ggplot(fake, aes(x = x, y = y)) + 27 | geom_point(color = "white", size = 3) + 28 | geom_point(color = "black", size = 2) + 29 | geom_abline( 30 | intercept = mean(sims[, 1]), 31 | slope = mean(sims[, 2]), 32 | color = "black" 33 | ) + 34 | geom_path(data = condpred, aes(x = x, y = y), color = "black") + 35 | geom_vline(xintercept = 18, linetype = 3, color = "grey") + 36 | geom_point(data = fake[18, ], color = "grey50", size = 5, shape = 1) + 37 | geom_abline( 38 | intercept = mean(sims_minus_18[, 1]), 39 | slope = mean(sims_minus_18[, 2]), 40 | color = "grey50", 41 | linetype = 2 42 | ) + 43 | geom_path(data = condpredloo, aes(x = x, y = y), color = "grey50", linetype = 2) 44 | ggsave("slides/figs/loo-init-eg.png", width = 6, height = 4.5) 45 | 46 | fake$residual <- fake$y - fit_all$fitted.values 47 | fake$looresidual <- fake$y - loo_predict(fit_all)$value 48 | ggplot(fake, aes(x = x, y = residual)) + 49 | geom_point(color = "black", size = 2, shape = 16) + 50 | geom_point(aes(y = looresidual), color = "grey50", size = 2, shape = 1) + 51 | geom_segment(aes(xend = x, y = residual, yend = looresidual)) + 52 | geom_hline(yintercept = 0, linetype = 2) 53 | ggsave("slides/figs/loo-resid-eg.png", width = 6, height = 4.5) 54 | 55 | 56 | S <- 10 57 | y_hat <- matrix(nrow = S, ncol = n) 58 | for (s in 1:S) { 59 | for (i in 1:n) { 60 | y_hat[s, i] <- sims[s, 1] + sims[s, 2] * x[i] 61 | } 62 | } 63 | 64 | 65 | y_i <- 10 66 | xx <- seq(0, 7, length.out = 100) 67 | mult <- 6 68 | 69 | gg <- list() 70 | for (s in 1:6) { 71 | yy <- sapply(xx, \(x) dnorm(x, mean = y_hat[s, y_i], sd = sims[s, 3], log = FALSE)) 72 | df <- data.frame(x = xx, y = yy) 73 | 74 | df2 <- data.frame(x = y[y_i], y = dnorm(y[y_i], mean = y_hat[s, y_i], sd = sims[s, 3], log = FALSE)*mult + y_i) 75 | df2 <- rbind(data.frame(x = y[y_i], y = y_i), df2) 76 | 77 | gg[[s]] <- data.frame(x = x, y = y_hat[s,]) |> 78 | ggplot(aes(x, y)) + 79 | geom_line() + 80 | geom_point(data = data.frame(Var2 = y_i, value = y[y_i]), mapping = aes(Var2, value), inherit.aes = FALSE) + 81 | geom_path(data = df, mapping = aes(x = y_i + y * mult, y = x), inherit.aes = FALSE) + 82 | geom_path(data = df2, mapping = aes(x = y, y = x), colour = "red") + 83 | xlab("x") + ylab("y") + 84 | geom_point(data = fake, mapping = aes(x, y), inherit.aes = FALSE, pch = 21) + 85 | ggtitle(paste0("MCMC sample ", s)) 86 | } 87 | gg[[1]] 88 | ggsave("slides/figs/elpd1.png", width = 6, height = 4.5) 89 | patchwork::wrap_plots(gg) 90 | ggsave("slides/figs/elpd6.png", width = 10, height = 7) 91 | 92 | S <- 50 93 | y_hat <- matrix(nrow = S, ncol = n) 94 | for (s in 1:S) { 95 | for (i in 1:n) { 96 | y_hat[s, i] <- sims[s, 1] + sims[s, 2] * x[i] 97 | } 98 | } 99 | reshape2::melt(y_hat) |> 100 | ggplot(aes(Var2, value, group = Var1)) + 101 | geom_line(alpha = 0.3) + 102 | xlab("x") + ylab("y") + 103 | geom_point(data = fake, mapping = aes(x, y), inherit.aes = FALSE, pch = 21) 104 | ggsave("slides/figs/elpd-fits.png", width = 6, height = 4.5) 105 | 106 | S <- 1000 107 | y_hat <- matrix(nrow = S, ncol = n) 108 | for (s in 1:S) { 109 | for (i in 1:n) { 110 | y_hat[s, i] <- sims[s, 1] + sims[s, 2] * x[i] 111 | } 112 | } 113 | ll_2 <- matrix(nrow = S, ncol = n) 114 | for (s in 1:S) { 115 | for (i in 1:n) { 116 | ll_2[s, i] <- dnorm(y[i], mean = y_hat[s, i], sd = sims[s, 3], log = TRUE) 117 | } 118 | } 119 | 120 | df <- reshape2::melt(ll_2) |> 121 | filter(Var2 %in% 1:6) |> 122 | mutate(Var2 = paste("Data point", Var2)) 123 | 124 | df2 <- group_by(df, Var2) |> 125 | summarise(mean = mean(exp(value))) 126 | 127 | df |> 128 | ggplot(aes(exp(value))) + 129 | facet_wrap(~Var2) + 130 | geom_histogram() + xlab("Likelihood") + ylab("MCMC sample count") + 131 | geom_vline(data = df2, mapping = aes(xintercept = mean), colour = "red") + 132 | coord_cartesian(expand = FALSE) 133 | ggsave("slides/figs/elpd-dist.png", width = 7.5, height = 5) 134 | 135 | lpd_2 <- log(apply(exp(ll_2), 2, mean)) # computationally dangerous! 136 | 137 | sum(lpd_2) 138 | -------------------------------------------------------------------------------- /stan/8schools.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; 3 | vector[N] y; 4 | vector[N] sigma; 5 | } 6 | parameters { 7 | real mu; 8 | real sigma_prime; 9 | vector[N] theta; 10 | } 11 | model { 12 | mu ~ normal(0, 10); 13 | sigma_prime ~ cauchy(0, 10); 14 | theta ~ normal(mu, sigma_prime); 15 | y ~ normal(theta, sigma); 16 | } 17 | -------------------------------------------------------------------------------- /stan/8schools_noncentered.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; 3 | vector[N] y; 4 | vector[N] sigma; 5 | } 6 | parameters { 7 | real mu; 8 | real sigma_prime; 9 | vector[N] eta; 10 | } 11 | transformed parameters { 12 | vector[N] theta; 13 | theta = mu + sigma_prime * eta; 14 | } 15 | model { 16 | mu ~ normal(0, 10); 17 | sigma_prime ~ cauchy(0, 10); 18 | eta ~ normal(0, 1); 19 | y ~ normal(theta, sigma); 20 | } 21 | -------------------------------------------------------------------------------- /stan/gompertz.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; // rows of data 3 | vector[N] y; // vector to hold observations 4 | real nu_rate; // rate parameter for nu exponential prior 5 | } 6 | parameters { 7 | real lambda; 8 | real b; 9 | real sigma; 10 | real nu; 11 | } 12 | model { 13 | // priors 14 | lambda ~ normal(0, 5); 15 | b ~ normal(0, 5); 16 | sigma ~ student_t(3, 0, 3); 17 | nu ~ exponential(nu_rate); 18 | 19 | // likelihood 20 | for (i in 2:N) { 21 | y[i] ~ student_t(nu, lambda + b * y[i-1], sigma); 22 | } 23 | } 24 | generated quantities { 25 | vector[N] pred; 26 | pred[1] = y[1]; 27 | for (i in 2:N) { 28 | pred[i] = student_t_rng(nu, lambda + b * y[i-1], sigma); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /stan/lm-matrix.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; // number of observations 3 | int K; // number of predictors 4 | real y_meas[N]; // measurement of y 5 | matrix[N, K] X; // model predictor matrix 6 | } 7 | parameters { 8 | vector[K] beta; // vector of predictors 9 | real alpha; // intercept 10 | real sigma; // residual sd 11 | } 12 | model { 13 | sigma ~ student_t(3, 0, 2); // prior 14 | alpha ~ normal(0, 10); // prior 15 | beta ~ normal(0, 2); // prior 16 | y_meas ~ normal(alpha + X * beta, sigma); // likelihood 17 | } 18 | -------------------------------------------------------------------------------- /stan/lm-measure.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; // number of observations 3 | vector[N] y; // response 4 | vector[N] x; // a predictor 5 | real tau; // measurement noise [This is new.] 6 | } 7 | parameters { 8 | real beta; // slope coefficient 9 | real alpha; // intercept coefficient 10 | real sigma; // residual error standard deviation 11 | vector[N] x_true; // unknown true x value [This is new.] 12 | } 13 | model { 14 | sigma ~ student_t(3, 0, 2); // prior 15 | alpha ~ normal(0, 10); // prior 16 | beta ~ normal(0, 2); // prior 17 | 18 | // [optional prior on x_true could go here] 19 | x ~ normal(x_true, tau); // measurement model [This is new.] 20 | y ~ normal(alpha + x_true * beta, sigma); // data likelihood [This has changed.] 21 | } 22 | generated quantities { 23 | vector[N] posterior_predictions; 24 | for (i in 1:N) { 25 | posterior_predictions[i] = normal_rng(alpha + x_true[i] * beta, sigma); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /stan/lm-simple.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; // number of observations 3 | vector[N] y; // response 4 | vector[N] x; // a predictor 5 | } 6 | parameters { 7 | real beta; // slope coefficient 8 | real alpha; // intercept coefficient 9 | real sigma; // residual error standard deviation 10 | } 11 | model { 12 | sigma ~ student_t(3, 0, 2); // prior 13 | alpha ~ normal(0, 10); // prior 14 | beta ~ normal(0, 2); // prior 15 | 16 | y ~ normal(alpha + x * beta, sigma); // data likelihood 17 | } 18 | -------------------------------------------------------------------------------- /stan/lm.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; // number of observations 3 | vector[N] y; // response 4 | vector[N] x; // a predictor 5 | } 6 | parameters { 7 | real beta; // slope coefficient 8 | real alpha; // intercept coefficient 9 | real sigma; // residual error standard deviation 10 | } 11 | model { 12 | sigma ~ student_t(3, 0, 2); // prior 13 | alpha ~ normal(0, 10); // prior 14 | beta ~ normal(0, 2); // prior 15 | 16 | y ~ normal(alpha + x * beta, sigma); // data likelihood 17 | } 18 | generated quantities { 19 | vector[N] posterior_predictions; 20 | vector[N] log_lik; 21 | 22 | for (i in 1:N) { 23 | posterior_predictions[i] = normal_rng(alpha + x[i] * beta, sigma); 24 | log_lik[i] = normal_lpdf(y[i] | alpha + x[i] * beta, sigma); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /vb/vb_basic.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; 3 | vector[N] length; 4 | vector[N] age; 5 | } 6 | parameters { 7 | real k; 8 | real linf; 9 | real sigma; 10 | real t0; 11 | } 12 | model { 13 | k ~ normal(0, 1); 14 | linf ~ normal(0, 100); 15 | t0 ~ normal(0, 1); 16 | sigma ~ student_t(3, 0, 2); 17 | length ~ normal(linf * (1 - exp(-k * (age - t0))), sigma); 18 | } 19 | -------------------------------------------------------------------------------- /vb/vb_norm.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; 3 | vector[N] length; 4 | vector[N] age; 5 | vector[4] prior_sds; 6 | int prior_only; 7 | } 8 | parameters { 9 | real k; 10 | real linf; 11 | real sigma; 12 | real t0; 13 | } 14 | transformed parameters { 15 | vector[N] predicted_length; 16 | for (i in 1:N) { 17 | // record expected length for visualization and to use below: 18 | predicted_length[i] = linf * (1 - exp(-k * (age[i] - t0))); 19 | } 20 | } 21 | model { 22 | // priors: 23 | k ~ normal(0, prior_sds[1]); 24 | linf ~ normal(0, prior_sds[2]); 25 | t0 ~ normal(0, prior_sds[3]); 26 | sigma ~ student_t(3, 0, prior_sds[4]); 27 | // data likelihood: 28 | if (!prior_only) { // enable prior predictive simulations 29 | length ~ normal(predicted_length, sigma); 30 | } 31 | } 32 | generated quantities { 33 | vector[N] length_sim; // for posterior predictive simulations 34 | vector[N] log_lik; // for ELPD calculations 35 | for (i in 1:N) { 36 | length_sim[i] = normal_rng(predicted_length[i], sigma); 37 | log_lik[i] = normal_lpdf(length[i] | predicted_length[i], sigma); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /vb/vb_norm_regions.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; 3 | vector[N] length; 4 | vector[N] age; 5 | vector[4] prior_sds; 6 | int prior_only; 7 | int survey_id[N]; 8 | int N_surveys; 9 | } 10 | parameters { 11 | real k[N_surveys]; 12 | real linf[N_surveys]; 13 | real t0[N_surveys]; 14 | real sigma; 15 | } 16 | transformed parameters { 17 | vector[N] predicted_length; 18 | for (i in 1:N) { 19 | // record expected length for visualization and to use below: 20 | predicted_length[i] = linf[survey_id[i]] * 21 | (1 - exp(-k[survey_id[i]] * (age[i] - t0[survey_id[i]]))); 22 | } 23 | } 24 | model { 25 | // priors: 26 | k ~ normal(0, prior_sds[1]); 27 | linf ~ normal(0, prior_sds[2]); 28 | t0 ~ normal(0, prior_sds[3]); 29 | sigma ~ student_t(3, 0, prior_sds[4]); 30 | // data likelihood: 31 | if (!prior_only) { // enable prior predictive simulations 32 | length ~ normal(predicted_length, sigma); 33 | } 34 | } 35 | generated quantities { 36 | vector[N] length_sim; // for posterior predictive simulations 37 | vector[N] log_lik; // for ELPD calculations 38 | for (i in 1:N) { 39 | length_sim[i] = normal_rng(predicted_length[i], sigma); 40 | log_lik[i] = normal_lpdf(length[i] | predicted_length[i], sigma); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /vb/vb_norm_regions_sigma.stan: -------------------------------------------------------------------------------- 1 | data { 2 | int N; 3 | vector[N] length; 4 | vector[N] age; 5 | vector[4] prior_sds; 6 | int prior_only; 7 | int survey_id[N]; 8 | int N_surveys; 9 | } 10 | parameters { 11 | real k[N_surveys]; 12 | real linf[N_surveys]; 13 | real t0[N_surveys]; 14 | real sigma; 15 | } 16 | transformed parameters { 17 | vector[N] predicted_length; 18 | for (i in 1:N) { 19 | // record expected length for visualization and to use below: 20 | predicted_length[i] = linf[survey_id[i]] * 21 | (1 - exp(-k[survey_id[i]] * (age[i] - t0[survey_id[i]]))); 22 | } 23 | } 24 | model { 25 | // priors: 26 | k ~ normal(0, prior_sds[1]); 27 | linf ~ normal(0, prior_sds[2]); 28 | t0 ~ normal(0, prior_sds[3]); 29 | sigma ~ student_t(3, 0, prior_sds[4]); 30 | // data likelihood: 31 | if (!prior_only) { // enable prior predictive simulations 32 | length ~ normal(predicted_length, sigma); 33 | } 34 | } 35 | generated quantities { 36 | vector[N] length_sim; // for posterior predictive simulations 37 | vector[N] log_lik; // for ELPD calculations 38 | for (i in 1:N) { 39 | length_sim[i] = normal_rng(predicted_length[i], sigma); 40 | log_lik[i] = normal_lpdf(length[i] | predicted_length[i], sigma); 41 | } 42 | } 43 | --------------------------------------------------------------------------------