├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ └── pkgdown.yaml ├── .gitignore ├── DESCRIPTION ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── classification_summary.R ├── classification_summary_cv.R ├── data_airbnb.R ├── data_airbnb_small.R ├── data_bald_eagles.R ├── data_basketball.R ├── data_bechdel.R ├── data_big_word_club.R ├── data_bike_users.R ├── data_bikes.R ├── data_bird_counts.R ├── data_book_banning.R ├── data_cherry_blossom_sample.R ├── data_climbers_sub.R ├── data_coffee_ratings.R ├── data_coffee_ratings_small.R ├── data_equality_index.R ├── data_fake_news.R ├── data_football.R ├── data_hotel_bookings.R ├── data_loons.R ├── data_moma.R ├── data_moma_sample.R ├── data_penguins_bayes.R ├── data_pop_vs_soda.R ├── data_pulse_of_the_nation.R ├── data_spotify.R ├── data_voices.R ├── data_weather_WU.R ├── data_weather_australia.R ├── data_weather_perth.R ├── globals.R ├── naive_classification_summary.R ├── naive_classification_summary_cv.R ├── plot_beta.R ├── plot_beta_binomial.R ├── plot_beta_ci.R ├── plot_binomial_likelihood.R ├── plot_gamma.R ├── plot_gamma_poisson.R ├── plot_normal.R ├── plot_normal_likelihood.R ├── plot_normal_normal.R ├── plot_poisson_likelihood.R ├── prediction_summary.R ├── prediction_summary_cv.R ├── sample_mode.R ├── summarize_beta.R ├── summarize_beta_binomial.R ├── summarize_gamma.R ├── summarize_gamma_poisson.R └── summarize_normal_normal.R ├── README.Rmd ├── README.md ├── cran-comments.md ├── data-raw ├── DATASET.R ├── airbnb.R ├── airbnb_small.R ├── bald_eagles.R ├── basketball.R ├── bechdel │ └── bechdel_dataprep.R ├── big_word_club │ ├── big_word_club.csv │ ├── big_word_club_small.csv │ ├── big_word_clubprep.R │ └── bwc_data.dta ├── bike_users.R ├── bikes.R ├── bird_counts.R ├── book_banning.R ├── cherry_blossom_sample.R ├── climbers_sub │ ├── climbers.csv │ └── climbers_sub.R ├── coffee_ratings.R ├── coffee_ratings_small.R ├── equality_index │ ├── equality_index.csv │ └── equality_index_dataprep.R ├── fake_news │ ├── BuzzFeed_fake_news_content.csv │ ├── BuzzFeed_real_news_content.csv │ └── fake_news_dataprep.R ├── football.R ├── hotel_bookings.R ├── loons.R ├── moma.R ├── penguins_bayes.R ├── pop_vs_soda │ ├── pop_vs_soda_dataprep.R │ └── pop_vs_soda_raw.csv ├── pulse_of_the_nation │ ├── pulse_of_the_nation.csv │ └── pulse_of_the_nation_dataprep.R ├── spotify.R ├── voices.R ├── weather_WU.Rmd ├── weather_australia.R └── weather_perth.R ├── data ├── airbnb.rda ├── airbnb_small.rda ├── bald_eagles.rda ├── basketball.rda ├── bechdel.rda ├── big_word_club.rda ├── bike_users.rda ├── bikes.rda ├── bird_counts.rda ├── book_banning.rda ├── cherry_blossom_sample.rda ├── climbers_sub.rda ├── coffee_ratings.rda ├── coffee_ratings_small.rda ├── equality_index.rda ├── fake_news.rda ├── football.rda ├── hotel_bookings.rda ├── loons.rda ├── moma.rda ├── moma_sample.rda ├── penguins_bayes.rda ├── pop_vs_soda.rda ├── pulse_of_the_nation.rda ├── spotify.rda ├── voices.rda ├── weather_WU.rda ├── weather_australia.rda └── weather_perth.rda ├── docs ├── 404.html ├── LICENSE.html ├── articles │ ├── conjugate-families.html │ ├── conjugate-families_files │ │ ├── figure-html │ │ │ ├── unnamed-chunk-2-1.png │ │ │ ├── unnamed-chunk-4-1.png │ │ │ ├── unnamed-chunk-5-1.png │ │ │ └── unnamed-chunk-7-1.png │ │ ├── header-attrs-2.10 │ │ │ └── header-attrs.js │ │ ├── header-attrs-2.11 │ │ │ └── header-attrs.js │ │ ├── header-attrs-2.8 │ │ │ └── header-attrs.js │ │ └── header-attrs-2.9 │ │ │ └── header-attrs.js │ ├── index.html │ ├── model-evaluation.html │ └── model-evaluation_files │ │ ├── header-attrs-2.10 │ │ └── header-attrs.js │ │ ├── header-attrs-2.11 │ │ └── header-attrs.js │ │ ├── header-attrs-2.8 │ │ └── header-attrs.js │ │ └── header-attrs-2.9 │ │ └── header-attrs.js ├── authors.html ├── bootstrap-toc.css ├── bootstrap-toc.js ├── docsearch.css ├── docsearch.js ├── index.html ├── link.svg ├── news │ └── index.html ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml ├── reference │ ├── Rplot001.png │ ├── Rplot002.png │ ├── airbnb.html │ ├── airbnb_small.html │ ├── bald_eagles.html │ ├── basketball.html │ ├── bechdel.html │ ├── big_word_club.html │ ├── bike_users.html │ ├── bikes.html │ ├── bird_counts.html │ ├── book_banning.html │ ├── cherry_blossom_sample.html │ ├── classification_summary.html │ ├── classification_summary_cv.html │ ├── climbers_sub.html │ ├── coffee_ratings.html │ ├── coffee_ratings_small.html │ ├── equality_index.html │ ├── fake_news.html │ ├── figures │ │ ├── README-pressure-1.png │ │ └── bayes-rules-hex.png │ ├── football.html │ ├── hotel_bookings.html │ ├── index.html │ ├── loons.html │ ├── moma.html │ ├── moma_sample.html │ ├── naive_classification_summary.html │ ├── naive_classification_summary_cv.html │ ├── penguins_bayes.html │ ├── plot_beta-1.png │ ├── plot_beta.html │ ├── plot_beta_binomial-1.png │ ├── plot_beta_binomial-2.png │ ├── plot_beta_binomial.html │ ├── plot_beta_ci-1.png │ ├── plot_beta_ci.html │ ├── plot_binomial_likelihood-1.png │ ├── plot_binomial_likelihood.html │ ├── plot_gamma-1.png │ ├── plot_gamma.html │ ├── plot_gamma_poisson-1.png │ ├── plot_gamma_poisson-2.png │ ├── plot_gamma_poisson.html │ ├── plot_normal-1.png │ ├── plot_normal.html │ ├── plot_normal_likelihood-1.png │ ├── plot_normal_likelihood.html │ ├── plot_normal_normal-1.png │ ├── plot_normal_normal-2.png │ ├── plot_normal_normal.html │ ├── plot_poisson_likelihood-1.png │ ├── plot_poisson_likelihood.html │ ├── pop_vs_soda.html │ ├── prediction_summary.html │ ├── prediction_summary_cv.html │ ├── pulse_of_the_nation.html │ ├── sample_mode.html │ ├── spotify.html │ ├── summarize_beta.html │ ├── summarize_beta_binomial.html │ ├── summarize_gamma.html │ ├── summarize_gamma_poisson.html │ ├── summarize_normal_normal.html │ ├── voices.html │ ├── weather_WU.html │ ├── weather_australia.html │ └── weather_perth.html └── sitemap.xml ├── inst └── CITATION ├── man ├── airbnb.Rd ├── airbnb_small.Rd ├── bald_eagles.Rd ├── basketball.Rd ├── bechdel.Rd ├── big_word_club.Rd ├── bike_users.Rd ├── bikes.Rd ├── bird_counts.Rd ├── book_banning.Rd ├── cherry_blossom_sample.Rd ├── classification_summary.Rd ├── classification_summary_cv.Rd ├── climbers_sub.Rd ├── coffee_ratings.Rd ├── coffee_ratings_small.Rd ├── equality_index.Rd ├── fake_news.Rd ├── figures │ └── bayes-rules-hex.png ├── football.Rd ├── hotel_bookings.Rd ├── loons.Rd ├── moma.Rd ├── moma_sample.Rd ├── naive_classification_summary.Rd ├── naive_classification_summary_cv.Rd ├── penguins_bayes.Rd ├── plot_beta.Rd ├── plot_beta_binomial.Rd ├── plot_beta_ci.Rd ├── plot_binomial_likelihood.Rd ├── plot_gamma.Rd ├── plot_gamma_poisson.Rd ├── plot_normal.Rd ├── plot_normal_likelihood.Rd ├── plot_normal_normal.Rd ├── plot_poisson_likelihood.Rd ├── pop_vs_soda.Rd ├── prediction_summary.Rd ├── prediction_summary_cv.Rd ├── pulse_of_the_nation.Rd ├── sample_mode.Rd ├── spotify.Rd ├── summarize_beta.Rd ├── summarize_beta_binomial.Rd ├── summarize_gamma.Rd ├── summarize_gamma_poisson.Rd ├── summarize_normal_normal.Rd ├── voices.Rd ├── weather_WU.Rd ├── weather_australia.Rd └── weather_perth.Rd └── vignettes ├── .gitignore ├── conjugate-families.Rmd └── model-evaluation.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^README\.Rmd$ 4 | ^data-raw$ 5 | ^LICENSE.md 6 | ^cran-comments.md 7 | ^docs$ 8 | ^\.github$ 9 | ^CRAN-RELEASE$ 10 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. 2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - master 8 | pull_request: 9 | branches: 10 | - main 11 | - master 12 | 13 | name: R-CMD-check 14 | 15 | jobs: 16 | R-CMD-check: 17 | runs-on: ${{ matrix.config.os }} 18 | 19 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 20 | 21 | strategy: 22 | fail-fast: false 23 | matrix: 24 | config: 25 | - {os: windows-latest, r: 'release'} 26 | - {os: macOS-latest, r: 'release'} 27 | - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} 28 | - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", http-user-agent: "R/4.1.0 (ubuntu-20.04) R (4.1.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" } 29 | 30 | env: 31 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 32 | RSPM: ${{ matrix.config.rspm }} 33 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 34 | 35 | steps: 36 | - uses: actions/checkout@v2 37 | 38 | - uses: r-lib/actions/setup-r@v1 39 | with: 40 | r-version: ${{ matrix.config.r }} 41 | 42 | - uses: r-lib/actions/setup-pandoc@v1 43 | 44 | - name: Query dependencies 45 | run: | 46 | install.packages('remotes') 47 | saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2) 48 | writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version") 49 | shell: Rscript {0} 50 | 51 | - name: Restore R package cache 52 | uses: actions/cache@v2 53 | with: 54 | path: ${{ env.R_LIBS_USER }} 55 | key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }} 56 | restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1- 57 | 58 | - name: Install system dependencies 59 | if: runner.os == 'Linux' 60 | run: | 61 | sudo apt-get install -y libcurl4-openssl-dev 62 | while read -r cmd 63 | do 64 | eval sudo $cmd 65 | done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))') 66 | 67 | - name: Install dependencies 68 | run: | 69 | remotes::install_deps(dependencies = TRUE) 70 | remotes::install_cran("rcmdcheck") 71 | shell: Rscript {0} 72 | 73 | - name: Check 74 | env: 75 | _R_CHECK_CRAN_INCOMING_REMOTE_: false 76 | run: | 77 | options(crayon.enabled = TRUE) 78 | rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check") 79 | shell: Rscript {0} 80 | 81 | - name: Upload check results 82 | if: failure() 83 | uses: actions/upload-artifact@main 84 | with: 85 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 86 | path: check 87 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | - master 6 | 7 | name: pkgdown 8 | 9 | jobs: 10 | pkgdown: 11 | runs-on: ubuntu-18.04 12 | env: 13 | RSPM: https://packagemanager.rstudio.com/cran/__linux__/bionic/latest 14 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 15 | 16 | steps: 17 | - uses: actions/checkout@v2 18 | 19 | - uses: r-lib/actions/setup-r@v1 20 | id: install-r 21 | 22 | - uses: r-lib/actions/setup-pandoc@v1 23 | 24 | - name: Install pak and query dependencies 25 | run: | 26 | install.packages("pak", repos = "https://r-lib.github.io/p/pak/dev/") 27 | saveRDS(pak::pkg_deps("local::.", dependencies = TRUE), ".github/r-depends.rds") 28 | shell: Rscript {0} 29 | 30 | - name: Cache R packages 31 | uses: actions/cache@v2 32 | with: 33 | path: ${{ env.R_LIBS_USER }} 34 | key: ubuntu-18.04-${{ steps.install-r.outputs.installed-r-version }}-1-${{ hashFiles('.github/r-depends.rds') }} 35 | restore-keys: ubuntu-18.04-${{ steps.install-r.outputs.installed-r-version }}-1- 36 | 37 | - name: Install system dependencies 38 | if: runner.os == 'Linux' 39 | run: | 40 | pak::local_system_requirements(execute = TRUE) 41 | pak::pkg_system_requirements("pkgdown", execute = TRUE) 42 | shell: Rscript {0} 43 | 44 | - name: Install dependencies 45 | run: | 46 | pak::local_install_dev_deps(upgrade = TRUE) 47 | pak::pkg_install("r-lib/pkgdown") 48 | shell: Rscript {0} 49 | 50 | - name: Install package 51 | run: R CMD INSTALL . 52 | 53 | - name: Build and deploy pkgdown site 54 | run: | 55 | git config --local user.name "$GITHUB_ACTOR" 56 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 57 | Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)' -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | bayesrules.Rproj 6 | inst/doc 7 | .DS_Store -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: bayesrules 2 | Type: Package 3 | Title: Datasets and Supplemental Functions from Bayes Rules! Book 4 | Version: 0.0.3.9000 5 | Authors@R: c( 6 | person("Mine", "Dogucu", , "mdogucu@gmail.com", c("aut", "cre"), comment = c(ORCID = "0000-0002-8007-934X")), 7 | person("Alicia", "Johnson", , role = "aut"), 8 | person("Miles", "Ott", , role = "aut", comment = c(ORCID = "0000-0003-4457-6565")) 9 | ) 10 | Description: Provides datasets and functions used for analysis 11 | and visualizations in the Bayes Rules! book (). 12 | The package contains a set of functions that summarize and plot Bayesian models from some conjugate families 13 | and another set of functions for evaluation of some Bayesian models. 14 | License: GPL (>= 3) 15 | Encoding: UTF-8 16 | LazyData: true 17 | RoxygenNote: 7.1.2 18 | Suggests: 19 | knitr, 20 | rmarkdown 21 | Imports: 22 | ggplot2, 23 | janitor, 24 | magrittr, 25 | dplyr, 26 | stats, 27 | purrr, 28 | rstanarm, 29 | e1071, 30 | groupdata2 31 | Depends: 32 | R (>= 2.10) 33 | URL: https://bayes-rules.github.io/bayesrules/docs/, https://github.com/bayes-rules/bayesrules/ 34 | BugReports: https://github.com/bayes-rules/bayesrules/issues 35 | VignetteBuilder: knitr 36 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(classification_summary) 4 | export(classification_summary_cv) 5 | export(naive_classification_summary) 6 | export(naive_classification_summary_cv) 7 | export(plot_beta) 8 | export(plot_beta_binomial) 9 | export(plot_beta_ci) 10 | export(plot_binomial_likelihood) 11 | export(plot_gamma) 12 | export(plot_gamma_poisson) 13 | export(plot_normal) 14 | export(plot_normal_likelihood) 15 | export(plot_normal_normal) 16 | export(plot_poisson_likelihood) 17 | export(prediction_summary) 18 | export(prediction_summary_cv) 19 | export(sample_mode) 20 | export(summarize_beta) 21 | export(summarize_beta_binomial) 22 | export(summarize_gamma) 23 | export(summarize_gamma_poisson) 24 | export(summarize_normal_normal) 25 | import(dplyr) 26 | import(ggplot2) 27 | import(groupdata2) 28 | import(janitor) 29 | importFrom(dplyr,filter) 30 | importFrom(e1071,naiveBayes) 31 | importFrom(groupdata2,fold) 32 | importFrom(magrittr,"%>%") 33 | importFrom(purrr,map_df) 34 | importFrom(rstanarm,posterior_predict) 35 | importFrom(stats,as.formula) 36 | importFrom(stats,dbeta) 37 | importFrom(stats,dbinom) 38 | importFrom(stats,density) 39 | importFrom(stats,dgamma) 40 | importFrom(stats,dnorm) 41 | importFrom(stats,integrate) 42 | importFrom(stats,mad) 43 | importFrom(stats,median) 44 | importFrom(stats,na.omit) 45 | importFrom(stats,predict) 46 | importFrom(stats,qbeta) 47 | importFrom(stats,qgamma) 48 | importFrom(stats,quantile) 49 | importFrom(stats,rnorm) 50 | importFrom(stats,sd) 51 | importFrom(stats,update) 52 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # bayesrules 2 | 3 | ## bug fixes 4 | - Beta mode calculation has been fixed in `summarize_beta()` and `summarize_beta_binomial()` function for situations when alpha < 1 and/or beta < 1. 5 | -------------------------------------------------------------------------------- /R/classification_summary.R: -------------------------------------------------------------------------------- 1 | #' Posterior Classification Summaries 2 | #' 3 | #' Given a set of observed data including a binary response variable y 4 | #' and an rstanreg model of y, 5 | #' this function returns summaries of the model's posterior classification quality. 6 | #' These summaries include a confusion matrix as well as estimates of the model's 7 | #' sensitivity, specificity, and overall accuracy. 8 | #' 9 | #' @param model an rstanreg model object with binary y 10 | #' @param data data frame including the variables in the model, both response y and predictors x 11 | #' @param cutoff probability cutoff to classify a new case as positive (0.5 is the default) 12 | #' 13 | #' @return a list 14 | #' @export 15 | #' @import janitor dplyr 16 | #' @importFrom rstanarm posterior_predict 17 | #' 18 | #' @examples 19 | #' x <- rnorm(20) 20 | #' z <- 3*x 21 | #' prob <- 1/(1+exp(-z)) 22 | #' y <- rbinom(20, 1, prob) 23 | #' example_data <- data.frame(x = x, y = y) 24 | #' example_model <- rstanarm::stan_glm(y ~ x, data = example_data, family = binomial) 25 | #' classification_summary(model = example_model, data = example_data, cutoff = 0.5) 26 | classification_summary <- function(model, data, cutoff = 0.5){ 27 | # This function summarizes the classifications across all cases 28 | if(!("stanreg" %in% class(model))){ stop("the model must be a stanreg object.")} 29 | 30 | # Calculate probability posterior predictions 31 | predictions <- posterior_predict(model, 32 | newdata = data) 33 | 34 | # Turn the predictions into classifications 35 | if("lmerMod" %in% class(model)){ 36 | y <- as.data.frame(data %>% dplyr::select(as.character(model$formula)[2]))[,1] 37 | } 38 | else{ 39 | y <- as.data.frame(data %>% dplyr::select(model$terms[[2]]))[,1] 40 | } 41 | 42 | classifications <- data.frame(proportion = colMeans(predictions)) %>% 43 | mutate(classification = as.numeric(proportion >= cutoff)) %>% 44 | mutate(y = y) 45 | 46 | # Confusion matrix 47 | confusion_matrix <- classifications %>% 48 | tabyl(y, classification) 49 | if(ncol(confusion_matrix) == 2){ 50 | if("1" %in% names(confusion_matrix)){ 51 | 52 | confusion_matrix <- confusion_matrix %>% 53 | mutate("0" = rep(0,nrow(.))) 54 | } 55 | if("0" %in% names(confusion_matrix)){ 56 | confusion_matrix <- confusion_matrix %>% 57 | mutate("1" = rep(0,nrow(.))) 58 | } 59 | } 60 | # Accuracy rates 61 | mat <- as.matrix(confusion_matrix[,-1]) 62 | sensitivity <- mat[2,2] / sum(mat[2,]) 63 | specificity <- mat[1,1] / sum(mat[1,]) 64 | overall_accuracy <- sum(diag(mat)) / sum(mat) 65 | accuracy_rates <- data.frame(c(sensitivity, specificity, overall_accuracy)) 66 | row.names(accuracy_rates) <- c("sensitivity", "specificity", "overall_accuracy") 67 | names(accuracy_rates) <- "" 68 | 69 | return(list(confusion_matrix = confusion_matrix, accuracy_rates = accuracy_rates)) 70 | } 71 | -------------------------------------------------------------------------------- /R/data_airbnb.R: -------------------------------------------------------------------------------- 1 | #' Chicago AirBnB Data 2 | #' 3 | #' The AirBnB data was collated by Trinh and Ameri as part of a course project 4 | #' at St Olaf College, and distributed with "Broadening Your Statistical Horizons" by Legler and Roback. 5 | #' This data set includes the prices and features for 1561 AirBnB listings in Chicago, collected in 2016. 6 | #' 7 | #' @format A data frame with 1561 rows and 12 variables. Each row represents a single AirBnB listing. 8 | #' \describe{ 9 | #' \item{price}{the nightly price of the listing (in USD)} 10 | #' \item{rating}{the listing's average rating, on a scale from 1 to 5} 11 | #' \item{reviews}{number of user reviews the listing has} 12 | #' \item{room_type}{the type of listing (eg: Shared room)} 13 | #' \item{accommodates}{number of guests the listing accommodates} 14 | #' \item{bedrooms}{the number of bedrooms the listing has} 15 | #' \item{minimum_stay}{the minimum number of nights to stay in the listing} 16 | #' \item{neighborhood}{the neighborhood in which the listing is located} 17 | #' \item{district}{the broader district in which the listing is located} 18 | #' \item{walk_score}{the neighborhood's rating for walkability (0 - 100)} 19 | #' \item{transit_score}{the neighborhood's rating for access to public transit (0 - 100)} 20 | #' \item{bike_score}{the neighborhood's rating for bikeability (0 - 100)} 21 | #' } 22 | #' @source Ly Trinh and Pony Ameri (2018). Airbnb Price Determinants: A Multilevel Modeling Approach. Project for Statistics 316-Advanced Statistical Modeling, St. Olaf College. 23 | #' Julie Legler and Paul Roback (2019). Broadening Your Statistical Horizons: Generalized Linear Models and Multilevel Models. \url{https://bookdown.org/roback/bookdown-bysh/}. 24 | #' \url{https://github.com/proback/BeyondMLR/blob/master/data/airbnb.csv/} 25 | 26 | "airbnb" 27 | -------------------------------------------------------------------------------- /R/data_airbnb_small.R: -------------------------------------------------------------------------------- 1 | #' Chicago AirBnB Data 2 | #' 3 | #' The AirBnB data was collated by Trinh and Ameri as part of a course project 4 | #' at St Olaf College, and distributed with "Broadening Your Statistical Horizons" by Legler and Roback. 5 | #' This data set, a subset of the airbnb data in the bayesrules package, includes the prices and features for 869 AirBnB listings in Chicago, collected in 2016. 6 | #' 7 | #' @format A data frame with 869 rows and 12 variables. Each row represents a single AirBnB listing. 8 | #' \describe{ 9 | #' \item{price}{the nightly price of the listing (in USD)} 10 | #' \item{rating}{the listing's average rating, on a scale from 1 to 5} 11 | #' \item{reviews}{number of user reviews the listing has} 12 | #' \item{room_type}{the type of listing (eg: Shared room)} 13 | #' \item{accommodates}{number of guests the listing accommodates} 14 | #' \item{bedrooms}{the number of bedrooms the listing has} 15 | #' \item{minimum_stay}{the minimum number of nights to stay in the listing} 16 | #' \item{neighborhood}{the neighborhood in which the listing is located} 17 | #' \item{district}{the broader district in which the listing is located} 18 | #' \item{walk_score}{the neighborhood's rating for walkability (0 - 100)} 19 | #' \item{transit_score}{the neighborhood's rating for access to public transit (0 - 100)} 20 | #' \item{bike_score}{the neighborhood's rating for bikeability (0 - 100)} 21 | #' } 22 | #' @source Ly Trinh and Pony Ameri (2018). Airbnb Price Determinants: A Multilevel Modeling Approach. Project for Statistics 316-Advanced Statistical Modeling, St. Olaf College. 23 | #' Julie Legler and Paul Roback (2019). Broadening Your Statistical Horizons: Generalized Linear Models and Multilevel Models. \url{https://bookdown.org/roback/bookdown-bysh/}. 24 | #' \url{https://github.com/proback/BeyondMLR/blob/master/data/airbnb.csv/} 25 | 26 | "airbnb_small" 27 | -------------------------------------------------------------------------------- /R/data_bald_eagles.R: -------------------------------------------------------------------------------- 1 | #' Bald Eagle Count Data 2 | #' 3 | #' Bald Eagle count data collected from the year 1981 to 2017, in late December, by birdwatchers in the Ontario, Canada area. 4 | #' The data was made available by the Bird Studies Canada website and distributed through the R for Data Science TidyTuesday project. 5 | #' A more complete data set with a larger selection of birds can be found in the bird_counts data in the bayesrules package. 6 | #' 7 | #' @format A data frame with 37 rows and 5 variables. Each row represents Bald Eagle observations in the given year. 8 | #' \describe{ 9 | #' \item{year}{year of data collection} 10 | #' \item{count}{number of birds observed} 11 | #' \item{hours}{total person-hours of observation period} 12 | #' \item{count_per_hour}{count divided by hours} 13 | #' \item{count_per_week}{count_per_hour multiplied by 168 hours per week} 14 | #' } 15 | #' @source \url{https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-06-18/bird_counts.csv}. 16 | "bald_eagles" 17 | -------------------------------------------------------------------------------- /R/data_basketball.R: -------------------------------------------------------------------------------- 1 | #' WNBA Basketball Data 2 | #' 3 | #' The WNBA Basketball Data was scraped from \url{https://www.basketball-reference.com/wnba/players/} and contains information on basketball players from the 2019 season. 4 | #' 5 | #' @format A data frame with 146 rows and 30 variables. Each row represents a single WNBA basketball player. The variables on each player are as follows. 6 | #' \describe{ 7 | #' \item{player_name}{first and last name} 8 | #' \item{height}{height in inches} 9 | #' \item{weight}{weight in pounds} 10 | #' \item{year}{year of the WNBA season} 11 | #' \item{team}{team that the WNBA player is a member of} 12 | #' \item{age}{age in years} 13 | #' \item{games_played}{number of games played by the player in that season} 14 | #' \item{games_started}{number of games the player started in that season} 15 | #' \item{avg_minutes_played}{average number of minutes played per game} 16 | #' \item{avg_field_goals}{average number of field goals per game played} 17 | #' \item{avg_field_goal_attempts}{average number of field goals attempted per game played} 18 | #' \item{field_goal_pct}{percent of field goals made throughout the season} 19 | #' \item{avg_three_pointers}{average number of three pointers per game played} 20 | #' \item{avg_three_pointer_attempts}{average number of three pointers attempted per game played} 21 | #' \item{three_pointer_pct}{percent of three pointers made throughout the season} 22 | #' \item{avg_two_pointers}{average number of two pointers made per game played} 23 | #' \item{avg_two_pointer_attempts}{average number of two pointers attempted per game played} 24 | #' \item{two_pointer_pct}{percent of two pointers made throughout the season} 25 | #' \item{avg_free_throws}{average number of free throws made per game played} 26 | #' \item{avg_free_throw_attempts}{average number of free throws attempted per game played} 27 | #' \item{free_throw_pct}{percent of free throws made throughout the season} 28 | #' \item{avg_offensive_rb}{average number of offensive rebounds per game played} 29 | #' \item{avg_defensive_rb}{average number of defensive rebounds per game played} 30 | #' \item{avg_rb}{average number of rebounds (both offensive and defensive) per game played} 31 | #' \item{avg_assists}{average number of assists per game played} 32 | #' \item{avg_steals}{average number of steals per game played} 33 | #' \item{avg_blocks}{average number of blocks per game played} 34 | #' \item{avg_turnovers}{average number of turnovers per game played} 35 | #' \item{avg_personal_fouls}{average number of personal fouls per game played. Note: after 5 fouls the player is not allowed to play in that game anymore} 36 | #' \item{avg_points}{average number of points made per game played} 37 | #' \item{total_minutes}{total number of minutes played throughout the season} 38 | #' \item{starter}{whether or not the player started in more than half of the games they played} 39 | #' } 40 | #' @source \url{https://www.basketball-reference.com/} 41 | 42 | "basketball" 43 | -------------------------------------------------------------------------------- /R/data_bechdel.R: -------------------------------------------------------------------------------- 1 | #' Bechdel Test for over 1500 movies 2 | #' 3 | #' A dataset containing data behind the story 4 | #' "The Dollar-And-Cents Case Against Hollywood's Exclusion of Women" 5 | #' \url{https://fivethirtyeight.com/features/the-dollar-and-cents-case-against-hollywoods-exclusion-of-women/}. 6 | #' 7 | #' @format A data frame with 1794 rows and 3 variables: 8 | #' \describe{ 9 | #' \item{year}{The release year of the movie} 10 | #' \item{title}{The title of the movie} 11 | #' \item{binary}{Bechdel test result (PASS, FAIL)} 12 | #' } 13 | #' @source 14 | "bechdel" 15 | -------------------------------------------------------------------------------- /R/data_big_word_club.R: -------------------------------------------------------------------------------- 1 | #' Big Word Club (BWC) 2 | #' 3 | #' Data on the effectiveness of a digital learning program designed by the Abdul Latif Jameel Poverty Action Lab (J-PAL) to address disparities in vocabulary levels among children from households with different income levels. 4 | #' 5 | #' @format A data frame with 818 student-level observations and 31 variables: 6 | #' \describe{ 7 | #' \item{participant_id}{unique student id} 8 | #' \item{treat}{control group (0) or treatment group (1)} 9 | #' \item{age_months}{age in months} 10 | #' \item{female}{whether student identifies as female} 11 | #' \item{kindergarten}{grade level, pre-school (0) or kindergarten (1)} 12 | #' \item{teacher_id}{unique teacher id} 13 | #' \item{school_id}{unique school id} 14 | #' \item{private_school}{whether school is private} 15 | #' \item{title1}{whether school has Title 1 status} 16 | #' \item{free_reduced_lunch}{percent of school that receive free / reduced lunch} 17 | #' \item{state}{school location} 18 | #' \item{esl_observed}{whether student has ESL status} 19 | #' \item{special_ed_observed}{whether student has special education status} 20 | #' \item{new_student}{whether student enrolled after program began} 21 | #' \item{distracted_a1}{student's distraction level during assessment 1 (0 = not distracted; 1 = mildly distracted; 2 = moderately distracted; 3 = extremely distracted)} 22 | #' \item{distracted_a2}{same as distracted_a1 but during assessment 2} 23 | #' \item{distracted_ppvt}{same as distracted_a1 but during standardized assessment} 24 | #' \item{score_a1}{student score on BWC assessment 1} 25 | #' \item{invalid_a1}{whether student's score on assessment 1 was invalid} 26 | #' \item{score_a2}{student score on BWC assessment 2} 27 | #' \item{invalid_a2}{whether student's score on assessment 2 was invalid} 28 | #' \item{score_ppvt}{student score on standardized assessment} 29 | #' \item{score_ppvt_age}{score_ppvt adjusted for age} 30 | #' \item{invalid_ppvt}{whether student's score on standardized assessment was invalid} 31 | #' \item{t_logins_april}{number of teacher logins onto BWC system in April} 32 | #' \item{t_logins_total}{number of teacher logins onto BWC system during entire study} 33 | #' \item{t_weeks_used}{number of weeks of the BWC program that the classroom has completed} 34 | #' \item{t_words_learned}{teacher response to the number of words students had learned through BWC (0 = almost none; 1 = 1 to 5; 2 = 6 to 10)} 35 | #' \item{t_financial_struggle}{teacher response to the number of their students that have families that experience financial struggle} 36 | #' \item{t_misbehavior}{teacher response to frequency that student misbehavior interferes with teaching (0 = never; 1 = rarely; 2 = occasionally; 3 = frequently)} 37 | #' \item{t_years_experience}{teacher's number of years of teaching experience} 38 | #' \item{score_pct_change}{percent change in scores before and after the program} 39 | #' } 40 | #' @source These data correspond to the following study: Ariel Kalil, Susan Mayer, Philip Oreopoulos (2020). Closing the word gap with Big Word Club: Evaluating the Impact of a Tech-Based Early Childhood Vocabulary Program. Data was obtained through the was obtained through the Inter-university Consortium for Political and Social Research (ICPSR) \url{https://www.openicpsr.org/openicpsr/project/117330/version/V1/view/}. 41 | "big_word_club" 42 | -------------------------------------------------------------------------------- /R/data_bike_users.R: -------------------------------------------------------------------------------- 1 | #' Capital Bikeshare Bike Ridership (Registered and Casual Riders) 2 | #' 3 | #' Data on ridership among registered members and casual users of the Capital Bikeshare service in Washington, D.C.. 4 | #' 5 | #' @format A data frame with 534 daily observations, 267 each for registered riders and casual riders, and 13 variables: 6 | #' \describe{ 7 | #' \item{date}{date of observation} 8 | #' \item{season}{fall, spring, summer, or winter} 9 | #' \item{year}{the year of the date} 10 | #' \item{month}{the month of the date} 11 | #' \item{day_of_week}{the day of the week} 12 | #' \item{weekend}{whether or not the date falls on a weekend (TRUE or FALSE)} 13 | #' \item{holiday}{whether or not the date falls on a holiday (yes or no)} 14 | #' \item{temp_actual}{raw temperature (degrees Fahrenheit)} 15 | #' \item{temp_feel}{what the temperature feels like (degrees Fahrenheit)} 16 | #' \item{humidity}{humidity level (percentage)} 17 | #' \item{windspeed}{wind speed (miles per hour)} 18 | #' \item{weather_cat}{weather category (categ1 = pleasant, categ2 = moderate, categ3 = severe)} 19 | #' \item{user}{rider type (casual or registered)} 20 | #' \item{rides}{number of bikeshare rides} 21 | #' } 22 | #' @source Fanaee-T, Hadi and Gama, Joao (2013). Event labeling combining ensemble detectors and background knowledge. Progress in Artificial Intelligence. \url{https://archive.ics.uci.edu/ml/datasets/Bike+Sharing+Dataset/} 23 | "bike_users" 24 | -------------------------------------------------------------------------------- /R/data_bikes.R: -------------------------------------------------------------------------------- 1 | #' Capital Bikeshare Bike Ridership 2 | #' 3 | #' Data on ridership among registered members of the Capital Bikeshare service in Washington, D.C.. 4 | #' 5 | #' @format A data frame with 500 daily observations and 13 variables: 6 | #' \describe{ 7 | #' \item{date}{date of observation} 8 | #' \item{season}{fall, spring, summer, or winter} 9 | #' \item{year}{the year of the date} 10 | #' \item{month}{the month of the date} 11 | #' \item{day_of_week}{the day of the week} 12 | #' \item{weekend}{whether or not the date falls on a weekend (TRUE or FALSE)} 13 | #' \item{holiday}{whether or not the date falls on a holiday (yes or no)} 14 | #' \item{temp_actual}{raw temperature (degrees Fahrenheit)} 15 | #' \item{temp_feel}{what the temperature feels like (degrees Fahrenheit)} 16 | #' \item{humidity}{humidity level (percentage)} 17 | #' \item{windspeed}{wind speed (miles per hour)} 18 | #' \item{weather_cat}{weather category (categ1 = pleasant, categ2 = moderate, categ3 = severe)} 19 | #' \item{rides}{number of bikeshare rides} 20 | #' } 21 | #' @source Fanaee-T, Hadi and Gama, Joao (2013). Event labeling combining ensemble detectors and background knowledge. Progress in Artificial Intelligence. \url{https://archive.ics.uci.edu/ml/datasets/Bike+Sharing+Dataset} 22 | "bikes" 23 | -------------------------------------------------------------------------------- /R/data_bird_counts.R: -------------------------------------------------------------------------------- 1 | #' Bird Counts Data 2 | #' 3 | #' Bird count data collected between the years 1921 and 2017, in late December, by birdwatchers in the Ontario, Canada area. 4 | #' The data was made available by the Bird Studies Canada website and distributed through the R for Data Science TidyTuesday project. 5 | #' 6 | #' @format A data frame with 18706 rows and 7 variables. Each row represents observations for the given bird species in the given year. 7 | #' \describe{ 8 | #' \item{year}{year of data collection} 9 | #' \item{species}{scientific name of observed bird species} 10 | #' \item{species_latin}{latin name of observed bird species} 11 | #' \item{count}{number of birds observed} 12 | #' \item{hours}{total person-hours of observation period} 13 | #' \item{count_per_hour}{count divided by hours} 14 | #' \item{count_per_week}{count_per_hour multiplied by 168 hours per week} 15 | #' } 16 | #' @source \url{https://github.com/rfordatascience/tidytuesday/blob/master/data/2019/2019-06-18/bird_counts.csv/}. 17 | "bird_counts" 18 | -------------------------------------------------------------------------------- /R/data_book_banning.R: -------------------------------------------------------------------------------- 1 | #' Book Banning Data 2 | #' 3 | #' The book banning data was collected by Fast and Hegland as part of a course project 4 | #' at St Olaf College, and distributed with "Broadening Your Statistical Horizons" by Legler and Roback. 5 | #' This data set includes the features and outcomes for 931 book challenges 6 | #' (ie. requests to ban a book) made in the US between 2000 and 2010. 7 | #' Information on the books being challenged and the characteristics of these books 8 | #' were collected from the American Library Society. State-level demographic information and 9 | #' political leanings were obtained from the US Census Bureau and Cook Political Report, respectively. 10 | #' Due to an outlying large number of challenges, book challenges made in the state of Texas 11 | #' were omitted. 12 | #' 13 | #' @format A data frame with 931 rows and 17 variables. Each row represents a single book challenge within the given state and date. 14 | #' \describe{ 15 | #' \item{title}{title of book being challenged} 16 | #' \item{book_id}{identifier for the book} 17 | #' \item{author}{author of the book} 18 | #' \item{date}{date of the challenge} 19 | #' \item{year}{year of the challenge} 20 | #' \item{removed}{whether or not the challenge was successful (the book was removed)} 21 | #' \item{explicit}{whether the book was challenged for sexually explicit material} 22 | #' \item{antifamily}{whether the book was challenged for anti-family material} 23 | #' \item{occult}{whether the book was challenged for occult material} 24 | #' \item{language}{whether the book was challenged for inapropriate language} 25 | #' \item{lgbtq}{whether the book was challenged for LGBTQ material} 26 | #' \item{violent}{whether the book was challenged for violent material} 27 | #' \item{state}{US state in which the challenge was made} 28 | #' \item{political_value_index}{Political Value Index of the state (negative = leans Republican, 0 = neutral, positive = leans Democrat)} 29 | #' \item{median_income}{median income in the state, relative to the average state median income} 30 | #' \item{hs_grad_rate}{high school graduation rate, in percent, relative to the average state high school graduation rate} 31 | #' \item{college_grad_rate}{college graduation rate, in percent, relative to the average state college graduation rate} 32 | #' } 33 | #' @source Shannon Fast and Thomas Hegland (2011). Book Challenges: A Statistical Examination. Project for Statistics 316-Advanced Statistical Modeling, St. Olaf College. 34 | #' Julie Legler and Paul Roback (2019). Broadening Your Statistical Horizons: Generalized Linear Models and Multilevel Models. \url{https://bookdown.org/roback/bookdown-bysh/}. 35 | #' \url{https://github.com/proback/BeyondMLR/blob/master/data/bookbanningNoTex.csv/} 36 | 37 | "book_banning" 38 | -------------------------------------------------------------------------------- /R/data_cherry_blossom_sample.R: -------------------------------------------------------------------------------- 1 | #' Cherry Blossom Running Race 2 | #' 3 | #' A sub-sample of outcomes for the annual Cherry Blossom Ten Mile race in Washington, D.C.. This sub-sample was taken from the complete Cherry data in the mdsr package. 4 | #' 5 | #' @format A data frame with 252 Cherry Blossom outcomes and 7 variables: 6 | #' \describe{ 7 | #' \item{runner}{a unique identifier for the runner} 8 | #' \item{age}{age of the runner} 9 | #' \item{net}{time to complete the race, from starting line to finish line (minutes)} 10 | #' \item{gun}{time between the official start of the of race and the finish line (minutes)} 11 | #' \item{year}{year of the race} 12 | #' \item{previous}{the number of previous years in which the subject ran in the race} 13 | #' } 14 | #' @source Data in the original Cherry data set were obtained from \url{https://www.cherryblossom.org/post-race/race-results/}. 15 | "cherry_blossom_sample" 16 | -------------------------------------------------------------------------------- /R/data_climbers_sub.R: -------------------------------------------------------------------------------- 1 | #' Himalayan Climber Data 2 | #' 3 | #' A sub-sample of the Himalayan Database distributed through the R for Data Science TidyTuesday project. This dataset includes information on the results and conditions for various Himalayan climbing expeditions. Each row corresponds to a single member of a climbing expedition team. 4 | #' 5 | #' @format A data frame with 2076 observations (1 per climber) and 22 variables: 6 | #' \describe{ 7 | #' \item{expedition_id}{unique expedition identifier} 8 | #' \item{member_id}{unique climber identifier} 9 | #' \item{peak_id}{unique identifier of the expedition's destination peak} 10 | #' \item{peak_name}{name of the expedition's destination peak} 11 | #' \item{year}{year of expedition} 12 | #' \item{season}{season of expedition (Autumn, Spring, Summer, Winter)} 13 | #' \item{sex}{climber gender identity which the database oversimplifies to a binary category} 14 | #' \item{age}{climber age} 15 | #' \item{citizenship}{climber citizenship} 16 | #' \item{expedition_role}{climber's role in the expedition (eg: Co-Leader)} 17 | #' \item{hired}{whether the climber was a hired member of the expedition} 18 | #' \item{highpoint_metres}{the destination peak's highpoint (metres)} 19 | #' \item{success}{whether the climber successfully reached the destination} 20 | #' \item{solo}{whether the climber was on a solo expedition} 21 | #' \item{oxygen_used}{whether the climber utilized supplemental oxygen} 22 | #' \item{died}{whether the climber died during the expedition} 23 | #' \item{death_cause}{} 24 | #' \item{death_height_metres}{} 25 | #' \item{injured}{whether the climber was injured on the expedition} 26 | #' \item{injury_type}{} 27 | #' \item{injury_height_metres}{} 28 | #' \item{count}{number of climbers in the expedition} 29 | #' \item{height_metres}{height of the peak in meters} 30 | #' \item{first_ascent_year}{the year of the first recorded summit of the peak (though not necessarily the actual first summit!)} 31 | #' } 32 | #' @source Original source: \url{https://www.himalayandatabase.com/}. Complete dataset distributed by: \url{https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-09-22/}. 33 | "climbers_sub" 34 | -------------------------------------------------------------------------------- /R/data_coffee_ratings.R: -------------------------------------------------------------------------------- 1 | #' Coffee Ratings Data 2 | #' 3 | #' A sub-set of data on coffee bean ratings / quality originally collected by James LeDoux (jmzledoux) and distributed through the R for Data Science TidyTuesday project. 4 | #' 5 | #' @format A data frame with 1339 batches of coffee beans and 27 variables on each batch. 6 | #' \describe{ 7 | #' \item{owner}{farm owner} 8 | #' \item{farm_name}{farm where beans were grown} 9 | #' \item{country_of_origin}{country where farm is} 10 | #' \item{mill}{where beans were processed} 11 | #' \item{in_country_partner}{country of coffee partner} 12 | #' \item{altitude_low_meters}{lowest altitude of the farm} 13 | #' \item{altitude_high_meters}{highest altitude of the farm} 14 | #' \item{altitude_mean_meters}{average altitude of the farm} 15 | #' \item{number_of_bags}{number of bags tested} 16 | #' \item{bag_weight}{weight of each tested bag} 17 | #' \item{species}{bean species} 18 | #' \item{variety}{bean variety} 19 | #' \item{processing_method}{how beans were processed} 20 | #' \item{aroma}{bean aroma grade} 21 | #' \item{flavor}{bean flavor grade} 22 | #' \item{aftertaste}{bean aftertaste grade} 23 | #' \item{acidity}{bean acidity grade} 24 | #' \item{body}{bean body grade} 25 | #' \item{balance}{bean balance grade} 26 | #' \item{uniformity}{bean uniformity grade} 27 | #' \item{clean_cup}{bean clean cup grade} 28 | #' \item{sweetness}{bean sweetness grade} 29 | #' \item{moisture}{bean moisture grade} 30 | #' \item{category_one_defects}{count of category one defects} 31 | #' \item{category_two_defects}{count of category two defects} 32 | #' \item{color}{bean color} 33 | #' \item{total_cup_points}{total bean rating (0 -- 100)} 34 | #' } 35 | #' @source \url{https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv}. 36 | "coffee_ratings" 37 | -------------------------------------------------------------------------------- /R/data_coffee_ratings_small.R: -------------------------------------------------------------------------------- 1 | #' Coffee Ratings Data 2 | #' 3 | #' A sub-set of data on coffee bean ratings / quality originally collected by James LeDoux (jmzledoux) and distributed through the R for Data Science TidyTuesday project. 4 | #' This is a simplified version of the coffee_ratings data. 5 | #' 6 | #' @format A data frame with 636 batches of coffee beans and 11 variables on each batch. 7 | #' \describe{ 8 | #' \item{farm_name}{farm where beans were grown} 9 | #' \item{total_cup_points}{total bean rating (0 -- 100)} 10 | #' \item{aroma}{bean aroma grade} 11 | #' \item{flavor}{bean flavor grade} 12 | #' \item{aftertaste}{bean aftertaste grade} 13 | #' \item{acidity}{bean acidity grade} 14 | #' \item{body}{bean body grade} 15 | #' \item{balance}{bean balance grade} 16 | #' \item{uniformity}{bean uniformity grade} 17 | #' \item{sweetness}{bean sweetness grade} 18 | #' \item{moisture}{bean moisture grade} 19 | #' } 20 | #' @source \url{https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv}. 21 | "coffee_ratings_small" 22 | -------------------------------------------------------------------------------- /R/data_equality_index.R: -------------------------------------------------------------------------------- 1 | #' LGBTQ+ Rights Laws by State 2 | #' 3 | #' Data on the number of LGBTQ+ equality laws (as of 2019) and demographics in each U.S. state. 4 | #' 5 | #' @format A data frame with 50 observations, one per state, and 6 variables: 6 | #' \describe{ 7 | #' \item{state}{state name} 8 | #' \item{region}{region in which the state falls} 9 | #' \item{gop_2016}{percent of the 2016 presidential election vote earned by the Republican ("GOP") candidate} 10 | #' \item{laws}{number of LGBTQ+ rights laws (as of 2019)} 11 | #' \item{historical}{political leaning of the state over time (gop = Republican, dem = Democrat, swing = swing state)} 12 | #' \item{percent_urban}{percent of state's residents that live in urban areas (by the 2010 census)} 13 | #' } 14 | #' @source Data on LGBTQ+ laws were obtained from Warbelow, Sarah, Courtnay Avant, and Colin Kutney (2020). 2019 State Equality Index. Washington, DC. Human Rights Campaign Foundation. \url{https://assets2.hrc.org/files/assets/resources/HRC-SEI-2019-Report.pdf?_ga=2.148925686.1325740687.1594310864-1928808113.1594310864&_gac=1.213124768.1594312278.EAIaIQobChMI9dP2hMzA6gIVkcDACh21GgLEEAAYASAAEgJiJvD_BwE/}. Data on urban residency obtained from \url{https://www.icip.iastate.edu/tables/population/urban-pct-states/}. 15 | "equality_index" 16 | -------------------------------------------------------------------------------- /R/data_fake_news.R: -------------------------------------------------------------------------------- 1 | #' A collection of 150 news articles 2 | #' 3 | #' A dataset containing data behind the study 4 | #' "FakeNewsNet: A Data Repository with News Content, Social Context and Spatialtemporal Information for Studying Fake News on Social Media" 5 | #' \url{https://arxiv.org/abs/1809.01286}. 6 | #' The news articles in this dataset were posted to Facebook in September 2016, in the run-up to the U.S. presidential election. 7 | #' 8 | #' @format A data frame with 150 rows and 6 variables: 9 | #' \describe{ 10 | #' \item{title}{The title of the news article} 11 | #' \item{text}{Text of the article} 12 | #' \item{url}{Hyperlink for the article} 13 | #' \item{authors}{Authors of the article} 14 | #' \item{type}{Binary variable indicating whether the article presents fake or real news(fake, real)} 15 | #' \item{title_words}{Number of words in the title} 16 | #' \item{text_words}{Number of words in the text} 17 | #' \item{title_char}{Number of characters in the title} 18 | #' \item{text_char}{Number of characters in the text} 19 | #' \item{title_caps}{Number of words that are all capital letters in the title} 20 | #' \item{text_caps}{Number of words that are all capital letters in the text} 21 | #' \item{title_caps_percent}{Percent of words that are all capital letters in the title} 22 | #' \item{text_caps_percent}{Percent of words that are all capital letters in the text} 23 | #' \item{title_excl}{Number of characters that are exclamation marks in the title} 24 | #' \item{text_excl}{Number of characters that are exclamation marks in the text} 25 | #' \item{title_excl_percent}{Percent of characters that are exclamation marks in the title} 26 | #' \item{text_excl_percent}{Percent of characters that are exclamation marks in the text} 27 | #' \item{title_has_excl}{Binary variable indicating whether the title of the article includes an exlamation point or not(TRUE, FALSE)} 28 | #' \item{anger}{Percent of words that are associated with anger} 29 | #' \item{anticipation}{Percent of words that are associated with anticipation} 30 | #' \item{disgust}{Percent of words that are associated with disgust} 31 | #' \item{fear}{Percent of words that are associated with fear} 32 | #' \item{joy}{Percent of words that are associated with joy} 33 | #' \item{sadness}{Percent of words that are associated with sadness} 34 | #' \item{surprise}{Percent of words that are associated with surprise} 35 | #' \item{trust}{Percent of words that are associated with trust} 36 | #' \item{negative}{Percent of words that have negative sentiment} 37 | #' \item{positive}{Percent of words that have positive sentiment} 38 | #' \item{text_syllables}{Number of syllables in text} 39 | #' \item{text_syllables_per_word}{Number of syllables per word in text} 40 | #' } 41 | #' @source Shu, K., Mahudeswaran, D., Wang, S., Lee, D. and Liu, H. (2018) FakeNewsNet: A Data Repository with News Content, Social Context and Dynamic Information for Studying Fake News on Social Media 42 | "fake_news" 43 | -------------------------------------------------------------------------------- /R/data_football.R: -------------------------------------------------------------------------------- 1 | #' Football Brain Measurements 2 | #' 3 | #' Brain measurements for football and non-football players as provided in the Lock5 package 4 | #' 5 | #' @format A data frame with 75 observations and 5 variables: 6 | #' \describe{ 7 | #' \item{group}{control = no football, 8 | #' fb_no_concuss = football player but no concussions, 9 | #' fb_concuss = football player with concussion history} 10 | #' \item{years}{Number of years a person played football} 11 | #' \item{volume}{Total hippocampus volume, in cubic centimeters} 12 | #' } 13 | #' @source Singh R, Meier T, Kuplicki R, Savitz J, et al., 14 | #' "Relationship of Collegiate Football Experience and Concussion 15 | #' With Hippocampal Volume and Cognitive Outcome," JAMA, 311(18), 2014 16 | #' 17 | "football" 18 | -------------------------------------------------------------------------------- /R/data_hotel_bookings.R: -------------------------------------------------------------------------------- 1 | #' Hotel Bookings Data 2 | #' 3 | #' A random subset of the data on hotel bookings originally collected by Antonio, Almeida and Nunes (2019) and distributed through the R for Data Science TidyTuesday project. 4 | #' 5 | #' @format A data frame with 1000 hotel bookings and 32 variables on each booking. 6 | #' \describe{ 7 | #' \item{hotel}{"Resort Hotel" or "City Hotel"} 8 | #' \item{is_canceled}{whether the booking was cancelled} 9 | #' \item{lead_time}{number of days between booking and arrival} 10 | #' \item{arrival_date_year}{year of scheduled arrival} 11 | #' \item{arrival_date_month}{month of scheduled arrival} 12 | #' \item{arrival_date_week_number}{week of scheduled arrival} 13 | #' \item{arrival_date_day_of_month}{day of month of scheduled arrival} 14 | #' \item{stays_in_weekend_nights}{number of reserved weekend nights} 15 | #' \item{stays_in_week_nights}{number of reserved week nights} 16 | #' \item{adults}{number of adults in booking} 17 | #' \item{children}{number of children} 18 | #' \item{babies}{number of babies} 19 | #' \item{meal}{whether the booking includes breakfast (BB = bed & breakfast), breakfast and dinner (HB = half board), or breakfast, lunch, and dinner (FB = full board)} 20 | #' \item{country}{guest's country of origin} 21 | #' \item{market_segment}{market segment designation (eg: TA = travel agent, TO = tour operator)} 22 | #' \item{distribution_channel}{booking distribution channel (eg: TA = travel agent, TO = tour operator)} 23 | #' \item{is_repeated_guest}{whether or not booking was made by a repeated guest} 24 | #' \item{previous_cancellations}{guest's number of previous booking cancellations} 25 | #' \item{previous_bookings_not_canceled}{guest's number of previous bookings that weren't cancelled} 26 | #' \item{reserved_room_type}{code for type of room reserved by guest} 27 | #' \item{assigned_room_type}{code for type of room assigned by hotel} 28 | #' \item{booking_changes}{number of changes made to the booking} 29 | #' \item{deposit_type}{No Deposit, Non Refund, Refundable} 30 | #' \item{agent}{booking travel agency} 31 | #' \item{company}{booking company} 32 | #' \item{days_in_waiting_list}{number of days the guest waited for booking confirmation} 33 | #' \item{customer_type}{Contract, Group, Transient, Transient-party (a transient booking tied to another transient booking)} 34 | #' \item{average_daily_rate}{average hotel cost per day} 35 | #' \item{required_car_parking_spaces}{number of parking spaces the guest needed} 36 | #' \item{total_of_special_requests}{number of guest special requests} 37 | #' \item{reservation_status}{Canceled, Check-Out, No-Show} 38 | #' \item{reservation_status_date}{when the guest cancelled or checked out} 39 | #' } 40 | #' @source 41 | #' Nuno Antonio, Ana de Almeida, and Luis Nunes (2019). "Hotel booking demand datasets." Data in Brief (22): 41-49. 42 | #' \url{https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-02-11/hotels.csv/}. 43 | "hotel_bookings" 44 | -------------------------------------------------------------------------------- /R/data_loons.R: -------------------------------------------------------------------------------- 1 | #' Loon Count Data 2 | #' 3 | #' Loon count data collected from the year 2000 to 2017, in late December, by birdwatchers in the Ontario, Canada area. 4 | #' The data was made available by the Bird Studies Canada website and distributed through the R for Data Science TidyTuesday project. 5 | #' A more complete data set with a larger selection of birds can be found in the bird_counts data in the bayesrules package. 6 | #' 7 | #' @format A data frame with 18 rows and 5 variables. Each row represents loon observations in the given year. 8 | #' \describe{ 9 | #' \item{year}{year of data collection} 10 | #' \item{count}{number of loons observed} 11 | #' \item{hours}{total person-hours of observation period} 12 | #' \item{count_per_hour}{count divided by hours} 13 | #' \item{count_per_100}{count_per_hour multiplied by 100 hours} 14 | #' } 15 | #' @source \url{https://github.com/rfordatascience/tidytuesday/blob/master/data/2019/2019-06-18/bird_counts.csv}. 16 | "loons" 17 | -------------------------------------------------------------------------------- /R/data_moma.R: -------------------------------------------------------------------------------- 1 | #' Museum of Modern Art (MoMA) data 2 | #' 3 | #' The Museum of Modern Art data includes information about the individual artists included in the collection of the Museum of Modern Art in New York City. 4 | #' It does not include information about works for artist collectives or companies. 5 | #' The data was made available by MoMA itself and downloaded in December 2020. 6 | #' 7 | #' @format A data frame with 10964 rows and 11 variables. Each row represents an individual artist in the MoMA collection. 8 | #' \describe{ 9 | #' \item{artist}{name} 10 | #' \item{country}{country of origin} 11 | #' \item{birth}{year of birth} 12 | #' \item{death}{year of death} 13 | #' \item{alive}{whether or not the artist was living at the time of data collection (December 2020)} 14 | #' \item{genx}{whether or not the artist is Gen X or younger, ie. born during 1965 or after} 15 | #' \item{gender}{gender identity (as perceived by MoMA employees)} 16 | #' \item{department}{MoMA department in which the artist's works most frequently appear} 17 | #' \item{count}{number of the artist's works in the MoMA collection} 18 | #' \item{year_acquired_min}{first year MoMA acquired one of the artist's works} 19 | #' \item{year_acquired_max}{most recent year MoMA acquired one of the artist's works} 20 | #' } 21 | #' @source \url{https://github.com/MuseumofModernArt/collection/blob/master/Artworks.csv/}. 22 | "moma" -------------------------------------------------------------------------------- /R/data_moma_sample.R: -------------------------------------------------------------------------------- 1 | #' Museum of Modern Art (MoMA) data sample 2 | #' 3 | #' A random sample of 100 artists represented in the Museum of Modern Art in New York City. 4 | #' The data was made available by MoMA itself and downloaded in December 2020. 5 | #' It does not include information about artist collectives or companies. 6 | #' 7 | #' @format A data frame with 100 rows and 10 variables. Each row represents an individual artist in the MoMA collection. 8 | #' \describe{ 9 | #' \item{artist}{name} 10 | #' \item{country}{country of origin} 11 | #' \item{birth}{year of birth} 12 | #' \item{death}{year of death} 13 | #' \item{alive}{whether or not the artist was living at the time of data collection (December 2020)} 14 | #' \item{genx}{whether or not the artist is Gen X or younger, ie. born during 1965 or after} 15 | #' \item{gender}{gender identity (as perceived by MoMA employees)} 16 | #' \item{count}{number of the artist's works in the MoMA collection} 17 | #' \item{year_acquired_min}{first year MoMA acquired one of the artist's works} 18 | #' \item{year_acquired_max}{most recent year MoMA acquired one of the artist's works} 19 | #' } 20 | #' @source \url{https://github.com/MuseumofModernArt/collection/blob/master/Artworks.csv/}. 21 | "moma_sample" -------------------------------------------------------------------------------- /R/data_penguins_bayes.R: -------------------------------------------------------------------------------- 1 | #' Penguins Data 2 | #' 3 | #' Data on penguins in the Palmer Archipelago, originally collected by Gordan etal and distributed through the penguins data in the palmerpenguins package. 4 | #' In addition to the original penguins data is a variable above_average_weight. 5 | #' 6 | #' @format A data frame with 344 penguins and 9 variables on each. 7 | #' \describe{ 8 | #' \item{species}{species (Adelie, Chinstrap, Gentoo)} 9 | #' \item{island}{home island (Biscoe, Dream, Torgersen)} 10 | #' \item{year}{year of observation} 11 | #' \item{bill_length_mm}{length of bill (mm)} 12 | #' \item{bill_depth_mm}{depth of bill (mm)} 13 | #' \item{flipper_length_mm}{length of flipper (mm)} 14 | #' \item{body_mass_g}{body mass (g)} 15 | #' \item{above_average_weight}{whether or not the body mass exceeds 4200g (TRUE or FALSE)} 16 | #' \item{sex}{male or female} 17 | #' } 18 | #' @source Gorman KB, Williams TD, and Fraser WR (2014). Ecological sexual dimorphism and environmental variability within a community of antarctic penguins (Genus Pygoscelis). PLoS ONE, 9(3). 19 | "penguins_bayes" 20 | -------------------------------------------------------------------------------- /R/data_pop_vs_soda.R: -------------------------------------------------------------------------------- 1 | #' Pop vs Soda vs Coke 2 | #' 3 | #' Results of a volunteer survey on how people around the U.S. refer to fizzy cola drinks. The options are "pop", "soda", "coke", or "other". 4 | #' 5 | #' @format A data frame with 374250 observations, one per survey respondent, and 4 variables: 6 | #' \describe{ 7 | #' \item{state}{the U.S. state in which the respondent resides} 8 | #' \item{region}{region in which the state falls (as defined by the U.S. Census)} 9 | #' \item{word_for_cola}{how the respondent refers to fizzy cola drinks} 10 | #' \item{pop}{whether or not the respondent refers to fizzy cola drinks as "pop"} 11 | #' } 12 | #' @source The survey responses were obtained at \url{https://popvssoda.com/} which is maintained by Alan McConchie. 13 | "pop_vs_soda" 14 | -------------------------------------------------------------------------------- /R/data_pulse_of_the_nation.R: -------------------------------------------------------------------------------- 1 | #' Cards Against Humanity's Pulse of the Nation Survey 2 | #' 3 | #' Cards Against Humanity's "Pulse of the Nation" project (\url{https://thepulseofthenation.com/}) conducted monthly polls into people's social and political views, as well as some silly things. This data includes responses to a subset of questions included in the poll conducted in September 2017. 4 | #' 5 | #' @format A data frame with observations on 1000 survey respondents with 15 variables: 6 | #' \describe{ 7 | #' \item{income}{income in \$1000s} 8 | #' \item{age}{age in years} 9 | #' \item{party}{political party affiliation} 10 | #' \item{trump_approval}{approval level of Donald Trump's job performance} 11 | #' \item{education}{maximum education level completed} 12 | #' \item{robots}{opinion of how likely their job is to be replaced by robots within 10 years} 13 | #' \item{climate_change}{belief in climate change} 14 | #' \item{transformers}{the number of Transformers film the respondent has seen} 15 | #' \item{science_is_honest}{opinion of whether scientists are generally honest and serve the public good} 16 | #' \item{vaccines_are_safe}{opinion of whether vaccines are safe and protect children from disease} 17 | #' \item{books}{number of books read in the past year} 18 | #' \item{ghosts}{whether or not they believe in ghosts} 19 | #' \item{fed_sci_budget}{respondent's estimate of the percentage of the federal budget that is spent on scientific research} 20 | #' \item{earth_sun}{belief about whether the earth is always farther away from the sun in winter than in summer (TRUE or FALSE)} 21 | #' \item{wise_unwise}{whether the respondent would rather be wise but unhappy, or unwise but happy} 22 | #' } 23 | #' @source \url{https://thepulseofthenation.com/downloads/201709-CAH_PulseOfTheNation_Raw.csv} 24 | "pulse_of_the_nation" 25 | -------------------------------------------------------------------------------- /R/data_spotify.R: -------------------------------------------------------------------------------- 1 | #' Spotify Song Data 2 | #' 3 | #' A sub-sample of the Spotify song data originally collected by Kaylin Pavlik (kaylinquest) and distributed through the R for Data Science TidyTuesday project. 4 | #' 5 | #' @format A data frame with 350 songs (or tracks) and 23 variables: 6 | #' \describe{ 7 | #' \item{track_id}{unique song identifier} 8 | #' \item{title}{song name} 9 | #' \item{artist}{song artist} 10 | #' \item{popularity}{song popularity from 0 (low) to 100 (high)} 11 | #' \item{album_id}{id of the album on which the song appears} 12 | #' \item{album_name}{name of the album on which the song appears} 13 | #' \item{album_release_date}{when the album was released} 14 | #' \item{playlist_name}{Spotify playlist on which the song appears} 15 | #' \item{playlist_id}{unique playlist identifier} 16 | #' \item{genre}{genre of the playlist} 17 | #' \item{subgenre}{subgenre of the playlist} 18 | #' \item{danceability}{a score from 0 (not danceable) to 100 (danceable) based on features such as tempo, rhythm, etc.} 19 | #' \item{energy}{a score from 0 (low energy) to 100 (high energy) based on features such as loudness, timbre, entropy, etc.} 20 | #' \item{key}{song key} 21 | #' \item{loudness}{song loudness (dB)} 22 | #' \item{mode}{0 (minor key) or 1 (major key)} 23 | #' \item{speechiness}{a score from 0 (non-speechy tracks) to 100 (speechy tracks)} 24 | #' \item{acousticness}{a score from 0 (not acoustic) to 100 (very acoustic)} 25 | #' \item{instrumentalness}{a score from 0 (not instrumental) to 100 (very instrumental)} 26 | #' \item{liveness}{a score from 0 (no live audience presence on the song) to 100 (strong live audience presence on the song)} 27 | #' \item{valence}{a score from 0 (the song is more negative, sad, angry) to 100 (the song is more positive, happy, euphoric)} 28 | #' \item{tempo}{song tempo (beats per minute)} 29 | #' \item{duration_ms}{song duration (ms)} 30 | #' } 31 | #' @source \url{https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-01-21/spotify_songs.csv/}. 32 | "spotify" 33 | -------------------------------------------------------------------------------- /R/data_voices.R: -------------------------------------------------------------------------------- 1 | #' Voice Pitch Data 2 | #' 3 | #' Voice pitch data collected by Winter and Grawunder (2012). 4 | #' In an experiment, subjects participated in role-playing dialog under various conditions, 5 | #' while researchers monitored voice pitch (Hz). 6 | #' The conditions spanned different scenarios (eg: making an appointment, asking for a favor) 7 | #' and different attitudes to use in the scenario (polite or informal). 8 | #' 9 | #' @format A data frame with 84 rows and 4 variables. Each row represents a single observation for the given subject. 10 | #' \describe{ 11 | #' \item{subject}{subject identifier} 12 | #' \item{scenario}{context of the dialog (encoded as A, B, ..., G)} 13 | #' \item{attitude}{whether the attitude to use in dialog was polite or informal} 14 | #' \item{pitch}{average voice pitch (Hz)} 15 | #' } 16 | #' @source Winter, B., & Grawunder, S. (2012). The Phonetic Profile of Korean Formal and Informal Speech Registers. Journal of Phonetics, 40, 808-815. 17 | #' \url{https://bodo-winter.net/data_and_scripts/POP.csv}. 18 | #' \url{https://bodo-winter.net/tutorial/bw_LME_tutorial2.pdf}. 19 | "voices" 20 | -------------------------------------------------------------------------------- /R/data_weather_WU.R: -------------------------------------------------------------------------------- 1 | #' Weather Data for 2 Australian Cities 2 | #' 3 | #' A sub-sample of daily weather information from the weatherAUS data in the rattle package for two Australian cities, Wollongong and Uluru. 4 | #' The weather_australia data in the bayesrules package combines this data with a third city 5 | #' 6 | #' @format A data frame with 200 daily observations and 22 variables from 2 Australian weather stations: 7 | #' \describe{ 8 | #' \item{location}{one of two weather stations} 9 | #' \item{mintemp}{minimum temperature (degrees Celsius)} 10 | #' \item{maxtemp}{maximum temperature (degrees Celsius)} 11 | #' \item{rainfall}{rainfall (mm)} 12 | #' \item{windgustdir}{direction of strongest wind gust} 13 | #' \item{windgustspeed}{speed of strongest wind gust (km/h)} 14 | #' \item{winddir9am}{direction of wind gust at 9am} 15 | #' \item{winddir3pm}{direction of wind gust at 3pm} 16 | #' \item{windspeed9am}{wind speed at 9am (km/h)} 17 | #' \item{windspeed3pm}{wind speed at 3pm (km/h)} 18 | #' \item{humidity9am}{humidity level at 9am (percent)} 19 | #' \item{humidity3pm}{humidity level at 3pm (percent)} 20 | #' \item{pressure9am}{atmospheric pressure at 9am (hpa)} 21 | #' \item{pressure3pm}{atmospheric pressure at 3pm (hpa)} 22 | #' \item{temp9am}{temperature at 9am (degrees Celsius)} 23 | #' \item{temp3pm}{temperature at 3pm (degrees Celsius)} 24 | #' \item{raintoday}{whether or not it rained today (Yes or No)} 25 | #' \item{risk_mm}{the amount of rain today (mm)} 26 | #' \item{raintomorrow}{whether or not it rained the next day (Yes or No)} 27 | #' \item{year}{the year of the date} 28 | #' \item{month}{the month of the date} 29 | #' \item{day_of_year}{the day of the year} 30 | #' } 31 | #' @source Data in the original weatherAUS data set were obtained from \url{https://www.bom.gov.au/climate/data}. Copyright Commonwealth of Australia 2010, Bureau of Meteorology. 32 | "weather_WU" 33 | -------------------------------------------------------------------------------- /R/data_weather_australia.R: -------------------------------------------------------------------------------- 1 | #' Weather Data for 3 Australian Cities 2 | #' 3 | #' A sub-sample of daily weather information from the weatherAUS data in the rattle package for three Australian cities: Wollongong, Hobart, and Uluru. 4 | #' 5 | #' @format A data frame with 300 daily observations and 22 variables from 3 Australian weather stations: 6 | #' \describe{ 7 | #' \item{location}{one of three weather stations} 8 | #' \item{mintemp}{minimum temperature (degrees Celsius)} 9 | #' \item{maxtemp}{maximum temperature (degrees Celsius)} 10 | #' \item{rainfall}{rainfall (mm)} 11 | #' \item{windgustdir}{direction of strongest wind gust} 12 | #' \item{windgustspeed}{speed of strongest wind gust (km/h)} 13 | #' \item{winddir9am}{direction of wind gust at 9am} 14 | #' \item{winddir3pm}{direction of wind gust at 3pm} 15 | #' \item{windspeed9am}{wind speed at 9am (km/h)} 16 | #' \item{windspeed3pm}{wind speed at 3pm (km/h)} 17 | #' \item{humidity9am}{humidity level at 9am (percent)} 18 | #' \item{humidity3pm}{humidity level at 3pm (percent)} 19 | #' \item{pressure9am}{atmospheric pressure at 9am (hpa)} 20 | #' \item{pressure3pm}{atmospheric pressure at 3pm (hpa)} 21 | #' \item{temp9am}{temperature at 9am (degrees Celsius)} 22 | #' \item{temp3pm}{temperature at 3pm (degrees Celsius)} 23 | #' \item{raintoday}{whether or not it rained today (Yes or No)} 24 | #' \item{risk_mm}{the amount of rain today (mm)} 25 | #' \item{raintomorrow}{whether or not it rained the next day (Yes or No)} 26 | #' \item{year}{the year of the date} 27 | #' \item{month}{the month of the date} 28 | #' \item{day_of_year}{the day of the year} 29 | #' } 30 | #' @source Data in the original weatherAUS data set were obtained from \url{https://www.bom.gov.au/climate/data/}. Copyright Commonwealth of Australia 2010, Bureau of Meteorology. 31 | "weather_australia" 32 | -------------------------------------------------------------------------------- /R/data_weather_perth.R: -------------------------------------------------------------------------------- 1 | #' Weather Data for Perth, Australia 2 | #' 3 | #' A sub-sample of daily weather information on Perth, Australia from the weatherAUS data in the rattle package. 4 | #' 5 | #' @format A data frame with 1000 daily observations and 21 variables: 6 | #' \describe{ 7 | #' \item{mintemp}{minimum temperature (degrees Celsius)} 8 | #' \item{maxtemp}{maximum temperature (degrees Celsius)} 9 | #' \item{rainfall}{rainfall (mm)} 10 | #' \item{windgustdir}{direction of strongest wind gust} 11 | #' \item{windgustspeed}{speed of strongest wind gust (km/h)} 12 | #' \item{winddir9am}{direction of wind gust at 9am} 13 | #' \item{winddir3pm}{direction of wind gust at 3pm} 14 | #' \item{windspeed9am}{wind speed at 9am (km/h)} 15 | #' \item{windspeed3pm}{wind speed at 3pm (km/h)} 16 | #' \item{humidity9am}{humidity level at 9am (percent)} 17 | #' \item{humidity3pm}{humidity level at 3pm (percent)} 18 | #' \item{pressure9am}{atmospheric pressure at 9am (hpa)} 19 | #' \item{pressure3pm}{atmospheric pressure at 3pm (hpa)} 20 | #' \item{temp9am}{temperature at 9am (degrees Celsius)} 21 | #' \item{temp3pm}{temperature at 3pm (degrees Celsius)} 22 | #' \item{raintoday}{whether or not it rained today (Yes or No)} 23 | #' \item{risk_mm}{the amount of rain today (mm)} 24 | #' \item{raintomorrow}{whether or not it rained the next day (Yes or No)} 25 | #' \item{year}{the year of the date} 26 | #' \item{month}{the month of the date} 27 | #' \item{day_of_year}{the day of the year} 28 | #' } 29 | #' @source Data in the original weatherAUS data set were obtained from \url{https://www.bom.gov.au/climate/data/}. Copyright Commonwealth of Australia 2010, Bureau of Meteorology. 30 | "weather_perth" 31 | -------------------------------------------------------------------------------- /R/globals.R: -------------------------------------------------------------------------------- 1 | utils::globalVariables(c("proportion", "classification", ".", ".folds", 2 | "x", "y1", "y2", "mu", 3 | "likelihood", "f_lambda", "post_median", 4 | "post_mean", "post_mad", "post_sd", 5 | "center", "error", "l_inner", "u_inner", 6 | "l_outer", "u_outer", 7 | "error_scaled", "within_inner", "within_outer")) 8 | -------------------------------------------------------------------------------- /R/naive_classification_summary.R: -------------------------------------------------------------------------------- 1 | #' Posterior Classification Summaries for a Naive Bayes model 2 | #' 3 | #' Given a set of observed data including a categorical response variable y 4 | #' and a naiveBayes model of y, 5 | #' this function returns summaries of the model's posterior classification quality. 6 | #' These summaries include a confusion matrix as well as an estimate of the model's 7 | #' overall accuracy. 8 | #' 9 | #' @param model a naiveBayes model object with categorical y 10 | #' @param data data frame including the variables in the model 11 | #' @param y a character string indicating the y variable in data 12 | #' 13 | #' @return a list 14 | #' @export 15 | #' @importFrom stats predict 16 | #' @examples 17 | #' data(penguins_bayes, package = "bayesrules") 18 | #' example_model <- e1071::naiveBayes(species ~ bill_length_mm, data = penguins_bayes) 19 | #' naive_classification_summary(model = example_model, data = penguins_bayes, y = "species") 20 | naive_classification_summary <- function(model, data, y){ 21 | # This function summarizes the classifications across all cases 22 | if(!("naiveBayes" %in% class(model))){ stop("the model must be a naiveBayes object.")} 23 | 24 | # Calculate posterior classifications 25 | # Turn the predictions into classifications 26 | classifications <- data %>% 27 | mutate(classification = predict(model, newdata = .)) %>% 28 | dplyr::select(y, classification) 29 | names(classifications)[1] <- "y" 30 | 31 | # Confusion matrix 32 | confusion_matrix <- classifications %>% 33 | tabyl(y, classification) %>% 34 | adorn_percentages("row") %>% 35 | adorn_pct_formatting(digits = 2) %>% 36 | adorn_ns() 37 | names(confusion_matrix)[1] <- y 38 | mat <- table(classifications$y, classifications$classification) 39 | overall_accuracy <- sum(diag(mat)) / sum(mat) 40 | 41 | return(list(confusion_matrix = confusion_matrix, overall_accuracy = overall_accuracy)) 42 | } 43 | 44 | 45 | -------------------------------------------------------------------------------- /R/plot_beta.R: -------------------------------------------------------------------------------- 1 | #' @title Plot a Beta Model for \eqn{\pi} 2 | #' 3 | #' @description Plots the probability density function (pdf) for 4 | #' a Beta(alpha, beta) model of variable \eqn{\pi}. 5 | #' 6 | #' @param alpha,beta positive shape parameters of the Beta model 7 | #' @param mean,mode a logical value indicating whether to display the model mean and mode 8 | #' 9 | #' @return A density plot for the Beta model. 10 | #' @export 11 | #' @import ggplot2 12 | #' @importFrom stats dbeta 13 | #' 14 | #' @examples 15 | #' plot_beta(alpha = 1, beta = 12, mean = TRUE, mode = TRUE) 16 | plot_beta <- function(alpha, beta, mean = FALSE, mode = FALSE){ 17 | 18 | 19 | p <- ggplot(data = data.frame(x = c(0, 1)), 20 | aes(x)) + 21 | stat_function(fun = stats::dbeta, 22 | n = 101, 23 | args = list(shape1 = alpha, 24 | shape2=beta)) + 25 | labs(x = expression(pi), 26 | y = expression(paste("f(",pi,")"))) 27 | 28 | 29 | if (mean == TRUE & mode == FALSE){ 30 | mean <- alpha / (alpha + beta) 31 | 32 | p <- p + 33 | geom_segment(aes(x = mean, y = 0, 34 | xend = mean, 35 | yend = dbeta(mean, alpha, beta), 36 | linetype = "mean")) + 37 | scale_linetype_manual(values = c(mean = "solid")) + 38 | theme(legend.title = element_blank()) 39 | } 40 | 41 | if (mean == FALSE & mode == TRUE){ 42 | mode <- (alpha - 1)/(alpha + beta - 2) 43 | 44 | p <- p + 45 | geom_segment(aes(x = mode, y = 0, 46 | xend = mode, 47 | yend = dbeta(mode, alpha, beta), 48 | linetype = "mode"))+ 49 | scale_linetype_manual(values = c(mode = "dashed")) + 50 | theme(legend.title = element_blank()) 51 | 52 | 53 | } 54 | 55 | if (mean == TRUE & mode == TRUE){ 56 | mean <- alpha / (alpha + beta) 57 | mode <- (alpha - 1)/(alpha + beta - 2) 58 | 59 | 60 | p <- p + 61 | geom_segment(aes(x = mean, y = 0, 62 | xend = mean, 63 | yend = dbeta(mean, alpha, beta), 64 | linetype = "mean")) + 65 | geom_segment(aes(x = mode, y = 0, 66 | xend = mode, 67 | yend = stats::dbeta(mode, alpha, beta), 68 | linetype = "mode"))+ 69 | scale_linetype_manual(values = c(mean = "solid", mode = "dashed")) + 70 | theme(legend.title = element_blank()) 71 | } 72 | p 73 | } 74 | 75 | -------------------------------------------------------------------------------- /R/plot_beta_ci.R: -------------------------------------------------------------------------------- 1 | #' @title Plot a Beta Model with Credible Interval 2 | #' 3 | #' @description Plots the probability density function (pdf) for a 4 | #' Beta(alpha, beta) model of variable \eqn{\pi} with markings indicating 5 | #' a credible interval for \eqn{\pi}. 6 | #' 7 | #' @param alpha,beta positive shape parameters of the Beta model 8 | #' @param ci_level credible interval level 9 | #' 10 | #' @return A density plot for the Beta model 11 | #' @export 12 | #' @import ggplot2 13 | #' @importFrom stats dbeta qbeta 14 | #' 15 | #' @examples 16 | #' plot_beta_ci(alpha = 7, beta = 12, ci_level = 0.80) 17 | plot_beta_ci <- function(alpha, beta, ci_level = 0.95){ 18 | 19 | 20 | p <- ggplot(data = data.frame(x = c(0, 1)), 21 | aes(x)) + 22 | stat_function(fun = dbeta, 23 | n = 101, 24 | args = list(shape1 = alpha, 25 | shape2=beta)) + 26 | labs(x = expression(pi), 27 | y = expression(paste("f(",pi,")"))) 28 | 29 | q1 <- (1 - ci_level)/2 30 | q2 <- 1 - q1 31 | 32 | ci <- qbeta(c(q1,q2), alpha, beta) 33 | mode <- (alpha - 1) / (alpha + beta - 2) 34 | marks <- c(ci, mode) 35 | 36 | ggplot(data.frame(x = c(0,1)), aes(x=x)) + 37 | stat_function(fun = dbeta, 38 | args = list(alpha, beta), 39 | xlim = ci, 40 | geom = "area", 41 | fill = "lightblue") + 42 | stat_function(fun = dbeta, 43 | args = list(alpha, beta)) + 44 | geom_segment(data = 45 | data.frame(x = marks, 46 | y1 = c(0,0,0), 47 | y2 = dbeta(marks, alpha, beta)), 48 | aes(x = x, 49 | xend = x, 50 | y = y1, 51 | yend = y2)) + 52 | labs(x = expression(pi), y = "density") 53 | 54 | 55 | } 56 | 57 | -------------------------------------------------------------------------------- /R/plot_binomial_likelihood.R: -------------------------------------------------------------------------------- 1 | #' @title Plot a Binomial Likelihood Function 2 | #' 3 | #' @description Plots the Binomial likelihood function for variable \eqn{\pi} 4 | #' given y observed successes in a series of n Binomial trials. 5 | #' 6 | #' @param y number of successes 7 | #' @param n number of trials 8 | #' @param mle a logical value indicating whether maximum likelihood estimate of \eqn{\pi}, y/n, should be plotted 9 | #' @return a ggplot 10 | #' @export 11 | #' 12 | #' @importFrom magrittr "%>%" 13 | #' @import ggplot2 14 | #' @importFrom dplyr filter 15 | #' @importFrom stats dbinom 16 | #' @examples 17 | #' plot_binomial_likelihood(y = 3, n = 10, mle = TRUE) 18 | 19 | plot_binomial_likelihood <-function(y, 20 | n, 21 | mle = FALSE){ 22 | 23 | g <- ggplot(data = data.frame(x = c(0, 1)), aes(x)) + 24 | stat_function(fun = dbinom, args = list(x = y, size = n)) + 25 | labs(x = expression(pi), 26 | y = expression(paste("L(",pi,"|(Y=", y, "))"))) 27 | 28 | 29 | 30 | if (mle == TRUE){ 31 | 32 | max <- y/n 33 | 34 | success <- y # the line segment does not work since y is an argument in ggplot 35 | 36 | g <- g + 37 | 38 | geom_segment(aes(x = max, 39 | xend = max, 40 | y = 0, 41 | yend = dbinom(success, n, max)), 42 | color = "cyan4") + 43 | theme(legend.position = "none") 44 | 45 | 46 | } 47 | 48 | g 49 | 50 | }# end of function 51 | 52 | -------------------------------------------------------------------------------- /R/plot_normal.R: -------------------------------------------------------------------------------- 1 | #' @title Plot a Normal Model for \eqn{\mu} 2 | #' 3 | #' @description Plots the probability density function (pdf) for a 4 | #' Normal(mean, sd^2) model of variable \eqn{\mu}. 5 | #' 6 | #' @param mean mean parameter of the Normal model 7 | #' @param sd standard deviation parameter of the Normal model 8 | #' 9 | #' @return a ggplot 10 | #' @export 11 | #' @import ggplot2 12 | #' @importFrom stats dnorm 13 | #' 14 | #' @examples 15 | #' plot_normal(mean = 3.5, sd = 0.5) 16 | plot_normal <- function(mean, sd){ 17 | x <- c(mean - 4*sd, mean +4*sd) 18 | 19 | ggplot(data = data.frame(x = x), 20 | aes(x)) + 21 | stat_function(fun = dnorm, 22 | n = 101, 23 | args = list(mean = mean, 24 | sd = sd)) + 25 | labs(x = expression(mu), 26 | y = expression(paste("f(",mu,")"))) 27 | } 28 | -------------------------------------------------------------------------------- /R/plot_normal_likelihood.R: -------------------------------------------------------------------------------- 1 | #' @title Plot a Normal Likelihood Function 2 | #' 3 | #' @description Plots the Normal likelihood function for variable \eqn{\mu} 4 | #' given a vector of Normal data y. 5 | #' 6 | #' @param y vector of observed data 7 | #' @param sigma optional value for assumed standard deviation of y. by default, this is calculated by the sample standard deviation of y. 8 | #' 9 | #' @return a ggplot of Normal likelihood 10 | #' @export 11 | #' 12 | #' @import ggplot2 13 | #' @importFrom stats dnorm 14 | #' 15 | #' @examples 16 | #' plot_normal_likelihood(y = rnorm(50, mean = 10, sd = 2), sigma = 1.5) 17 | 18 | plot_normal_likelihood <- function(y, sigma = NULL){ 19 | y_bar <- mean(y) 20 | y_sd <- sd(y) 21 | n <- length(y) 22 | 23 | if(!is.null(sigma)){y_sd <- sigma} 24 | 25 | like_fun <- function(x){prod(dnorm(y, mean = x, sd = y_sd))} 26 | 27 | plot_data <- data.frame(mu = seq(y_bar - 4*y_sd/sqrt(n), 28 | y_bar + 4*y_sd/sqrt(n), 29 | length = 100)) %>% 30 | 31 | mutate(likelihood = Vectorize(like_fun)(mu)) 32 | 33 | ggplot(plot_data, aes(x = mu, y = likelihood)) + 34 | geom_line() + 35 | labs(x = expression(mu), 36 | y = expression(paste("L(",mu,"|(Y=y))", sep = ""))) 37 | } 38 | -------------------------------------------------------------------------------- /R/plot_poisson_likelihood.R: -------------------------------------------------------------------------------- 1 | #' @title Plot a Poisson Likelihood Function 2 | #' 3 | #' @description Plots the Poisson likelihood function for variable \eqn{\lambda} 4 | #' given a vector of Poisson counts y. 5 | #' 6 | #' @param y vector of observed Poisson counts 7 | #' @param lambda_upper_bound upper bound for lambda values to display on x-axis 8 | #' 9 | #' @return a ggplot of Poisson likelihood 10 | #' @export 11 | #' 12 | #' @import ggplot2 13 | #' 14 | #' @examples 15 | #' plot_poisson_likelihood(y = c(4, 2, 7), lambda_upper_bound = 10) 16 | plot_poisson_likelihood <- 17 | function(y, lambda_upper_bound = 10){ 18 | 19 | lambda = seq(0, lambda_upper_bound, by = 0.1) 20 | 21 | ht = exp(-1*length(y)*lambda)*lambda^(sum(y))/prod(factorial(y)) 22 | 23 | data <- data.frame(lambda = lambda, 24 | f_lambda = 25 | rep(ht, length(lambda))) 26 | ggplot(data, aes(x = lambda, 27 | y = f_lambda)) + 28 | geom_line() + 29 | labs(x = expression(lambda), 30 | y = expression(paste("L(",lambda,"|(Y=", y, "))"))) 31 | 32 | } 33 | -------------------------------------------------------------------------------- /R/sample_mode.R: -------------------------------------------------------------------------------- 1 | #' Sample Mode 2 | #' 3 | #' Calculate the sample mode of vector x. 4 | #' 5 | #' @param x vector of sample data 6 | #' 7 | #' @return sample mode 8 | #' @export 9 | #' @importFrom stats density 10 | #' 11 | #' @examples sample_mode(rbeta(100, 2, 7)) 12 | sample_mode <- function(x){ 13 | d <- density(x) 14 | d$x[which.max(d$y)] 15 | } -------------------------------------------------------------------------------- /R/summarize_beta.R: -------------------------------------------------------------------------------- 1 | #' @title Summarize a Beta Model for \eqn{\pi} 2 | #' 3 | #' @description Summarizes the expected value, variance, and mode of 4 | #' a Beta(alpha, beta) model for variable \eqn{\pi}. 5 | #' 6 | #' @param alpha,beta positive shape parameters of the Beta model 7 | #' 8 | #' @return a summary table 9 | #' @export 10 | #' 11 | #' @examples 12 | #' summarize_beta(alpha = 1, beta = 15) 13 | 14 | 15 | summarize_beta <- function (alpha, beta){ 16 | 17 | mean <- alpha / (alpha + beta) 18 | var <- alpha * beta / ((alpha + beta)^2 * (alpha + beta + 1)) 19 | sd <- sqrt(var) 20 | 21 | if(alpha < 1 & beta <1){ 22 | mode <- "0 and 1" 23 | }else if (alpha <= 1 & beta > 1){ 24 | mode <- 0 25 | }else if (alpha > 1 & beta < 1){ 26 | mode <- 1 27 | } 28 | else{ 29 | mode <- (alpha - 1)/(alpha + beta - 2) 30 | } 31 | 32 | return(data.frame(mean = mean, 33 | mode = mode, 34 | var = var, 35 | sd = sd)) 36 | 37 | 38 | }# end of function 39 | -------------------------------------------------------------------------------- /R/summarize_beta_binomial.R: -------------------------------------------------------------------------------- 1 | #' @title Summarize a Beta-Binomial Bayesian model 2 | #' 3 | #' @description Consider a Beta-Binomial Bayesian model for parameter \eqn{\pi} with 4 | #' a Beta(alpha, beta) prior on \eqn{\pi} and Binomial likelihood with n trials 5 | #' and y successes. Given information on the prior (alpha and data) and data (y and n), 6 | #' this function summarizes the mean, mode, and variance of the 7 | #' prior and posterior Beta models of \eqn{\pi}. 8 | #' 9 | #' @param alpha,beta positive shape parameters of the prior Beta model 10 | #' @param y number of successes 11 | #' @param n number of trials 12 | #' 13 | #' @return a summary table 14 | #' @export 15 | #' 16 | #' @examples 17 | #' summarize_beta_binomial(alpha = 1, beta = 15, y = 25, n = 50) 18 | summarize_beta_binomial <- function (alpha, 19 | beta, 20 | y = NULL, 21 | n = NULL) 22 | { 23 | if (is.null(y) | is.null(n)) 24 | warning("To summarize the posterior, 25 | specify data y and n") 26 | beta_mean <- function(a, b) { 27 | a/(a + b) 28 | } 29 | beta_mode <- function(a, b) { 30 | if(a < 1 & b <1){ 31 | mode <- "0 and 1" 32 | }else if (a <= 1 & b > 1){ 33 | mode <- 0 34 | }else if (a > 1 & b < 1){ 35 | mode <- 1 36 | } 37 | else{ 38 | mode <- (a - 1)/(a + b - 2) 39 | } 40 | } 41 | beta_var <- function(a, b) { 42 | a * b/((a + b)^2 * (a + b + 1)) 43 | } 44 | prior_mean <- beta_mean(alpha, beta) 45 | prior_mode <- beta_mode(alpha, beta) 46 | prior_var <- beta_var(alpha, beta) 47 | prior_sd <- sqrt(prior_var) 48 | if (is.null(y) & is.null(n)) { 49 | return(data.frame(model = c("prior"), 50 | alpha = alpha, 51 | beta = beta, 52 | mean = prior_mean, 53 | mode = prior_mode, 54 | var = prior_var, 55 | sd = prior_sd)) 56 | } 57 | else { 58 | post_alpha <- y + alpha 59 | post_beta <- n - y + beta 60 | post_mean <- beta_mean(post_alpha, post_beta) 61 | post_mode <- beta_mode(post_alpha, post_beta) 62 | post_var <- beta_var(post_alpha, post_beta) 63 | post_sd <- sqrt(post_var) 64 | return(data.frame(model = c("prior", "posterior"), 65 | alpha = c(alpha, post_alpha), 66 | beta = c(beta, post_beta), 67 | mean = c(prior_mean, post_mean), 68 | mode = c(prior_mode, post_mode), 69 | var = c(prior_var, post_var), 70 | sd = c(prior_sd, post_sd))) 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /R/summarize_gamma.R: -------------------------------------------------------------------------------- 1 | #' @title Summarize a Gamma Model for \eqn{\lambda} 2 | #' 3 | #' @description Summarizes the expected value, variance, and mode of 4 | #' a Gamma(shape, rate) model for variable \eqn{\lambda}. 5 | #' 6 | #' @param shape positive shape parameter of the Gamma model 7 | #' @param rate positive rate parameter of the Gamma model 8 | #' 9 | #' @return a summary table 10 | #' @export 11 | #' 12 | #' @examples 13 | #' summarize_gamma(shape = 1, rate = 15) 14 | #' 15 | 16 | summarize_gamma <- function (shape, rate){ 17 | 18 | mean <- shape/rate 19 | 20 | if (shape >= 1){ 21 | mode <- (shape - 1)/rate 22 | } else { 23 | mode <- NA 24 | } 25 | var <- shape/rate^2 26 | sd <- sqrt(var) 27 | 28 | data.frame(mean = mean, 29 | mode = mode, 30 | var = var, 31 | sd = sd) 32 | 33 | 34 | }# end of function 35 | -------------------------------------------------------------------------------- /R/summarize_gamma_poisson.R: -------------------------------------------------------------------------------- 1 | #' Summarize the Gamma-Poisson Model 2 | #' 3 | #' Consider a Gamma-Poisson Bayesian model for rate parameter \eqn{\lambda} with 4 | #' a Gamma(shape, rate) prior on \eqn{\lambda} and a Poisson likelihood for the data. 5 | #' Given information on the prior (shape and rate) 6 | #' and data (the sample size n and sum_y), 7 | #' this function summarizes the mean, mode, and variance of the 8 | #' prior and posterior Gamma models of \eqn{\lambda}. 9 | #' 10 | #' @param shape positive shape parameter of the Gamma prior 11 | #' @param rate positive rate parameter of the Gamma prior 12 | #' @param sum_y sum of observed data values for the Poisson likelihood 13 | #' @param n number of observations for the Poisson likelihood 14 | #' 15 | #' @return data frame 16 | #' @export 17 | #' 18 | #' @examples 19 | #' summarize_gamma_poisson(shape = 3, rate = 4, sum_y = 7, n = 12) 20 | #' 21 | summarize_gamma_poisson <- function (shape, 22 | rate, 23 | sum_y = NULL, 24 | n = NULL){ 25 | 26 | if (is.null(sum_y) | is.null(n)) 27 | warning("To summarize the posterior, 28 | specify data sum_y and n") 29 | if (is.null(sum_y) & is.null(n)) { 30 | prior_mean <- shape/rate 31 | if (shape >= 1) { 32 | prior_mode <- (shape - 1)/rate 33 | } 34 | else { 35 | prior_mode <- NA 36 | } 37 | prior_var <- shape/rate^2 38 | prior_sd <- sqrt(prior_var) 39 | return(data.frame(model = c("prior"), 40 | shape = shape, 41 | rate = rate, 42 | mean = prior_mean, 43 | mode = prior_mode, 44 | var = prior_var, 45 | sd = prior_sd)) 46 | } 47 | else { 48 | prior_mean <- shape/rate 49 | if (shape >= 1) { 50 | prior_mode <- (shape - 1)/rate 51 | } 52 | else { 53 | prior_mode <- NULL 54 | } 55 | prior_var <- shape/rate^2 56 | prior_sd <- sqrt(prior_var) 57 | post_mean <- (shape + sum_y)/(rate + n) 58 | if ((shape + sum_y) >= 1) { 59 | post_mode <- (shape + sum_y - 1)/(rate + n) 60 | } 61 | else { 62 | post_mode <- NA 63 | } 64 | post_var <- (shape + sum_y)/((rate + n)^2) 65 | post_sd <- sqrt(post_var) 66 | post_s <- shape + sum_y 67 | post_r <- rate + n 68 | return(data.frame(model = c("prior", 69 | "posterior"), 70 | shape = c(shape, post_s), 71 | rate = c(rate, post_r), 72 | mean = c(prior_mean, post_mean), 73 | mode = c(prior_mode, post_mode), 74 | var = c(prior_var, post_var), 75 | sd = c(prior_sd, post_sd))) 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /R/summarize_normal_normal.R: -------------------------------------------------------------------------------- 1 | #' Summarize a Normal-Normal Bayesian model 2 | #' 3 | #' Consider a Normal-Normal Bayesian model for mean parameter \eqn{\mu} with 4 | #' a N(mean, sd^2) prior on \eqn{\mu} and a Normal likelihood for the data. 5 | #' Given information on the prior (mean and sd) 6 | #' and data (the sample size n, mean y_bar, and standard deviation sigma), 7 | #' this function summarizes the mean, mode, and variance of the 8 | #' prior and posterior Normal models of \eqn{\mu}. 9 | #' 10 | #' @param mean mean of the Normal prior 11 | #' @param sd standard deviation of the Normal prior 12 | #' @param sigma standard deviation of the data, or likelihood standard deviation 13 | #' @param y_bar sample mean of the data 14 | #' @param n sample size of the data 15 | #' 16 | #' @return data frame 17 | #' @export 18 | #' 19 | #' @examples 20 | #' summarize_normal_normal(mean = 2.3, sd = 0.3, sigma = 5.1, y_bar = 128.5, n = 20) 21 | summarize_normal_normal <- function (mean, 22 | sd, 23 | sigma = NULL, 24 | y_bar = NULL, 25 | n = NULL) 26 | { 27 | if (is.null(y_bar) | is.null(n)|is.null(sigma)) 28 | warning("To summarize the posterior, 29 | specify sigma for the likelihood, data ybar and n") 30 | 31 | prior_mean <- mean 32 | prior_mode <- mean 33 | prior_var <- sd^2 34 | prior_sd <- sd 35 | if (is.null(y_bar) & is.null(n) & is.null(sigma)) { 36 | return(data.frame(model = c("prior"), 37 | alpha = alpha, 38 | beta = beta, 39 | mean = prior_mean, 40 | mode = prior_mode, 41 | var = prior_var, 42 | sd = prior_sd)) 43 | } 44 | else { 45 | post_mean <- (((sigma^2)*mean) + ((sd^2)*n*y_bar))/(n*(sd^2)+(sigma^2)) 46 | post_mode <- post_mean 47 | post_var <- ((sigma^2)*(sd^2))/(n*(sd^2)+(sigma^2)) 48 | post_sd <- sqrt(post_var) 49 | return(data.frame(model = c("prior", "posterior"), 50 | mean = c(prior_mean, post_mean), 51 | mode = c(prior_mode, post_mode), 52 | var = c(prior_var, post_var), 53 | sd = c(prior_sd, post_sd))) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "80%" 13 | ) 14 | ``` 15 | # bayesrules a hex shaped logo with shiny green-pink disco ball and purple starry background. There is text that says Bayes Rules! 16 | 17 | 18 | [![R-CMD-check](https://github.com/bayes-rules/bayesrules/workflows/R-CMD-check/badge.svg)](https://github.com/bayes-rules/bayesrules/actions) 19 | [![CRAN status](https://www.r-pkg.org/badges/version/bayesrules)](https://cran.r-project.org/package=bayesrules) 20 | 21 | 22 | **bayesrules** is a package to supplement the [Bayes Rules! 23 | book](https://www.bayesrulesbook.com/). It contains datasets and 24 | functions that are used in the book. You can find vignettes on the [package website](https://bayes-rules.github.io/bayesrules/docs/). 25 | 26 |
27 | 28 | ## Installation 29 | 30 | You can install bayesrules from CRAN. 31 | 32 | ``` r 33 | install.packages("bayesrules") 34 | ``` 35 | 36 | 37 | You can install the development version from GitHub. You would also need to install the devtools package if you do not have it installed already. 38 | 39 | ``` r 40 | #install.packages("devtools") 41 | devtools::install_github("bayes-rules/bayesrules") 42 | ``` 43 | 44 | 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # bayesrules a hex shaped logo with shiny green-pink disco ball and purple starry background. There is text that says Bayes Rules! 5 | 6 | 7 | 8 | [![R-CMD-check](https://github.com/bayes-rules/bayesrules/workflows/R-CMD-check/badge.svg)](https://github.com/bayes-rules/bayesrules/actions) 9 | [![CRAN 10 | status](https://www.r-pkg.org/badges/version/bayesrules)](https://cran.r-project.org/package=bayesrules) 11 | 12 | 13 | **bayesrules** is an R package to supplement the [Bayes Rules! 14 | book](https://www.bayesrulesbook.com/). It contains datasets and 15 | functions that are used in the book. You can find vignettes on the 16 | [package website](https://bayes-rules.github.io/bayesrules/docs/). 17 | 18 |
19 | 20 | ## Installation 21 | 22 | You can install bayesrules from CRAN. 23 | 24 | ``` r 25 | install.packages("bayesrules") 26 | ``` 27 | 28 | You can install the development version from GitHub. You would also need 29 | to install the devtools package if you do not have it installed already. 30 | 31 | ``` r 32 | #install.packages("devtools") 33 | devtools::install_github("bayes-rules/bayesrules") 34 | ``` 35 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Test environments 2 | 3 | - local OS X install, R 4.1.0 4 | - GitHub Actions (ubuntu-20.04): release, devel 5 | - GitHub Actions (windows): release 6 | - Github Actions (macOS): release 7 | - r-hub: windows-x86_64-devel, ubuntu-gcc-release, fedora-clang-devel 8 | - win-builder: release, devel, oldrelease 9 | 10 | ## R CMD check results 11 | 12 | There were no ERRORS, no WARNINGS, no MESSAGES 13 | -------------------------------------------------------------------------------- /data-raw/DATASET.R: -------------------------------------------------------------------------------- 1 | ## code to prepare `DATASET` dataset goes here 2 | 3 | usethis::use_data(DATASET, overwrite = TRUE) 4 | -------------------------------------------------------------------------------- /data-raw/airbnb.R: -------------------------------------------------------------------------------- 1 | library(RCurl) 2 | 3 | x <- getURL("https://raw.githubusercontent.com/proback/BeyondMLR/master/data/airbnb.csv") 4 | airbnb <- read.csv(text = x) %>% 5 | select(-X, -PctBlack) %>% 6 | rename(minimum_stay = minstay, walk_score = WalkScore, 7 | transit_score = TransitScore, bike_score = BikeScore, 8 | rating = overall_satisfaction) %>% 9 | mutate(neighborhood = as.factor(neighborhood), district = as.factor(district), room_type = as.factor(room_type)) 10 | 11 | usethis::use_data(airbnb, overwrite = TRUE) 12 | -------------------------------------------------------------------------------- /data-raw/airbnb_small.R: -------------------------------------------------------------------------------- 1 | library(RCurl) 2 | library(dplyr) 3 | 4 | x <- getURL("https://raw.githubusercontent.com/proback/BeyondMLR/master/data/airbnb.csv") 5 | airbnb_small <- read.csv(text = x) %>% 6 | select(-X, -PctBlack) %>% 7 | rename(minimum_stay = minstay, walk_score = WalkScore, 8 | transit_score = TransitScore, bike_score = BikeScore, 9 | rating = overall_satisfaction) %>% 10 | mutate(neighborhood = as.factor(neighborhood), district = as.factor(district), room_type = as.factor(room_type)) %>% 11 | filter(district %in% c("Far North", "North", "Northwest")) %>% 12 | droplevels() 13 | 14 | usethis::use_data(airbnb_small, overwrite = TRUE) 15 | -------------------------------------------------------------------------------- /data-raw/bald_eagles.R: -------------------------------------------------------------------------------- 1 | # Import from tidytuesday 2 | 3 | bald_eagles <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-06-18/bird_counts.csv") %>% 4 | rename(count = how_many_counted, hours = total_hours, count_per_hour = how_many_counted_by_hour) %>% 5 | filter(species == "Bald Eagle", year > 1980) %>% 6 | dplyr::select(-species, -species_latin) %>% 7 | mutate(count_per_week = count_per_hour*7*24) 8 | 9 | usethis::use_data(bald_eagles, overwrite = TRUE) 10 | -------------------------------------------------------------------------------- /data-raw/bechdel/bechdel_dataprep.R: -------------------------------------------------------------------------------- 1 | library(magrittr) 2 | library(dplyr) 3 | 4 | # Read data 5 | bechdel <- readr::read_csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/bechdel/movies.csv") %>% 6 | select(year, title, binary) 7 | 8 | 9 | 10 | 11 | # Save the final data 12 | usethis::use_data(bechdel, overwrite = TRUE) 13 | -------------------------------------------------------------------------------- /data-raw/big_word_club/big_word_clubprep.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(here) 3 | 4 | big_word_club <- read.csv(here("data-raw", "big_word_club", "big_word_club.csv")) %>% 5 | mutate(score_pct_change = (score_a2 - score_a1) / score_a1 * 100) %>% 6 | mutate(school_id = as.factor(school_id)) 7 | 8 | # The .pdf file type is not great in packages. 9 | # The codebook can be accessed from an earlier commit https://github.com/mdogucu/bayesrules/blob/d9a01160ddf5bebb4a1ac2bb9869d539a1c63589/data-raw/big_word_club/BWC%20Codebook.pdf 10 | 11 | # Save the final data 12 | usethis::use_data(big_word_club, overwrite = TRUE) 13 | -------------------------------------------------------------------------------- /data-raw/big_word_club/bwc_data.dta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data-raw/big_word_club/bwc_data.dta -------------------------------------------------------------------------------- /data-raw/bike_users.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | 3 | # Original source: https://archive.ics.uci.edu/ml/datasets/Bike+Sharing+Dataset 4 | bikes <- read.csv("https://www.macalester.edu/~ajohns24/data/bike_share.csv") 5 | 6 | # Clean up & subset 7 | set.seed(84735) 8 | bike_users <- bikes %>% 9 | gather(user, rides, -c(date,season,year,month,day_of_week,weekend,holiday,temp_actual,temp_feel,humidity,windspeed,weather_cat)) %>% 10 | mutate(user = factor(user, labels = c("casual","registered","total"))) %>% 11 | filter(user != "total", year == 2011) %>% 12 | mutate(user = droplevels(user)) %>% 13 | filter(temp_feel < 87, temp_feel > 45) %>% 14 | mutate(date = as.Date(date, format = "%m/%d/%y")) %>% 15 | arrange(date) %>% 16 | mutate_if(is.character, as.factor) 17 | usethis::use_data(bike_users, overwrite = TRUE) 18 | -------------------------------------------------------------------------------- /data-raw/bikes.R: -------------------------------------------------------------------------------- 1 | # Original source: https://archive.ics.uci.edu/ml/datasets/Bike+Sharing+Dataset 2 | bikes <- read.csv("https://www.macalester.edu/~ajohns24/data/bike_share.csv") 3 | 4 | # Clean up & subset 5 | set.seed(84735) 6 | bikes <- bikes %>% 7 | mutate(rides = riders_registered) %>% 8 | mutate(date = as.Date(date, format = "%m/%d/%y")) %>% 9 | select(-riders_casual, -riders_total, -riders_registered) %>% 10 | filter(temp_feel < 87, temp_feel > 45) %>% 11 | mutate(humidity = 100*humidity) %>% 12 | sample_n(500) %>% 13 | arrange(date) %>% 14 | mutate_if(is.character, as.factor) 15 | 16 | usethis::use_data(bikes, overwrite = TRUE) 17 | -------------------------------------------------------------------------------- /data-raw/bird_counts.R: -------------------------------------------------------------------------------- 1 | # Import from tidytuesday 2 | 3 | bird_counts <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-06-18/bird_counts.csv") %>% 4 | rename(count = how_many_counted, hours = total_hours, count_per_hour = how_many_counted_by_hour) %>% 5 | mutate(count_per_week = count_per_hour*7*24) 6 | 7 | usethis::use_data(bird_counts, overwrite = TRUE) 8 | -------------------------------------------------------------------------------- /data-raw/book_banning.R: -------------------------------------------------------------------------------- 1 | library(RCurl) 2 | 3 | x <- getURL("https://raw.githubusercontent.com/proback/BeyondMLR/master/data/bookbanningNoTex.csv") 4 | book_banning <- read.csv(text = x) %>% 5 | rename(title = booktitle, college_grad_rate = cperba, date = days2000, 6 | lgbtq = homosexuality, median_income = cmedin, book_id = book, 7 | explicit = sexexp, hs_grad_rate = cperhs, political_value_index = pvi2, 8 | violent = violence) %>% 9 | select(-X, -obama, -freqchal) %>% 10 | mutate(date = as.Date(date, origin = "2000-01-01")) %>% 11 | mutate(year = lubridate::year(date)) %>% 12 | select(title, book_id, author, date, year, removed, 13 | explicit, antifamily, occult, language, lgbtq, violent, 14 | state, political_value_index, median_income, hs_grad_rate, college_grad_rate) %>% 15 | mutate_if(is.integer, as.factor) 16 | 17 | usethis::use_data(book_banning, overwrite = TRUE) 18 | -------------------------------------------------------------------------------- /data-raw/cherry_blossom_sample.R: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(mdsr) 3 | data(Cherry) 4 | 5 | # I.D. subjects that have 7 observations 6 | subj <- Cherry %>% 7 | filter(nruns == 7) %>% 8 | group_by(name.yob) %>% 9 | summarize(min_age = min(age)) %>% 10 | filter(min_age >= 50, min_age < 55) %>% 11 | mutate(subject = as.factor(c(1:length(name.yob)))) 12 | 13 | 14 | cherry_blossom_sample <- Cherry %>% 15 | filter(name.yob %in% subj$name.yob) %>% 16 | left_join(subj) %>% 17 | rename(runner = subject) %>% 18 | select(runner, age, net, gun, year, previous) %>% 19 | mutate(runner = as.factor(runner)) 20 | 21 | usethis::use_data(cherry_blossom_sample, overwrite = TRUE) 22 | -------------------------------------------------------------------------------- /data-raw/climbers_sub/climbers_sub.R: -------------------------------------------------------------------------------- 1 | # Read tidytuesday data 2 | members <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-22/members.csv') 3 | peaks <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-22/peaks.csv') %>% 4 | select(peak_id, height_metres, first_ascent_year) 5 | 6 | climbers_sub <- members %>% 7 | filter(age > 15, age < 78, year >= 1978) 8 | 9 | set.seed(88) 10 | random_exp <- climbers_sub %>% 11 | group_by(expedition_id) %>% 12 | summarise(count = n()) %>% 13 | filter(count > 4) %>% 14 | sample_n(200) 15 | 16 | climbers_sub <- climbers_sub %>% 17 | group_by(expedition_id) %>% 18 | right_join(random_exp) %>% 19 | left_join(peaks) %>% 20 | mutate_if(is.character, as.factor) %>% 21 | ungroup() 22 | 23 | 24 | 25 | # Store as climbers_sub 26 | usethis::use_data(climbers_sub, overwrite = TRUE) 27 | -------------------------------------------------------------------------------- /data-raw/coffee_ratings.R: -------------------------------------------------------------------------------- 1 | # Import from tidytuesday 2 | 3 | coffee_ratings <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv') %>% 4 | select(owner, farm_name, mill, in_country_partner, 5 | country_of_origin, altitude_low_meters, altitude_high_meters, altitude_mean_meters, 6 | number_of_bags, bag_weight, 7 | species, variety, processing_method, aroma, flavor, aftertaste, 8 | acidity, body, balance, uniformity, clean_cup, sweetness, 9 | moisture, category_one_defects, category_two_defects, color, total_cup_points) %>% 10 | mutate_if(is.character, as.factor) 11 | 12 | 13 | 14 | usethis::use_data(coffee_ratings, overwrite = TRUE) 15 | -------------------------------------------------------------------------------- /data-raw/coffee_ratings_small.R: -------------------------------------------------------------------------------- 1 | # Import from tidytuesday 2 | 3 | coffee_ratings_small <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv') %>% 4 | select(farm_name, total_cup_points, aroma, flavor, aftertaste, 5 | acidity, body, balance, uniformity, sweetness, moisture) %>% 6 | group_by(farm_name) %>% 7 | filter(n() >= 5, aroma > 0) %>% 8 | ungroup() %>% 9 | mutate_if(is.character, as.factor) 10 | 11 | 12 | usethis::use_data(coffee_ratings_small, overwrite = TRUE) 13 | -------------------------------------------------------------------------------- /data-raw/equality_index/equality_index.csv: -------------------------------------------------------------------------------- 1 | state,region,gop_2016,laws,historical,percent_urban 2 | alabama,south,62.08,3,gop,59 3 | alaska,west,51.28,2,gop,66 4 | arizona,west,48.67,3,gop,89.8 5 | arkansas,south,60.57,3,gop,56.2 6 | california,west,31.62,155,dem,95 7 | colorado,west,43.25,26,swing,86.2 8 | connecticut,northeast,40.93,20,dem,88 9 | delaware,south,41.79,17,dem,83.3 10 | florida,south,49.02,4,swing,91.2 11 | georgia,south,50.77,5,gop,75.1 12 | hawaii,west,30.03,24,dem,91.9 13 | idaho,west,59.26,2,gop,70.6 14 | illinois,midwest,38.76,38,dem,88.5 15 | indiana,midwest,56.82,2,swing,72.4 16 | iowa,midwest,51.15,4,swing,64 17 | kansas,midwest,56.65,1,gop,74.2 18 | kentucky,south,62.52,5,gop,58.4 19 | louisiana,south,58.09,11,gop,73.2 20 | maine,northeast,44.87,18,dem,38.7 21 | maryland,south,33.91,32,dem,87.2 22 | massachusetts,northeast,32.81,10,dem,92 23 | michigan,midwest,47.5,7,swing,74.6 24 | minnesota,midwest,44.92,4,dem,73.3 25 | mississippi,south,57.94,1,gop,49.4 26 | missouri,midwest,56.77,1,gop,70.4 27 | montana,west,56.17,2,gop,55.9 28 | nebraska,midwest,58.75,7,gop,73.1 29 | nevada,west,45.5,34,swing,94.2 30 | new hampshire,northeast,46.61,12,swing,60.3 31 | new jersey,northeast,41,20,dem,94.7 32 | new mexico,west,40.04,15,swing,77.4 33 | new york,northeast,36.15,30,dem,87.9 34 | north carolina,south,49.83,1,swing,66.1 35 | north dakota,midwest,62.96,3,gop,59.9 36 | ohio,midwest,51.69,1,swing,77.9 37 | oklahoma,south,65.32,3,gop,66.2 38 | oregon,west,39.09,27,dem,81 39 | pennsylvania,northeast,48.58,16,swing,78.7 40 | rhode island,northeast,38.9,17,dem,90.7 41 | south carolina,south,54.94,1,gop,66.3 42 | south dakota,midwest,61.53,1,gop,56.7 43 | tennessee,south,60.72,5,gop,66.4 44 | texas,south,52.23,3,gop,84.7 45 | utah,west,45.54,14,gop,90.6 46 | vermont,northeast,29.76,18,dem,38.9 47 | virginia,south,44.43,11,swing,75.5 48 | washington,west,38.07,23,dem,84.1 49 | west virginia,south,68.63,4,gop,48.7 50 | wisconsin,midwest,47.22,5,swing,70.2 51 | wyoming,west,67.4,2,gop,64.8 -------------------------------------------------------------------------------- /data-raw/equality_index/equality_index_dataprep.R: -------------------------------------------------------------------------------- 1 | equality_index <- read_csv("data-raw/equality_index/equality_index.csv") %>% 2 | mutate_if(is.character, as.factor) 3 | 4 | # Save the final data 5 | usethis::use_data(equality_index, overwrite = TRUE) 6 | -------------------------------------------------------------------------------- /data-raw/fake_news/fake_news_dataprep.R: -------------------------------------------------------------------------------- 1 | # Cleaning the fake_news data 2 | 3 | # SOURCE: https://www.kaggle.com/mdepak/fakenewsnet 4 | # Help from https://www.kaggle.com/kumudchauhan/fake-news-analysis-and-classification 5 | 6 | 7 | # Load libraries 8 | library(dplyr) 9 | library(readr) 10 | library(stringr) 11 | library(syuzhet) 12 | library(quanteda) 13 | 14 | 15 | # Load kaggle data 16 | set.seed(84735) 17 | buzzfeed_real <- read_csv("data-raw/fake_news/BuzzFeed_real_news_content.csv") 18 | buzzfeed_real <- buzzfeed_real %>% 19 | mutate(type = rep("real",nrow(buzzfeed_real))) %>% 20 | sample_n(90) 21 | 22 | set.seed(84735) 23 | buzzfeed_fake <- read_csv("data-raw/fake_news/BuzzFeed_fake_news_content.csv") 24 | buzzfeed_fake <- buzzfeed_fake %>% 25 | mutate(type = rep("fake",nrow(buzzfeed_fake))) %>% 26 | sample_n(size = 60) 27 | 28 | # Combine & simplify 29 | fake_news <- rbind(buzzfeed_real, buzzfeed_fake) %>% 30 | select(-c(id, meta_data, canonical_link, images, movies, publish_date, top_img, source)) 31 | 32 | 33 | 34 | 35 | 36 | # Check it out 37 | dim(fake_news) 38 | table(fake_news$type) 39 | 40 | 41 | # Some guidance / ideas adapted from https://www.kaggle.com/burakhmmtgl/exploratory-analysis 42 | 43 | 44 | # Count the number of words & characters 45 | fake_news <- fake_news %>% 46 | mutate(title_words = sapply(strsplit(as.character(fake_news$title), " "), length)) %>% 47 | mutate(text_words = sapply(strsplit(as.character(fake_news$text), " "), length)) %>% 48 | mutate(title_char = str_count(title)) %>% 49 | mutate(text_char = str_count(text)) 50 | 51 | # Count the number & percent of words that are all capital letters 52 | fake_news <- fake_news %>% 53 | mutate(title_caps = str_count(title, "\\b[A-Z]{2,}\\b")) %>% 54 | mutate(text_caps = str_count(text, "\\b[A-Z]{2,}\\b")) %>% 55 | mutate(title_caps_percent = title_caps / title_words * 100) %>% 56 | mutate(text_caps_percent = text_caps / text_words * 100) 57 | 58 | 59 | # Number & percent of characters that are exclamation marks 60 | fake_news <- fake_news %>% 61 | mutate(title_excl = str_count(title, "!")) %>% 62 | mutate(text_excl = str_count(text, "!")) %>% 63 | mutate(title_excl_percent = title_excl / title_char * 100) %>% 64 | mutate(text_excl_percent = text_excl / text_char * 100) %>% 65 | mutate(title_has_excl = title_excl > 0) 66 | 67 | # Sentiment analysis 68 | sentiments <- round((get_nrc_sentiment(as.character(fake_news$title)) + get_nrc_sentiment(as.character(fake_news$text))) / (fake_news$title_words + fake_news$text_words)*100, 2) 69 | fake_news <- cbind(fake_news, sentiments) 70 | 71 | 72 | # Syllables per word 73 | fake_news <- fake_news %>% 74 | mutate(text_syllables = nsyllable(as.character(text))) %>% 75 | mutate(text_syllables_per_word = text_syllables / text_words) 76 | 77 | #ggplot(fake_news, aes(x = title_char, fill = type)) + 78 | # geom_density(alpha = 0.5) 79 | 80 | 81 | # Resample so that not batched by real and fake status 82 | set.seed(84735) 83 | fake_news <- sample_n(fake_news, size = nrow(fake_news)) %>% 84 | mutate(type = as.factor(type)) 85 | 86 | 87 | 88 | 89 | # Save the final data 90 | usethis::use_data(fake_news, overwrite = TRUE) 91 | -------------------------------------------------------------------------------- /data-raw/football.R: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | 3 | football <- Lock5Data::FootballBrain %>% 4 | rename(group = Group, years = Years) %>% 5 | mutate(volume = Hipp / 1000, 6 | group = case_when( 7 | group == "no football" ~ "no_football", 8 | group == "FBNoConcuss" ~ "fb_no_concuss", 9 | group == "FBConcuss" ~ "fb_concuss", 10 | group == "Control" ~ "control" 11 | ) 12 | ) %>% 13 | select(group, years, volume) %>% 14 | mutate_if(is.character, as.factor) 15 | usethis::use_data(football, overwrite = TRUE) 16 | -------------------------------------------------------------------------------- /data-raw/hotel_bookings.R: -------------------------------------------------------------------------------- 1 | # Import from tidytuesday 2 | 3 | set.seed(84735) 4 | hotel_bookings <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-11/hotels.csv') %>% 5 | rename(average_daily_rate = adr) %>% 6 | sample_n(size = 1000) %>% 7 | mutate(is_canceled = as.factor(is_canceled)) %>% 8 | mutate_if(is.character, as.factor) 9 | 10 | usethis::use_data(hotel_bookings, overwrite = TRUE) 11 | -------------------------------------------------------------------------------- /data-raw/loons.R: -------------------------------------------------------------------------------- 1 | # Import from tidytuesday 2 | 3 | loons <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-06-18/bird_counts.csv") %>% 4 | rename(count = how_many_counted, hours = total_hours, count_per_hour = how_many_counted_by_hour) %>% 5 | filter(species == "Common Loon", year >= 2000) %>% 6 | mutate(count_per_100 = round(count_per_hour*100)) %>% 7 | dplyr::select(-species, -species_latin) 8 | 9 | usethis::use_data(loons, overwrite = TRUE) 10 | -------------------------------------------------------------------------------- /data-raw/moma.R: -------------------------------------------------------------------------------- 1 | library(RCurl) 2 | library(tidyverse) 3 | library(lubridate) 4 | 5 | 6 | url = getURL("https://media.githubusercontent.com/media/MuseumofModernArt/collection/master/Artworks.csv") 7 | moma_artists <- read.csv(text = url, na.strings = c("", " ", "NA", ""))[,-1] %>% 8 | mutate(year_acquired = year(DateAcquired), nartists = str_count(Artist, ",") + 1) %>% 9 | filter(nartists == 1) %>% 10 | select(Artist, Nationality, BeginDate, EndDate, Gender, Department, year_acquired) %>% 11 | mutate_all(funs(gsub("[(]", "", .))) %>% 12 | mutate_all(funs(gsub("[)]", "", .))) %>% 13 | rename(artist = Artist, country = Nationality, department = Department, 14 | gender = Gender, birth = BeginDate, death = EndDate) %>% 15 | mutate(alive = (death == 0), gender = tolower(gender), department = tolower(department), country = tolower(country)) %>% 16 | filter(birth != 0, !is.na(gender)) 17 | moma_artists$death[moma_artists$death == 0] <- NA 18 | moma_artists$gender[moma_artists$gender == ""] <- NA 19 | moma_artists <- moma_artists %>% 20 | filter(!is.na(gender)) 21 | 22 | 23 | moma_artists_2 <- moma_artists %>% 24 | group_by(artist) %>% 25 | summarize(count = n(), year_acquired_min = min(year_acquired), 26 | year_acquired_max = max(year_acquired), department = names(which.max(table(department)))) 27 | 28 | moma <- left_join(moma_artists, moma_artists_2) %>% 29 | mutate(genx = (birth >= 1965)) %>% 30 | select(artist, country, birth, death, alive, genx, gender, department, 31 | count, year_acquired_min, year_acquired_max) %>% 32 | distinct() %>% 33 | mutate_if(is.character, as.factor) 34 | 35 | 36 | set.seed(109) 37 | moma_sample <- moma %>% 38 | filter(!is.na(alive), !is.na(birth), !is.na(count), !is.na(year_acquired_min)) %>% 39 | sample_n(., size = 100) %>% 40 | select(-department) 41 | 42 | 43 | 44 | usethis::use_data(moma, overwrite = TRUE) 45 | usethis::use_data(moma_sample, overwrite = TRUE) -------------------------------------------------------------------------------- /data-raw/penguins_bayes.R: -------------------------------------------------------------------------------- 1 | library(palmerpenguins) 2 | data(penguins) 3 | penguins_bayes <- penguins %>% 4 | mutate(species = as.factor(species)) %>% 5 | mutate(above_average_weight = body_mass_g > 4200) %>% 6 | mutate(above_average_weight = as.factor(as.numeric(above_average_weight))) %>% 7 | select(species, island, year, bill_length_mm, bill_depth_mm, flipper_length_mm, body_mass_g, above_average_weight, sex) 8 | 9 | 10 | usethis::use_data(penguins_bayes, overwrite = TRUE) 11 | 12 | 13 | -------------------------------------------------------------------------------- /data-raw/pop_vs_soda/pop_vs_soda_dataprep.R: -------------------------------------------------------------------------------- 1 | library(readr) 2 | library(dplyr) 3 | library(reprex) 4 | library(tidyr) 5 | 6 | pop_vs_soda <- read_csv("data-raw/pop_vs_soda/pop_vs_soda_raw.csv") %>% 7 | mutate_if(is.character, as.factor) %>% 8 | mutate(state = tolower(state)) %>% 9 | pivot_longer(cols = c("pop","soda","coke","other"), names_to = "word_for_cola", values_to = "count") %>% 10 | uncount(count) %>% 11 | mutate(pop = (word_for_cola == "pop")) 12 | 13 | # Save the final data 14 | usethis::use_data(pop_vs_soda, overwrite = TRUE) 15 | -------------------------------------------------------------------------------- /data-raw/pop_vs_soda/pop_vs_soda_raw.csv: -------------------------------------------------------------------------------- 1 | state,region,pop,soda,coke,other 2 | Alabama,south,153,582,2849,665 3 | Alaska,west,324,636,60,92 4 | Arizona,west,586,2799,437,174 5 | Arkansas,south,154,347,1442,80 6 | California,west,925,20119,2892,1941 7 | Colorado,west,2909,1943,327,183 8 | Connecticut,northeast,70,4273,102,115 9 | Delaware,south,24,699,41,29 10 | District of Columbia,south,35,442,57,40 11 | Florida,south,344,5400,3642,1921 12 | Georgia,south,140,1425,4933,410 13 | Hawaii,west,35,673,36,76 14 | Idaho,west,954,520,63,123 15 | Illinois,midwest,16400,7267,885,572 16 | Indiana,midwest,5591,1485,1989,363 17 | Iowa,midwest,6295,1227,67,173 18 | Kansas,midwest,2960,934,342,253 19 | Kentucky,south,1251,692,2248,329 20 | Louisiana,south,81,375,2739,597 21 | Maine,northeast,35,1418,21,74 22 | Maryland,south,208,5127,677,218 23 | Massachusetts,northeast,115,5874,240,1883 24 | Michigan,midwest,20493,10894,232,398 25 | Minnesota,midwest,11627,1745,119,311 26 | Mississippi,south,74,184,1435,102 27 | Missouri,midwest,1948,6733,541,230 28 | Montana,west,1061,284,37,73 29 | Nebraska,midwest,3045,661,54,119 30 | Nevada,west,119,849,136,52 31 | New Hampshire,northeast,18,1428,34,255 32 | New Jersey,northeast,135,8306,263,201 33 | New Mexico,west,95,493,754,79 34 | New York,northeast,7607,16671,489,525 35 | North Carolina,south,228,2671,1843,1008 36 | North Dakota,midwest,1136,253,15,25 37 | Ohio,midwest,19487,3082,450,490 38 | Oklahoma,south,1687,692,1486,151 39 | Oregon,west,3086,1903,188,179 40 | Pennsylvania,northeast,13089,11395,331,450 41 | Rhode Island,northeast,15,934,16,43 42 | South Carolina,south,60,880,1283,237 43 | South Dakota,midwest,1152,313,22,32 44 | Tennessee,south,226,892,3656,323 45 | Texas,south,361,4813,14494,906 46 | Utah,west,858,1096,161,137 47 | Vermont,northeast,19,679,9,56 48 | Virginia,south,562,5066,1457,620 49 | Washington,west,6772,3059,213,286 50 | West Virginia,south,1555,488,240,88 51 | Wisconsin,midwest,3410,8751,87,438 52 | Wyoming,west,463,150,74,318 -------------------------------------------------------------------------------- /data-raw/pulse_of_the_nation/pulse_of_the_nation_dataprep.R: -------------------------------------------------------------------------------- 1 | pulse_of_the_nation <- read_csv("data-raw/pulse_of_the_nation/pulse_of_the_nation.csv") %>% 2 | mutate_if(is.character, as.factor) 3 | 4 | # Save the final data 5 | usethis::use_data(pulse_of_the_nation, overwrite = TRUE) 6 | -------------------------------------------------------------------------------- /data-raw/voices.R: -------------------------------------------------------------------------------- 1 | # Import original data from Bodo Winter 2 | 3 | voices <- readr::read_csv("http://www.bodowinter.com/uploads/1/2/9/3/129362560/politeness_data.csv") %>% 4 | select(-gender) %>% 5 | rename(pitch = frequency) %>% 6 | mutate(subject = as.factor(rep(c("C","A","B","D","E","F"), each = 14)), 7 | attitude = forcats::fct_recode(attitude, polite = "pol", informal = "inf"), 8 | scenario = as.factor(chartr("1234567", "ABCDEFG", scenario))) %>% 9 | arrange(subject) 10 | 11 | usethis::use_data(voices, overwrite = TRUE) 12 | -------------------------------------------------------------------------------- /data-raw/weather_WU.Rmd: -------------------------------------------------------------------------------- 1 | # Load the data 2 | library(bayesrules) 3 | data(weather_australia) 4 | weather_WU <- weather_australia %>% 5 | filter(location %in% c("Wollongong", "Uluru")) %>% 6 | droplevels() 7 | 8 | usethis::use_data(weather_WU, overwrite = TRUE) 9 | -------------------------------------------------------------------------------- /data-raw/weather_australia.R: -------------------------------------------------------------------------------- 1 | # Load the data 2 | library(rattle) 3 | library(lubridate) 4 | data(weatherAUS) 5 | 6 | # Take a sub-sample of the data 7 | set.seed(22) 8 | weather <- weatherAUS %>% 9 | filter(Location %in% c("Wollongong", "Hobart", "Uluru")) %>% 10 | mutate(Location = as.factor(droplevels(as.factor(Location)))) %>% 11 | filter(!is.na(WindSpeed9am), !is.na(Humidity9am), !is.na(Pressure9am), !is.na(Temp9am), !is.na(Temp3pm)) %>% 12 | group_by(Location) %>% 13 | sample_n(100) %>% 14 | ungroup() %>% 15 | mutate(year = year(Date), month = month(Date), day_of_year = yday(Date)) %>% 16 | dplyr::select(-Date, -Cloud9am, -Cloud3pm, -Evaporation, -Sunshine) 17 | names(weather) <- tolower(names(weather)) 18 | weather_australia <- weather 19 | 20 | usethis::use_data(weather_australia, overwrite = TRUE) 21 | -------------------------------------------------------------------------------- /data-raw/weather_perth.R: -------------------------------------------------------------------------------- 1 | # Load the data 2 | library(rattle) 3 | library(dplyr) 4 | data(weatherAUS) 5 | 6 | # Take a sub-sample of the data 7 | set.seed(84735) 8 | weather_perth <- weatherAUS %>% 9 | filter(Location == "Perth") %>% 10 | na.omit() %>% 11 | sample_n(1000) %>% 12 | mutate(year = year(Date), month = month(Date), day_of_year = yday(Date)) %>% 13 | select(-Date, -Cloud9am, -Cloud3pm, -Evaporation, -Sunshine, -Location) 14 | 15 | names(weather_perth) <- tolower(names(weather_perth)) 16 | 17 | usethis::use_data(weather_perth, overwrite = TRUE) 18 | -------------------------------------------------------------------------------- /data/airbnb.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/airbnb.rda -------------------------------------------------------------------------------- /data/airbnb_small.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/airbnb_small.rda -------------------------------------------------------------------------------- /data/bald_eagles.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/bald_eagles.rda -------------------------------------------------------------------------------- /data/basketball.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/basketball.rda -------------------------------------------------------------------------------- /data/bechdel.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/bechdel.rda -------------------------------------------------------------------------------- /data/big_word_club.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/big_word_club.rda -------------------------------------------------------------------------------- /data/bike_users.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/bike_users.rda -------------------------------------------------------------------------------- /data/bikes.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/bikes.rda -------------------------------------------------------------------------------- /data/bird_counts.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/bird_counts.rda -------------------------------------------------------------------------------- /data/book_banning.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/book_banning.rda -------------------------------------------------------------------------------- /data/cherry_blossom_sample.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/cherry_blossom_sample.rda -------------------------------------------------------------------------------- /data/climbers_sub.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/climbers_sub.rda -------------------------------------------------------------------------------- /data/coffee_ratings.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/coffee_ratings.rda -------------------------------------------------------------------------------- /data/coffee_ratings_small.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/coffee_ratings_small.rda -------------------------------------------------------------------------------- /data/equality_index.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/equality_index.rda -------------------------------------------------------------------------------- /data/fake_news.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/fake_news.rda -------------------------------------------------------------------------------- /data/football.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/football.rda -------------------------------------------------------------------------------- /data/hotel_bookings.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/hotel_bookings.rda -------------------------------------------------------------------------------- /data/loons.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/loons.rda -------------------------------------------------------------------------------- /data/moma.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/moma.rda -------------------------------------------------------------------------------- /data/moma_sample.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/moma_sample.rda -------------------------------------------------------------------------------- /data/penguins_bayes.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/penguins_bayes.rda -------------------------------------------------------------------------------- /data/pop_vs_soda.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/pop_vs_soda.rda -------------------------------------------------------------------------------- /data/pulse_of_the_nation.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/pulse_of_the_nation.rda -------------------------------------------------------------------------------- /data/spotify.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/spotify.rda -------------------------------------------------------------------------------- /data/voices.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/voices.rda -------------------------------------------------------------------------------- /data/weather_WU.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/weather_WU.rda -------------------------------------------------------------------------------- /data/weather_australia.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/weather_australia.rda -------------------------------------------------------------------------------- /data/weather_perth.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/weather_perth.rda -------------------------------------------------------------------------------- /docs/articles/conjugate-families_files/figure-html/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/articles/conjugate-families_files/figure-html/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /docs/articles/conjugate-families_files/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/articles/conjugate-families_files/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/articles/conjugate-families_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/articles/conjugate-families_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /docs/articles/conjugate-families_files/figure-html/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/articles/conjugate-families_files/figure-html/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /docs/articles/conjugate-families_files/header-attrs-2.10/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/conjugate-families_files/header-attrs-2.11/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/conjugate-families_files/header-attrs-2.8/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/conjugate-families_files/header-attrs-2.9/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/model-evaluation_files/header-attrs-2.10/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/model-evaluation_files/header-attrs-2.11/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/model-evaluation_files/header-attrs-2.8/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/model-evaluation_files/header-attrs-2.9/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ 7 | 8 | /* All levels of nav */ 9 | nav[data-toggle='toc'] .nav > li > a { 10 | display: block; 11 | padding: 4px 20px; 12 | font-size: 13px; 13 | font-weight: 500; 14 | color: #767676; 15 | } 16 | nav[data-toggle='toc'] .nav > li > a:hover, 17 | nav[data-toggle='toc'] .nav > li > a:focus { 18 | padding-left: 19px; 19 | color: #563d7c; 20 | text-decoration: none; 21 | background-color: transparent; 22 | border-left: 1px solid #563d7c; 23 | } 24 | nav[data-toggle='toc'] .nav > .active > a, 25 | nav[data-toggle='toc'] .nav > .active:hover > a, 26 | nav[data-toggle='toc'] .nav > .active:focus > a { 27 | padding-left: 18px; 28 | font-weight: bold; 29 | color: #563d7c; 30 | background-color: transparent; 31 | border-left: 2px solid #563d7c; 32 | } 33 | 34 | /* Nav: second level (shown on .active) */ 35 | nav[data-toggle='toc'] .nav .nav { 36 | display: none; /* Hide by default, but at >768px, show it */ 37 | padding-bottom: 10px; 38 | } 39 | nav[data-toggle='toc'] .nav .nav > li > a { 40 | padding-top: 1px; 41 | padding-bottom: 1px; 42 | padding-left: 30px; 43 | font-size: 12px; 44 | font-weight: normal; 45 | } 46 | nav[data-toggle='toc'] .nav .nav > li > a:hover, 47 | nav[data-toggle='toc'] .nav .nav > li > a:focus { 48 | padding-left: 29px; 49 | } 50 | nav[data-toggle='toc'] .nav .nav > .active > a, 51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a, 52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a { 53 | padding-left: 28px; 54 | font-weight: 500; 55 | } 56 | 57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ 58 | nav[data-toggle='toc'] .nav > .active > ul { 59 | display: block; 60 | } 61 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 2.14.0.3 2 | pkgdown: 2.0.2 3 | pkgdown_sha: ~ 4 | articles: 5 | conjugate-families: conjugate-families.html 6 | model-evaluation: model-evaluation.html 7 | last_built: 2022-03-14T16:27Z 8 | 9 | -------------------------------------------------------------------------------- /docs/reference/Rplot001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/Rplot001.png -------------------------------------------------------------------------------- /docs/reference/Rplot002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/Rplot002.png -------------------------------------------------------------------------------- /docs/reference/figures/README-pressure-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/figures/README-pressure-1.png -------------------------------------------------------------------------------- /docs/reference/figures/bayes-rules-hex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/figures/bayes-rules-hex.png -------------------------------------------------------------------------------- /docs/reference/plot_beta-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_beta-1.png -------------------------------------------------------------------------------- /docs/reference/plot_beta_binomial-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_beta_binomial-1.png -------------------------------------------------------------------------------- /docs/reference/plot_beta_binomial-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_beta_binomial-2.png -------------------------------------------------------------------------------- /docs/reference/plot_beta_ci-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_beta_ci-1.png -------------------------------------------------------------------------------- /docs/reference/plot_binomial_likelihood-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_binomial_likelihood-1.png -------------------------------------------------------------------------------- /docs/reference/plot_gamma-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_gamma-1.png -------------------------------------------------------------------------------- /docs/reference/plot_gamma_poisson-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_gamma_poisson-1.png -------------------------------------------------------------------------------- /docs/reference/plot_gamma_poisson-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_gamma_poisson-2.png -------------------------------------------------------------------------------- /docs/reference/plot_normal-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_normal-1.png -------------------------------------------------------------------------------- /docs/reference/plot_normal_likelihood-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_normal_likelihood-1.png -------------------------------------------------------------------------------- /docs/reference/plot_normal_normal-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_normal_normal-1.png -------------------------------------------------------------------------------- /docs/reference/plot_normal_normal-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_normal_normal-2.png -------------------------------------------------------------------------------- /docs/reference/plot_poisson_likelihood-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_poisson_likelihood-1.png -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | bibentry(bibtype = "Manual", 2 | title = "bayesrules: Datasets and Supplemental Functions from Bayes Rules! Book", 3 | author = c(person(given = "Mine", 4 | family = "Dogucu"), 5 | person(given = "Alicia", 6 | family = "Johnson"), 7 | person(given = "Miles", 8 | family = "Ott")), 9 | year = "2021", 10 | url = "https://github.com/bayes-rules/bayesrules", 11 | note = "R package version 0.0.2.9000", 12 | header = "To cite bayesrules package in publications use:", 13 | textVersion = 14 | paste("Mine Dogucu, Alicia Johnson, Miles Ott (2021).", 15 | "bayesrules: Datasets and Supplemental Functions from Bayes Rules! Book", 16 | "Retrieved from https://github.com/bayes-rules/bayesrules R package version 0.0.2.900") 17 | ) 18 | -------------------------------------------------------------------------------- /man/airbnb.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_airbnb.R 3 | \docType{data} 4 | \name{airbnb} 5 | \alias{airbnb} 6 | \title{Chicago AirBnB Data} 7 | \format{ 8 | A data frame with 1561 rows and 12 variables. Each row represents a single AirBnB listing. 9 | \describe{ 10 | \item{price}{the nightly price of the listing (in USD)} 11 | \item{rating}{the listing's average rating, on a scale from 1 to 5} 12 | \item{reviews}{number of user reviews the listing has} 13 | \item{room_type}{the type of listing (eg: Shared room)} 14 | \item{accommodates}{number of guests the listing accommodates} 15 | \item{bedrooms}{the number of bedrooms the listing has} 16 | \item{minimum_stay}{the minimum number of nights to stay in the listing} 17 | \item{neighborhood}{the neighborhood in which the listing is located} 18 | \item{district}{the broader district in which the listing is located} 19 | \item{walk_score}{the neighborhood's rating for walkability (0 - 100)} 20 | \item{transit_score}{the neighborhood's rating for access to public transit (0 - 100)} 21 | \item{bike_score}{the neighborhood's rating for bikeability (0 - 100)} 22 | } 23 | } 24 | \source{ 25 | Ly Trinh and Pony Ameri (2018). Airbnb Price Determinants: A Multilevel Modeling Approach. Project for Statistics 316-Advanced Statistical Modeling, St. Olaf College. 26 | Julie Legler and Paul Roback (2019). Broadening Your Statistical Horizons: Generalized Linear Models and Multilevel Models. \url{https://bookdown.org/roback/bookdown-bysh/}. 27 | \url{https://github.com/proback/BeyondMLR/blob/master/data/airbnb.csv/} 28 | } 29 | \usage{ 30 | airbnb 31 | } 32 | \description{ 33 | The AirBnB data was collated by Trinh and Ameri as part of a course project 34 | at St Olaf College, and distributed with "Broadening Your Statistical Horizons" by Legler and Roback. 35 | This data set includes the prices and features for 1561 AirBnB listings in Chicago, collected in 2016. 36 | } 37 | \keyword{datasets} 38 | -------------------------------------------------------------------------------- /man/airbnb_small.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_airbnb_small.R 3 | \docType{data} 4 | \name{airbnb_small} 5 | \alias{airbnb_small} 6 | \title{Chicago AirBnB Data} 7 | \format{ 8 | A data frame with 869 rows and 12 variables. Each row represents a single AirBnB listing. 9 | \describe{ 10 | \item{price}{the nightly price of the listing (in USD)} 11 | \item{rating}{the listing's average rating, on a scale from 1 to 5} 12 | \item{reviews}{number of user reviews the listing has} 13 | \item{room_type}{the type of listing (eg: Shared room)} 14 | \item{accommodates}{number of guests the listing accommodates} 15 | \item{bedrooms}{the number of bedrooms the listing has} 16 | \item{minimum_stay}{the minimum number of nights to stay in the listing} 17 | \item{neighborhood}{the neighborhood in which the listing is located} 18 | \item{district}{the broader district in which the listing is located} 19 | \item{walk_score}{the neighborhood's rating for walkability (0 - 100)} 20 | \item{transit_score}{the neighborhood's rating for access to public transit (0 - 100)} 21 | \item{bike_score}{the neighborhood's rating for bikeability (0 - 100)} 22 | } 23 | } 24 | \source{ 25 | Ly Trinh and Pony Ameri (2018). Airbnb Price Determinants: A Multilevel Modeling Approach. Project for Statistics 316-Advanced Statistical Modeling, St. Olaf College. 26 | Julie Legler and Paul Roback (2019). Broadening Your Statistical Horizons: Generalized Linear Models and Multilevel Models. \url{https://bookdown.org/roback/bookdown-bysh/}. 27 | \url{https://github.com/proback/BeyondMLR/blob/master/data/airbnb.csv/} 28 | } 29 | \usage{ 30 | airbnb_small 31 | } 32 | \description{ 33 | The AirBnB data was collated by Trinh and Ameri as part of a course project 34 | at St Olaf College, and distributed with "Broadening Your Statistical Horizons" by Legler and Roback. 35 | This data set, a subset of the airbnb data in the bayesrules package, includes the prices and features for 869 AirBnB listings in Chicago, collected in 2016. 36 | } 37 | \keyword{datasets} 38 | -------------------------------------------------------------------------------- /man/bald_eagles.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_bald_eagles.R 3 | \docType{data} 4 | \name{bald_eagles} 5 | \alias{bald_eagles} 6 | \title{Bald Eagle Count Data} 7 | \format{ 8 | A data frame with 37 rows and 5 variables. Each row represents Bald Eagle observations in the given year. 9 | \describe{ 10 | \item{year}{year of data collection} 11 | \item{count}{number of birds observed} 12 | \item{hours}{total person-hours of observation period} 13 | \item{count_per_hour}{count divided by hours} 14 | \item{count_per_week}{count_per_hour multiplied by 168 hours per week} 15 | } 16 | } 17 | \source{ 18 | \url{https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-06-18/bird_counts.csv}. 19 | } 20 | \usage{ 21 | bald_eagles 22 | } 23 | \description{ 24 | Bald Eagle count data collected from the year 1981 to 2017, in late December, by birdwatchers in the Ontario, Canada area. 25 | The data was made available by the Bird Studies Canada website and distributed through the R for Data Science TidyTuesday project. 26 | A more complete data set with a larger selection of birds can be found in the bird_counts data in the bayesrules package. 27 | } 28 | \keyword{datasets} 29 | -------------------------------------------------------------------------------- /man/basketball.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_basketball.R 3 | \docType{data} 4 | \name{basketball} 5 | \alias{basketball} 6 | \title{WNBA Basketball Data} 7 | \format{ 8 | A data frame with 146 rows and 30 variables. Each row represents a single WNBA basketball player. The variables on each player are as follows. 9 | \describe{ 10 | \item{player_name}{first and last name} 11 | \item{height}{height in inches} 12 | \item{weight}{weight in pounds} 13 | \item{year}{year of the WNBA season} 14 | \item{team}{team that the WNBA player is a member of} 15 | \item{age}{age in years} 16 | \item{games_played}{number of games played by the player in that season} 17 | \item{games_started}{number of games the player started in that season} 18 | \item{avg_minutes_played}{average number of minutes played per game} 19 | \item{avg_field_goals}{average number of field goals per game played} 20 | \item{avg_field_goal_attempts}{average number of field goals attempted per game played} 21 | \item{field_goal_pct}{percent of field goals made throughout the season} 22 | \item{avg_three_pointers}{average number of three pointers per game played} 23 | \item{avg_three_pointer_attempts}{average number of three pointers attempted per game played} 24 | \item{three_pointer_pct}{percent of three pointers made throughout the season} 25 | \item{avg_two_pointers}{average number of two pointers made per game played} 26 | \item{avg_two_pointer_attempts}{average number of two pointers attempted per game played} 27 | \item{two_pointer_pct}{percent of two pointers made throughout the season} 28 | \item{avg_free_throws}{average number of free throws made per game played} 29 | \item{avg_free_throw_attempts}{average number of free throws attempted per game played} 30 | \item{free_throw_pct}{percent of free throws made throughout the season} 31 | \item{avg_offensive_rb}{average number of offensive rebounds per game played} 32 | \item{avg_defensive_rb}{average number of defensive rebounds per game played} 33 | \item{avg_rb}{average number of rebounds (both offensive and defensive) per game played} 34 | \item{avg_assists}{average number of assists per game played} 35 | \item{avg_steals}{average number of steals per game played} 36 | \item{avg_blocks}{average number of blocks per game played} 37 | \item{avg_turnovers}{average number of turnovers per game played} 38 | \item{avg_personal_fouls}{average number of personal fouls per game played. Note: after 5 fouls the player is not allowed to play in that game anymore} 39 | \item{avg_points}{average number of points made per game played} 40 | \item{total_minutes}{total number of minutes played throughout the season} 41 | \item{starter}{whether or not the player started in more than half of the games they played} 42 | } 43 | } 44 | \source{ 45 | \url{https://www.basketball-reference.com/} 46 | } 47 | \usage{ 48 | basketball 49 | } 50 | \description{ 51 | The WNBA Basketball Data was scraped from \url{https://www.basketball-reference.com/wnba/players/} and contains information on basketball players from the 2019 season. 52 | } 53 | \keyword{datasets} 54 | -------------------------------------------------------------------------------- /man/bechdel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_bechdel.R 3 | \docType{data} 4 | \name{bechdel} 5 | \alias{bechdel} 6 | \title{Bechdel Test for over 1500 movies} 7 | \format{ 8 | A data frame with 1794 rows and 3 variables: 9 | \describe{ 10 | \item{year}{The release year of the movie} 11 | \item{title}{The title of the movie} 12 | \item{binary}{Bechdel test result (PASS, FAIL)} 13 | } 14 | } 15 | \source{ 16 | 17 | } 18 | \usage{ 19 | bechdel 20 | } 21 | \description{ 22 | A dataset containing data behind the story 23 | "The Dollar-And-Cents Case Against Hollywood's Exclusion of Women" 24 | \url{https://fivethirtyeight.com/features/the-dollar-and-cents-case-against-hollywoods-exclusion-of-women/}. 25 | } 26 | \keyword{datasets} 27 | -------------------------------------------------------------------------------- /man/bike_users.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_bike_users.R 3 | \docType{data} 4 | \name{bike_users} 5 | \alias{bike_users} 6 | \title{Capital Bikeshare Bike Ridership (Registered and Casual Riders)} 7 | \format{ 8 | A data frame with 534 daily observations, 267 each for registered riders and casual riders, and 13 variables: 9 | \describe{ 10 | \item{date}{date of observation} 11 | \item{season}{fall, spring, summer, or winter} 12 | \item{year}{the year of the date} 13 | \item{month}{the month of the date} 14 | \item{day_of_week}{the day of the week} 15 | \item{weekend}{whether or not the date falls on a weekend (TRUE or FALSE)} 16 | \item{holiday}{whether or not the date falls on a holiday (yes or no)} 17 | \item{temp_actual}{raw temperature (degrees Fahrenheit)} 18 | \item{temp_feel}{what the temperature feels like (degrees Fahrenheit)} 19 | \item{humidity}{humidity level (percentage)} 20 | \item{windspeed}{wind speed (miles per hour)} 21 | \item{weather_cat}{weather category (categ1 = pleasant, categ2 = moderate, categ3 = severe)} 22 | \item{user}{rider type (casual or registered)} 23 | \item{rides}{number of bikeshare rides} 24 | } 25 | } 26 | \source{ 27 | Fanaee-T, Hadi and Gama, Joao (2013). Event labeling combining ensemble detectors and background knowledge. Progress in Artificial Intelligence. \url{https://archive.ics.uci.edu/ml/datasets/Bike+Sharing+Dataset/} 28 | } 29 | \usage{ 30 | bike_users 31 | } 32 | \description{ 33 | Data on ridership among registered members and casual users of the Capital Bikeshare service in Washington, D.C.. 34 | } 35 | \keyword{datasets} 36 | -------------------------------------------------------------------------------- /man/bikes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_bikes.R 3 | \docType{data} 4 | \name{bikes} 5 | \alias{bikes} 6 | \title{Capital Bikeshare Bike Ridership} 7 | \format{ 8 | A data frame with 500 daily observations and 13 variables: 9 | \describe{ 10 | \item{date}{date of observation} 11 | \item{season}{fall, spring, summer, or winter} 12 | \item{year}{the year of the date} 13 | \item{month}{the month of the date} 14 | \item{day_of_week}{the day of the week} 15 | \item{weekend}{whether or not the date falls on a weekend (TRUE or FALSE)} 16 | \item{holiday}{whether or not the date falls on a holiday (yes or no)} 17 | \item{temp_actual}{raw temperature (degrees Fahrenheit)} 18 | \item{temp_feel}{what the temperature feels like (degrees Fahrenheit)} 19 | \item{humidity}{humidity level (percentage)} 20 | \item{windspeed}{wind speed (miles per hour)} 21 | \item{weather_cat}{weather category (categ1 = pleasant, categ2 = moderate, categ3 = severe)} 22 | \item{rides}{number of bikeshare rides} 23 | } 24 | } 25 | \source{ 26 | Fanaee-T, Hadi and Gama, Joao (2013). Event labeling combining ensemble detectors and background knowledge. Progress in Artificial Intelligence. \url{https://archive.ics.uci.edu/ml/datasets/Bike+Sharing+Dataset} 27 | } 28 | \usage{ 29 | bikes 30 | } 31 | \description{ 32 | Data on ridership among registered members of the Capital Bikeshare service in Washington, D.C.. 33 | } 34 | \keyword{datasets} 35 | -------------------------------------------------------------------------------- /man/bird_counts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_bird_counts.R 3 | \docType{data} 4 | \name{bird_counts} 5 | \alias{bird_counts} 6 | \title{Bird Counts Data} 7 | \format{ 8 | A data frame with 18706 rows and 7 variables. Each row represents observations for the given bird species in the given year. 9 | \describe{ 10 | \item{year}{year of data collection} 11 | \item{species}{scientific name of observed bird species} 12 | \item{species_latin}{latin name of observed bird species} 13 | \item{count}{number of birds observed} 14 | \item{hours}{total person-hours of observation period} 15 | \item{count_per_hour}{count divided by hours} 16 | \item{count_per_week}{count_per_hour multiplied by 168 hours per week} 17 | } 18 | } 19 | \source{ 20 | \url{https://github.com/rfordatascience/tidytuesday/blob/master/data/2019/2019-06-18/bird_counts.csv/}. 21 | } 22 | \usage{ 23 | bird_counts 24 | } 25 | \description{ 26 | Bird count data collected between the years 1921 and 2017, in late December, by birdwatchers in the Ontario, Canada area. 27 | The data was made available by the Bird Studies Canada website and distributed through the R for Data Science TidyTuesday project. 28 | } 29 | \keyword{datasets} 30 | -------------------------------------------------------------------------------- /man/book_banning.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_book_banning.R 3 | \docType{data} 4 | \name{book_banning} 5 | \alias{book_banning} 6 | \title{Book Banning Data} 7 | \format{ 8 | A data frame with 931 rows and 17 variables. Each row represents a single book challenge within the given state and date. 9 | \describe{ 10 | \item{title}{title of book being challenged} 11 | \item{book_id}{identifier for the book} 12 | \item{author}{author of the book} 13 | \item{date}{date of the challenge} 14 | \item{year}{year of the challenge} 15 | \item{removed}{whether or not the challenge was successful (the book was removed)} 16 | \item{explicit}{whether the book was challenged for sexually explicit material} 17 | \item{antifamily}{whether the book was challenged for anti-family material} 18 | \item{occult}{whether the book was challenged for occult material} 19 | \item{language}{whether the book was challenged for inapropriate language} 20 | \item{lgbtq}{whether the book was challenged for LGBTQ material} 21 | \item{violent}{whether the book was challenged for violent material} 22 | \item{state}{US state in which the challenge was made} 23 | \item{political_value_index}{Political Value Index of the state (negative = leans Republican, 0 = neutral, positive = leans Democrat)} 24 | \item{median_income}{median income in the state, relative to the average state median income} 25 | \item{hs_grad_rate}{high school graduation rate, in percent, relative to the average state high school graduation rate} 26 | \item{college_grad_rate}{college graduation rate, in percent, relative to the average state college graduation rate} 27 | } 28 | } 29 | \source{ 30 | Shannon Fast and Thomas Hegland (2011). Book Challenges: A Statistical Examination. Project for Statistics 316-Advanced Statistical Modeling, St. Olaf College. 31 | Julie Legler and Paul Roback (2019). Broadening Your Statistical Horizons: Generalized Linear Models and Multilevel Models. \url{https://bookdown.org/roback/bookdown-bysh/}. 32 | \url{https://github.com/proback/BeyondMLR/blob/master/data/bookbanningNoTex.csv/} 33 | } 34 | \usage{ 35 | book_banning 36 | } 37 | \description{ 38 | The book banning data was collected by Fast and Hegland as part of a course project 39 | at St Olaf College, and distributed with "Broadening Your Statistical Horizons" by Legler and Roback. 40 | This data set includes the features and outcomes for 931 book challenges 41 | (ie. requests to ban a book) made in the US between 2000 and 2010. 42 | Information on the books being challenged and the characteristics of these books 43 | were collected from the American Library Society. State-level demographic information and 44 | political leanings were obtained from the US Census Bureau and Cook Political Report, respectively. 45 | Due to an outlying large number of challenges, book challenges made in the state of Texas 46 | were omitted. 47 | } 48 | \keyword{datasets} 49 | -------------------------------------------------------------------------------- /man/cherry_blossom_sample.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_cherry_blossom_sample.R 3 | \docType{data} 4 | \name{cherry_blossom_sample} 5 | \alias{cherry_blossom_sample} 6 | \title{Cherry Blossom Running Race} 7 | \format{ 8 | A data frame with 252 Cherry Blossom outcomes and 7 variables: 9 | \describe{ 10 | \item{runner}{a unique identifier for the runner} 11 | \item{age}{age of the runner} 12 | \item{net}{time to complete the race, from starting line to finish line (minutes)} 13 | \item{gun}{time between the official start of the of race and the finish line (minutes)} 14 | \item{year}{year of the race} 15 | \item{previous}{the number of previous years in which the subject ran in the race} 16 | } 17 | } 18 | \source{ 19 | Data in the original Cherry data set were obtained from \url{https://www.cherryblossom.org/post-race/race-results/}. 20 | } 21 | \usage{ 22 | cherry_blossom_sample 23 | } 24 | \description{ 25 | A sub-sample of outcomes for the annual Cherry Blossom Ten Mile race in Washington, D.C.. This sub-sample was taken from the complete Cherry data in the mdsr package. 26 | } 27 | \keyword{datasets} 28 | -------------------------------------------------------------------------------- /man/classification_summary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/classification_summary.R 3 | \name{classification_summary} 4 | \alias{classification_summary} 5 | \title{Posterior Classification Summaries} 6 | \usage{ 7 | classification_summary(model, data, cutoff = 0.5) 8 | } 9 | \arguments{ 10 | \item{model}{an rstanreg model object with binary y} 11 | 12 | \item{data}{data frame including the variables in the model, both response y and predictors x} 13 | 14 | \item{cutoff}{probability cutoff to classify a new case as positive (0.5 is the default)} 15 | } 16 | \value{ 17 | a list 18 | } 19 | \description{ 20 | Given a set of observed data including a binary response variable y 21 | and an rstanreg model of y, 22 | this function returns summaries of the model's posterior classification quality. 23 | These summaries include a confusion matrix as well as estimates of the model's 24 | sensitivity, specificity, and overall accuracy. 25 | } 26 | \examples{ 27 | x <- rnorm(20) 28 | z <- 3*x 29 | prob <- 1/(1+exp(-z)) 30 | y <- rbinom(20, 1, prob) 31 | example_data <- data.frame(x = x, y = y) 32 | example_model <- rstanarm::stan_glm(y ~ x, data = example_data, family = binomial) 33 | classification_summary(model = example_model, data = example_data, cutoff = 0.5) 34 | } 35 | -------------------------------------------------------------------------------- /man/classification_summary_cv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/classification_summary_cv.R 3 | \name{classification_summary_cv} 4 | \alias{classification_summary_cv} 5 | \title{Cross-Validated Posterior Classification Summaries} 6 | \usage{ 7 | classification_summary_cv(model, data, group, k, cutoff = 0.5) 8 | } 9 | \arguments{ 10 | \item{model}{an rstanreg model object with binary y} 11 | 12 | \item{data}{data frame including the variables in the model, both response y (0 or 1) and predictors x} 13 | 14 | \item{group}{a character string representing the name of the factor grouping variable, ie. random effect (only used for hierarchical models)} 15 | 16 | \item{k}{the number of folds to use for cross validation} 17 | 18 | \item{cutoff}{probability cutoff to classify a new case as positive} 19 | } 20 | \value{ 21 | a list 22 | } 23 | \description{ 24 | Given a set of observed data including a binary response variable y 25 | and an rstanreg model of y, 26 | this function returns cross validated estimates of the model's posterior classification quality: 27 | sensitivity, specificity, and overall accuracy. 28 | For hierarchical models of class lmerMod, the folds are comprised by collections of groups, not individual observations. 29 | } 30 | \examples{ 31 | x <- rnorm(20) 32 | z <- 3*x 33 | prob <- 1/(1+exp(-z)) 34 | y <- rbinom(20, 1, prob) 35 | example_data <- data.frame(x = x, y = y) 36 | example_model <- rstanarm::stan_glm(y ~ x, data = example_data, family = binomial) 37 | classification_summary_cv(model = example_model, data = example_data, k = 2, cutoff = 0.5) 38 | } 39 | -------------------------------------------------------------------------------- /man/climbers_sub.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_climbers_sub.R 3 | \docType{data} 4 | \name{climbers_sub} 5 | \alias{climbers_sub} 6 | \title{Himalayan Climber Data} 7 | \format{ 8 | A data frame with 2076 observations (1 per climber) and 22 variables: 9 | \describe{ 10 | \item{expedition_id}{unique expedition identifier} 11 | \item{member_id}{unique climber identifier} 12 | \item{peak_id}{unique identifier of the expedition's destination peak} 13 | \item{peak_name}{name of the expedition's destination peak} 14 | \item{year}{year of expedition} 15 | \item{season}{season of expedition (Autumn, Spring, Summer, Winter)} 16 | \item{sex}{climber gender identity which the database oversimplifies to a binary category} 17 | \item{age}{climber age} 18 | \item{citizenship}{climber citizenship} 19 | \item{expedition_role}{climber's role in the expedition (eg: Co-Leader)} 20 | \item{hired}{whether the climber was a hired member of the expedition} 21 | \item{highpoint_metres}{the destination peak's highpoint (metres)} 22 | \item{success}{whether the climber successfully reached the destination} 23 | \item{solo}{whether the climber was on a solo expedition} 24 | \item{oxygen_used}{whether the climber utilized supplemental oxygen} 25 | \item{died}{whether the climber died during the expedition} 26 | \item{death_cause}{} 27 | \item{death_height_metres}{} 28 | \item{injured}{whether the climber was injured on the expedition} 29 | \item{injury_type}{} 30 | \item{injury_height_metres}{} 31 | \item{count}{number of climbers in the expedition} 32 | \item{height_metres}{height of the peak in meters} 33 | \item{first_ascent_year}{the year of the first recorded summit of the peak (though not necessarily the actual first summit!)} 34 | } 35 | } 36 | \source{ 37 | Original source: \url{https://www.himalayandatabase.com/}. Complete dataset distributed by: \url{https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-09-22/}. 38 | } 39 | \usage{ 40 | climbers_sub 41 | } 42 | \description{ 43 | A sub-sample of the Himalayan Database distributed through the R for Data Science TidyTuesday project. This dataset includes information on the results and conditions for various Himalayan climbing expeditions. Each row corresponds to a single member of a climbing expedition team. 44 | } 45 | \keyword{datasets} 46 | -------------------------------------------------------------------------------- /man/coffee_ratings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_coffee_ratings.R 3 | \docType{data} 4 | \name{coffee_ratings} 5 | \alias{coffee_ratings} 6 | \title{Coffee Ratings Data} 7 | \format{ 8 | A data frame with 1339 batches of coffee beans and 27 variables on each batch. 9 | \describe{ 10 | \item{owner}{farm owner} 11 | \item{farm_name}{farm where beans were grown} 12 | \item{country_of_origin}{country where farm is} 13 | \item{mill}{where beans were processed} 14 | \item{in_country_partner}{country of coffee partner} 15 | \item{altitude_low_meters}{lowest altitude of the farm} 16 | \item{altitude_high_meters}{highest altitude of the farm} 17 | \item{altitude_mean_meters}{average altitude of the farm} 18 | \item{number_of_bags}{number of bags tested} 19 | \item{bag_weight}{weight of each tested bag} 20 | \item{species}{bean species} 21 | \item{variety}{bean variety} 22 | \item{processing_method}{how beans were processed} 23 | \item{aroma}{bean aroma grade} 24 | \item{flavor}{bean flavor grade} 25 | \item{aftertaste}{bean aftertaste grade} 26 | \item{acidity}{bean acidity grade} 27 | \item{body}{bean body grade} 28 | \item{balance}{bean balance grade} 29 | \item{uniformity}{bean uniformity grade} 30 | \item{clean_cup}{bean clean cup grade} 31 | \item{sweetness}{bean sweetness grade} 32 | \item{moisture}{bean moisture grade} 33 | \item{category_one_defects}{count of category one defects} 34 | \item{category_two_defects}{count of category two defects} 35 | \item{color}{bean color} 36 | \item{total_cup_points}{total bean rating (0 -- 100)} 37 | } 38 | } 39 | \source{ 40 | \url{https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv}. 41 | } 42 | \usage{ 43 | coffee_ratings 44 | } 45 | \description{ 46 | A sub-set of data on coffee bean ratings / quality originally collected by James LeDoux (jmzledoux) and distributed through the R for Data Science TidyTuesday project. 47 | } 48 | \keyword{datasets} 49 | -------------------------------------------------------------------------------- /man/coffee_ratings_small.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_coffee_ratings_small.R 3 | \docType{data} 4 | \name{coffee_ratings_small} 5 | \alias{coffee_ratings_small} 6 | \title{Coffee Ratings Data} 7 | \format{ 8 | A data frame with 636 batches of coffee beans and 11 variables on each batch. 9 | \describe{ 10 | \item{farm_name}{farm where beans were grown} 11 | \item{total_cup_points}{total bean rating (0 -- 100)} 12 | \item{aroma}{bean aroma grade} 13 | \item{flavor}{bean flavor grade} 14 | \item{aftertaste}{bean aftertaste grade} 15 | \item{acidity}{bean acidity grade} 16 | \item{body}{bean body grade} 17 | \item{balance}{bean balance grade} 18 | \item{uniformity}{bean uniformity grade} 19 | \item{sweetness}{bean sweetness grade} 20 | \item{moisture}{bean moisture grade} 21 | } 22 | } 23 | \source{ 24 | \url{https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv}. 25 | } 26 | \usage{ 27 | coffee_ratings_small 28 | } 29 | \description{ 30 | A sub-set of data on coffee bean ratings / quality originally collected by James LeDoux (jmzledoux) and distributed through the R for Data Science TidyTuesday project. 31 | This is a simplified version of the coffee_ratings data. 32 | } 33 | \keyword{datasets} 34 | -------------------------------------------------------------------------------- /man/equality_index.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_equality_index.R 3 | \docType{data} 4 | \name{equality_index} 5 | \alias{equality_index} 6 | \title{LGBTQ+ Rights Laws by State} 7 | \format{ 8 | A data frame with 50 observations, one per state, and 6 variables: 9 | \describe{ 10 | \item{state}{state name} 11 | \item{region}{region in which the state falls} 12 | \item{gop_2016}{percent of the 2016 presidential election vote earned by the Republican ("GOP") candidate} 13 | \item{laws}{number of LGBTQ+ rights laws (as of 2019)} 14 | \item{historical}{political leaning of the state over time (gop = Republican, dem = Democrat, swing = swing state)} 15 | \item{percent_urban}{percent of state's residents that live in urban areas (by the 2010 census)} 16 | } 17 | } 18 | \source{ 19 | Data on LGBTQ+ laws were obtained from Warbelow, Sarah, Courtnay Avant, and Colin Kutney (2020). 2019 State Equality Index. Washington, DC. Human Rights Campaign Foundation. \url{https://assets2.hrc.org/files/assets/resources/HRC-SEI-2019-Report.pdf?_ga=2.148925686.1325740687.1594310864-1928808113.1594310864&_gac=1.213124768.1594312278.EAIaIQobChMI9dP2hMzA6gIVkcDACh21GgLEEAAYASAAEgJiJvD_BwE/}. Data on urban residency obtained from \url{https://www.icip.iastate.edu/tables/population/urban-pct-states/}. 20 | } 21 | \usage{ 22 | equality_index 23 | } 24 | \description{ 25 | Data on the number of LGBTQ+ equality laws (as of 2019) and demographics in each U.S. state. 26 | } 27 | \keyword{datasets} 28 | -------------------------------------------------------------------------------- /man/fake_news.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_fake_news.R 3 | \docType{data} 4 | \name{fake_news} 5 | \alias{fake_news} 6 | \title{A collection of 150 news articles} 7 | \format{ 8 | A data frame with 150 rows and 6 variables: 9 | \describe{ 10 | \item{title}{The title of the news article} 11 | \item{text}{Text of the article} 12 | \item{url}{Hyperlink for the article} 13 | \item{authors}{Authors of the article} 14 | \item{type}{Binary variable indicating whether the article presents fake or real news(fake, real)} 15 | \item{title_words}{Number of words in the title} 16 | \item{text_words}{Number of words in the text} 17 | \item{title_char}{Number of characters in the title} 18 | \item{text_char}{Number of characters in the text} 19 | \item{title_caps}{Number of words that are all capital letters in the title} 20 | \item{text_caps}{Number of words that are all capital letters in the text} 21 | \item{title_caps_percent}{Percent of words that are all capital letters in the title} 22 | \item{text_caps_percent}{Percent of words that are all capital letters in the text} 23 | \item{title_excl}{Number of characters that are exclamation marks in the title} 24 | \item{text_excl}{Number of characters that are exclamation marks in the text} 25 | \item{title_excl_percent}{Percent of characters that are exclamation marks in the title} 26 | \item{text_excl_percent}{Percent of characters that are exclamation marks in the text} 27 | \item{title_has_excl}{Binary variable indicating whether the title of the article includes an exlamation point or not(TRUE, FALSE)} 28 | \item{anger}{Percent of words that are associated with anger} 29 | \item{anticipation}{Percent of words that are associated with anticipation} 30 | \item{disgust}{Percent of words that are associated with disgust} 31 | \item{fear}{Percent of words that are associated with fear} 32 | \item{joy}{Percent of words that are associated with joy} 33 | \item{sadness}{Percent of words that are associated with sadness} 34 | \item{surprise}{Percent of words that are associated with surprise} 35 | \item{trust}{Percent of words that are associated with trust} 36 | \item{negative}{Percent of words that have negative sentiment} 37 | \item{positive}{Percent of words that have positive sentiment} 38 | \item{text_syllables}{Number of syllables in text} 39 | \item{text_syllables_per_word}{Number of syllables per word in text} 40 | } 41 | } 42 | \source{ 43 | Shu, K., Mahudeswaran, D., Wang, S., Lee, D. and Liu, H. (2018) FakeNewsNet: A Data Repository with News Content, Social Context and Dynamic Information for Studying Fake News on Social Media 44 | } 45 | \usage{ 46 | fake_news 47 | } 48 | \description{ 49 | A dataset containing data behind the study 50 | "FakeNewsNet: A Data Repository with News Content, Social Context and Spatialtemporal Information for Studying Fake News on Social Media" 51 | \url{https://arxiv.org/abs/1809.01286}. 52 | The news articles in this dataset were posted to Facebook in September 2016, in the run-up to the U.S. presidential election. 53 | } 54 | \keyword{datasets} 55 | -------------------------------------------------------------------------------- /man/figures/bayes-rules-hex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/man/figures/bayes-rules-hex.png -------------------------------------------------------------------------------- /man/football.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_football.R 3 | \docType{data} 4 | \name{football} 5 | \alias{football} 6 | \title{Football Brain Measurements} 7 | \format{ 8 | A data frame with 75 observations and 5 variables: 9 | \describe{ 10 | \item{group}{control = no football, 11 | fb_no_concuss = football player but no concussions, 12 | fb_concuss = football player with concussion history} 13 | \item{years}{Number of years a person played football} 14 | \item{volume}{Total hippocampus volume, in cubic centimeters} 15 | } 16 | } 17 | \source{ 18 | Singh R, Meier T, Kuplicki R, Savitz J, et al., 19 | "Relationship of Collegiate Football Experience and Concussion 20 | With Hippocampal Volume and Cognitive Outcome," JAMA, 311(18), 2014 21 | } 22 | \usage{ 23 | football 24 | } 25 | \description{ 26 | Brain measurements for football and non-football players as provided in the Lock5 package 27 | } 28 | \keyword{datasets} 29 | -------------------------------------------------------------------------------- /man/hotel_bookings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_hotel_bookings.R 3 | \docType{data} 4 | \name{hotel_bookings} 5 | \alias{hotel_bookings} 6 | \title{Hotel Bookings Data} 7 | \format{ 8 | A data frame with 1000 hotel bookings and 32 variables on each booking. 9 | \describe{ 10 | \item{hotel}{"Resort Hotel" or "City Hotel"} 11 | \item{is_canceled}{whether the booking was cancelled} 12 | \item{lead_time}{number of days between booking and arrival} 13 | \item{arrival_date_year}{year of scheduled arrival} 14 | \item{arrival_date_month}{month of scheduled arrival} 15 | \item{arrival_date_week_number}{week of scheduled arrival} 16 | \item{arrival_date_day_of_month}{day of month of scheduled arrival} 17 | \item{stays_in_weekend_nights}{number of reserved weekend nights} 18 | \item{stays_in_week_nights}{number of reserved week nights} 19 | \item{adults}{number of adults in booking} 20 | \item{children}{number of children} 21 | \item{babies}{number of babies} 22 | \item{meal}{whether the booking includes breakfast (BB = bed & breakfast), breakfast and dinner (HB = half board), or breakfast, lunch, and dinner (FB = full board)} 23 | \item{country}{guest's country of origin} 24 | \item{market_segment}{market segment designation (eg: TA = travel agent, TO = tour operator)} 25 | \item{distribution_channel}{booking distribution channel (eg: TA = travel agent, TO = tour operator)} 26 | \item{is_repeated_guest}{whether or not booking was made by a repeated guest} 27 | \item{previous_cancellations}{guest's number of previous booking cancellations} 28 | \item{previous_bookings_not_canceled}{guest's number of previous bookings that weren't cancelled} 29 | \item{reserved_room_type}{code for type of room reserved by guest} 30 | \item{assigned_room_type}{code for type of room assigned by hotel} 31 | \item{booking_changes}{number of changes made to the booking} 32 | \item{deposit_type}{No Deposit, Non Refund, Refundable} 33 | \item{agent}{booking travel agency} 34 | \item{company}{booking company} 35 | \item{days_in_waiting_list}{number of days the guest waited for booking confirmation} 36 | \item{customer_type}{Contract, Group, Transient, Transient-party (a transient booking tied to another transient booking)} 37 | \item{average_daily_rate}{average hotel cost per day} 38 | \item{required_car_parking_spaces}{number of parking spaces the guest needed} 39 | \item{total_of_special_requests}{number of guest special requests} 40 | \item{reservation_status}{Canceled, Check-Out, No-Show} 41 | \item{reservation_status_date}{when the guest cancelled or checked out} 42 | } 43 | } 44 | \source{ 45 | Nuno Antonio, Ana de Almeida, and Luis Nunes (2019). "Hotel booking demand datasets." Data in Brief (22): 41-49. 46 | \url{https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-02-11/hotels.csv/}. 47 | } 48 | \usage{ 49 | hotel_bookings 50 | } 51 | \description{ 52 | A random subset of the data on hotel bookings originally collected by Antonio, Almeida and Nunes (2019) and distributed through the R for Data Science TidyTuesday project. 53 | } 54 | \keyword{datasets} 55 | -------------------------------------------------------------------------------- /man/loons.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_loons.R 3 | \docType{data} 4 | \name{loons} 5 | \alias{loons} 6 | \title{Loon Count Data} 7 | \format{ 8 | A data frame with 18 rows and 5 variables. Each row represents loon observations in the given year. 9 | \describe{ 10 | \item{year}{year of data collection} 11 | \item{count}{number of loons observed} 12 | \item{hours}{total person-hours of observation period} 13 | \item{count_per_hour}{count divided by hours} 14 | \item{count_per_100}{count_per_hour multiplied by 100 hours} 15 | } 16 | } 17 | \source{ 18 | \url{https://github.com/rfordatascience/tidytuesday/blob/master/data/2019/2019-06-18/bird_counts.csv}. 19 | } 20 | \usage{ 21 | loons 22 | } 23 | \description{ 24 | Loon count data collected from the year 2000 to 2017, in late December, by birdwatchers in the Ontario, Canada area. 25 | The data was made available by the Bird Studies Canada website and distributed through the R for Data Science TidyTuesday project. 26 | A more complete data set with a larger selection of birds can be found in the bird_counts data in the bayesrules package. 27 | } 28 | \keyword{datasets} 29 | -------------------------------------------------------------------------------- /man/moma.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_moma.R 3 | \docType{data} 4 | \name{moma} 5 | \alias{moma} 6 | \title{Museum of Modern Art (MoMA) data} 7 | \format{ 8 | A data frame with 10964 rows and 11 variables. Each row represents an individual artist in the MoMA collection. 9 | \describe{ 10 | \item{artist}{name} 11 | \item{country}{country of origin} 12 | \item{birth}{year of birth} 13 | \item{death}{year of death} 14 | \item{alive}{whether or not the artist was living at the time of data collection (December 2020)} 15 | \item{genx}{whether or not the artist is Gen X or younger, ie. born during 1965 or after} 16 | \item{gender}{gender identity (as perceived by MoMA employees)} 17 | \item{department}{MoMA department in which the artist's works most frequently appear} 18 | \item{count}{number of the artist's works in the MoMA collection} 19 | \item{year_acquired_min}{first year MoMA acquired one of the artist's works} 20 | \item{year_acquired_max}{most recent year MoMA acquired one of the artist's works} 21 | } 22 | } 23 | \source{ 24 | \url{https://github.com/MuseumofModernArt/collection/blob/master/Artworks.csv/}. 25 | } 26 | \usage{ 27 | moma 28 | } 29 | \description{ 30 | The Museum of Modern Art data includes information about the individual artists included in the collection of the Museum of Modern Art in New York City. 31 | It does not include information about works for artist collectives or companies. 32 | The data was made available by MoMA itself and downloaded in December 2020. 33 | } 34 | \keyword{datasets} 35 | -------------------------------------------------------------------------------- /man/moma_sample.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_moma_sample.R 3 | \docType{data} 4 | \name{moma_sample} 5 | \alias{moma_sample} 6 | \title{Museum of Modern Art (MoMA) data sample} 7 | \format{ 8 | A data frame with 100 rows and 10 variables. Each row represents an individual artist in the MoMA collection. 9 | \describe{ 10 | \item{artist}{name} 11 | \item{country}{country of origin} 12 | \item{birth}{year of birth} 13 | \item{death}{year of death} 14 | \item{alive}{whether or not the artist was living at the time of data collection (December 2020)} 15 | \item{genx}{whether or not the artist is Gen X or younger, ie. born during 1965 or after} 16 | \item{gender}{gender identity (as perceived by MoMA employees)} 17 | \item{count}{number of the artist's works in the MoMA collection} 18 | \item{year_acquired_min}{first year MoMA acquired one of the artist's works} 19 | \item{year_acquired_max}{most recent year MoMA acquired one of the artist's works} 20 | } 21 | } 22 | \source{ 23 | \url{https://github.com/MuseumofModernArt/collection/blob/master/Artworks.csv/}. 24 | } 25 | \usage{ 26 | moma_sample 27 | } 28 | \description{ 29 | A random sample of 100 artists represented in the Museum of Modern Art in New York City. 30 | The data was made available by MoMA itself and downloaded in December 2020. 31 | It does not include information about artist collectives or companies. 32 | } 33 | \keyword{datasets} 34 | -------------------------------------------------------------------------------- /man/naive_classification_summary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/naive_classification_summary.R 3 | \name{naive_classification_summary} 4 | \alias{naive_classification_summary} 5 | \title{Posterior Classification Summaries for a Naive Bayes model} 6 | \usage{ 7 | naive_classification_summary(model, data, y) 8 | } 9 | \arguments{ 10 | \item{model}{a naiveBayes model object with categorical y} 11 | 12 | \item{data}{data frame including the variables in the model} 13 | 14 | \item{y}{a character string indicating the y variable in data} 15 | } 16 | \value{ 17 | a list 18 | } 19 | \description{ 20 | Given a set of observed data including a categorical response variable y 21 | and a naiveBayes model of y, 22 | this function returns summaries of the model's posterior classification quality. 23 | These summaries include a confusion matrix as well as an estimate of the model's 24 | overall accuracy. 25 | } 26 | \examples{ 27 | data(penguins_bayes, package = "bayesrules") 28 | example_model <- e1071::naiveBayes(species ~ bill_length_mm, data = penguins_bayes) 29 | naive_classification_summary(model = example_model, data = penguins_bayes, y = "species") 30 | } 31 | -------------------------------------------------------------------------------- /man/naive_classification_summary_cv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/naive_classification_summary_cv.R 3 | \name{naive_classification_summary_cv} 4 | \alias{naive_classification_summary_cv} 5 | \title{Cross-Validated Posterior Classification Summaries for a Naive Bayes model} 6 | \usage{ 7 | naive_classification_summary_cv(model, data, y, k = 10) 8 | } 9 | \arguments{ 10 | \item{model}{a naiveBayes model object with categorical y} 11 | 12 | \item{data}{data frame including the variables in the model} 13 | 14 | \item{y}{a character string indicating the y variable in data} 15 | 16 | \item{k}{the number of folds to use for cross validation} 17 | } 18 | \value{ 19 | a list 20 | } 21 | \description{ 22 | Given a set of observed data including a categorical response variable y 23 | and a naiveBayes model of y, 24 | this function returns a cross validated confusion matrix by which to assess 25 | the model's posterior classification quality. 26 | } 27 | \examples{ 28 | data(penguins_bayes, package = "bayesrules") 29 | example_model <- e1071::naiveBayes(species ~ bill_length_mm, data = penguins_bayes) 30 | naive_classification_summary_cv(model = example_model, data = penguins_bayes, y = "species", k = 2) 31 | } 32 | -------------------------------------------------------------------------------- /man/penguins_bayes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_penguins_bayes.R 3 | \docType{data} 4 | \name{penguins_bayes} 5 | \alias{penguins_bayes} 6 | \title{Penguins Data} 7 | \format{ 8 | A data frame with 344 penguins and 9 variables on each. 9 | \describe{ 10 | \item{species}{species (Adelie, Chinstrap, Gentoo)} 11 | \item{island}{home island (Biscoe, Dream, Torgersen)} 12 | \item{year}{year of observation} 13 | \item{bill_length_mm}{length of bill (mm)} 14 | \item{bill_depth_mm}{depth of bill (mm)} 15 | \item{flipper_length_mm}{length of flipper (mm)} 16 | \item{body_mass_g}{body mass (g)} 17 | \item{above_average_weight}{whether or not the body mass exceeds 4200g (TRUE or FALSE)} 18 | \item{sex}{male or female} 19 | } 20 | } 21 | \source{ 22 | Gorman KB, Williams TD, and Fraser WR (2014). Ecological sexual dimorphism and environmental variability within a community of antarctic penguins (Genus Pygoscelis). PLoS ONE, 9(3). 23 | } 24 | \usage{ 25 | penguins_bayes 26 | } 27 | \description{ 28 | Data on penguins in the Palmer Archipelago, originally collected by Gordan etal and distributed through the penguins data in the palmerpenguins package. 29 | In addition to the original penguins data is a variable above_average_weight. 30 | } 31 | \keyword{datasets} 32 | -------------------------------------------------------------------------------- /man/plot_beta.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_beta.R 3 | \name{plot_beta} 4 | \alias{plot_beta} 5 | \title{Plot a Beta Model for \eqn{\pi}} 6 | \usage{ 7 | plot_beta(alpha, beta, mean = FALSE, mode = FALSE) 8 | } 9 | \arguments{ 10 | \item{alpha, beta}{positive shape parameters of the Beta model} 11 | 12 | \item{mean, mode}{a logical value indicating whether to display the model mean and mode} 13 | } 14 | \value{ 15 | A density plot for the Beta model. 16 | } 17 | \description{ 18 | Plots the probability density function (pdf) for 19 | a Beta(alpha, beta) model of variable \eqn{\pi}. 20 | } 21 | \examples{ 22 | plot_beta(alpha = 1, beta = 12, mean = TRUE, mode = TRUE) 23 | } 24 | -------------------------------------------------------------------------------- /man/plot_beta_binomial.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_beta_binomial.R 3 | \name{plot_beta_binomial} 4 | \alias{plot_beta_binomial} 5 | \title{Plot a Beta-Binomial Bayesian Model} 6 | \usage{ 7 | plot_beta_binomial( 8 | alpha, 9 | beta, 10 | y = NULL, 11 | n = NULL, 12 | prior = TRUE, 13 | likelihood = TRUE, 14 | posterior = TRUE 15 | ) 16 | } 17 | \arguments{ 18 | \item{alpha, beta}{positive shape parameters of the prior Beta model} 19 | 20 | \item{y}{observed number of successes} 21 | 22 | \item{n}{observed number of trials} 23 | 24 | \item{prior}{a logical value indicating whether the prior model should be plotted} 25 | 26 | \item{likelihood}{a logical value indicating whether the scaled likelihood should be plotted} 27 | 28 | \item{posterior}{a logical value indicating whether posterior model should be plotted} 29 | } 30 | \value{ 31 | a ggplot 32 | } 33 | \description{ 34 | Consider a Beta-Binomial Bayesian model for parameter \eqn{\pi} with 35 | a Beta(alpha, beta) prior on \eqn{\pi} and Binomial likelihood with n trials 36 | and y successes. Given information on the prior (alpha and data) and data (y and n), 37 | this function produces a plot of any combination of the corresponding prior pdf, 38 | scaled likelihood function, and posterior pdf. All three are included by default. 39 | } 40 | \examples{ 41 | 42 | plot_beta_binomial(alpha = 1, beta = 13, y = 25, n = 50) 43 | plot_beta_binomial(alpha = 1, beta = 13, y = 25, n = 50, posterior = FALSE) 44 | 45 | } 46 | -------------------------------------------------------------------------------- /man/plot_beta_ci.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_beta_ci.R 3 | \name{plot_beta_ci} 4 | \alias{plot_beta_ci} 5 | \title{Plot a Beta Model with Credible Interval} 6 | \usage{ 7 | plot_beta_ci(alpha, beta, ci_level = 0.95) 8 | } 9 | \arguments{ 10 | \item{alpha, beta}{positive shape parameters of the Beta model} 11 | 12 | \item{ci_level}{credible interval level} 13 | } 14 | \value{ 15 | A density plot for the Beta model 16 | } 17 | \description{ 18 | Plots the probability density function (pdf) for a 19 | Beta(alpha, beta) model of variable \eqn{\pi} with markings indicating 20 | a credible interval for \eqn{\pi}. 21 | } 22 | \examples{ 23 | plot_beta_ci(alpha = 7, beta = 12, ci_level = 0.80) 24 | } 25 | -------------------------------------------------------------------------------- /man/plot_binomial_likelihood.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_binomial_likelihood.R 3 | \name{plot_binomial_likelihood} 4 | \alias{plot_binomial_likelihood} 5 | \title{Plot a Binomial Likelihood Function} 6 | \usage{ 7 | plot_binomial_likelihood(y, n, mle = FALSE) 8 | } 9 | \arguments{ 10 | \item{y}{number of successes} 11 | 12 | \item{n}{number of trials} 13 | 14 | \item{mle}{a logical value indicating whether maximum likelihood estimate of \eqn{\pi}, y/n, should be plotted} 15 | } 16 | \value{ 17 | a ggplot 18 | } 19 | \description{ 20 | Plots the Binomial likelihood function for variable \eqn{\pi} 21 | given y observed successes in a series of n Binomial trials. 22 | } 23 | \examples{ 24 | plot_binomial_likelihood(y = 3, n = 10, mle = TRUE) 25 | } 26 | -------------------------------------------------------------------------------- /man/plot_gamma.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_gamma.R 3 | \name{plot_gamma} 4 | \alias{plot_gamma} 5 | \title{Plot a Gamma Model for \eqn{\lambda}} 6 | \usage{ 7 | plot_gamma(shape, rate, mean = FALSE, mode = FALSE) 8 | } 9 | \arguments{ 10 | \item{shape}{non-negative shape parameter of the Gamma model} 11 | 12 | \item{rate}{non-negative rate parameter of the Gamma model} 13 | 14 | \item{mean, mode}{a logical value indicating whether to display the model mean and mode} 15 | } 16 | \value{ 17 | A density plot for the Gamma model. 18 | } 19 | \description{ 20 | Plots the probability density function (pdf) for 21 | a Gamma(shape, rate) model of variable \eqn{\lambda}. 22 | } 23 | \examples{ 24 | plot_gamma(shape = 2, rate = 11, mean = TRUE, mode = TRUE) 25 | 26 | } 27 | -------------------------------------------------------------------------------- /man/plot_gamma_poisson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_gamma_poisson.R 3 | \name{plot_gamma_poisson} 4 | \alias{plot_gamma_poisson} 5 | \title{Plot a Gamma-Poisson Bayesian Model} 6 | \usage{ 7 | plot_gamma_poisson( 8 | shape, 9 | rate, 10 | sum_y = NULL, 11 | n = NULL, 12 | prior = TRUE, 13 | likelihood = TRUE, 14 | posterior = TRUE 15 | ) 16 | } 17 | \arguments{ 18 | \item{shape}{non-negative shape parameter of the Gamma prior} 19 | 20 | \item{rate}{non-negative rate parameter of the Gamma prior} 21 | 22 | \item{sum_y}{sum of observed data values for the Poisson likelihood} 23 | 24 | \item{n}{number of observations for the Poisson likelihood} 25 | 26 | \item{prior}{a logical value indicating whether the prior model should be plotted.} 27 | 28 | \item{likelihood}{a logical value indicating whether the scaled likelihood should be plotted.} 29 | 30 | \item{posterior}{a logical value indicating whether posterior model should be plotted.} 31 | } 32 | \value{ 33 | a ggplot 34 | } 35 | \description{ 36 | Consider a Gamma-Poisson Bayesian model for rate parameter \eqn{\lambda} with 37 | a Gamma(shape, rate) prior on \eqn{\lambda} and a Poisson likelihood for the data. 38 | Given information on the prior (shape and rate) 39 | and data (the sample size n and sum_y), 40 | this function produces a plot of any combination of the corresponding prior pdf, 41 | scaled likelihood function, and posterior pdf. All three are included by default. 42 | } 43 | \examples{ 44 | plot_gamma_poisson(shape = 100, rate = 20, sum_y = 39, n = 6) 45 | plot_gamma_poisson(shape = 100, rate = 20, sum_y = 39, n = 6, posterior = FALSE) 46 | } 47 | -------------------------------------------------------------------------------- /man/plot_normal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_normal.R 3 | \name{plot_normal} 4 | \alias{plot_normal} 5 | \title{Plot a Normal Model for \eqn{\mu}} 6 | \usage{ 7 | plot_normal(mean, sd) 8 | } 9 | \arguments{ 10 | \item{mean}{mean parameter of the Normal model} 11 | 12 | \item{sd}{standard deviation parameter of the Normal model} 13 | } 14 | \value{ 15 | a ggplot 16 | } 17 | \description{ 18 | Plots the probability density function (pdf) for a 19 | Normal(mean, sd^2) model of variable \eqn{\mu}. 20 | } 21 | \examples{ 22 | plot_normal(mean = 3.5, sd = 0.5) 23 | } 24 | -------------------------------------------------------------------------------- /man/plot_normal_likelihood.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_normal_likelihood.R 3 | \name{plot_normal_likelihood} 4 | \alias{plot_normal_likelihood} 5 | \title{Plot a Normal Likelihood Function} 6 | \usage{ 7 | plot_normal_likelihood(y, sigma = NULL) 8 | } 9 | \arguments{ 10 | \item{y}{vector of observed data} 11 | 12 | \item{sigma}{optional value for assumed standard deviation of y. by default, this is calculated by the sample standard deviation of y.} 13 | } 14 | \value{ 15 | a ggplot of Normal likelihood 16 | } 17 | \description{ 18 | Plots the Normal likelihood function for variable \eqn{\mu} 19 | given a vector of Normal data y. 20 | } 21 | \examples{ 22 | plot_normal_likelihood(y = rnorm(50, mean = 10, sd = 2), sigma = 1.5) 23 | } 24 | -------------------------------------------------------------------------------- /man/plot_normal_normal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_normal_normal.R 3 | \name{plot_normal_normal} 4 | \alias{plot_normal_normal} 5 | \title{Plot a Normal-Normal Bayesian model} 6 | \usage{ 7 | plot_normal_normal( 8 | mean, 9 | sd, 10 | sigma = NULL, 11 | y_bar = NULL, 12 | n = NULL, 13 | prior = TRUE, 14 | likelihood = TRUE, 15 | posterior = TRUE 16 | ) 17 | } 18 | \arguments{ 19 | \item{mean}{mean of the Normal prior} 20 | 21 | \item{sd}{standard deviation of the Normal prior} 22 | 23 | \item{sigma}{standard deviation of the data, or likelihood standard deviation} 24 | 25 | \item{y_bar}{sample mean of the data} 26 | 27 | \item{n}{sample size of the data} 28 | 29 | \item{prior}{a logical value indicating whether the prior model should be plotted} 30 | 31 | \item{likelihood}{a logical value indicating whether the scaled likelihood should be plotted} 32 | 33 | \item{posterior}{a logical value indicating whether posterior model should be plotted} 34 | } 35 | \value{ 36 | a ggplot 37 | } 38 | \description{ 39 | Consider a Normal-Normal Bayesian model for mean parameter \eqn{\mu} with 40 | a N(mean, sd^2) prior on \eqn{\mu} and a Normal likelihood for the data. 41 | Given information on the prior (mean and sd) 42 | and data (the sample size n, mean y_bar, and standard deviation sigma), 43 | this function produces a plot of any combination of the corresponding prior pdf, 44 | scaled likelihood function, and posterior pdf. All three are included by default. 45 | } 46 | \examples{ 47 | plot_normal_normal(mean = 0, sd = 3, sigma= 4, y_bar = 5, n = 3) 48 | plot_normal_normal(mean = 0, sd = 3, sigma= 4, y_bar = 5, n = 3, posterior = FALSE) 49 | } 50 | -------------------------------------------------------------------------------- /man/plot_poisson_likelihood.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plot_poisson_likelihood.R 3 | \name{plot_poisson_likelihood} 4 | \alias{plot_poisson_likelihood} 5 | \title{Plot a Poisson Likelihood Function} 6 | \usage{ 7 | plot_poisson_likelihood(y, lambda_upper_bound = 10) 8 | } 9 | \arguments{ 10 | \item{y}{vector of observed Poisson counts} 11 | 12 | \item{lambda_upper_bound}{upper bound for lambda values to display on x-axis} 13 | } 14 | \value{ 15 | a ggplot of Poisson likelihood 16 | } 17 | \description{ 18 | Plots the Poisson likelihood function for variable \eqn{\lambda} 19 | given a vector of Poisson counts y. 20 | } 21 | \examples{ 22 | plot_poisson_likelihood(y = c(4, 2, 7), lambda_upper_bound = 10) 23 | } 24 | -------------------------------------------------------------------------------- /man/pop_vs_soda.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_pop_vs_soda.R 3 | \docType{data} 4 | \name{pop_vs_soda} 5 | \alias{pop_vs_soda} 6 | \title{Pop vs Soda vs Coke} 7 | \format{ 8 | A data frame with 374250 observations, one per survey respondent, and 4 variables: 9 | \describe{ 10 | \item{state}{the U.S. state in which the respondent resides} 11 | \item{region}{region in which the state falls (as defined by the U.S. Census)} 12 | \item{word_for_cola}{how the respondent refers to fizzy cola drinks} 13 | \item{pop}{whether or not the respondent refers to fizzy cola drinks as "pop"} 14 | } 15 | } 16 | \source{ 17 | The survey responses were obtained at \url{https://popvssoda.com/} which is maintained by Alan McConchie. 18 | } 19 | \usage{ 20 | pop_vs_soda 21 | } 22 | \description{ 23 | Results of a volunteer survey on how people around the U.S. refer to fizzy cola drinks. The options are "pop", "soda", "coke", or "other". 24 | } 25 | \keyword{datasets} 26 | -------------------------------------------------------------------------------- /man/prediction_summary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prediction_summary.R 3 | \name{prediction_summary} 4 | \alias{prediction_summary} 5 | \title{Posterior Predictive Summaries} 6 | \usage{ 7 | prediction_summary( 8 | model, 9 | data, 10 | prob_inner = 0.5, 11 | prob_outer = 0.95, 12 | stable = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{model}{an rstanreg model object with quantitative y} 17 | 18 | \item{data}{data frame including the variables in the model, both response y and predictors x} 19 | 20 | \item{prob_inner}{posterior predictive interval probability (a value between 0 and 1)} 21 | 22 | \item{prob_outer}{posterior predictive interval probability (a value between 0 and 1)} 23 | 24 | \item{stable}{TRUE returns the number of absolute deviations and FALSE returns the standard deviations that observed y values fall from their predictive medians} 25 | } 26 | \value{ 27 | a tibble 28 | } 29 | \description{ 30 | Given a set of observed data including a quantitative response variable y 31 | and an rstanreg model of y, 32 | this function returns 4 measures of the posterior prediction quality. 33 | Median absolute prediction error (mae) measures the typical difference between the observed y values and their posterior predictive medians (stable = TRUE) or means (stable = FALSE). 34 | Scaled mae (mae_scaled) measures the typical number of absolute deviations (stable = TRUE) or standard deviations (stable = FALSE) that observed y values fall from their predictive medians (stable = TRUE) or means (stable = FALSE). 35 | within_50 and within_90 report the proportion of observed y values that fall within their posterior prediction intervals, the probability levels of which are set by the user. 36 | } 37 | \examples{ 38 | example_data <- data.frame(x = sample(1:100, 20)) 39 | example_data$y <- example_data$x*3 + rnorm(20, 0, 5) 40 | example_model <- rstanarm::stan_glm(y ~ x, data = example_data) 41 | prediction_summary(example_model, example_data, prob_inner = 0.6, prob_outer = 0.80, stable = TRUE) 42 | } 43 | -------------------------------------------------------------------------------- /man/prediction_summary_cv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prediction_summary_cv.R 3 | \name{prediction_summary_cv} 4 | \alias{prediction_summary_cv} 5 | \title{Cross-Validated Posterior Predictive Summaries} 6 | \usage{ 7 | prediction_summary_cv( 8 | data, 9 | group, 10 | model, 11 | k, 12 | prob_inner = 0.5, 13 | prob_outer = 0.95 14 | ) 15 | } 16 | \arguments{ 17 | \item{data}{data frame including the variables in the model, both response y and predictors x} 18 | 19 | \item{group}{a character string representing the name of the factor grouping variable, ie. random effect (only used for hierarchical models)} 20 | 21 | \item{model}{an rstanreg model object with quantitative y} 22 | 23 | \item{k}{the number of folds to use for cross validation} 24 | 25 | \item{prob_inner}{posterior predictive interval probability (a value between 0 and 1)} 26 | 27 | \item{prob_outer}{posterior predictive interval probability (a value between 0 and 1)} 28 | } 29 | \value{ 30 | list 31 | } 32 | \description{ 33 | Given a set of observed data including a quantitative response variable y 34 | and an rstanreg model of y, 35 | this function returns 4 cross-validated measures of the model's posterior prediction quality: 36 | Median absolute prediction error (mae) measures the typical difference between the observed y values and their posterior predictive medians (stable = TRUE) or means (stable = FALSE). 37 | Scaled mae (mae_scaled) measures the typical number of absolute deviations (stable = TRUE) or standard deviations (stable = FALSE) that observed y values fall from their predictive medians (stable = TRUE) or means (stable = FALSE). 38 | within_50 and within_90 report the proportion of observed y values that fall within their posterior prediction intervals, the probability levels of which are set by the user. 39 | For hierarchical models of class lmerMod, the folds are comprised by collections of groups, not individual observations. 40 | } 41 | \examples{ 42 | example_data <- data.frame(x = sample(1:100, 20)) 43 | example_data$y <- example_data$x*3 + rnorm(20, 0, 5) 44 | example_model <- rstanarm::stan_glm(y ~ x, data = example_data) 45 | prediction_summary_cv(model = example_model, data = example_data, k = 2) 46 | } 47 | -------------------------------------------------------------------------------- /man/pulse_of_the_nation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_pulse_of_the_nation.R 3 | \docType{data} 4 | \name{pulse_of_the_nation} 5 | \alias{pulse_of_the_nation} 6 | \title{Cards Against Humanity's Pulse of the Nation Survey} 7 | \format{ 8 | A data frame with observations on 1000 survey respondents with 15 variables: 9 | \describe{ 10 | \item{income}{income in \$1000s} 11 | \item{age}{age in years} 12 | \item{party}{political party affiliation} 13 | \item{trump_approval}{approval level of Donald Trump's job performance} 14 | \item{education}{maximum education level completed} 15 | \item{robots}{opinion of how likely their job is to be replaced by robots within 10 years} 16 | \item{climate_change}{belief in climate change} 17 | \item{transformers}{the number of Transformers film the respondent has seen} 18 | \item{science_is_honest}{opinion of whether scientists are generally honest and serve the public good} 19 | \item{vaccines_are_safe}{opinion of whether vaccines are safe and protect children from disease} 20 | \item{books}{number of books read in the past year} 21 | \item{ghosts}{whether or not they believe in ghosts} 22 | \item{fed_sci_budget}{respondent's estimate of the percentage of the federal budget that is spent on scientific research} 23 | \item{earth_sun}{belief about whether the earth is always farther away from the sun in winter than in summer (TRUE or FALSE)} 24 | \item{wise_unwise}{whether the respondent would rather be wise but unhappy, or unwise but happy} 25 | } 26 | } 27 | \source{ 28 | \url{https://thepulseofthenation.com/downloads/201709-CAH_PulseOfTheNation_Raw.csv} 29 | } 30 | \usage{ 31 | pulse_of_the_nation 32 | } 33 | \description{ 34 | Cards Against Humanity's "Pulse of the Nation" project (\url{https://thepulseofthenation.com/}) conducted monthly polls into people's social and political views, as well as some silly things. This data includes responses to a subset of questions included in the poll conducted in September 2017. 35 | } 36 | \keyword{datasets} 37 | -------------------------------------------------------------------------------- /man/sample_mode.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sample_mode.R 3 | \name{sample_mode} 4 | \alias{sample_mode} 5 | \title{Sample Mode} 6 | \usage{ 7 | sample_mode(x) 8 | } 9 | \arguments{ 10 | \item{x}{vector of sample data} 11 | } 12 | \value{ 13 | sample mode 14 | } 15 | \description{ 16 | Calculate the sample mode of vector x. 17 | } 18 | \examples{ 19 | sample_mode(rbeta(100, 2, 7)) 20 | } 21 | -------------------------------------------------------------------------------- /man/spotify.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_spotify.R 3 | \docType{data} 4 | \name{spotify} 5 | \alias{spotify} 6 | \title{Spotify Song Data} 7 | \format{ 8 | A data frame with 350 songs (or tracks) and 23 variables: 9 | \describe{ 10 | \item{track_id}{unique song identifier} 11 | \item{title}{song name} 12 | \item{artist}{song artist} 13 | \item{popularity}{song popularity from 0 (low) to 100 (high)} 14 | \item{album_id}{id of the album on which the song appears} 15 | \item{album_name}{name of the album on which the song appears} 16 | \item{album_release_date}{when the album was released} 17 | \item{playlist_name}{Spotify playlist on which the song appears} 18 | \item{playlist_id}{unique playlist identifier} 19 | \item{genre}{genre of the playlist} 20 | \item{subgenre}{subgenre of the playlist} 21 | \item{danceability}{a score from 0 (not danceable) to 100 (danceable) based on features such as tempo, rhythm, etc.} 22 | \item{energy}{a score from 0 (low energy) to 100 (high energy) based on features such as loudness, timbre, entropy, etc.} 23 | \item{key}{song key} 24 | \item{loudness}{song loudness (dB)} 25 | \item{mode}{0 (minor key) or 1 (major key)} 26 | \item{speechiness}{a score from 0 (non-speechy tracks) to 100 (speechy tracks)} 27 | \item{acousticness}{a score from 0 (not acoustic) to 100 (very acoustic)} 28 | \item{instrumentalness}{a score from 0 (not instrumental) to 100 (very instrumental)} 29 | \item{liveness}{a score from 0 (no live audience presence on the song) to 100 (strong live audience presence on the song)} 30 | \item{valence}{a score from 0 (the song is more negative, sad, angry) to 100 (the song is more positive, happy, euphoric)} 31 | \item{tempo}{song tempo (beats per minute)} 32 | \item{duration_ms}{song duration (ms)} 33 | } 34 | } 35 | \source{ 36 | \url{https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-01-21/spotify_songs.csv/}. 37 | } 38 | \usage{ 39 | spotify 40 | } 41 | \description{ 42 | A sub-sample of the Spotify song data originally collected by Kaylin Pavlik (kaylinquest) and distributed through the R for Data Science TidyTuesday project. 43 | } 44 | \keyword{datasets} 45 | -------------------------------------------------------------------------------- /man/summarize_beta.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summarize_beta.R 3 | \name{summarize_beta} 4 | \alias{summarize_beta} 5 | \title{Summarize a Beta Model for \eqn{\pi}} 6 | \usage{ 7 | summarize_beta(alpha, beta) 8 | } 9 | \arguments{ 10 | \item{alpha, beta}{positive shape parameters of the Beta model} 11 | } 12 | \value{ 13 | a summary table 14 | } 15 | \description{ 16 | Summarizes the expected value, variance, and mode of 17 | a Beta(alpha, beta) model for variable \eqn{\pi}. 18 | } 19 | \examples{ 20 | summarize_beta(alpha = 1, beta = 15) 21 | } 22 | -------------------------------------------------------------------------------- /man/summarize_beta_binomial.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summarize_beta_binomial.R 3 | \name{summarize_beta_binomial} 4 | \alias{summarize_beta_binomial} 5 | \title{Summarize a Beta-Binomial Bayesian model} 6 | \usage{ 7 | summarize_beta_binomial(alpha, beta, y = NULL, n = NULL) 8 | } 9 | \arguments{ 10 | \item{alpha, beta}{positive shape parameters of the prior Beta model} 11 | 12 | \item{y}{number of successes} 13 | 14 | \item{n}{number of trials} 15 | } 16 | \value{ 17 | a summary table 18 | } 19 | \description{ 20 | Consider a Beta-Binomial Bayesian model for parameter \eqn{\pi} with 21 | a Beta(alpha, beta) prior on \eqn{\pi} and Binomial likelihood with n trials 22 | and y successes. Given information on the prior (alpha and data) and data (y and n), 23 | this function summarizes the mean, mode, and variance of the 24 | prior and posterior Beta models of \eqn{\pi}. 25 | } 26 | \examples{ 27 | summarize_beta_binomial(alpha = 1, beta = 15, y = 25, n = 50) 28 | } 29 | -------------------------------------------------------------------------------- /man/summarize_gamma.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summarize_gamma.R 3 | \name{summarize_gamma} 4 | \alias{summarize_gamma} 5 | \title{Summarize a Gamma Model for \eqn{\lambda}} 6 | \usage{ 7 | summarize_gamma(shape, rate) 8 | } 9 | \arguments{ 10 | \item{shape}{positive shape parameter of the Gamma model} 11 | 12 | \item{rate}{positive rate parameter of the Gamma model} 13 | } 14 | \value{ 15 | a summary table 16 | } 17 | \description{ 18 | Summarizes the expected value, variance, and mode of 19 | a Gamma(shape, rate) model for variable \eqn{\lambda}. 20 | } 21 | \examples{ 22 | summarize_gamma(shape = 1, rate = 15) 23 | 24 | } 25 | -------------------------------------------------------------------------------- /man/summarize_gamma_poisson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summarize_gamma_poisson.R 3 | \name{summarize_gamma_poisson} 4 | \alias{summarize_gamma_poisson} 5 | \title{Summarize the Gamma-Poisson Model} 6 | \usage{ 7 | summarize_gamma_poisson(shape, rate, sum_y = NULL, n = NULL) 8 | } 9 | \arguments{ 10 | \item{shape}{positive shape parameter of the Gamma prior} 11 | 12 | \item{rate}{positive rate parameter of the Gamma prior} 13 | 14 | \item{sum_y}{sum of observed data values for the Poisson likelihood} 15 | 16 | \item{n}{number of observations for the Poisson likelihood} 17 | } 18 | \value{ 19 | data frame 20 | } 21 | \description{ 22 | Consider a Gamma-Poisson Bayesian model for rate parameter \eqn{\lambda} with 23 | a Gamma(shape, rate) prior on \eqn{\lambda} and a Poisson likelihood for the data. 24 | Given information on the prior (shape and rate) 25 | and data (the sample size n and sum_y), 26 | this function summarizes the mean, mode, and variance of the 27 | prior and posterior Gamma models of \eqn{\lambda}. 28 | } 29 | \examples{ 30 | summarize_gamma_poisson(shape = 3, rate = 4, sum_y = 7, n = 12) 31 | 32 | } 33 | -------------------------------------------------------------------------------- /man/summarize_normal_normal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/summarize_normal_normal.R 3 | \name{summarize_normal_normal} 4 | \alias{summarize_normal_normal} 5 | \title{Summarize a Normal-Normal Bayesian model} 6 | \usage{ 7 | summarize_normal_normal(mean, sd, sigma = NULL, y_bar = NULL, n = NULL) 8 | } 9 | \arguments{ 10 | \item{mean}{mean of the Normal prior} 11 | 12 | \item{sd}{standard deviation of the Normal prior} 13 | 14 | \item{sigma}{standard deviation of the data, or likelihood standard deviation} 15 | 16 | \item{y_bar}{sample mean of the data} 17 | 18 | \item{n}{sample size of the data} 19 | } 20 | \value{ 21 | data frame 22 | } 23 | \description{ 24 | Consider a Normal-Normal Bayesian model for mean parameter \eqn{\mu} with 25 | a N(mean, sd^2) prior on \eqn{\mu} and a Normal likelihood for the data. 26 | Given information on the prior (mean and sd) 27 | and data (the sample size n, mean y_bar, and standard deviation sigma), 28 | this function summarizes the mean, mode, and variance of the 29 | prior and posterior Normal models of \eqn{\mu}. 30 | } 31 | \examples{ 32 | summarize_normal_normal(mean = 2.3, sd = 0.3, sigma = 5.1, y_bar = 128.5, n = 20) 33 | } 34 | -------------------------------------------------------------------------------- /man/voices.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_voices.R 3 | \docType{data} 4 | \name{voices} 5 | \alias{voices} 6 | \title{Voice Pitch Data} 7 | \format{ 8 | A data frame with 84 rows and 4 variables. Each row represents a single observation for the given subject. 9 | \describe{ 10 | \item{subject}{subject identifier} 11 | \item{scenario}{context of the dialog (encoded as A, B, ..., G)} 12 | \item{attitude}{whether the attitude to use in dialog was polite or informal} 13 | \item{pitch}{average voice pitch (Hz)} 14 | } 15 | } 16 | \source{ 17 | Winter, B., & Grawunder, S. (2012). The Phonetic Profile of Korean Formal and Informal Speech Registers. Journal of Phonetics, 40, 808-815. 18 | \url{https://bodo-winter.net/data_and_scripts/POP.csv}. 19 | \url{https://bodo-winter.net/tutorial/bw_LME_tutorial2.pdf}. 20 | } 21 | \usage{ 22 | voices 23 | } 24 | \description{ 25 | Voice pitch data collected by Winter and Grawunder (2012). 26 | In an experiment, subjects participated in role-playing dialog under various conditions, 27 | while researchers monitored voice pitch (Hz). 28 | The conditions spanned different scenarios (eg: making an appointment, asking for a favor) 29 | and different attitudes to use in the scenario (polite or informal). 30 | } 31 | \keyword{datasets} 32 | -------------------------------------------------------------------------------- /man/weather_WU.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_weather_WU.R 3 | \docType{data} 4 | \name{weather_WU} 5 | \alias{weather_WU} 6 | \title{Weather Data for 2 Australian Cities} 7 | \format{ 8 | A data frame with 200 daily observations and 22 variables from 2 Australian weather stations: 9 | \describe{ 10 | \item{location}{one of two weather stations} 11 | \item{mintemp}{minimum temperature (degrees Celsius)} 12 | \item{maxtemp}{maximum temperature (degrees Celsius)} 13 | \item{rainfall}{rainfall (mm)} 14 | \item{windgustdir}{direction of strongest wind gust} 15 | \item{windgustspeed}{speed of strongest wind gust (km/h)} 16 | \item{winddir9am}{direction of wind gust at 9am} 17 | \item{winddir3pm}{direction of wind gust at 3pm} 18 | \item{windspeed9am}{wind speed at 9am (km/h)} 19 | \item{windspeed3pm}{wind speed at 3pm (km/h)} 20 | \item{humidity9am}{humidity level at 9am (percent)} 21 | \item{humidity3pm}{humidity level at 3pm (percent)} 22 | \item{pressure9am}{atmospheric pressure at 9am (hpa)} 23 | \item{pressure3pm}{atmospheric pressure at 3pm (hpa)} 24 | \item{temp9am}{temperature at 9am (degrees Celsius)} 25 | \item{temp3pm}{temperature at 3pm (degrees Celsius)} 26 | \item{raintoday}{whether or not it rained today (Yes or No)} 27 | \item{risk_mm}{the amount of rain today (mm)} 28 | \item{raintomorrow}{whether or not it rained the next day (Yes or No)} 29 | \item{year}{the year of the date} 30 | \item{month}{the month of the date} 31 | \item{day_of_year}{the day of the year} 32 | } 33 | } 34 | \source{ 35 | Data in the original weatherAUS data set were obtained from \url{https://www.bom.gov.au/climate/data}. Copyright Commonwealth of Australia 2010, Bureau of Meteorology. 36 | } 37 | \usage{ 38 | weather_WU 39 | } 40 | \description{ 41 | A sub-sample of daily weather information from the weatherAUS data in the rattle package for two Australian cities, Wollongong and Uluru. 42 | The weather_australia data in the bayesrules package combines this data with a third city 43 | } 44 | \keyword{datasets} 45 | -------------------------------------------------------------------------------- /man/weather_australia.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_weather_australia.R 3 | \docType{data} 4 | \name{weather_australia} 5 | \alias{weather_australia} 6 | \title{Weather Data for 3 Australian Cities} 7 | \format{ 8 | A data frame with 300 daily observations and 22 variables from 3 Australian weather stations: 9 | \describe{ 10 | \item{location}{one of three weather stations} 11 | \item{mintemp}{minimum temperature (degrees Celsius)} 12 | \item{maxtemp}{maximum temperature (degrees Celsius)} 13 | \item{rainfall}{rainfall (mm)} 14 | \item{windgustdir}{direction of strongest wind gust} 15 | \item{windgustspeed}{speed of strongest wind gust (km/h)} 16 | \item{winddir9am}{direction of wind gust at 9am} 17 | \item{winddir3pm}{direction of wind gust at 3pm} 18 | \item{windspeed9am}{wind speed at 9am (km/h)} 19 | \item{windspeed3pm}{wind speed at 3pm (km/h)} 20 | \item{humidity9am}{humidity level at 9am (percent)} 21 | \item{humidity3pm}{humidity level at 3pm (percent)} 22 | \item{pressure9am}{atmospheric pressure at 9am (hpa)} 23 | \item{pressure3pm}{atmospheric pressure at 3pm (hpa)} 24 | \item{temp9am}{temperature at 9am (degrees Celsius)} 25 | \item{temp3pm}{temperature at 3pm (degrees Celsius)} 26 | \item{raintoday}{whether or not it rained today (Yes or No)} 27 | \item{risk_mm}{the amount of rain today (mm)} 28 | \item{raintomorrow}{whether or not it rained the next day (Yes or No)} 29 | \item{year}{the year of the date} 30 | \item{month}{the month of the date} 31 | \item{day_of_year}{the day of the year} 32 | } 33 | } 34 | \source{ 35 | Data in the original weatherAUS data set were obtained from \url{https://www.bom.gov.au/climate/data/}. Copyright Commonwealth of Australia 2010, Bureau of Meteorology. 36 | } 37 | \usage{ 38 | weather_australia 39 | } 40 | \description{ 41 | A sub-sample of daily weather information from the weatherAUS data in the rattle package for three Australian cities: Wollongong, Hobart, and Uluru. 42 | } 43 | \keyword{datasets} 44 | -------------------------------------------------------------------------------- /man/weather_perth.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data_weather_perth.R 3 | \docType{data} 4 | \name{weather_perth} 5 | \alias{weather_perth} 6 | \title{Weather Data for Perth, Australia} 7 | \format{ 8 | A data frame with 1000 daily observations and 21 variables: 9 | \describe{ 10 | \item{mintemp}{minimum temperature (degrees Celsius)} 11 | \item{maxtemp}{maximum temperature (degrees Celsius)} 12 | \item{rainfall}{rainfall (mm)} 13 | \item{windgustdir}{direction of strongest wind gust} 14 | \item{windgustspeed}{speed of strongest wind gust (km/h)} 15 | \item{winddir9am}{direction of wind gust at 9am} 16 | \item{winddir3pm}{direction of wind gust at 3pm} 17 | \item{windspeed9am}{wind speed at 9am (km/h)} 18 | \item{windspeed3pm}{wind speed at 3pm (km/h)} 19 | \item{humidity9am}{humidity level at 9am (percent)} 20 | \item{humidity3pm}{humidity level at 3pm (percent)} 21 | \item{pressure9am}{atmospheric pressure at 9am (hpa)} 22 | \item{pressure3pm}{atmospheric pressure at 3pm (hpa)} 23 | \item{temp9am}{temperature at 9am (degrees Celsius)} 24 | \item{temp3pm}{temperature at 3pm (degrees Celsius)} 25 | \item{raintoday}{whether or not it rained today (Yes or No)} 26 | \item{risk_mm}{the amount of rain today (mm)} 27 | \item{raintomorrow}{whether or not it rained the next day (Yes or No)} 28 | \item{year}{the year of the date} 29 | \item{month}{the month of the date} 30 | \item{day_of_year}{the day of the year} 31 | } 32 | } 33 | \source{ 34 | Data in the original weatherAUS data set were obtained from \url{https://www.bom.gov.au/climate/data/}. Copyright Commonwealth of Australia 2010, Bureau of Meteorology. 35 | } 36 | \usage{ 37 | weather_perth 38 | } 39 | \description{ 40 | A sub-sample of daily weather information on Perth, Australia from the weatherAUS data in the rattle package. 41 | } 42 | \keyword{datasets} 43 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.R 2 | *.html 3 | -------------------------------------------------------------------------------- /vignettes/conjugate-families.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Conjugate Families" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteIndexEntry{Conjugate Families} 6 | %\VignetteEngine{knitr::rmarkdown} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | ```{r, include = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>" 14 | ) 15 | ``` 16 | 17 | ```{r setup} 18 | library(bayesrules) 19 | ``` 20 | 21 | The **bayesrules** package has a set of functions that support exploring Bayesian models from three conjugate families: **Beta-Binomial**, **Gamma-Poisson**, and **Normal-Normal**. 22 | The functions either help with plotting (prior, likelihood, and/or posterior) or summarizing the descriptives (mean, mode, variance, and sd) of the prior and/or posterior. 23 | 24 | ## The Beta-Binomial Model 25 | 26 | We use the Beta-Binomial model to show the different set of functions and the arguments. 27 | 28 | ### Prior 29 | 30 | ```{r fig.align='center', fig.height=4, fig.width=5} 31 | plot_beta(alpha = 3, beta = 13, mean = TRUE, mode = TRUE) 32 | ``` 33 | 34 | ```{r} 35 | summarize_beta(alpha = 3, beta = 13) 36 | ``` 37 | 38 | 39 | ### Likelihood 40 | 41 | In addition, `plot_binomial_likelihood()` helps users visualize the Binomial likelihood function and shows the maximum likelihood estimate. 42 | 43 | ```{r fig.align='center', fig.height=4, fig.width=5, message = FALSE} 44 | plot_binomial_likelihood(y = 3, n = 15, mle = TRUE) 45 | ``` 46 | 47 | 48 | 49 | ### Prior-Likelihood-Posterior 50 | 51 | The two other functions `plot_beta_binomial()` and `summarize_beta_binomial()` require both the prior parameters and the data for the likelihood. 52 | 53 | ```{r fig.align='center', warning = FALSE, fig.height=4, fig.width=5} 54 | plot_beta_binomial(alpha = 3, beta = 13, y = 5, n = 10, 55 | prior = TRUE, #the default 56 | likelihood = TRUE, #the default 57 | posterior = TRUE #the default 58 | ) 59 | ``` 60 | 61 | ```{r fig.align='center', warning = FALSE, fig.height=4, fig.width=5} 62 | summarize_beta_binomial(alpha = 3, beta = 13, y = 5, n = 10) 63 | ``` 64 | 65 | ## Other Conjugate-Families 66 | 67 | For Gamma-Poisson and Normal-Normal models, the set of functions are similar but the arguments are different for each model. Arguments of the Gamma-Poisson functions include the `shape` and `rate` of the Gamma prior and `sum_y` and `n` arguments related to observed data which represent the sum of observed data values and number of observations respectively. 68 | 69 | ```{r fig.align='center', warning = FALSE, fig.height=4, fig.width=5} 70 | plot_gamma_poisson( 71 | shape = 3, 72 | rate = 4, 73 | sum_y = 3, 74 | n = 9, 75 | prior = TRUE, 76 | likelihood = TRUE, 77 | posterior = TRUE 78 | ) 79 | ``` 80 | 81 | For the Normal-Normal model functions, the prior Normal model has the `mean` and `sd` argument. The observed data has `sigma`, `y_bar`, and `n` which indicate the standard deviation, mean, and sample size of the data respectively. 82 | 83 | ```{r} 84 | summarize_normal_normal(mean = 3.8, sd = 1.12, sigma = 5.8, y_bar = 3.35, n = 8) 85 | ``` 86 | 87 | --------------------------------------------------------------------------------