├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   ├── R-CMD-check.yaml
    │   └── pkgdown.yaml
├── .gitignore
├── DESCRIPTION
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
    ├── classification_summary.R
    ├── classification_summary_cv.R
    ├── data_airbnb.R
    ├── data_airbnb_small.R
    ├── data_bald_eagles.R
    ├── data_basketball.R
    ├── data_bechdel.R
    ├── data_big_word_club.R
    ├── data_bike_users.R
    ├── data_bikes.R
    ├── data_bird_counts.R
    ├── data_book_banning.R
    ├── data_cherry_blossom_sample.R
    ├── data_climbers_sub.R
    ├── data_coffee_ratings.R
    ├── data_coffee_ratings_small.R
    ├── data_equality_index.R
    ├── data_fake_news.R
    ├── data_football.R
    ├── data_hotel_bookings.R
    ├── data_loons.R
    ├── data_moma.R
    ├── data_moma_sample.R
    ├── data_penguins_bayes.R
    ├── data_pop_vs_soda.R
    ├── data_pulse_of_the_nation.R
    ├── data_spotify.R
    ├── data_voices.R
    ├── data_weather_WU.R
    ├── data_weather_australia.R
    ├── data_weather_perth.R
    ├── globals.R
    ├── naive_classification_summary.R
    ├── naive_classification_summary_cv.R
    ├── plot_beta.R
    ├── plot_beta_binomial.R
    ├── plot_beta_ci.R
    ├── plot_binomial_likelihood.R
    ├── plot_gamma.R
    ├── plot_gamma_poisson.R
    ├── plot_normal.R
    ├── plot_normal_likelihood.R
    ├── plot_normal_normal.R
    ├── plot_poisson_likelihood.R
    ├── prediction_summary.R
    ├── prediction_summary_cv.R
    ├── sample_mode.R
    ├── summarize_beta.R
    ├── summarize_beta_binomial.R
    ├── summarize_gamma.R
    ├── summarize_gamma_poisson.R
    └── summarize_normal_normal.R
├── README.Rmd
├── README.md
├── cran-comments.md
├── data-raw
    ├── DATASET.R
    ├── airbnb.R
    ├── airbnb_small.R
    ├── bald_eagles.R
    ├── basketball.R
    ├── bechdel
    │   └── bechdel_dataprep.R
    ├── big_word_club
    │   ├── big_word_club.csv
    │   ├── big_word_club_small.csv
    │   ├── big_word_clubprep.R
    │   └── bwc_data.dta
    ├── bike_users.R
    ├── bikes.R
    ├── bird_counts.R
    ├── book_banning.R
    ├── cherry_blossom_sample.R
    ├── climbers_sub
    │   ├── climbers.csv
    │   └── climbers_sub.R
    ├── coffee_ratings.R
    ├── coffee_ratings_small.R
    ├── equality_index
    │   ├── equality_index.csv
    │   └── equality_index_dataprep.R
    ├── fake_news
    │   ├── BuzzFeed_fake_news_content.csv
    │   ├── BuzzFeed_real_news_content.csv
    │   └── fake_news_dataprep.R
    ├── football.R
    ├── hotel_bookings.R
    ├── loons.R
    ├── moma.R
    ├── penguins_bayes.R
    ├── pop_vs_soda
    │   ├── pop_vs_soda_dataprep.R
    │   └── pop_vs_soda_raw.csv
    ├── pulse_of_the_nation
    │   ├── pulse_of_the_nation.csv
    │   └── pulse_of_the_nation_dataprep.R
    ├── spotify.R
    ├── voices.R
    ├── weather_WU.Rmd
    ├── weather_australia.R
    └── weather_perth.R
├── data
    ├── airbnb.rda
    ├── airbnb_small.rda
    ├── bald_eagles.rda
    ├── basketball.rda
    ├── bechdel.rda
    ├── big_word_club.rda
    ├── bike_users.rda
    ├── bikes.rda
    ├── bird_counts.rda
    ├── book_banning.rda
    ├── cherry_blossom_sample.rda
    ├── climbers_sub.rda
    ├── coffee_ratings.rda
    ├── coffee_ratings_small.rda
    ├── equality_index.rda
    ├── fake_news.rda
    ├── football.rda
    ├── hotel_bookings.rda
    ├── loons.rda
    ├── moma.rda
    ├── moma_sample.rda
    ├── penguins_bayes.rda
    ├── pop_vs_soda.rda
    ├── pulse_of_the_nation.rda
    ├── spotify.rda
    ├── voices.rda
    ├── weather_WU.rda
    ├── weather_australia.rda
    └── weather_perth.rda
├── docs
    ├── 404.html
    ├── LICENSE.html
    ├── articles
    │   ├── conjugate-families.html
    │   ├── conjugate-families_files
    │   │   ├── figure-html
    │   │   │   ├── unnamed-chunk-2-1.png
    │   │   │   ├── unnamed-chunk-4-1.png
    │   │   │   ├── unnamed-chunk-5-1.png
    │   │   │   └── unnamed-chunk-7-1.png
    │   │   ├── header-attrs-2.10
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.11
    │   │   │   └── header-attrs.js
    │   │   ├── header-attrs-2.8
    │   │   │   └── header-attrs.js
    │   │   └── header-attrs-2.9
    │   │   │   └── header-attrs.js
    │   ├── index.html
    │   ├── model-evaluation.html
    │   └── model-evaluation_files
    │   │   ├── header-attrs-2.10
    │   │       └── header-attrs.js
    │   │   ├── header-attrs-2.11
    │   │       └── header-attrs.js
    │   │   ├── header-attrs-2.8
    │   │       └── header-attrs.js
    │   │   └── header-attrs-2.9
    │   │       └── header-attrs.js
    ├── authors.html
    ├── bootstrap-toc.css
    ├── bootstrap-toc.js
    ├── docsearch.css
    ├── docsearch.js
    ├── index.html
    ├── link.svg
    ├── news
    │   └── index.html
    ├── pkgdown.css
    ├── pkgdown.js
    ├── pkgdown.yml
    ├── reference
    │   ├── Rplot001.png
    │   ├── Rplot002.png
    │   ├── airbnb.html
    │   ├── airbnb_small.html
    │   ├── bald_eagles.html
    │   ├── basketball.html
    │   ├── bechdel.html
    │   ├── big_word_club.html
    │   ├── bike_users.html
    │   ├── bikes.html
    │   ├── bird_counts.html
    │   ├── book_banning.html
    │   ├── cherry_blossom_sample.html
    │   ├── classification_summary.html
    │   ├── classification_summary_cv.html
    │   ├── climbers_sub.html
    │   ├── coffee_ratings.html
    │   ├── coffee_ratings_small.html
    │   ├── equality_index.html
    │   ├── fake_news.html
    │   ├── figures
    │   │   ├── README-pressure-1.png
    │   │   └── bayes-rules-hex.png
    │   ├── football.html
    │   ├── hotel_bookings.html
    │   ├── index.html
    │   ├── loons.html
    │   ├── moma.html
    │   ├── moma_sample.html
    │   ├── naive_classification_summary.html
    │   ├── naive_classification_summary_cv.html
    │   ├── penguins_bayes.html
    │   ├── plot_beta-1.png
    │   ├── plot_beta.html
    │   ├── plot_beta_binomial-1.png
    │   ├── plot_beta_binomial-2.png
    │   ├── plot_beta_binomial.html
    │   ├── plot_beta_ci-1.png
    │   ├── plot_beta_ci.html
    │   ├── plot_binomial_likelihood-1.png
    │   ├── plot_binomial_likelihood.html
    │   ├── plot_gamma-1.png
    │   ├── plot_gamma.html
    │   ├── plot_gamma_poisson-1.png
    │   ├── plot_gamma_poisson-2.png
    │   ├── plot_gamma_poisson.html
    │   ├── plot_normal-1.png
    │   ├── plot_normal.html
    │   ├── plot_normal_likelihood-1.png
    │   ├── plot_normal_likelihood.html
    │   ├── plot_normal_normal-1.png
    │   ├── plot_normal_normal-2.png
    │   ├── plot_normal_normal.html
    │   ├── plot_poisson_likelihood-1.png
    │   ├── plot_poisson_likelihood.html
    │   ├── pop_vs_soda.html
    │   ├── prediction_summary.html
    │   ├── prediction_summary_cv.html
    │   ├── pulse_of_the_nation.html
    │   ├── sample_mode.html
    │   ├── spotify.html
    │   ├── summarize_beta.html
    │   ├── summarize_beta_binomial.html
    │   ├── summarize_gamma.html
    │   ├── summarize_gamma_poisson.html
    │   ├── summarize_normal_normal.html
    │   ├── voices.html
    │   ├── weather_WU.html
    │   ├── weather_australia.html
    │   └── weather_perth.html
    └── sitemap.xml
├── inst
    └── CITATION
├── man
    ├── airbnb.Rd
    ├── airbnb_small.Rd
    ├── bald_eagles.Rd
    ├── basketball.Rd
    ├── bechdel.Rd
    ├── big_word_club.Rd
    ├── bike_users.Rd
    ├── bikes.Rd
    ├── bird_counts.Rd
    ├── book_banning.Rd
    ├── cherry_blossom_sample.Rd
    ├── classification_summary.Rd
    ├── classification_summary_cv.Rd
    ├── climbers_sub.Rd
    ├── coffee_ratings.Rd
    ├── coffee_ratings_small.Rd
    ├── equality_index.Rd
    ├── fake_news.Rd
    ├── figures
    │   └── bayes-rules-hex.png
    ├── football.Rd
    ├── hotel_bookings.Rd
    ├── loons.Rd
    ├── moma.Rd
    ├── moma_sample.Rd
    ├── naive_classification_summary.Rd
    ├── naive_classification_summary_cv.Rd
    ├── penguins_bayes.Rd
    ├── plot_beta.Rd
    ├── plot_beta_binomial.Rd
    ├── plot_beta_ci.Rd
    ├── plot_binomial_likelihood.Rd
    ├── plot_gamma.Rd
    ├── plot_gamma_poisson.Rd
    ├── plot_normal.Rd
    ├── plot_normal_likelihood.Rd
    ├── plot_normal_normal.Rd
    ├── plot_poisson_likelihood.Rd
    ├── pop_vs_soda.Rd
    ├── prediction_summary.Rd
    ├── prediction_summary_cv.Rd
    ├── pulse_of_the_nation.Rd
    ├── sample_mode.Rd
    ├── spotify.Rd
    ├── summarize_beta.Rd
    ├── summarize_beta_binomial.Rd
    ├── summarize_gamma.Rd
    ├── summarize_gamma_poisson.Rd
    ├── summarize_normal_normal.Rd
    ├── voices.Rd
    ├── weather_WU.Rd
    ├── weather_australia.Rd
    └── weather_perth.Rd
└── vignettes
    ├── .gitignore
    ├── conjugate-families.Rmd
    └── model-evaluation.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^README\.Rmd$
 4 | ^data-raw$
 5 | ^LICENSE.md
 6 | ^cran-comments.md
 7 | ^docs$
 8 | ^\.github$
 9 | ^CRAN-RELEASE$
10 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag.
 2 | # https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |       - master
 8 |   pull_request:
 9 |     branches:
10 |       - main
11 |       - master
12 | 
13 | name: R-CMD-check
14 | 
15 | jobs:
16 |   R-CMD-check:
17 |     runs-on: ${{ matrix.config.os }}
18 | 
19 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
20 | 
21 |     strategy:
22 |       fail-fast: false
23 |       matrix:
24 |         config:
25 |           - {os: windows-latest, r: 'release'}
26 |           - {os: macOS-latest, r: 'release'}
27 |           - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"}
28 |           - {os: ubuntu-20.04,   r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest", http-user-agent: "R/4.1.0 (ubuntu-20.04) R (4.1.0 x86_64-pc-linux-gnu x86_64 linux-gnu) on GitHub Actions" }
29 | 
30 |     env:
31 |       R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
32 |       RSPM: ${{ matrix.config.rspm }}
33 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
34 | 
35 |     steps:
36 |       - uses: actions/checkout@v2
37 | 
38 |       - uses: r-lib/actions/setup-r@v1
39 |         with:
40 |           r-version: ${{ matrix.config.r }}
41 | 
42 |       - uses: r-lib/actions/setup-pandoc@v1
43 | 
44 |       - name: Query dependencies
45 |         run: |
46 |           install.packages('remotes')
47 |           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
48 |           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
49 |         shell: Rscript {0}
50 | 
51 |       - name: Restore R package cache
52 |         uses: actions/cache@v2
53 |         with:
54 |           path: ${{ env.R_LIBS_USER }}
55 |           key: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-${{ hashFiles('.github/depends.Rds') }}
56 |           restore-keys: ${{ runner.os }}-${{ hashFiles('.github/R-version') }}-1-
57 | 
58 |       - name: Install system dependencies
59 |         if: runner.os == 'Linux'
60 |         run: |
61 |           sudo apt-get install -y libcurl4-openssl-dev
62 |           while read -r cmd
63 |           do
64 |             eval sudo $cmd
65 |           done < <(Rscript -e 'writeLines(remotes::system_requirements("ubuntu", "20.04"))')
66 | 
67 |       - name: Install dependencies
68 |         run: |
69 |           remotes::install_deps(dependencies = TRUE)
70 |           remotes::install_cran("rcmdcheck")
71 |         shell: Rscript {0}
72 | 
73 |       - name: Check
74 |         env:
75 |           _R_CHECK_CRAN_INCOMING_REMOTE_: false
76 |         run: |
77 |           options(crayon.enabled = TRUE)
78 |           rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), error_on = "warning", check_dir = "check")
79 |         shell: Rscript {0}
80 | 
81 |       - name: Upload check results
82 |         if: failure()
83 |         uses: actions/upload-artifact@main
84 |         with:
85 |           name: ${{ runner.os }}-r${{ matrix.config.r }}-results
86 |           path: check
87 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 |       - master
 6 | 
 7 | name: pkgdown
 8 | 
 9 | jobs:
10 |   pkgdown:
11 |     runs-on: ubuntu-18.04
12 |     env:
13 |       RSPM: https://packagemanager.rstudio.com/cran/__linux__/bionic/latest
14 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
15 | 
16 |     steps:
17 |       - uses: actions/checkout@v2
18 | 
19 |       - uses: r-lib/actions/setup-r@v1
20 |         id: install-r
21 | 
22 |       - uses: r-lib/actions/setup-pandoc@v1
23 | 
24 |       - name: Install pak and query dependencies
25 |         run: |
26 |           install.packages("pak", repos = "https://r-lib.github.io/p/pak/dev/")
27 |           saveRDS(pak::pkg_deps("local::.", dependencies = TRUE), ".github/r-depends.rds")
28 |         shell: Rscript {0}
29 | 
30 |       - name: Cache R packages
31 |         uses: actions/cache@v2
32 |         with:
33 |           path: ${{ env.R_LIBS_USER }}
34 |           key: ubuntu-18.04-${{ steps.install-r.outputs.installed-r-version }}-1-${{ hashFiles('.github/r-depends.rds') }}
35 |           restore-keys: ubuntu-18.04-${{ steps.install-r.outputs.installed-r-version }}-1-
36 | 
37 |       - name: Install system dependencies
38 |         if: runner.os == 'Linux'
39 |         run: |
40 |           pak::local_system_requirements(execute = TRUE)
41 |           pak::pkg_system_requirements("pkgdown", execute = TRUE)
42 |         shell: Rscript {0}
43 | 
44 |       - name: Install dependencies
45 |         run: |
46 |           pak::local_install_dev_deps(upgrade = TRUE)
47 |           pak::pkg_install("r-lib/pkgdown")
48 |         shell: Rscript {0}
49 | 
50 |       - name: Install package
51 |         run: R CMD INSTALL .
52 | 
53 |       - name: Build and deploy pkgdown site
54 |         run: |
55 |           git config --local user.name "$GITHUB_ACTOR"
56 |           git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
57 |           Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE)'


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | bayesrules.Rproj
6 | inst/doc
7 | .DS_Store


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: bayesrules
 2 | Type: Package
 3 | Title: Datasets and Supplemental Functions from Bayes Rules! Book
 4 | Version: 0.0.3.9000
 5 | Authors@R: c(
 6 |     person("Mine", "Dogucu", , "mdogucu@gmail.com", c("aut", "cre"), comment = c(ORCID = "0000-0002-8007-934X")),
 7 |     person("Alicia", "Johnson", , role = "aut"),
 8 |     person("Miles", "Ott", , role = "aut", comment = c(ORCID = "0000-0003-4457-6565"))
 9 |     )
10 | Description: Provides datasets and functions used for analysis 
11 |   and visualizations in the Bayes Rules! book (<https://www.bayesrulesbook.com>). 
12 |   The package contains a set of functions that summarize and plot Bayesian models from some conjugate families 
13 |   and another set of functions for evaluation of some Bayesian models.
14 | License: GPL (>= 3)
15 | Encoding: UTF-8
16 | LazyData: true
17 | RoxygenNote: 7.1.2
18 | Suggests: 
19 |     knitr,
20 |     rmarkdown
21 | Imports: 
22 |     ggplot2,
23 |     janitor,
24 |     magrittr,
25 |     dplyr,
26 |     stats,
27 |     purrr,
28 |     rstanarm,
29 |     e1071,
30 |     groupdata2
31 | Depends: 
32 |     R (>= 2.10)
33 | URL: https://bayes-rules.github.io/bayesrules/docs/, https://github.com/bayes-rules/bayesrules/
34 | BugReports: https://github.com/bayes-rules/bayesrules/issues
35 | VignetteBuilder: knitr
36 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(classification_summary)
 4 | export(classification_summary_cv)
 5 | export(naive_classification_summary)
 6 | export(naive_classification_summary_cv)
 7 | export(plot_beta)
 8 | export(plot_beta_binomial)
 9 | export(plot_beta_ci)
10 | export(plot_binomial_likelihood)
11 | export(plot_gamma)
12 | export(plot_gamma_poisson)
13 | export(plot_normal)
14 | export(plot_normal_likelihood)
15 | export(plot_normal_normal)
16 | export(plot_poisson_likelihood)
17 | export(prediction_summary)
18 | export(prediction_summary_cv)
19 | export(sample_mode)
20 | export(summarize_beta)
21 | export(summarize_beta_binomial)
22 | export(summarize_gamma)
23 | export(summarize_gamma_poisson)
24 | export(summarize_normal_normal)
25 | import(dplyr)
26 | import(ggplot2)
27 | import(groupdata2)
28 | import(janitor)
29 | importFrom(dplyr,filter)
30 | importFrom(e1071,naiveBayes)
31 | importFrom(groupdata2,fold)
32 | importFrom(magrittr,"%>%")
33 | importFrom(purrr,map_df)
34 | importFrom(rstanarm,posterior_predict)
35 | importFrom(stats,as.formula)
36 | importFrom(stats,dbeta)
37 | importFrom(stats,dbinom)
38 | importFrom(stats,density)
39 | importFrom(stats,dgamma)
40 | importFrom(stats,dnorm)
41 | importFrom(stats,integrate)
42 | importFrom(stats,mad)
43 | importFrom(stats,median)
44 | importFrom(stats,na.omit)
45 | importFrom(stats,predict)
46 | importFrom(stats,qbeta)
47 | importFrom(stats,qgamma)
48 | importFrom(stats,quantile)
49 | importFrom(stats,rnorm)
50 | importFrom(stats,sd)
51 | importFrom(stats,update)
52 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # bayesrules
2 | 
3 | ## bug fixes
4 | - Beta mode calculation has been fixed in `summarize_beta()` and `summarize_beta_binomial()` function for situations when alpha < 1 and/or beta < 1.
5 | 


--------------------------------------------------------------------------------
/R/classification_summary.R:
--------------------------------------------------------------------------------
 1 | #' Posterior Classification Summaries
 2 | #'
 3 | #' Given a set of observed data including a binary response variable y 
 4 | #' and an rstanreg model of y, 
 5 | #' this function returns summaries of the model's posterior classification quality.
 6 | #' These summaries include a confusion matrix as well as estimates of the model's
 7 | #' sensitivity, specificity, and overall accuracy.
 8 | #' 
 9 | #' @param model an rstanreg model object with binary y
10 | #' @param data data frame including the variables in the model, both response y and predictors x
11 | #' @param cutoff probability cutoff to classify a new case as positive (0.5 is the default)
12 | #'
13 | #' @return a list
14 | #' @export
15 | #' @import janitor dplyr
16 | #' @importFrom rstanarm posterior_predict
17 | #'
18 | #' @examples
19 | #' x <- rnorm(20)
20 | #' z <- 3*x
21 | #' prob <- 1/(1+exp(-z))
22 | #' y <- rbinom(20, 1, prob)
23 | #' example_data <- data.frame(x = x, y = y)
24 | #' example_model <- rstanarm::stan_glm(y ~ x, data = example_data, family = binomial)
25 | #' classification_summary(model = example_model, data = example_data, cutoff = 0.5)                   
26 | classification_summary <- function(model, data, cutoff = 0.5){
27 |           # This function summarizes the classifications across all cases
28 |           if(!("stanreg" %in% class(model))){ stop("the model must be a stanreg object.")}
29 |           
30 |           # Calculate probability posterior predictions
31 |           predictions <- posterior_predict(model, 
32 |                                            newdata = data)
33 |           
34 |           # Turn the predictions into classifications
35 |           if("lmerMod" %in% class(model)){
36 |             y <- as.data.frame(data %>% dplyr::select(as.character(model$formula)[2]))[,1]
37 |           }
38 |           else{
39 |             y <- as.data.frame(data %>% dplyr::select(model$terms[[2]]))[,1]
40 |           }
41 |           
42 |           classifications <- data.frame(proportion = colMeans(predictions)) %>% 
43 |             mutate(classification = as.numeric(proportion >= cutoff)) %>% 
44 |             mutate(y = y)
45 |           
46 |           # Confusion matrix
47 |           confusion_matrix <- classifications %>% 
48 |                     tabyl(y, classification)
49 |           if(ncol(confusion_matrix) == 2){
50 |                     if("1" %in% names(confusion_matrix)){
51 |                       
52 |                       confusion_matrix <- confusion_matrix %>% 
53 |                         mutate("0" = rep(0,nrow(.)))
54 |                     }
55 |                     if("0" %in% names(confusion_matrix)){
56 |                       confusion_matrix <- confusion_matrix %>% 
57 |                         mutate("1" = rep(0,nrow(.)))
58 |                     }
59 |           }
60 |           # Accuracy rates
61 |           mat <- as.matrix(confusion_matrix[,-1])
62 |           sensitivity <- mat[2,2] / sum(mat[2,])
63 |           specificity <- mat[1,1] / sum(mat[1,])
64 |           overall_accuracy <- sum(diag(mat)) / sum(mat)
65 |           accuracy_rates <- data.frame(c(sensitivity, specificity, overall_accuracy))
66 |           row.names(accuracy_rates) <- c("sensitivity", "specificity", "overall_accuracy")
67 |           names(accuracy_rates) <- ""
68 |           
69 |           return(list(confusion_matrix = confusion_matrix, accuracy_rates = accuracy_rates))
70 | }
71 | 


--------------------------------------------------------------------------------
/R/data_airbnb.R:
--------------------------------------------------------------------------------
 1 | #' Chicago AirBnB Data
 2 | #' 
 3 | #' The AirBnB data was collated by Trinh and Ameri as part of a course project
 4 | #' at St Olaf College, and distributed with "Broadening Your Statistical Horizons" by Legler and Roback.
 5 | #' This data set includes the prices and features for 1561 AirBnB listings in Chicago, collected in 2016.
 6 | #' 
 7 | #' @format A data frame with 1561 rows and 12 variables. Each row represents a single AirBnB listing.
 8 | #' \describe{
 9 | #'   \item{price}{the nightly price of the listing (in USD)}
10 | #'   \item{rating}{the listing's average rating, on a scale from 1 to 5}
11 | #'   \item{reviews}{number of user reviews the listing has}
12 | #'   \item{room_type}{the type of listing (eg: Shared room)}
13 | #'   \item{accommodates}{number of guests the listing accommodates}
14 | #'   \item{bedrooms}{the number of bedrooms the listing has}
15 | #'   \item{minimum_stay}{the minimum number of nights to stay in the listing}
16 | #'   \item{neighborhood}{the neighborhood in which the listing is located}
17 | #'   \item{district}{the broader district in which the listing is located}
18 | #'   \item{walk_score}{the neighborhood's rating for walkability (0 - 100)}
19 | #'   \item{transit_score}{the neighborhood's rating for access to public transit (0 - 100)}
20 | #'   \item{bike_score}{the neighborhood's rating for bikeability (0 - 100)}
21 | #' }
22 | #' @source Ly Trinh and Pony Ameri (2018). Airbnb Price Determinants: A Multilevel Modeling Approach. Project for Statistics 316-Advanced Statistical Modeling, St. Olaf College.
23 | #' Julie Legler and Paul Roback (2019). Broadening Your Statistical Horizons: Generalized Linear Models and Multilevel Models. \url{https://bookdown.org/roback/bookdown-bysh/}.
24 | #' \url{https://github.com/proback/BeyondMLR/blob/master/data/airbnb.csv/}
25 | 
26 | "airbnb"
27 | 


--------------------------------------------------------------------------------
/R/data_airbnb_small.R:
--------------------------------------------------------------------------------
 1 | #' Chicago AirBnB Data
 2 | #' 
 3 | #' The AirBnB data was collated by Trinh and Ameri as part of a course project
 4 | #' at St Olaf College, and distributed with "Broadening Your Statistical Horizons" by Legler and Roback.
 5 | #' This data set, a subset of the airbnb data in the bayesrules package, includes the prices and features for 869 AirBnB listings in Chicago, collected in 2016.
 6 | #' 
 7 | #' @format A data frame with 869 rows and 12 variables. Each row represents a single AirBnB listing.
 8 | #' \describe{
 9 | #'   \item{price}{the nightly price of the listing (in USD)}
10 | #'   \item{rating}{the listing's average rating, on a scale from 1 to 5}
11 | #'   \item{reviews}{number of user reviews the listing has}
12 | #'   \item{room_type}{the type of listing (eg: Shared room)}
13 | #'   \item{accommodates}{number of guests the listing accommodates}
14 | #'   \item{bedrooms}{the number of bedrooms the listing has}
15 | #'   \item{minimum_stay}{the minimum number of nights to stay in the listing}
16 | #'   \item{neighborhood}{the neighborhood in which the listing is located}
17 | #'   \item{district}{the broader district in which the listing is located}
18 | #'   \item{walk_score}{the neighborhood's rating for walkability (0 - 100)}
19 | #'   \item{transit_score}{the neighborhood's rating for access to public transit (0 - 100)}
20 | #'   \item{bike_score}{the neighborhood's rating for bikeability (0 - 100)}
21 | #' }
22 | #' @source Ly Trinh and Pony Ameri (2018). Airbnb Price Determinants: A Multilevel Modeling Approach. Project for Statistics 316-Advanced Statistical Modeling, St. Olaf College.
23 | #' Julie Legler and Paul Roback (2019). Broadening Your Statistical Horizons: Generalized Linear Models and Multilevel Models. \url{https://bookdown.org/roback/bookdown-bysh/}.
24 | #' \url{https://github.com/proback/BeyondMLR/blob/master/data/airbnb.csv/}
25 | 
26 | "airbnb_small"
27 | 


--------------------------------------------------------------------------------
/R/data_bald_eagles.R:
--------------------------------------------------------------------------------
 1 | #' Bald Eagle Count Data
 2 | #' 
 3 | #' Bald Eagle count data collected from the year 1981 to 2017, in late December, by birdwatchers in the Ontario, Canada area.
 4 | #' The data was made available by the Bird Studies Canada website and distributed through the R for Data Science TidyTuesday project. 
 5 | #' A more complete data set with a larger selection of birds can be found in the bird_counts data in the bayesrules package.
 6 | #' 
 7 | #' @format A data frame with 37 rows and 5 variables. Each row represents Bald Eagle observations in the given year.
 8 | #' \describe{
 9 | #'   \item{year}{year of data collection}
10 | #'   \item{count}{number of birds observed}
11 | #'   \item{hours}{total person-hours of observation period}
12 | #'   \item{count_per_hour}{count divided by hours}
13 | #'   \item{count_per_week}{count_per_hour multiplied by 168 hours per week}
14 | #'   }
15 | #' @source \url{https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-06-18/bird_counts.csv}.
16 | "bald_eagles"
17 | 


--------------------------------------------------------------------------------
/R/data_basketball.R:
--------------------------------------------------------------------------------
 1 | #' WNBA Basketball Data
 2 | #' 
 3 | #' The WNBA Basketball Data was scraped from \url{https://www.basketball-reference.com/wnba/players/} and contains information on basketball players from the 2019 season.
 4 | #' 
 5 | #' @format A data frame with 146 rows and 30 variables. Each row represents a single WNBA basketball player. The variables on each player are as follows.
 6 | #' \describe{
 7 | #'   \item{player_name}{first and last name}
 8 | #'   \item{height}{height in inches}
 9 | #'   \item{weight}{weight in pounds}
10 | #'   \item{year}{year of the WNBA season}
11 | #'   \item{team}{team that the WNBA player is a member of}
12 | #'   \item{age}{age in years}
13 | #'   \item{games_played}{number of games played by the player in that season}
14 | #'   \item{games_started}{number of games the player started in that season}
15 | #'   \item{avg_minutes_played}{average number of minutes played per game}
16 | #'   \item{avg_field_goals}{average number of field goals per game played}
17 | #'   \item{avg_field_goal_attempts}{average number of field goals attempted per game played}
18 | #'   \item{field_goal_pct}{percent of field goals made throughout the season}
19 | #'   \item{avg_three_pointers}{average number of three pointers per game played}
20 | #'   \item{avg_three_pointer_attempts}{average number of three pointers attempted per game played}
21 | #'   \item{three_pointer_pct}{percent of three pointers made throughout the season}
22 | #'   \item{avg_two_pointers}{average number of two pointers made per game played}
23 | #'   \item{avg_two_pointer_attempts}{average number of two pointers attempted per game played}
24 | #'   \item{two_pointer_pct}{percent of two pointers made throughout the season}
25 | #'   \item{avg_free_throws}{average number of free throws made per game played}
26 | #'   \item{avg_free_throw_attempts}{average number of free throws attempted per game played}
27 | #'   \item{free_throw_pct}{percent of free throws made throughout the season}
28 | #'   \item{avg_offensive_rb}{average number of offensive rebounds per game played}
29 | #'   \item{avg_defensive_rb}{average number of defensive rebounds per game played}
30 | #'   \item{avg_rb}{average number of rebounds (both offensive and defensive) per game played}
31 | #'   \item{avg_assists}{average number of assists per game played}
32 | #'   \item{avg_steals}{average number of steals per game played}
33 | #'   \item{avg_blocks}{average number of blocks per game played}
34 | #'   \item{avg_turnovers}{average number of turnovers per game played}
35 | #'   \item{avg_personal_fouls}{average number of personal fouls per game played. Note: after 5 fouls the player is not allowed to play in that game anymore}
36 | #'   \item{avg_points}{average number of points made per game played}
37 | #'   \item{total_minutes}{total number of minutes played throughout the season}
38 | #'   \item{starter}{whether or not the player started in more than half of the games they played}
39 | #' }
40 | #' @source  \url{https://www.basketball-reference.com/}
41 | 
42 | "basketball"
43 | 


--------------------------------------------------------------------------------
/R/data_bechdel.R:
--------------------------------------------------------------------------------
 1 | #' Bechdel Test for over 1500 movies
 2 | #'
 3 | #' A dataset containing data behind the story
 4 | #' "The Dollar-And-Cents Case Against Hollywood's Exclusion of Women"
 5 | #' \url{https://fivethirtyeight.com/features/the-dollar-and-cents-case-against-hollywoods-exclusion-of-women/}.
 6 | #'
 7 | #' @format A data frame with 1794 rows and 3 variables:
 8 | #' \describe{
 9 | #'   \item{year}{The release year of the movie}
10 | #'   \item{title}{The title of the movie}
11 | #'   \item{binary}{Bechdel test result (PASS, FAIL)}
12 | #' }
13 | #' @source <https://github.com/fivethirtyeight/data/tree/master/bechdel/>
14 | "bechdel"
15 | 


--------------------------------------------------------------------------------
/R/data_big_word_club.R:
--------------------------------------------------------------------------------
 1 | #' Big Word Club (BWC)
 2 | #' 
 3 | #' Data on the effectiveness of a digital learning program designed by the Abdul Latif Jameel Poverty Action Lab (J-PAL) to address disparities in vocabulary levels among children from households with different income levels.
 4 | #' 
 5 | #' @format A data frame with 818 student-level observations and 31 variables:
 6 | #' \describe{
 7 | #'   \item{participant_id}{unique student id}
 8 | #'   \item{treat}{control group (0) or treatment group (1)}
 9 | #'   \item{age_months}{age in months}
10 | #'   \item{female}{whether student identifies as female}
11 | #'   \item{kindergarten}{grade level, pre-school (0) or kindergarten (1)}
12 | #'   \item{teacher_id}{unique teacher id}
13 | #'   \item{school_id}{unique school id}
14 | #'   \item{private_school}{whether school is private}
15 | #'   \item{title1}{whether school has Title 1 status}
16 | #'   \item{free_reduced_lunch}{percent of school that receive free / reduced lunch}
17 | #'   \item{state}{school location}
18 | #'   \item{esl_observed}{whether student has ESL status}
19 | #'   \item{special_ed_observed}{whether student has special education status}
20 | #'   \item{new_student}{whether student enrolled after program began}
21 | #'   \item{distracted_a1}{student's distraction level during assessment 1 (0 = not distracted; 1 = mildly distracted; 2 = moderately distracted; 3 = extremely distracted)}
22 | #'   \item{distracted_a2}{same as distracted_a1 but during assessment 2}
23 | #'   \item{distracted_ppvt}{same as distracted_a1 but during standardized assessment}
24 | #'   \item{score_a1}{student score on BWC assessment 1}
25 | #'   \item{invalid_a1}{whether student's score on assessment 1 was invalid}
26 | #'   \item{score_a2}{student score on BWC assessment 2}
27 | #'   \item{invalid_a2}{whether student's score on assessment 2 was invalid}
28 | #'   \item{score_ppvt}{student score on standardized assessment}
29 | #'   \item{score_ppvt_age}{score_ppvt adjusted for age}
30 | #'   \item{invalid_ppvt}{whether student's score on standardized assessment was invalid}
31 | #'   \item{t_logins_april}{number of teacher logins onto BWC system in April}
32 | #'   \item{t_logins_total}{number of teacher logins onto BWC system during entire study}
33 | #'   \item{t_weeks_used}{number of weeks of the BWC program that the classroom has completed}
34 | #'   \item{t_words_learned}{teacher response to the number of words students had learned through BWC (0 = almost none; 1 = 1 to 5; 2 = 6 to 10)}
35 | #'   \item{t_financial_struggle}{teacher response to the number of their students that have families that experience financial struggle}
36 | #'   \item{t_misbehavior}{teacher response to frequency that student misbehavior interferes with teaching (0 = never; 1 = rarely; 2 = occasionally; 3 = frequently)}
37 | #'   \item{t_years_experience}{teacher's number of years of teaching experience}
38 | #'   \item{score_pct_change}{percent change in scores before and after the program}
39 | #'   }
40 | #' @source These data correspond to the following study: Ariel Kalil, Susan Mayer, Philip Oreopoulos (2020). Closing the word gap with Big Word Club: Evaluating the Impact of a Tech-Based Early Childhood Vocabulary Program. Data was obtained through the was obtained through the Inter-university Consortium for Political and Social Research (ICPSR) \url{https://www.openicpsr.org/openicpsr/project/117330/version/V1/view/}.
41 | "big_word_club"
42 | 


--------------------------------------------------------------------------------
/R/data_bike_users.R:
--------------------------------------------------------------------------------
 1 | #' Capital Bikeshare Bike Ridership (Registered and Casual Riders)
 2 | #' 
 3 | #' Data on ridership among registered members and casual users of the Capital Bikeshare service in Washington, D.C..
 4 | #' 
 5 | #' @format A data frame with 534 daily observations, 267 each for registered riders and casual riders, and 13 variables:
 6 | #' \describe{
 7 | #'   \item{date}{date of observation}
 8 | #'   \item{season}{fall, spring, summer, or winter}
 9 | #'   \item{year}{the year of the date}
10 | #'   \item{month}{the month of the date}
11 | #'   \item{day_of_week}{the day of the week}
12 | #'   \item{weekend}{whether or not the date falls on a weekend (TRUE or FALSE)}
13 | #'   \item{holiday}{whether or not the date falls on a holiday (yes or no)}
14 | #'   \item{temp_actual}{raw temperature (degrees Fahrenheit)}
15 | #'   \item{temp_feel}{what the temperature feels like (degrees Fahrenheit)}
16 | #'   \item{humidity}{humidity level (percentage)}
17 | #'   \item{windspeed}{wind speed (miles per hour)}
18 | #'   \item{weather_cat}{weather category (categ1 = pleasant, categ2 = moderate, categ3 = severe)}
19 | #'   \item{user}{rider type (casual or registered)}
20 | #'   \item{rides}{number of bikeshare rides}
21 | #'   }
22 | #' @source Fanaee-T, Hadi and Gama, Joao (2013). Event labeling combining ensemble detectors and background knowledge. Progress in Artificial Intelligence. \url{https://archive.ics.uci.edu/ml/datasets/Bike+Sharing+Dataset/}
23 | "bike_users"
24 | 


--------------------------------------------------------------------------------
/R/data_bikes.R:
--------------------------------------------------------------------------------
 1 | #' Capital Bikeshare Bike Ridership
 2 | #' 
 3 | #' Data on ridership among registered members of the Capital Bikeshare service in Washington, D.C..
 4 | #' 
 5 | #' @format A data frame with 500 daily observations and 13 variables:
 6 | #' \describe{
 7 | #'   \item{date}{date of observation}
 8 | #'   \item{season}{fall, spring, summer, or winter}
 9 | #'   \item{year}{the year of the date}
10 | #'   \item{month}{the month of the date}
11 | #'   \item{day_of_week}{the day of the week}
12 | #'   \item{weekend}{whether or not the date falls on a weekend (TRUE or FALSE)}
13 | #'   \item{holiday}{whether or not the date falls on a holiday (yes or no)}
14 | #'   \item{temp_actual}{raw temperature (degrees Fahrenheit)}
15 | #'   \item{temp_feel}{what the temperature feels like (degrees Fahrenheit)}
16 | #'   \item{humidity}{humidity level (percentage)}
17 | #'   \item{windspeed}{wind speed (miles per hour)}
18 | #'   \item{weather_cat}{weather category (categ1 = pleasant, categ2 = moderate, categ3 = severe)}
19 | #'   \item{rides}{number of bikeshare rides}
20 | #'   }
21 | #' @source Fanaee-T, Hadi and Gama, Joao (2013). Event labeling combining ensemble detectors and background knowledge. Progress in Artificial Intelligence. \url{https://archive.ics.uci.edu/ml/datasets/Bike+Sharing+Dataset}
22 | "bikes"
23 | 


--------------------------------------------------------------------------------
/R/data_bird_counts.R:
--------------------------------------------------------------------------------
 1 | #' Bird Counts Data
 2 | #' 
 3 | #' Bird count data collected between the years 1921 and 2017, in late December, by birdwatchers in the Ontario, Canada area.
 4 | #' The data was made available by the Bird Studies Canada website and distributed through the R for Data Science TidyTuesday project. 
 5 | #' 
 6 | #' @format A data frame with 18706 rows and 7 variables. Each row represents observations for the given bird species in the given year.
 7 | #' \describe{
 8 | #'   \item{year}{year of data collection}
 9 | #'   \item{species}{scientific name of observed bird species}
10 | #'   \item{species_latin}{latin name of observed bird species}
11 | #'   \item{count}{number of birds observed}
12 | #'   \item{hours}{total person-hours of observation period}
13 | #'   \item{count_per_hour}{count divided by hours}
14 | #'   \item{count_per_week}{count_per_hour multiplied by 168 hours per week}
15 | #'   }
16 | #' @source \url{https://github.com/rfordatascience/tidytuesday/blob/master/data/2019/2019-06-18/bird_counts.csv/}.
17 | "bird_counts"
18 | 


--------------------------------------------------------------------------------
/R/data_book_banning.R:
--------------------------------------------------------------------------------
 1 | #' Book Banning Data
 2 | #' 
 3 | #' The book banning data was collected by Fast and Hegland as part of a course project
 4 | #' at St Olaf College, and distributed with "Broadening Your Statistical Horizons" by Legler and Roback.
 5 | #' This data set includes the features and outcomes for 931 book challenges 
 6 | #' (ie. requests to ban a book) made in the US between 2000 and 2010.
 7 | #' Information on the books being challenged and the characteristics of these books 
 8 | #' were collected from the American Library Society. State-level demographic information and 
 9 | #' political leanings were obtained from the US Census Bureau and Cook Political Report, respectively.
10 | #' Due to an outlying large number of challenges, book challenges made in the state of Texas 
11 | #' were omitted.
12 | #' 
13 | #' @format A data frame with 931 rows and 17 variables. Each row represents a single book challenge within the given state and date.
14 | #' \describe{
15 | #'   \item{title}{title of book being challenged}
16 | #'   \item{book_id}{identifier for the book}
17 | #'   \item{author}{author of the book}
18 | #'   \item{date}{date of the challenge}
19 | #'   \item{year}{year of the challenge}
20 | #'   \item{removed}{whether or not the challenge was successful (the book was removed)}
21 | #'   \item{explicit}{whether the book was challenged for sexually explicit material}
22 | #'   \item{antifamily}{whether the book was challenged for anti-family material}
23 | #'   \item{occult}{whether the book was challenged for occult material}
24 | #'   \item{language}{whether the book was challenged for inapropriate language}
25 | #'   \item{lgbtq}{whether the book was challenged for LGBTQ material}
26 | #'   \item{violent}{whether the book was challenged for violent material}
27 | #'   \item{state}{US state in which the challenge was made}
28 | #'   \item{political_value_index}{Political Value Index of the state (negative = leans Republican, 0 = neutral, positive = leans Democrat)}
29 | #'   \item{median_income}{median income in the state, relative to the average state median income}
30 | #'   \item{hs_grad_rate}{high school graduation rate, in percent, relative to the average state high school graduation rate}
31 | #'   \item{college_grad_rate}{college graduation rate, in percent, relative to the average state college graduation rate}
32 | #'   }
33 | #' @source Shannon Fast and Thomas Hegland (2011). Book Challenges: A Statistical Examination. Project for Statistics 316-Advanced Statistical Modeling, St. Olaf College.
34 | #' Julie Legler and Paul Roback (2019). Broadening Your Statistical Horizons: Generalized Linear Models and Multilevel Models. \url{https://bookdown.org/roback/bookdown-bysh/}.
35 | #' \url{https://github.com/proback/BeyondMLR/blob/master/data/bookbanningNoTex.csv/}
36 | 
37 | "book_banning"
38 | 


--------------------------------------------------------------------------------
/R/data_cherry_blossom_sample.R:
--------------------------------------------------------------------------------
 1 | #' Cherry Blossom Running Race
 2 | #' 
 3 | #' A sub-sample of outcomes for the annual Cherry Blossom Ten Mile race in Washington, D.C.. This sub-sample was taken from the complete Cherry data in the mdsr package.
 4 | #' 
 5 | #' @format A data frame with 252 Cherry Blossom outcomes and 7 variables:
 6 | #' \describe{
 7 | #'   \item{runner}{a unique identifier for the runner}
 8 | #'   \item{age}{age of the runner}
 9 | #'   \item{net}{time to complete the race, from starting line to finish line (minutes)}
10 | #'   \item{gun}{time between the official start of the of race and the finish line (minutes)}
11 | #'   \item{year}{year of the race}
12 | #'   \item{previous}{the number of previous years in which the subject ran in the race}
13 | #'   }
14 | #' @source Data in the original Cherry data set were obtained from \url{https://www.cherryblossom.org/post-race/race-results/}.
15 | "cherry_blossom_sample"
16 | 


--------------------------------------------------------------------------------
/R/data_climbers_sub.R:
--------------------------------------------------------------------------------
 1 | #' Himalayan Climber Data
 2 | #' 
 3 | #' A sub-sample of the Himalayan Database distributed through the R for Data Science TidyTuesday project. This dataset includes information on the results and conditions for various Himalayan climbing expeditions. Each row corresponds to a single member of a climbing expedition team.
 4 | #' 
 5 | #' @format A data frame with 2076 observations (1 per climber) and 22 variables:
 6 | #' \describe{
 7 | #'   \item{expedition_id}{unique expedition identifier}
 8 | #'   \item{member_id}{unique climber identifier}
 9 | #'   \item{peak_id}{unique identifier of the expedition's destination peak}
10 | #'   \item{peak_name}{name of the expedition's destination peak}
11 | #'   \item{year}{year of expedition}
12 | #'   \item{season}{season of expedition (Autumn, Spring, Summer, Winter)}
13 | #'   \item{sex}{climber gender identity which the database oversimplifies to a binary category}
14 | #'   \item{age}{climber age}
15 | #'   \item{citizenship}{climber citizenship}
16 | #'   \item{expedition_role}{climber's role in the expedition (eg: Co-Leader)}
17 | #'   \item{hired}{whether the climber was a hired member of the expedition}
18 | #'   \item{highpoint_metres}{the destination peak's highpoint (metres)}
19 | #'   \item{success}{whether the climber successfully reached the destination}
20 | #'   \item{solo}{whether the climber was on a solo expedition}
21 | #'   \item{oxygen_used}{whether the climber utilized supplemental oxygen}
22 | #'   \item{died}{whether the climber died during the expedition}
23 | #'   \item{death_cause}{}
24 | #'   \item{death_height_metres}{}
25 | #'   \item{injured}{whether the climber was injured on the expedition}
26 | #'   \item{injury_type}{}
27 | #'   \item{injury_height_metres}{}
28 | #'   \item{count}{number of climbers in the expedition}
29 | #'   \item{height_metres}{height of the peak in meters}
30 | #'   \item{first_ascent_year}{the year of the first recorded summit of the peak (though not necessarily the actual first summit!)}
31 | #'   }
32 | #' @source Original source: \url{https://www.himalayandatabase.com/}. Complete dataset distributed by: \url{https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-09-22/}.
33 | "climbers_sub"
34 | 


--------------------------------------------------------------------------------
/R/data_coffee_ratings.R:
--------------------------------------------------------------------------------
 1 | #' Coffee Ratings Data
 2 | #' 
 3 | #' A sub-set of data on coffee bean ratings / quality originally collected by James LeDoux (jmzledoux) and distributed through the R for Data Science TidyTuesday project. 
 4 | #' 
 5 | #' @format A data frame with 1339 batches of coffee beans and 27 variables on each batch. 
 6 | #' \describe{
 7 | #'   \item{owner}{farm owner}
 8 | #'   \item{farm_name}{farm where beans were grown}
 9 | #'   \item{country_of_origin}{country where farm is}
10 | #'   \item{mill}{where beans were processed}
11 | #'   \item{in_country_partner}{country of coffee partner}
12 | #'   \item{altitude_low_meters}{lowest altitude of the farm}
13 | #'   \item{altitude_high_meters}{highest altitude of the farm}
14 | #'   \item{altitude_mean_meters}{average altitude of the farm}
15 | #'   \item{number_of_bags}{number of bags tested}
16 | #'   \item{bag_weight}{weight of each tested bag}
17 | #'   \item{species}{bean species}
18 | #'   \item{variety}{bean variety}
19 | #'   \item{processing_method}{how beans were processed}
20 | #'   \item{aroma}{bean aroma grade}
21 | #'   \item{flavor}{bean flavor grade}
22 | #'   \item{aftertaste}{bean aftertaste grade}
23 | #'   \item{acidity}{bean acidity grade}
24 | #'   \item{body}{bean body grade}
25 | #'   \item{balance}{bean balance grade}
26 | #'   \item{uniformity}{bean uniformity grade}
27 | #'   \item{clean_cup}{bean clean cup grade}
28 | #'   \item{sweetness}{bean sweetness grade}
29 | #'   \item{moisture}{bean moisture grade}
30 | #'   \item{category_one_defects}{count of category one defects}
31 | #'   \item{category_two_defects}{count of category two defects}
32 | #'   \item{color}{bean color}
33 | #'   \item{total_cup_points}{total bean rating (0 -- 100)}
34 | #'   }
35 | #' @source \url{https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv}.
36 | "coffee_ratings"
37 | 


--------------------------------------------------------------------------------
/R/data_coffee_ratings_small.R:
--------------------------------------------------------------------------------
 1 | #' Coffee Ratings Data
 2 | #' 
 3 | #' A sub-set of data on coffee bean ratings / quality originally collected by James LeDoux (jmzledoux) and distributed through the R for Data Science TidyTuesday project.
 4 | #' This is a simplified version of the coffee_ratings data. 
 5 | #' 
 6 | #' @format A data frame with 636 batches of coffee beans and 11 variables on each batch. 
 7 | #' \describe{
 8 | #'   \item{farm_name}{farm where beans were grown}
 9 | #'   \item{total_cup_points}{total bean rating (0 -- 100)}
10 | #'   \item{aroma}{bean aroma grade}
11 | #'   \item{flavor}{bean flavor grade}
12 | #'   \item{aftertaste}{bean aftertaste grade}
13 | #'   \item{acidity}{bean acidity grade}
14 | #'   \item{body}{bean body grade}
15 | #'   \item{balance}{bean balance grade}
16 | #'   \item{uniformity}{bean uniformity grade}
17 | #'   \item{sweetness}{bean sweetness grade}
18 | #'   \item{moisture}{bean moisture grade}
19 | #'   }
20 | #' @source \url{https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv}.
21 | "coffee_ratings_small"
22 | 


--------------------------------------------------------------------------------
/R/data_equality_index.R:
--------------------------------------------------------------------------------
 1 | #' LGBTQ+ Rights Laws by State
 2 | #' 
 3 | #' Data on the number of LGBTQ+ equality laws (as of 2019) and demographics in each U.S. state.
 4 | #' 
 5 | #' @format A data frame with 50 observations, one per state, and 6 variables:
 6 | #' \describe{
 7 | #'   \item{state}{state name}
 8 | #'   \item{region}{region in which the state falls}
 9 | #'   \item{gop_2016}{percent of the 2016 presidential election vote earned by the Republican ("GOP") candidate}
10 | #'   \item{laws}{number of LGBTQ+ rights laws (as of 2019)}
11 | #'   \item{historical}{political leaning of the state over time (gop = Republican, dem = Democrat, swing = swing state)}
12 | #'   \item{percent_urban}{percent of state's residents that live in urban areas (by the 2010 census)}
13 | #'   }
14 | #' @source Data on LGBTQ+ laws were obtained from Warbelow, Sarah, Courtnay Avant, and Colin Kutney (2020). 2019 State Equality Index. Washington, DC. Human Rights Campaign Foundation. \url{https://assets2.hrc.org/files/assets/resources/HRC-SEI-2019-Report.pdf?_ga=2.148925686.1325740687.1594310864-1928808113.1594310864&_gac=1.213124768.1594312278.EAIaIQobChMI9dP2hMzA6gIVkcDACh21GgLEEAAYASAAEgJiJvD_BwE/}. Data on urban residency obtained from \url{https://www.icip.iastate.edu/tables/population/urban-pct-states/}. 
15 | "equality_index"
16 | 


--------------------------------------------------------------------------------
/R/data_fake_news.R:
--------------------------------------------------------------------------------
 1 | #' A collection of 150 news articles
 2 | #'
 3 | #' A dataset containing data behind the study
 4 | #' "FakeNewsNet: A Data Repository with News Content, Social Context and Spatialtemporal Information for Studying Fake News on Social Media"
 5 | #' \url{https://arxiv.org/abs/1809.01286}.
 6 | #' The news articles in this dataset were posted to Facebook in September 2016, in the run-up to the U.S. presidential election. 
 7 | #'
 8 | #' @format A data frame with 150 rows and 6 variables:
 9 | #' \describe{
10 | #'   \item{title}{The title of the news article}
11 | #'   \item{text}{Text of the article}
12 | #'   \item{url}{Hyperlink for the article}
13 | #'   \item{authors}{Authors of the article}
14 | #'   \item{type}{Binary variable indicating whether the article presents fake or real news(fake, real)}
15 | #'   \item{title_words}{Number of words in the title}
16 | #'   \item{text_words}{Number of words in the text}
17 | #'   \item{title_char}{Number of characters in the title}
18 | #'   \item{text_char}{Number of characters in the text}
19 | #'   \item{title_caps}{Number of words that are all capital letters in the title}
20 | #'   \item{text_caps}{Number of words that are all capital letters in the text}
21 | #'   \item{title_caps_percent}{Percent of words that are all capital letters in the title}
22 | #'   \item{text_caps_percent}{Percent of words that are all capital letters in the text}
23 | #'   \item{title_excl}{Number of characters that are exclamation marks in the title}
24 | #'   \item{text_excl}{Number of characters that are exclamation marks in the text}
25 | #'   \item{title_excl_percent}{Percent of characters that are exclamation marks in the title}
26 | #'   \item{text_excl_percent}{Percent of characters that are exclamation marks in the text}
27 | #'   \item{title_has_excl}{Binary variable indicating whether the title of the article includes an exlamation point or not(TRUE, FALSE)}
28 | #'   \item{anger}{Percent of words that are associated with anger}
29 | #'   \item{anticipation}{Percent of words that are associated with anticipation}
30 | #'   \item{disgust}{Percent of words that are associated with disgust}
31 | #'   \item{fear}{Percent of words that are associated with fear}
32 | #'   \item{joy}{Percent of words that are associated with joy}
33 | #'   \item{sadness}{Percent of words that are associated with sadness}
34 | #'   \item{surprise}{Percent of words that are associated with surprise}
35 | #'   \item{trust}{Percent of words that are associated with trust}
36 | #'   \item{negative}{Percent of words that have negative sentiment}
37 | #'   \item{positive}{Percent of words that have positive sentiment}
38 | #'   \item{text_syllables}{Number of syllables in text}
39 | #'   \item{text_syllables_per_word}{Number of syllables per word in text}
40 | #' }
41 | #' @source Shu, K., Mahudeswaran, D., Wang, S., Lee, D. and Liu, H. (2018) FakeNewsNet: A Data Repository with News Content, Social Context and Dynamic Information for Studying Fake News on Social Media
42 | "fake_news"
43 | 


--------------------------------------------------------------------------------
/R/data_football.R:
--------------------------------------------------------------------------------
 1 | #' Football Brain Measurements
 2 | #' 
 3 | #' Brain measurements for football and non-football players as provided in the Lock5 package
 4 | #' 
 5 | #' @format A data frame with 75 observations and 5 variables:
 6 | #' \describe{
 7 | #'   \item{group}{control = no football, 
 8 | #'   fb_no_concuss = football player but no concussions, 
 9 | #'   fb_concuss = football player with concussion history}
10 | #'   \item{years}{Number of years a person played football}
11 | #'   \item{volume}{Total hippocampus volume, in cubic centimeters}
12 | #'   }
13 | #' @source Singh R, Meier T, Kuplicki R, Savitz J, et al., 
14 | #' "Relationship of Collegiate Football Experience and Concussion 
15 | #' With Hippocampal Volume and Cognitive Outcome," JAMA, 311(18), 2014
16 | #' 
17 | "football"
18 | 


--------------------------------------------------------------------------------
/R/data_hotel_bookings.R:
--------------------------------------------------------------------------------
 1 | #' Hotel Bookings Data
 2 | #' 
 3 | #' A random subset of the data on hotel bookings originally collected by Antonio, Almeida and Nunes (2019) and distributed through the R for Data Science TidyTuesday project. 
 4 | #' 
 5 | #' @format A data frame with 1000 hotel bookings and 32 variables on each booking. 
 6 | #' \describe{
 7 | #'   \item{hotel}{"Resort Hotel" or "City Hotel"}
 8 | #'   \item{is_canceled}{whether the booking was cancelled}
 9 | #'   \item{lead_time}{number of days between booking and arrival}
10 | #'   \item{arrival_date_year}{year of scheduled arrival}
11 | #'   \item{arrival_date_month}{month of scheduled arrival}
12 | #'   \item{arrival_date_week_number}{week of scheduled arrival}
13 | #'   \item{arrival_date_day_of_month}{day of month of scheduled arrival}
14 | #'   \item{stays_in_weekend_nights}{number of reserved weekend nights}
15 | #'   \item{stays_in_week_nights}{number of reserved week nights}
16 | #'   \item{adults}{number of adults in booking}
17 | #'   \item{children}{number of children}
18 | #'   \item{babies}{number of babies}
19 | #'   \item{meal}{whether the booking includes breakfast (BB = bed & breakfast), breakfast and dinner (HB = half board), or breakfast, lunch, and dinner (FB = full board)}
20 | #'   \item{country}{guest's country of origin}
21 | #'   \item{market_segment}{market segment designation (eg: TA = travel agent, TO = tour operator)}
22 | #'   \item{distribution_channel}{booking distribution channel (eg: TA = travel agent, TO = tour operator)}
23 | #'   \item{is_repeated_guest}{whether or not booking was made by a repeated guest}
24 | #'   \item{previous_cancellations}{guest's number of previous booking cancellations}
25 | #'   \item{previous_bookings_not_canceled}{guest's number of previous bookings that weren't cancelled}
26 | #'   \item{reserved_room_type}{code for type of room reserved by guest}
27 | #'   \item{assigned_room_type}{code for type of room assigned by hotel}
28 | #'   \item{booking_changes}{number of changes made to the booking}
29 | #'   \item{deposit_type}{No Deposit, Non Refund, Refundable}
30 | #'   \item{agent}{booking travel agency}
31 | #'   \item{company}{booking company}
32 | #'   \item{days_in_waiting_list}{number of days the guest waited for booking confirmation}
33 | #'   \item{customer_type}{Contract, Group, Transient, Transient-party (a transient booking tied to another transient booking)}
34 | #'   \item{average_daily_rate}{average hotel cost per day}
35 | #'   \item{required_car_parking_spaces}{number of parking spaces the guest needed}
36 | #'   \item{total_of_special_requests}{number of guest special requests}
37 | #'   \item{reservation_status}{Canceled, Check-Out, No-Show}
38 | #'   \item{reservation_status_date}{when the guest cancelled or checked out}
39 | #'   }
40 | #' @source 
41 | #' Nuno Antonio, Ana de Almeida, and Luis Nunes (2019). "Hotel booking demand datasets." Data in Brief (22): 41-49.
42 | #' \url{https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-02-11/hotels.csv/}.
43 | "hotel_bookings"
44 | 


--------------------------------------------------------------------------------
/R/data_loons.R:
--------------------------------------------------------------------------------
 1 | #' Loon Count Data
 2 | #' 
 3 | #' Loon count data collected from the year 2000 to 2017, in late December, by birdwatchers in the Ontario, Canada area.
 4 | #' The data was made available by the Bird Studies Canada website and distributed through the R for Data Science TidyTuesday project. 
 5 | #' A more complete data set with a larger selection of birds can be found in the bird_counts data in the bayesrules package.
 6 | #' 
 7 | #' @format A data frame with 18 rows and 5 variables. Each row represents loon observations in the given year.
 8 | #' \describe{
 9 | #'   \item{year}{year of data collection}
10 | #'   \item{count}{number of loons observed}
11 | #'   \item{hours}{total person-hours of observation period}
12 | #'   \item{count_per_hour}{count divided by hours}
13 | #'   \item{count_per_100}{count_per_hour multiplied by 100 hours}
14 | #'   }
15 | #' @source \url{https://github.com/rfordatascience/tidytuesday/blob/master/data/2019/2019-06-18/bird_counts.csv}.
16 | "loons"
17 | 


--------------------------------------------------------------------------------
/R/data_moma.R:
--------------------------------------------------------------------------------
 1 | #' Museum of Modern Art (MoMA) data
 2 | #' 
 3 | #' The Museum of Modern Art data includes information about the individual artists included in the collection of the Museum of Modern Art in New York City.
 4 | #' It does not include information about works for artist collectives or companies.
 5 | #' The data was made available by MoMA itself and downloaded in December 2020.
 6 | #' 
 7 | #' @format A data frame with 10964 rows and 11 variables. Each row represents an individual artist in the MoMA collection.
 8 | #' \describe{
 9 | #'   \item{artist}{name}
10 | #'   \item{country}{country of origin}
11 | #'   \item{birth}{year of birth}
12 | #'   \item{death}{year of death}
13 | #'   \item{alive}{whether or not the artist was living at the time of data collection (December 2020)}
14 | #'   \item{genx}{whether or not the artist is Gen X or younger, ie. born during 1965 or after}
15 | #'   \item{gender}{gender identity (as perceived by MoMA employees)}
16 | #'   \item{department}{MoMA department in which the artist's works most frequently appear}
17 | #'   \item{count}{number of the artist's works in the MoMA collection}
18 | #'   \item{year_acquired_min}{first year MoMA acquired one of the artist's works}
19 | #'   \item{year_acquired_max}{most recent year MoMA acquired one of the artist's works}
20 | #'   }
21 | #' @source \url{https://github.com/MuseumofModernArt/collection/blob/master/Artworks.csv/}.
22 | "moma"


--------------------------------------------------------------------------------
/R/data_moma_sample.R:
--------------------------------------------------------------------------------
 1 | #' Museum of Modern Art (MoMA) data sample
 2 | #' 
 3 | #' A random sample of 100 artists represented in the Museum of Modern Art in New York City.
 4 | #' The data was made available by MoMA itself and downloaded in December 2020.
 5 | #' It does not include information about artist collectives or companies.
 6 | #' 
 7 | #' @format A data frame with 100 rows and 10 variables. Each row represents an individual artist in the MoMA collection.
 8 | #' \describe{
 9 | #'   \item{artist}{name}
10 | #'   \item{country}{country of origin}
11 | #'   \item{birth}{year of birth}
12 | #'   \item{death}{year of death}
13 | #'   \item{alive}{whether or not the artist was living at the time of data collection (December 2020)}
14 | #'   \item{genx}{whether or not the artist is Gen X or younger, ie. born during 1965 or after}
15 | #'   \item{gender}{gender identity (as perceived by MoMA employees)}
16 | #'   \item{count}{number of the artist's works in the MoMA collection}
17 | #'   \item{year_acquired_min}{first year MoMA acquired one of the artist's works}
18 | #'   \item{year_acquired_max}{most recent year MoMA acquired one of the artist's works}
19 | #'   }
20 | #' @source \url{https://github.com/MuseumofModernArt/collection/blob/master/Artworks.csv/}.
21 | "moma_sample"


--------------------------------------------------------------------------------
/R/data_penguins_bayes.R:
--------------------------------------------------------------------------------
 1 | #' Penguins Data
 2 | #' 
 3 | #' Data on penguins in the Palmer Archipelago, originally collected by Gordan etal and distributed through the penguins data in the palmerpenguins package.
 4 | #' In addition to the original penguins data is a variable above_average_weight.
 5 | #' 
 6 | #' @format A data frame with 344 penguins and 9 variables on each. 
 7 | #' \describe{
 8 | #'   \item{species}{species (Adelie, Chinstrap, Gentoo)}
 9 | #'   \item{island}{home island (Biscoe, Dream, Torgersen)}
10 | #'   \item{year}{year of observation}
11 | #'   \item{bill_length_mm}{length of bill (mm)}
12 | #'   \item{bill_depth_mm}{depth of bill (mm)}
13 | #'   \item{flipper_length_mm}{length of flipper (mm)}
14 | #'   \item{body_mass_g}{body mass (g)}
15 | #'   \item{above_average_weight}{whether or not the body mass exceeds 4200g (TRUE or FALSE)}
16 | #'   \item{sex}{male or female}
17 | #'   }
18 | #' @source Gorman KB, Williams TD, and Fraser WR (2014). Ecological sexual dimorphism and environmental variability within a community of antarctic penguins (Genus Pygoscelis). PLoS ONE, 9(3).
19 | "penguins_bayes"
20 | 


--------------------------------------------------------------------------------
/R/data_pop_vs_soda.R:
--------------------------------------------------------------------------------
 1 | #' Pop vs Soda vs Coke
 2 | #' 
 3 | #' Results of a volunteer survey on how people around the U.S. refer to fizzy cola drinks. The options are "pop", "soda", "coke", or "other".
 4 | #' 
 5 | #' @format A data frame with 374250 observations, one per survey respondent, and 4 variables:
 6 | #' \describe{
 7 | #'   \item{state}{the U.S. state in which the respondent resides}
 8 | #'   \item{region}{region in which the state falls (as defined by the U.S. Census)}
 9 | #'   \item{word_for_cola}{how the respondent refers to fizzy cola drinks}
10 | #'   \item{pop}{whether or not the respondent refers to fizzy cola drinks as "pop"}
11 | #'   }
12 | #' @source The survey responses were obtained at \url{https://popvssoda.com/} which is maintained by Alan McConchie.
13 | "pop_vs_soda"
14 | 


--------------------------------------------------------------------------------
/R/data_pulse_of_the_nation.R:
--------------------------------------------------------------------------------
 1 | #' Cards Against Humanity's Pulse of the Nation Survey
 2 | #' 
 3 | #' Cards Against Humanity's "Pulse of the Nation" project (\url{https://thepulseofthenation.com/}) conducted monthly polls into people's social and political views, as well as some silly things. This data includes responses to a subset of questions included in the poll conducted in September 2017.
 4 | #' 
 5 | #' @format A data frame with observations on 1000 survey respondents with 15 variables:
 6 | #' \describe{
 7 | #'   \item{income}{income in \$1000s}
 8 | #'   \item{age}{age in years}
 9 | #'   \item{party}{political party affiliation}
10 | #'   \item{trump_approval}{approval level of Donald Trump's job performance}
11 | #'   \item{education}{maximum education level completed}
12 | #'   \item{robots}{opinion of how likely their job is to be replaced by robots within 10 years}
13 | #'   \item{climate_change}{belief in climate change}
14 | #'   \item{transformers}{the number of Transformers film the respondent has seen}
15 | #'   \item{science_is_honest}{opinion of whether scientists are generally honest and serve the public good}
16 | #'   \item{vaccines_are_safe}{opinion of whether vaccines are safe and protect children from disease}
17 | #'   \item{books}{number of books read in the past year}
18 | #'   \item{ghosts}{whether or not they believe in ghosts}
19 | #'   \item{fed_sci_budget}{respondent's estimate of the percentage of the federal budget that is spent on scientific research}
20 | #'   \item{earth_sun}{belief about whether the earth is always farther away from the sun in winter than in summer (TRUE or FALSE)}
21 | #'   \item{wise_unwise}{whether the respondent would rather be wise but unhappy, or unwise but happy}
22 | #'   }
23 | #' @source \url{https://thepulseofthenation.com/downloads/201709-CAH_PulseOfTheNation_Raw.csv}
24 | "pulse_of_the_nation"
25 | 


--------------------------------------------------------------------------------
/R/data_spotify.R:
--------------------------------------------------------------------------------
 1 | #' Spotify Song Data
 2 | #' 
 3 | #' A sub-sample of the Spotify song data originally collected by Kaylin Pavlik (kaylinquest) and distributed through the R for Data Science TidyTuesday project. 
 4 | #' 
 5 | #' @format A data frame with 350 songs (or tracks) and 23 variables:
 6 | #' \describe{
 7 | #'   \item{track_id}{unique song identifier}
 8 | #'   \item{title}{song name}
 9 | #'   \item{artist}{song artist}
10 | #'   \item{popularity}{song popularity from 0 (low) to 100 (high)}
11 | #'   \item{album_id}{id of the album on which the song appears}
12 | #'   \item{album_name}{name of the album on which the song appears}
13 | #'   \item{album_release_date}{when the album was released}
14 | #'   \item{playlist_name}{Spotify playlist on which the song appears}
15 | #'   \item{playlist_id}{unique playlist identifier}
16 | #'   \item{genre}{genre of the playlist}
17 | #'   \item{subgenre}{subgenre of the playlist}
18 | #'   \item{danceability}{a score from 0 (not danceable) to 100 (danceable) based on features such as tempo, rhythm, etc.}
19 | #'   \item{energy}{a score from 0 (low energy) to 100 (high energy) based on features such as loudness, timbre, entropy, etc.}
20 | #'   \item{key}{song key}
21 | #'   \item{loudness}{song loudness (dB)}
22 | #'   \item{mode}{0 (minor key) or 1 (major key)}
23 | #'   \item{speechiness}{a score from 0 (non-speechy tracks) to 100 (speechy tracks)}
24 | #'   \item{acousticness}{a score from 0 (not acoustic) to 100 (very acoustic)}
25 | #'   \item{instrumentalness}{a score from 0 (not instrumental) to 100 (very instrumental)}
26 | #'   \item{liveness}{a score from 0 (no live audience presence on the song) to 100 (strong live audience presence on the song)}
27 | #'   \item{valence}{a score from 0 (the song is more negative, sad, angry) to 100 (the song is more positive, happy, euphoric)}
28 | #'   \item{tempo}{song tempo (beats per minute)}
29 | #'   \item{duration_ms}{song duration (ms)}
30 | #'   }
31 | #' @source \url{https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-01-21/spotify_songs.csv/}.
32 | "spotify"
33 | 


--------------------------------------------------------------------------------
/R/data_voices.R:
--------------------------------------------------------------------------------
 1 | #' Voice Pitch Data
 2 | #' 
 3 | #' Voice pitch data collected by Winter and Grawunder (2012). 
 4 | #' In an experiment, subjects participated in role-playing dialog under various conditions,
 5 | #' while researchers monitored voice pitch (Hz).
 6 | #' The conditions spanned different scenarios (eg: making an appointment, asking for a favor)
 7 | #' and different attitudes to use in the scenario (polite or informal).
 8 | #' 
 9 | #' @format A data frame with 84 rows and 4 variables. Each row represents a single observation for the given subject.
10 | #' \describe{
11 | #'   \item{subject}{subject identifier}
12 | #'   \item{scenario}{context of the dialog (encoded as A, B, ..., G)}
13 | #'   \item{attitude}{whether the attitude to use in dialog was polite or informal}
14 | #'   \item{pitch}{average voice pitch (Hz)}
15 | #'   }
16 | #' @source Winter, B., & Grawunder, S. (2012). The Phonetic Profile of Korean Formal and Informal Speech Registers. Journal of Phonetics, 40, 808-815. 
17 | #' \url{https://bodo-winter.net/data_and_scripts/POP.csv}.
18 | #' \url{https://bodo-winter.net/tutorial/bw_LME_tutorial2.pdf}.
19 | "voices"
20 | 


--------------------------------------------------------------------------------
/R/data_weather_WU.R:
--------------------------------------------------------------------------------
 1 | #' Weather Data for 2 Australian Cities
 2 | #' 
 3 | #' A sub-sample of daily weather information from the weatherAUS data in the rattle package for two Australian cities, Wollongong and Uluru.
 4 | #' The weather_australia data in the bayesrules package combines this data with a third city
 5 | #' 
 6 | #' @format A data frame with 200 daily observations and 22 variables from 2 Australian weather stations:
 7 | #' \describe{
 8 | #'   \item{location}{one of two weather stations}
 9 | #'   \item{mintemp}{minimum temperature (degrees Celsius)}
10 | #'   \item{maxtemp}{maximum temperature (degrees Celsius)}
11 | #'   \item{rainfall}{rainfall (mm)}
12 | #'   \item{windgustdir}{direction of strongest wind gust}
13 | #'   \item{windgustspeed}{speed of strongest wind gust (km/h)}
14 | #'   \item{winddir9am}{direction of wind gust at 9am}
15 | #'   \item{winddir3pm}{direction of wind gust at 3pm}
16 | #'   \item{windspeed9am}{wind speed at 9am (km/h)}
17 | #'   \item{windspeed3pm}{wind speed at 3pm (km/h)}
18 | #'   \item{humidity9am}{humidity level at 9am (percent)}
19 | #'   \item{humidity3pm}{humidity level at 3pm (percent)}
20 | #'   \item{pressure9am}{atmospheric pressure at 9am (hpa)}
21 | #'   \item{pressure3pm}{atmospheric pressure at 3pm (hpa)}
22 | #'   \item{temp9am}{temperature at 9am (degrees Celsius)}
23 | #'   \item{temp3pm}{temperature at 3pm (degrees Celsius)}
24 | #'   \item{raintoday}{whether or not it rained today (Yes or No)}
25 | #'   \item{risk_mm}{the amount of rain today (mm)}
26 | #'   \item{raintomorrow}{whether or not it rained the next day (Yes or No)}
27 | #'   \item{year}{the year of the date}
28 | #'   \item{month}{the month of the date}
29 | #'   \item{day_of_year}{the day of the year}
30 | #'   }
31 | #' @source Data in the original weatherAUS data set were obtained from \url{https://www.bom.gov.au/climate/data}. Copyright Commonwealth of Australia 2010, Bureau of Meteorology.
32 | "weather_WU"
33 | 


--------------------------------------------------------------------------------
/R/data_weather_australia.R:
--------------------------------------------------------------------------------
 1 | #' Weather Data for 3 Australian Cities
 2 | #' 
 3 | #' A sub-sample of daily weather information from the weatherAUS data in the rattle package for three Australian cities: Wollongong, Hobart, and Uluru.
 4 | #' 
 5 | #' @format A data frame with 300 daily observations and 22 variables from 3 Australian weather stations:
 6 | #' \describe{
 7 | #'   \item{location}{one of three weather stations}
 8 | #'   \item{mintemp}{minimum temperature (degrees Celsius)}
 9 | #'   \item{maxtemp}{maximum temperature (degrees Celsius)}
10 | #'   \item{rainfall}{rainfall (mm)}
11 | #'   \item{windgustdir}{direction of strongest wind gust}
12 | #'   \item{windgustspeed}{speed of strongest wind gust (km/h)}
13 | #'   \item{winddir9am}{direction of wind gust at 9am}
14 | #'   \item{winddir3pm}{direction of wind gust at 3pm}
15 | #'   \item{windspeed9am}{wind speed at 9am (km/h)}
16 | #'   \item{windspeed3pm}{wind speed at 3pm (km/h)}
17 | #'   \item{humidity9am}{humidity level at 9am (percent)}
18 | #'   \item{humidity3pm}{humidity level at 3pm (percent)}
19 | #'   \item{pressure9am}{atmospheric pressure at 9am (hpa)}
20 | #'   \item{pressure3pm}{atmospheric pressure at 3pm (hpa)}
21 | #'   \item{temp9am}{temperature at 9am (degrees Celsius)}
22 | #'   \item{temp3pm}{temperature at 3pm (degrees Celsius)}
23 | #'   \item{raintoday}{whether or not it rained today (Yes or No)}
24 | #'   \item{risk_mm}{the amount of rain today (mm)}
25 | #'   \item{raintomorrow}{whether or not it rained the next day (Yes or No)}
26 | #'   \item{year}{the year of the date}
27 | #'   \item{month}{the month of the date}
28 | #'   \item{day_of_year}{the day of the year}
29 | #'   }
30 | #' @source Data in the original weatherAUS data set were obtained from \url{https://www.bom.gov.au/climate/data/}. Copyright Commonwealth of Australia 2010, Bureau of Meteorology.
31 | "weather_australia"
32 | 


--------------------------------------------------------------------------------
/R/data_weather_perth.R:
--------------------------------------------------------------------------------
 1 | #' Weather Data for Perth, Australia
 2 | #' 
 3 | #' A sub-sample of daily weather information on Perth, Australia from the weatherAUS data in the rattle package.
 4 | #' 
 5 | #' @format A data frame with 1000 daily observations and 21 variables:
 6 | #' \describe{
 7 | #'   \item{mintemp}{minimum temperature (degrees Celsius)}
 8 | #'   \item{maxtemp}{maximum temperature (degrees Celsius)}
 9 | #'   \item{rainfall}{rainfall (mm)}
10 | #'   \item{windgustdir}{direction of strongest wind gust}
11 | #'   \item{windgustspeed}{speed of strongest wind gust (km/h)}
12 | #'   \item{winddir9am}{direction of wind gust at 9am}
13 | #'   \item{winddir3pm}{direction of wind gust at 3pm}
14 | #'   \item{windspeed9am}{wind speed at 9am (km/h)}
15 | #'   \item{windspeed3pm}{wind speed at 3pm (km/h)}
16 | #'   \item{humidity9am}{humidity level at 9am (percent)}
17 | #'   \item{humidity3pm}{humidity level at 3pm (percent)}
18 | #'   \item{pressure9am}{atmospheric pressure at 9am (hpa)}
19 | #'   \item{pressure3pm}{atmospheric pressure at 3pm (hpa)}
20 | #'   \item{temp9am}{temperature at 9am (degrees Celsius)}
21 | #'   \item{temp3pm}{temperature at 3pm (degrees Celsius)}
22 | #'   \item{raintoday}{whether or not it rained today (Yes or No)}
23 | #'   \item{risk_mm}{the amount of rain today (mm)}
24 | #'   \item{raintomorrow}{whether or not it rained the next day (Yes or No)}
25 | #'   \item{year}{the year of the date}
26 | #'   \item{month}{the month of the date}
27 | #'   \item{day_of_year}{the day of the year}
28 | #'   }
29 | #' @source Data in the original weatherAUS data set were obtained from \url{https://www.bom.gov.au/climate/data/}. Copyright Commonwealth of Australia 2010, Bureau of Meteorology.
30 | "weather_perth"
31 | 


--------------------------------------------------------------------------------
/R/globals.R:
--------------------------------------------------------------------------------
1 | utils::globalVariables(c("proportion", "classification", ".", ".folds",
2 |                          "x", "y1", "y2", "mu",
3 |                          "likelihood", "f_lambda", "post_median",
4 |                          "post_mean", "post_mad", "post_sd",
5 |                          "center", "error", "l_inner", "u_inner",
6 |                          "l_outer", "u_outer",
7 |                          "error_scaled", "within_inner", "within_outer"))
8 | 


--------------------------------------------------------------------------------
/R/naive_classification_summary.R:
--------------------------------------------------------------------------------
 1 | #' Posterior Classification Summaries for a Naive Bayes model
 2 | #'
 3 | #' Given a set of observed data including a categorical response variable y 
 4 | #' and a naiveBayes model of y, 
 5 | #' this function returns summaries of the model's posterior classification quality.
 6 | #' These summaries include a confusion matrix as well as an estimate of the model's
 7 | #' overall accuracy.
 8 | #' 
 9 | #' @param model a naiveBayes model object with categorical y
10 | #' @param data data frame including the variables in the model
11 | #' @param y a character string indicating the y variable in data
12 | #'
13 | #' @return a list
14 | #' @export
15 | #' @importFrom stats predict
16 | #' @examples
17 | #' data(penguins_bayes, package = "bayesrules")
18 | #' example_model <- e1071::naiveBayes(species ~ bill_length_mm, data = penguins_bayes)
19 | #' naive_classification_summary(model = example_model, data = penguins_bayes, y = "species")
20 | naive_classification_summary <- function(model, data, y){
21 |           # This function summarizes the classifications across all cases
22 |           if(!("naiveBayes" %in% class(model))){ stop("the model must be a naiveBayes object.")}
23 |           
24 |           # Calculate posterior classifications
25 |           # Turn the predictions into classifications
26 |           classifications <- data %>% 
27 |                     mutate(classification = predict(model, newdata = .)) %>% 
28 |                     dplyr::select(y, classification)
29 |           names(classifications)[1] <- "y"
30 |           
31 |           # Confusion matrix
32 |           confusion_matrix <- classifications %>% 
33 |                     tabyl(y, classification) %>% 
34 |                     adorn_percentages("row") %>%
35 |                     adorn_pct_formatting(digits = 2) %>%
36 |                     adorn_ns() 
37 |           names(confusion_matrix)[1] <- y
38 |           mat <- table(classifications$y, classifications$classification)
39 |           overall_accuracy <- sum(diag(mat)) / sum(mat)
40 |           
41 |           return(list(confusion_matrix = confusion_matrix, overall_accuracy = overall_accuracy))
42 | }
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/R/plot_beta.R:
--------------------------------------------------------------------------------
 1 | #' @title Plot a Beta Model for \eqn{\pi} 
 2 | #'
 3 | #' @description Plots the probability density function (pdf) for
 4 | #' a Beta(alpha, beta) model of variable \eqn{\pi}.
 5 | #'
 6 | #' @param alpha,beta positive shape parameters of the Beta model
 7 | #' @param mean,mode a logical value indicating whether to display the model mean and mode
 8 | #'
 9 | #' @return A density plot for the Beta model.
10 | #' @export
11 | #' @import ggplot2
12 | #' @importFrom stats dbeta
13 | #'
14 | #' @examples
15 | #' plot_beta(alpha = 1, beta = 12, mean = TRUE, mode = TRUE)
16 | plot_beta <- function(alpha, beta, mean = FALSE, mode = FALSE){
17 |   
18 |   
19 |   p <- ggplot(data = data.frame(x = c(0, 1)),
20 |               aes(x)) +
21 |     stat_function(fun = stats::dbeta,
22 |                   n = 101,
23 |                   args = list(shape1 = alpha,
24 |                               shape2=beta)) +
25 |     labs(x = expression(pi),
26 |          y = expression(paste("f(",pi,")")))
27 | 
28 |   
29 |   if (mean == TRUE & mode == FALSE){
30 |     mean <- alpha / (alpha + beta)
31 |     
32 |     p <- p +
33 |       geom_segment(aes(x = mean, y = 0, 
34 |                        xend = mean, 
35 |                        yend = dbeta(mean, alpha, beta),
36 |                        linetype = "mean")) +
37 |       scale_linetype_manual(values = c(mean = "solid")) +
38 |       theme(legend.title = element_blank())
39 |   }
40 |   
41 |   if (mean == FALSE & mode == TRUE){
42 |     mode <- (alpha - 1)/(alpha + beta - 2)
43 |     
44 |     p <- p +
45 |       geom_segment(aes(x = mode, y = 0, 
46 |                        xend = mode, 
47 |                        yend = dbeta(mode, alpha, beta), 
48 |                        linetype = "mode"))+
49 |       scale_linetype_manual(values = c(mode = "dashed")) +
50 |       theme(legend.title = element_blank())
51 |     
52 |     
53 |   }
54 |   
55 |   if (mean == TRUE & mode == TRUE){
56 |     mean <- alpha / (alpha + beta)
57 |     mode <- (alpha - 1)/(alpha + beta - 2)
58 |     
59 |     
60 |     p <- p +
61 |       geom_segment(aes(x = mean, y = 0, 
62 |                        xend = mean, 
63 |                        yend = dbeta(mean, alpha, beta),
64 |                        linetype = "mean")) +
65 |       geom_segment(aes(x = mode, y = 0, 
66 |                        xend = mode, 
67 |                        yend = stats::dbeta(mode, alpha, beta), 
68 |                        linetype = "mode"))+
69 |       scale_linetype_manual(values = c(mean = "solid", mode = "dashed")) +
70 |       theme(legend.title = element_blank())
71 |   }
72 | p
73 | }
74 | 
75 | 


--------------------------------------------------------------------------------
/R/plot_beta_ci.R:
--------------------------------------------------------------------------------
 1 | #' @title Plot a Beta Model with Credible Interval
 2 | #'
 3 | #' @description Plots the probability density function (pdf) for a
 4 | #' Beta(alpha, beta) model of variable \eqn{\pi} with markings indicating
 5 | #' a credible interval for \eqn{\pi}.
 6 | #'
 7 | #' @param alpha,beta positive shape parameters of the Beta model
 8 | #' @param ci_level credible interval level
 9 | #'
10 | #' @return A density plot for the Beta model
11 | #' @export
12 | #' @import ggplot2
13 | #' @importFrom stats dbeta qbeta
14 | #'
15 | #' @examples
16 | #' plot_beta_ci(alpha = 7, beta = 12, ci_level = 0.80)
17 | plot_beta_ci <- function(alpha, beta, ci_level = 0.95){
18 |   
19 |   
20 |   p <- ggplot(data = data.frame(x = c(0, 1)),
21 |               aes(x)) +
22 |     stat_function(fun = dbeta,
23 |                   n = 101,
24 |                   args = list(shape1 = alpha,
25 |                               shape2=beta)) +
26 |     labs(x = expression(pi),
27 |          y = expression(paste("f(",pi,")")))
28 |   
29 |   q1  <- (1 - ci_level)/2
30 |   q2  <- 1 - q1
31 |   
32 |   ci  <- qbeta(c(q1,q2), alpha, beta)
33 |   mode <- (alpha - 1) / (alpha + beta - 2)
34 |   marks <- c(ci, mode)
35 |   
36 |   ggplot(data.frame(x = c(0,1)), aes(x=x)) + 
37 |     stat_function(fun = dbeta, 
38 |                   args = list(alpha, beta), 
39 |                   xlim = ci, 
40 |                   geom = "area", 
41 |                   fill = "lightblue") + 
42 |     stat_function(fun = dbeta, 
43 |                   args = list(alpha, beta)) + 
44 |     geom_segment(data = 
45 |                    data.frame(x = marks, 
46 |                               y1 = c(0,0,0), 
47 |                               y2 = dbeta(marks, alpha, beta)),
48 |                  aes(x = x, 
49 |                      xend = x, 
50 |                      y = y1, 
51 |                      yend = y2)) +
52 |     labs(x = expression(pi), y = "density") 
53 |   
54 | 
55 | }
56 | 
57 | 


--------------------------------------------------------------------------------
/R/plot_binomial_likelihood.R:
--------------------------------------------------------------------------------
 1 | #' @title Plot a Binomial Likelihood Function
 2 | #' 
 3 | #' @description Plots the Binomial likelihood function for variable \eqn{\pi}
 4 | #' given y observed successes in a series of n Binomial trials.
 5 | #' 
 6 | #' @param y number of successes  
 7 | #' @param n number of trials
 8 | #' @param mle a logical value indicating whether maximum likelihood estimate of \eqn{\pi}, y/n, should be plotted
 9 | #' @return a ggplot
10 | #' @export
11 | #' 
12 | #' @importFrom magrittr "%>%"
13 | #' @import ggplot2
14 | #' @importFrom dplyr filter
15 | #' @importFrom stats dbinom
16 | #' @examples
17 | #' plot_binomial_likelihood(y = 3, n = 10, mle = TRUE)
18 | 
19 | plot_binomial_likelihood <-function(y, 
20 |                                     n, 
21 |                                     mle = FALSE){
22 |   
23 |   g <- ggplot(data = data.frame(x = c(0, 1)), aes(x)) +
24 |     stat_function(fun = dbinom, args = list(x = y, size = n)) +
25 |     labs(x = expression(pi),
26 |          y = expression(paste("L(",pi,"|(Y=", y, "))")))
27 |   
28 |   
29 |   
30 |   if (mle == TRUE){
31 |     
32 |     max <- y/n
33 |     
34 |     success <- y # the line segment does not work since y is an argument in ggplot
35 |     
36 |     g <- g +
37 |       
38 |       geom_segment(aes(x = max, 
39 |                        xend = max, 
40 |                        y = 0, 
41 |                        yend = dbinom(success, n, max)),
42 |                    color = "cyan4") +
43 |       theme(legend.position = "none") 
44 |     
45 |     
46 |   }
47 |   
48 |   g
49 |   
50 | }# end of function
51 | 
52 | 


--------------------------------------------------------------------------------
/R/plot_normal.R:
--------------------------------------------------------------------------------
 1 | #' @title Plot a Normal Model for \eqn{\mu} 
 2 | #'
 3 | #' @description Plots the probability density function (pdf) for a
 4 | #' Normal(mean, sd^2) model of variable \eqn{\mu}.
 5 | #' 
 6 | #' @param mean mean parameter of the Normal model
 7 | #' @param sd  standard deviation parameter of the Normal model
 8 | #'
 9 | #' @return a ggplot
10 | #' @export
11 | #' @import ggplot2
12 | #' @importFrom stats dnorm
13 | #'
14 | #' @examples
15 | #' plot_normal(mean = 3.5, sd = 0.5)
16 | plot_normal <- function(mean, sd){
17 |   x <- c(mean - 4*sd, mean +4*sd)
18 |   
19 |   ggplot(data = data.frame(x = x),
20 |          aes(x)) +
21 |     stat_function(fun = dnorm,
22 |                   n = 101,
23 |                   args = list(mean = mean,
24 |                               sd = sd)) +
25 |     labs(x = expression(mu),
26 |          y = expression(paste("f(",mu,")")))
27 | }
28 | 


--------------------------------------------------------------------------------
/R/plot_normal_likelihood.R:
--------------------------------------------------------------------------------
 1 | #' @title Plot a Normal Likelihood Function
 2 | #' 
 3 | #' @description Plots the Normal likelihood function for variable \eqn{\mu}
 4 | #' given a vector of Normal data y.
 5 | #' 
 6 | #' @param y vector of observed data
 7 | #' @param sigma optional value for assumed standard deviation of y. by default, this is calculated by the sample standard deviation of y.
 8 | #' 
 9 | #' @return a ggplot of Normal likelihood
10 | #' @export
11 | #' 
12 | #' @import ggplot2
13 | #' @importFrom stats dnorm
14 | #' 
15 | #' @examples
16 | #' plot_normal_likelihood(y = rnorm(50, mean = 10, sd = 2), sigma = 1.5)
17 | 
18 | plot_normal_likelihood <- function(y, sigma = NULL){
19 |   y_bar <- mean(y)
20 |   y_sd  <- sd(y)
21 |   n     <- length(y)
22 |   
23 |   if(!is.null(sigma)){y_sd <- sigma}
24 |   
25 |   like_fun <- function(x){prod(dnorm(y, mean = x, sd = y_sd))}
26 |   
27 |   plot_data <- data.frame(mu = seq(y_bar - 4*y_sd/sqrt(n), 
28 |                                    y_bar + 4*y_sd/sqrt(n), 
29 |                                    length = 100)) %>% 
30 |     
31 |     mutate(likelihood = Vectorize(like_fun)(mu))
32 |   
33 |   ggplot(plot_data, aes(x = mu, y = likelihood)) +
34 |     geom_line() +
35 |     labs(x = expression(mu), 
36 |          y = expression(paste("L(",mu,"|(Y=y))", sep = "")))
37 | }
38 | 


--------------------------------------------------------------------------------
/R/plot_poisson_likelihood.R:
--------------------------------------------------------------------------------
 1 | #' @title Plot a Poisson Likelihood Function
 2 | #'
 3 | #' @description Plots the Poisson likelihood function for variable \eqn{\lambda}
 4 | #' given a vector of Poisson counts y.
 5 | #' 
 6 | #' @param y vector of observed Poisson counts
 7 | #' @param lambda_upper_bound upper bound for lambda values to display on x-axis
 8 | #'
 9 | #' @return a ggplot of Poisson likelihood
10 | #' @export
11 | #' 
12 | #' @import ggplot2
13 | #'
14 | #' @examples
15 | #' plot_poisson_likelihood(y = c(4, 2, 7), lambda_upper_bound = 10)
16 | plot_poisson_likelihood <- 
17 |   function(y, lambda_upper_bound = 10){
18 |     
19 |     lambda = seq(0, lambda_upper_bound, by = 0.1)
20 |     
21 |     ht = exp(-1*length(y)*lambda)*lambda^(sum(y))/prod(factorial(y))
22 |     
23 |     data <- data.frame(lambda = lambda,
24 |                        f_lambda = 
25 |                          rep(ht, length(lambda)))
26 |     ggplot(data, aes(x = lambda, 
27 |                      y = f_lambda)) +
28 |       geom_line() +
29 |       labs(x = expression(lambda),
30 |            y = expression(paste("L(",lambda,"|(Y=", y, "))")))
31 |     
32 |   }
33 | 


--------------------------------------------------------------------------------
/R/sample_mode.R:
--------------------------------------------------------------------------------
 1 | #' Sample Mode
 2 | #' 
 3 | #' Calculate the sample mode of vector x.
 4 | #'
 5 | #' @param x vector of sample data 
 6 | #'
 7 | #' @return sample mode 
 8 | #' @export
 9 | #' @importFrom stats density
10 | #'
11 | #' @examples sample_mode(rbeta(100, 2, 7))
12 | sample_mode <- function(x){
13 |   d <- density(x)
14 |   d$x[which.max(d$y)]  
15 | }


--------------------------------------------------------------------------------
/R/summarize_beta.R:
--------------------------------------------------------------------------------
 1 | #' @title Summarize a Beta Model for \eqn{\pi} 
 2 | #' 
 3 | #' @description Summarizes the expected value, variance, and mode of 
 4 | #' a Beta(alpha, beta) model for variable \eqn{\pi}.
 5 | #'
 6 | #' @param alpha,beta positive shape parameters of the Beta model
 7 | #'
 8 | #' @return a summary table
 9 | #' @export
10 | #'
11 | #' @examples
12 | #' summarize_beta(alpha = 1, beta = 15)
13 | 
14 | 
15 | summarize_beta <- function (alpha, beta){
16 | 
17 |   mean <- alpha / (alpha + beta)
18 |   var  <-  alpha * beta / ((alpha + beta)^2 * (alpha + beta + 1))
19 |   sd   <- sqrt(var)
20 |   
21 |   if(alpha < 1 & beta <1){
22 |     mode <- "0 and 1"
23 |   }else if (alpha <= 1 & beta > 1){
24 |     mode <- 0
25 |   }else if (alpha > 1 & beta < 1){
26 |     mode <- 1
27 |   }
28 |   else{
29 |     mode <- (alpha - 1)/(alpha + beta - 2)
30 |   }
31 |   
32 |   return(data.frame(mean = mean,
33 |                     mode = mode,
34 |                     var = var,
35 |                     sd = sd))
36 |   
37 |  
38 | }# end of function
39 | 


--------------------------------------------------------------------------------
/R/summarize_beta_binomial.R:
--------------------------------------------------------------------------------
 1 | #' @title Summarize a Beta-Binomial Bayesian model
 2 | #' 
 3 | #' @description Consider a Beta-Binomial Bayesian model for parameter \eqn{\pi} with 
 4 | #' a Beta(alpha, beta) prior on \eqn{\pi} and Binomial likelihood with n trials
 5 | #' and y successes. Given information on the prior (alpha and data) and data (y and n),
 6 | #' this function summarizes the mean, mode, and variance of the 
 7 | #' prior and posterior Beta models of \eqn{\pi}.
 8 | #' 
 9 | #' @param alpha,beta positive shape parameters of the prior Beta model
10 | #' @param y number of successes
11 | #' @param n number of trials
12 | #'
13 | #' @return a summary table
14 | #' @export
15 | #'
16 | #' @examples 
17 | #' summarize_beta_binomial(alpha = 1, beta = 15, y = 25, n = 50)
18 | summarize_beta_binomial <- function (alpha, 
19 |                                      beta, 
20 |                                      y = NULL, 
21 |                                      n = NULL)
22 | {
23 |   if (is.null(y) | is.null(n))
24 |     warning("To summarize the posterior, 
25 |             specify data y and n")
26 |   beta_mean <- function(a, b) {
27 |     a/(a + b)
28 |   }
29 |   beta_mode <- function(a, b) {
30 |     if(a < 1 & b <1){
31 |       mode <- "0 and 1"
32 |     }else if (a <= 1 & b > 1){
33 |       mode <- 0
34 |     }else if (a > 1 & b < 1){
35 |       mode <- 1
36 |     }
37 |     else{
38 |       mode <- (a - 1)/(a + b - 2)
39 |     }
40 |   }
41 |   beta_var <- function(a, b) {
42 |     a * b/((a + b)^2 * (a + b + 1))
43 |   }
44 |   prior_mean <- beta_mean(alpha, beta)
45 |   prior_mode <- beta_mode(alpha, beta)
46 |   prior_var <- beta_var(alpha, beta)
47 |   prior_sd  <- sqrt(prior_var)
48 |   if (is.null(y) & is.null(n)) {
49 |     return(data.frame(model = c("prior"), 
50 |                       alpha = alpha,
51 |                       beta = beta, 
52 |                       mean = prior_mean, 
53 |                       mode = prior_mode,
54 |                       var = prior_var,
55 |                       sd = prior_sd))
56 |   }
57 |   else {
58 |     post_alpha <- y + alpha
59 |     post_beta <- n - y + beta
60 |     post_mean <- beta_mean(post_alpha, post_beta)
61 |     post_mode <- beta_mode(post_alpha, post_beta)
62 |     post_var  <- beta_var(post_alpha, post_beta)
63 |     post_sd   <- sqrt(post_var)
64 |     return(data.frame(model = c("prior", "posterior"), 
65 |                       alpha = c(alpha, post_alpha), 
66 |                       beta = c(beta, post_beta), 
67 |                       mean = c(prior_mean, post_mean), 
68 |                       mode = c(prior_mode, post_mode), 
69 |                       var = c(prior_var, post_var),
70 |                       sd = c(prior_sd, post_sd)))
71 |   }
72 | }
73 | 


--------------------------------------------------------------------------------
/R/summarize_gamma.R:
--------------------------------------------------------------------------------
 1 | #' @title Summarize a Gamma Model for \eqn{\lambda}
 2 | #' 
 3 | #' @description Summarizes the expected value, variance, and mode of 
 4 | #' a Gamma(shape, rate) model for variable \eqn{\lambda}.
 5 | #'
 6 | #' @param shape positive shape parameter of the Gamma model
 7 | #' @param rate positive rate parameter of the Gamma model
 8 | #'
 9 | #' @return a summary table
10 | #' @export
11 | #'
12 | #' @examples
13 | #' summarize_gamma(shape = 1, rate = 15)
14 | #' 
15 | 
16 | summarize_gamma <- function (shape, rate){
17 | 
18 |   mean <- shape/rate
19 |   
20 |   if (shape >= 1){
21 |     mode <- (shape - 1)/rate
22 |   } else {
23 |     mode <- NA
24 |   }
25 |   var <- shape/rate^2
26 |   sd  <- sqrt(var)
27 |   
28 |  data.frame(mean = mean,
29 |             mode = mode,
30 |             var = var,
31 |             sd = sd)
32 |   
33 |  
34 | }# end of function
35 | 


--------------------------------------------------------------------------------
/R/summarize_gamma_poisson.R:
--------------------------------------------------------------------------------
 1 | #' Summarize the Gamma-Poisson Model
 2 | #'
 3 | #' Consider a Gamma-Poisson Bayesian model for rate parameter \eqn{\lambda} with 
 4 | #' a Gamma(shape, rate) prior on \eqn{\lambda} and a Poisson likelihood for the data. 
 5 | #' Given information on the prior (shape and rate) 
 6 | #' and data (the sample size n and sum_y),
 7 | #' this function summarizes the mean, mode, and variance of the 
 8 | #' prior and posterior Gamma models of \eqn{\lambda}.
 9 | #' 
10 | #' @param shape positive shape parameter of the Gamma prior
11 | #' @param rate positive rate parameter of the Gamma prior
12 | #' @param sum_y sum of observed data values for the Poisson likelihood
13 | #' @param n number of observations for the Poisson likelihood
14 | #'
15 | #' @return data frame
16 | #' @export
17 | #'
18 | #' @examples 
19 | #' summarize_gamma_poisson(shape = 3, rate = 4, sum_y = 7, n = 12)
20 | #' 
21 | summarize_gamma_poisson <- function (shape,
22 |                                      rate,
23 |                                      sum_y = NULL,
24 |                                      n = NULL){
25 | 
26 |   if (is.null(sum_y) | is.null(n))
27 |     warning("To summarize the posterior,
28 |             specify data sum_y and n")
29 |   if (is.null(sum_y) & is.null(n)) {
30 |     prior_mean <- shape/rate
31 |     if (shape >= 1) {
32 |       prior_mode <- (shape - 1)/rate
33 |     }
34 |     else {
35 |       prior_mode <- NA
36 |     }
37 |     prior_var <- shape/rate^2
38 |     prior_sd  <- sqrt(prior_var)
39 |     return(data.frame(model = c("prior"),
40 |                       shape = shape,
41 |                       rate = rate,
42 |                       mean = prior_mean,
43 |                       mode = prior_mode,
44 |                       var = prior_var,
45 |                       sd = prior_sd))
46 |   }
47 |   else {
48 |     prior_mean <- shape/rate
49 |     if (shape >= 1) {
50 |       prior_mode <- (shape - 1)/rate
51 |     }
52 |     else {
53 |       prior_mode <- NULL
54 |     }
55 |     prior_var <- shape/rate^2
56 |     prior_sd  <- sqrt(prior_var)
57 |     post_mean <- (shape + sum_y)/(rate + n)
58 |     if ((shape + sum_y) >= 1) {
59 |       post_mode <- (shape + sum_y - 1)/(rate + n)
60 |     }
61 |     else {
62 |       post_mode <- NA
63 |     }
64 |     post_var <- (shape + sum_y)/((rate + n)^2)
65 |     post_sd  <- sqrt(post_var)
66 |     post_s <- shape + sum_y
67 |     post_r <- rate + n
68 |     return(data.frame(model = c("prior",
69 |                                 "posterior"),
70 |                       shape = c(shape, post_s),
71 |                       rate = c(rate, post_r),
72 |                       mean = c(prior_mean, post_mean),
73 |                       mode = c(prior_mode, post_mode),
74 |                       var = c(prior_var, post_var),
75 |                       sd = c(prior_sd, post_sd)))
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/R/summarize_normal_normal.R:
--------------------------------------------------------------------------------
 1 | #' Summarize a Normal-Normal Bayesian model
 2 | #'
 3 | #' Consider a Normal-Normal Bayesian model for mean parameter \eqn{\mu} with 
 4 | #' a N(mean, sd^2) prior on \eqn{\mu} and a Normal likelihood for the data. 
 5 | #' Given information on the prior (mean and sd) 
 6 | #' and data (the sample size n, mean y_bar, and standard deviation sigma),
 7 | #' this function summarizes the mean, mode, and variance of the 
 8 | #' prior and posterior Normal models of \eqn{\mu}.
 9 | #' 
10 | #' @param mean mean of the Normal prior
11 | #' @param sd standard deviation of the Normal prior
12 | #' @param sigma standard deviation of the data, or likelihood standard deviation
13 | #' @param y_bar sample mean of the data
14 | #' @param n sample size of the data
15 | #'
16 | #' @return data frame
17 | #' @export 
18 | #'
19 | #' @examples
20 | #' summarize_normal_normal(mean = 2.3, sd = 0.3, sigma = 5.1, y_bar = 128.5, n = 20)
21 | summarize_normal_normal <- function (mean, 
22 |                                      sd, 
23 |                                      sigma = NULL,
24 |                                      y_bar = NULL, 
25 |                                      n = NULL)
26 | {
27 |   if (is.null(y_bar) | is.null(n)|is.null(sigma))
28 |     warning("To summarize the posterior, 
29 |             specify sigma for the likelihood, data ybar and n")
30 |   
31 |   prior_mean <- mean
32 |   prior_mode <- mean
33 |   prior_var <- sd^2
34 |   prior_sd  <- sd
35 |   if (is.null(y_bar) & is.null(n) & is.null(sigma)) {
36 |     return(data.frame(model = c("prior"), 
37 |                       alpha = alpha,
38 |                       beta = beta, 
39 |                       mean = prior_mean, 
40 |                       mode = prior_mode,
41 |                       var = prior_var,
42 |                       sd = prior_sd))
43 |   }
44 |   else {
45 |     post_mean <- (((sigma^2)*mean) + ((sd^2)*n*y_bar))/(n*(sd^2)+(sigma^2))
46 |     post_mode <- post_mean
47 |     post_var <- ((sigma^2)*(sd^2))/(n*(sd^2)+(sigma^2))
48 |     post_sd  <- sqrt(post_var)
49 |     return(data.frame(model = c("prior", "posterior"), 
50 |                       mean = c(prior_mean, post_mean), 
51 |                       mode = c(prior_mode, post_mode), 
52 |                       var = c(prior_var, post_var),
53 |                       sd = c(prior_sd, post_sd)))
54 |   }
55 | }
56 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: github_document
 3 | ---
 4 | 
 5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 6 | 
 7 | ```{r, include = FALSE}
 8 | knitr::opts_chunk$set(
 9 |   collapse = TRUE,
10 |   comment = "#>",
11 |   fig.path = "man/figures/README-",
12 |   out.width = "80%"
13 | )
14 | ```
15 | # bayesrules <img src="man/figures/bayes-rules-hex.png" align="right" alt="a hex shaped logo with shiny green-pink disco ball and purple starry background. There is text that says Bayes Rules!" width="120" />
16 | 
17 | <!-- badges: start -->
18 | [![R-CMD-check](https://github.com/bayes-rules/bayesrules/workflows/R-CMD-check/badge.svg)](https://github.com/bayes-rules/bayesrules/actions)
19 | [![CRAN status](https://www.r-pkg.org/badges/version/bayesrules)](https://cran.r-project.org/package=bayesrules)
20 | <!-- badges: end -->
21 | 
22 | **bayesrules** is a package to supplement the [Bayes Rules!
23 | book](https://www.bayesrulesbook.com/). It contains datasets and
24 | functions that are used in the book. You can find vignettes on the [package website](https://bayes-rules.github.io/bayesrules/docs/).
25 | 
26 | <hr>
27 | 
28 | ## Installation
29 | 
30 | You can install bayesrules from CRAN. 
31 | 
32 | ``` r
33 | install.packages("bayesrules") 
34 | ```
35 | 
36 | 
37 | You can install the development version from GitHub. You would also need to install the devtools package if you do not have it installed already.
38 | 
39 | ``` r
40 | #install.packages("devtools") 
41 | devtools::install_github("bayes-rules/bayesrules")
42 | ```
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 3 | 
 4 | # bayesrules <img src="man/figures/bayes-rules-hex.png" align="right" alt="a hex shaped logo with shiny green-pink disco ball and purple starry background. There is text that says Bayes Rules!" width="120" />
 5 | 
 6 | <!-- badges: start -->
 7 | 
 8 | [![R-CMD-check](https://github.com/bayes-rules/bayesrules/workflows/R-CMD-check/badge.svg)](https://github.com/bayes-rules/bayesrules/actions)
 9 | [![CRAN
10 | status](https://www.r-pkg.org/badges/version/bayesrules)](https://cran.r-project.org/package=bayesrules)
11 | <!-- badges: end -->
12 | 
13 | **bayesrules** is an R package to supplement the [Bayes Rules!
14 | book](https://www.bayesrulesbook.com/). It contains datasets and
15 | functions that are used in the book. You can find vignettes on the
16 | [package website](https://bayes-rules.github.io/bayesrules/docs/).
17 | 
18 | <hr>
19 | 
20 | ## Installation
21 | 
22 | You can install bayesrules from CRAN.
23 | 
24 | ``` r
25 | install.packages("bayesrules") 
26 | ```
27 | 
28 | You can install the development version from GitHub. You would also need
29 | to install the devtools package if you do not have it installed already.
30 | 
31 | ``` r
32 | #install.packages("devtools") 
33 | devtools::install_github("bayes-rules/bayesrules")
34 | ```
35 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Test environments
 2 | 
 3 | - local OS X install, R 4.1.0
 4 | - GitHub Actions (ubuntu-20.04): release, devel
 5 | - GitHub Actions (windows): release
 6 | - Github Actions (macOS): release
 7 | - r-hub: windows-x86_64-devel, ubuntu-gcc-release, fedora-clang-devel
 8 | - win-builder: release, devel, oldrelease
 9 | 
10 | ## R CMD check results
11 | 
12 | There were no ERRORS, no WARNINGS, no MESSAGES
13 | 


--------------------------------------------------------------------------------
/data-raw/DATASET.R:
--------------------------------------------------------------------------------
1 | ## code to prepare `DATASET` dataset goes here
2 | 
3 | usethis::use_data(DATASET, overwrite = TRUE)
4 | 


--------------------------------------------------------------------------------
/data-raw/airbnb.R:
--------------------------------------------------------------------------------
 1 | library(RCurl)
 2 | 
 3 | x <- getURL("https://raw.githubusercontent.com/proback/BeyondMLR/master/data/airbnb.csv")
 4 | airbnb <- read.csv(text = x) %>% 
 5 |           select(-X, -PctBlack) %>% 
 6 |           rename(minimum_stay = minstay, walk_score = WalkScore, 
 7 |                  transit_score = TransitScore, bike_score = BikeScore,
 8 |                  rating = overall_satisfaction) %>% 
 9 |           mutate(neighborhood = as.factor(neighborhood), district = as.factor(district), room_type = as.factor(room_type))
10 |           
11 | usethis::use_data(airbnb, overwrite = TRUE)
12 | 


--------------------------------------------------------------------------------
/data-raw/airbnb_small.R:
--------------------------------------------------------------------------------
 1 | library(RCurl)
 2 | library(dplyr)
 3 | 
 4 | x <- getURL("https://raw.githubusercontent.com/proback/BeyondMLR/master/data/airbnb.csv")
 5 | airbnb_small <- read.csv(text = x) %>% 
 6 |           select(-X, -PctBlack) %>% 
 7 |           rename(minimum_stay = minstay, walk_score = WalkScore, 
 8 |                  transit_score = TransitScore, bike_score = BikeScore,
 9 |                  rating = overall_satisfaction) %>% 
10 |           mutate(neighborhood = as.factor(neighborhood), district = as.factor(district), room_type = as.factor(room_type)) %>% 
11 |           filter(district %in% c("Far North", "North", "Northwest")) %>%
12 |           droplevels()
13 | 
14 | usethis::use_data(airbnb_small, overwrite = TRUE)
15 | 


--------------------------------------------------------------------------------
/data-raw/bald_eagles.R:
--------------------------------------------------------------------------------
 1 | # Import from tidytuesday
 2 | 
 3 | bald_eagles <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-06-18/bird_counts.csv") %>% 
 4 |           rename(count = how_many_counted, hours = total_hours, count_per_hour = how_many_counted_by_hour) %>% 
 5 |           filter(species == "Bald Eagle", year > 1980) %>% 
 6 |           dplyr::select(-species, -species_latin) %>% 
 7 |           mutate(count_per_week = count_per_hour*7*24)
 8 | 
 9 | usethis::use_data(bald_eagles, overwrite = TRUE)
10 | 


--------------------------------------------------------------------------------
/data-raw/bechdel/bechdel_dataprep.R:
--------------------------------------------------------------------------------
 1 | library(magrittr)
 2 | library(dplyr)
 3 | 
 4 | # Read data
 5 | bechdel <- readr::read_csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/bechdel/movies.csv") %>%
 6 |   select(year, title, binary)
 7 | 
 8 | 
 9 | 
10 | 
11 | # Save the final data
12 | usethis::use_data(bechdel, overwrite = TRUE)
13 | 


--------------------------------------------------------------------------------
/data-raw/big_word_club/big_word_clubprep.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(here)
 3 | 
 4 | big_word_club <- read.csv(here("data-raw", "big_word_club", "big_word_club.csv")) %>% 
 5 |   mutate(score_pct_change = (score_a2 - score_a1) / score_a1 * 100) %>% 
 6 |   mutate(school_id = as.factor(school_id))
 7 | 
 8 | # The .pdf file type is not great in packages. 
 9 | # The codebook can be accessed from an earlier commit https://github.com/mdogucu/bayesrules/blob/d9a01160ddf5bebb4a1ac2bb9869d539a1c63589/data-raw/big_word_club/BWC%20Codebook.pdf
10 | 
11 | # Save the final data
12 | usethis::use_data(big_word_club, overwrite = TRUE)
13 | 


--------------------------------------------------------------------------------
/data-raw/big_word_club/bwc_data.dta:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data-raw/big_word_club/bwc_data.dta


--------------------------------------------------------------------------------
/data-raw/bike_users.R:
--------------------------------------------------------------------------------
 1 | library(tidyverse)
 2 | 
 3 | # Original source: https://archive.ics.uci.edu/ml/datasets/Bike+Sharing+Dataset
 4 | bikes <- read.csv("https://www.macalester.edu/~ajohns24/data/bike_share.csv")
 5 | 
 6 | # Clean up & subset
 7 | set.seed(84735)
 8 | bike_users <- bikes %>% 
 9 |           gather(user, rides, -c(date,season,year,month,day_of_week,weekend,holiday,temp_actual,temp_feel,humidity,windspeed,weather_cat)) %>% 
10 |           mutate(user = factor(user, labels = c("casual","registered","total"))) %>% 
11 |           filter(user != "total", year == 2011) %>% 
12 |           mutate(user = droplevels(user)) %>% 
13 |           filter(temp_feel < 87, temp_feel > 45) %>% 
14 |           mutate(date = as.Date(date, format = "%m/%d/%y")) %>% 
15 |           arrange(date) %>% 
16 |           mutate_if(is.character, as.factor)
17 | usethis::use_data(bike_users, overwrite = TRUE)
18 | 


--------------------------------------------------------------------------------
/data-raw/bikes.R:
--------------------------------------------------------------------------------
 1 | # Original source: https://archive.ics.uci.edu/ml/datasets/Bike+Sharing+Dataset
 2 | bikes <- read.csv("https://www.macalester.edu/~ajohns24/data/bike_share.csv")
 3 | 
 4 | # Clean up & subset
 5 | set.seed(84735)
 6 | bikes <- bikes %>% 
 7 |           mutate(rides = riders_registered) %>% 
 8 |           mutate(date = as.Date(date, format = "%m/%d/%y")) %>% 
 9 |           select(-riders_casual, -riders_total, -riders_registered) %>% 
10 |           filter(temp_feel < 87, temp_feel > 45) %>% 
11 |           mutate(humidity = 100*humidity) %>% 
12 |           sample_n(500) %>% 
13 |           arrange(date) %>% 
14 |           mutate_if(is.character, as.factor)
15 | 
16 | usethis::use_data(bikes, overwrite = TRUE)
17 | 


--------------------------------------------------------------------------------
/data-raw/bird_counts.R:
--------------------------------------------------------------------------------
1 | # Import from tidytuesday
2 | 
3 | bird_counts <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-06-18/bird_counts.csv") %>% 
4 |   rename(count = how_many_counted, hours = total_hours, count_per_hour = how_many_counted_by_hour) %>% 
5 |   mutate(count_per_week = count_per_hour*7*24)
6 | 
7 | usethis::use_data(bird_counts, overwrite = TRUE)
8 | 


--------------------------------------------------------------------------------
/data-raw/book_banning.R:
--------------------------------------------------------------------------------
 1 | library(RCurl)
 2 | 
 3 | x <- getURL("https://raw.githubusercontent.com/proback/BeyondMLR/master/data/bookbanningNoTex.csv")
 4 | book_banning <- read.csv(text = x) %>% 
 5 |           rename(title = booktitle, college_grad_rate = cperba, date = days2000,
 6 |                  lgbtq = homosexuality, median_income = cmedin, book_id = book,
 7 |                  explicit = sexexp, hs_grad_rate = cperhs, political_value_index = pvi2,
 8 |                  violent = violence) %>% 
 9 |           select(-X, -obama, -freqchal) %>% 
10 |           mutate(date = as.Date(date, origin = "2000-01-01")) %>% 
11 |           mutate(year = lubridate::year(date)) %>% 
12 |           select(title, book_id, author, date, year, removed, 
13 |                  explicit, antifamily, occult, language, lgbtq, violent,
14 |                  state, political_value_index, median_income, hs_grad_rate, college_grad_rate) %>% 
15 |           mutate_if(is.integer, as.factor)
16 | 
17 | usethis::use_data(book_banning, overwrite = TRUE)
18 | 


--------------------------------------------------------------------------------
/data-raw/cherry_blossom_sample.R:
--------------------------------------------------------------------------------
 1 | library(dplyr)
 2 | library(mdsr)
 3 | data(Cherry)
 4 | 
 5 | # I.D. subjects that have 7 observations
 6 | subj <- Cherry %>%
 7 |           filter(nruns == 7) %>% 
 8 |           group_by(name.yob) %>%
 9 |           summarize(min_age = min(age)) %>%
10 |           filter(min_age >= 50, min_age < 55) %>% 
11 |           mutate(subject = as.factor(c(1:length(name.yob))))
12 | 
13 | 
14 | cherry_blossom_sample <- Cherry %>%
15 |           filter(name.yob %in% subj$name.yob) %>% 
16 |           left_join(subj) %>% 
17 |           rename(runner = subject) %>% 
18 |           select(runner, age, net, gun, year, previous) %>% 
19 |           mutate(runner = as.factor(runner))
20 | 
21 | usethis::use_data(cherry_blossom_sample, overwrite = TRUE)
22 | 


--------------------------------------------------------------------------------
/data-raw/climbers_sub/climbers_sub.R:
--------------------------------------------------------------------------------
 1 | # Read tidytuesday data
 2 | members <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-22/members.csv')
 3 | peaks <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-09-22/peaks.csv') %>% 
 4 |   select(peak_id, height_metres, first_ascent_year)
 5 | 
 6 | climbers_sub <- members %>% 
 7 |           filter(age > 15, age < 78, year >= 1978)
 8 | 
 9 | set.seed(88)
10 | random_exp <- climbers_sub %>% 
11 |           group_by(expedition_id) %>% 
12 |           summarise(count = n()) %>% 
13 |           filter(count > 4) %>% 
14 |           sample_n(200)
15 | 
16 | climbers_sub <- climbers_sub %>% 
17 |           group_by(expedition_id) %>% 
18 |           right_join(random_exp) %>% 
19 |           left_join(peaks) %>% 
20 |           mutate_if(is.character, as.factor) %>% 
21 |           ungroup()
22 | 
23 | 
24 | 
25 | # Store as climbers_sub
26 | usethis::use_data(climbers_sub, overwrite = TRUE)
27 | 


--------------------------------------------------------------------------------
/data-raw/coffee_ratings.R:
--------------------------------------------------------------------------------
 1 | # Import from tidytuesday
 2 | 
 3 | coffee_ratings <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv') %>% 
 4 |   select(owner, farm_name, mill, in_country_partner,
 5 |          country_of_origin, altitude_low_meters, altitude_high_meters, altitude_mean_meters,
 6 |          number_of_bags, bag_weight, 
 7 |          species, variety, processing_method, aroma, flavor, aftertaste,
 8 |          acidity, body, balance, uniformity, clean_cup, sweetness,
 9 |          moisture, category_one_defects, category_two_defects, color, total_cup_points) %>% 
10 |           mutate_if(is.character, as.factor)
11 | 
12 | 
13 | 
14 | usethis::use_data(coffee_ratings, overwrite = TRUE)
15 | 


--------------------------------------------------------------------------------
/data-raw/coffee_ratings_small.R:
--------------------------------------------------------------------------------
 1 | # Import from tidytuesday
 2 | 
 3 | coffee_ratings_small <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv') %>% 
 4 |           select(farm_name, total_cup_points, aroma, flavor, aftertaste, 
 5 |                  acidity, body, balance, uniformity, sweetness, moisture) %>% 
 6 |           group_by(farm_name) %>% 
 7 |           filter(n() >= 5, aroma > 0) %>% 
 8 |           ungroup() %>% 
 9 |           mutate_if(is.character, as.factor)
10 | 
11 | 
12 | usethis::use_data(coffee_ratings_small, overwrite = TRUE)
13 | 


--------------------------------------------------------------------------------
/data-raw/equality_index/equality_index.csv:
--------------------------------------------------------------------------------
 1 | ﻿state,region,gop_2016,laws,historical,percent_urban
 2 | alabama,south,62.08,3,gop,59
 3 | alaska,west,51.28,2,gop,66
 4 | arizona,west,48.67,3,gop,89.8
 5 | arkansas,south,60.57,3,gop,56.2
 6 | california,west,31.62,155,dem,95
 7 | colorado,west,43.25,26,swing,86.2
 8 | connecticut,northeast,40.93,20,dem,88
 9 | delaware,south,41.79,17,dem,83.3
10 | florida,south,49.02,4,swing,91.2
11 | georgia,south,50.77,5,gop,75.1
12 | hawaii,west,30.03,24,dem,91.9
13 | idaho,west,59.26,2,gop,70.6
14 | illinois,midwest,38.76,38,dem,88.5
15 | indiana,midwest,56.82,2,swing,72.4
16 | iowa,midwest,51.15,4,swing,64
17 | kansas,midwest,56.65,1,gop,74.2
18 | kentucky,south,62.52,5,gop,58.4
19 | louisiana,south,58.09,11,gop,73.2
20 | maine,northeast,44.87,18,dem,38.7
21 | maryland,south,33.91,32,dem,87.2
22 | massachusetts,northeast,32.81,10,dem,92
23 | michigan,midwest,47.5,7,swing,74.6
24 | minnesota,midwest,44.92,4,dem,73.3
25 | mississippi,south,57.94,1,gop,49.4
26 | missouri,midwest,56.77,1,gop,70.4
27 | montana,west,56.17,2,gop,55.9
28 | nebraska,midwest,58.75,7,gop,73.1
29 | nevada,west,45.5,34,swing,94.2
30 | new hampshire,northeast,46.61,12,swing,60.3
31 | new jersey,northeast,41,20,dem,94.7
32 | new mexico,west,40.04,15,swing,77.4
33 | new york,northeast,36.15,30,dem,87.9
34 | north carolina,south,49.83,1,swing,66.1
35 | north dakota,midwest,62.96,3,gop,59.9
36 | ohio,midwest,51.69,1,swing,77.9
37 | oklahoma,south,65.32,3,gop,66.2
38 | oregon,west,39.09,27,dem,81
39 | pennsylvania,northeast,48.58,16,swing,78.7
40 | rhode island,northeast,38.9,17,dem,90.7
41 | south carolina,south,54.94,1,gop,66.3
42 | south dakota,midwest,61.53,1,gop,56.7
43 | tennessee,south,60.72,5,gop,66.4
44 | texas,south,52.23,3,gop,84.7
45 | utah,west,45.54,14,gop,90.6
46 | vermont,northeast,29.76,18,dem,38.9
47 | virginia,south,44.43,11,swing,75.5
48 | washington,west,38.07,23,dem,84.1
49 | west virginia,south,68.63,4,gop,48.7
50 | wisconsin,midwest,47.22,5,swing,70.2
51 | wyoming,west,67.4,2,gop,64.8


--------------------------------------------------------------------------------
/data-raw/equality_index/equality_index_dataprep.R:
--------------------------------------------------------------------------------
1 | equality_index <- read_csv("data-raw/equality_index/equality_index.csv") %>% 
2 |           mutate_if(is.character, as.factor)
3 | 
4 | # Save the final data
5 | usethis::use_data(equality_index, overwrite = TRUE)
6 | 


--------------------------------------------------------------------------------
/data-raw/fake_news/fake_news_dataprep.R:
--------------------------------------------------------------------------------
 1 | # Cleaning the fake_news data
 2 | 
 3 | # SOURCE: https://www.kaggle.com/mdepak/fakenewsnet
 4 | # Help from https://www.kaggle.com/kumudchauhan/fake-news-analysis-and-classification
 5 | 
 6 | 
 7 | # Load libraries
 8 | library(dplyr)
 9 | library(readr)
10 | library(stringr)
11 | library(syuzhet)
12 | library(quanteda)
13 | 
14 | 
15 | # Load kaggle data
16 | set.seed(84735)
17 | buzzfeed_real <- read_csv("data-raw/fake_news/BuzzFeed_real_news_content.csv")
18 | buzzfeed_real <- buzzfeed_real %>% 
19 |   mutate(type = rep("real",nrow(buzzfeed_real))) %>% 
20 |   sample_n(90)
21 | 
22 | set.seed(84735)
23 | buzzfeed_fake <- read_csv("data-raw/fake_news/BuzzFeed_fake_news_content.csv")
24 | buzzfeed_fake <- buzzfeed_fake %>% 
25 |   mutate(type = rep("fake",nrow(buzzfeed_fake))) %>% 
26 |   sample_n(size = 60)
27 | 
28 | # Combine & simplify
29 | fake_news <- rbind(buzzfeed_real, buzzfeed_fake) %>% 
30 |   select(-c(id, meta_data, canonical_link, images, movies, publish_date, top_img, source))
31 | 
32 | 
33 | 
34 | 
35 | 
36 | # Check it out
37 | dim(fake_news)
38 | table(fake_news$type)
39 | 
40 | 
41 | # Some guidance / ideas adapted from https://www.kaggle.com/burakhmmtgl/exploratory-analysis
42 | 
43 | 
44 | # Count the number of words & characters
45 | fake_news <- fake_news %>% 
46 |   mutate(title_words = sapply(strsplit(as.character(fake_news$title), " "), length)) %>% 
47 |   mutate(text_words = sapply(strsplit(as.character(fake_news$text), " "), length)) %>% 
48 |   mutate(title_char = str_count(title)) %>%
49 |   mutate(text_char = str_count(text))
50 | 
51 | # Count the number & percent of words that are all capital letters
52 | fake_news <- fake_news %>% 
53 |   mutate(title_caps = str_count(title, "\\b[A-Z]{2,}\\b")) %>%
54 |   mutate(text_caps = str_count(text, "\\b[A-Z]{2,}\\b")) %>% 
55 |   mutate(title_caps_percent = title_caps / title_words * 100) %>% 
56 |   mutate(text_caps_percent = text_caps / text_words * 100)
57 | 
58 | 
59 | # Number & percent of characters that are exclamation marks
60 | fake_news <- fake_news  %>% 
61 |   mutate(title_excl = str_count(title, "!")) %>%
62 |   mutate(text_excl = str_count(text, "!")) %>% 
63 |   mutate(title_excl_percent = title_excl / title_char * 100) %>% 
64 |   mutate(text_excl_percent = text_excl / text_char * 100) %>% 
65 |   mutate(title_has_excl = title_excl > 0)
66 | 
67 | # Sentiment analysis
68 | sentiments <- round((get_nrc_sentiment(as.character(fake_news$title)) + get_nrc_sentiment(as.character(fake_news$text))) / (fake_news$title_words + fake_news$text_words)*100, 2)
69 | fake_news <- cbind(fake_news, sentiments)
70 | 
71 | 
72 | # Syllables per word
73 | fake_news <- fake_news %>% 
74 |   mutate(text_syllables = nsyllable(as.character(text))) %>% 
75 |   mutate(text_syllables_per_word = text_syllables / text_words)
76 | 
77 | #ggplot(fake_news, aes(x = title_char, fill = type)) +
78 | #  geom_density(alpha = 0.5)
79 | 
80 | 
81 | # Resample so that not batched by real and fake status
82 | set.seed(84735)
83 | fake_news <- sample_n(fake_news, size = nrow(fake_news)) %>% 
84 |   mutate(type = as.factor(type))
85 | 
86 | 
87 | 
88 | 
89 | # Save the final data
90 | usethis::use_data(fake_news, overwrite = TRUE)
91 | 


--------------------------------------------------------------------------------
/data-raw/football.R:
--------------------------------------------------------------------------------
 1 | library(tidyverse)
 2 | 
 3 | football <- Lock5Data::FootballBrain %>% 
 4 |   rename(group = Group, years = Years) %>% 
 5 |   mutate(volume = Hipp / 1000, 
 6 |     group = case_when(
 7 |       group == "no football" ~ "no_football",
 8 |       group == "FBNoConcuss" ~ "fb_no_concuss",
 9 |       group == "FBConcuss" ~ "fb_concuss",
10 |       group == "Control" ~ "control"
11 |     )
12 |   ) %>% 
13 |   select(group, years, volume) %>% 
14 |   mutate_if(is.character, as.factor)
15 | usethis::use_data(football, overwrite = TRUE)
16 | 


--------------------------------------------------------------------------------
/data-raw/hotel_bookings.R:
--------------------------------------------------------------------------------
 1 | # Import from tidytuesday
 2 | 
 3 | set.seed(84735)
 4 | hotel_bookings <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-02-11/hotels.csv') %>% 
 5 |           rename(average_daily_rate = adr) %>%
 6 |           sample_n(size = 1000) %>% 
 7 |           mutate(is_canceled = as.factor(is_canceled)) %>% 
 8 |           mutate_if(is.character, as.factor)
 9 |           
10 | usethis::use_data(hotel_bookings, overwrite = TRUE)
11 | 


--------------------------------------------------------------------------------
/data-raw/loons.R:
--------------------------------------------------------------------------------
 1 | # Import from tidytuesday
 2 | 
 3 | loons <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-06-18/bird_counts.csv") %>% 
 4 |           rename(count = how_many_counted, hours = total_hours, count_per_hour = how_many_counted_by_hour) %>% 
 5 |           filter(species == "Common Loon", year >= 2000) %>% 
 6 |           mutate(count_per_100 = round(count_per_hour*100)) %>% 
 7 |           dplyr::select(-species, -species_latin)
 8 | 
 9 | usethis::use_data(loons, overwrite = TRUE)
10 | 


--------------------------------------------------------------------------------
/data-raw/moma.R:
--------------------------------------------------------------------------------
 1 | library(RCurl)
 2 | library(tidyverse)
 3 | library(lubridate)
 4 | 
 5 | 
 6 | url = getURL("https://media.githubusercontent.com/media/MuseumofModernArt/collection/master/Artworks.csv")
 7 | moma_artists <- read.csv(text = url, na.strings = c("", " ", "NA", "<NA>"))[,-1] %>% 
 8 |           mutate(year_acquired = year(DateAcquired), nartists = str_count(Artist, ",") + 1) %>% 
 9 |           filter(nartists == 1) %>% 
10 |           select(Artist, Nationality, BeginDate, EndDate, Gender, Department, year_acquired) %>%
11 |           mutate_all(funs(gsub("[(]", "", .))) %>% 
12 |           mutate_all(funs(gsub("[)]", "", .))) %>% 
13 |           rename(artist = Artist, country = Nationality, department = Department,
14 |                  gender = Gender, birth = BeginDate, death = EndDate) %>% 
15 |           mutate(alive = (death == 0), gender = tolower(gender), department = tolower(department), country = tolower(country)) %>% 
16 |           filter(birth != 0, !is.na(gender))
17 | moma_artists$death[moma_artists$death == 0] <- NA
18 | moma_artists$gender[moma_artists$gender == ""] <- NA
19 | moma_artists <- moma_artists %>% 
20 |           filter(!is.na(gender))
21 | 
22 | 
23 | moma_artists_2 <- moma_artists %>% 
24 |           group_by(artist) %>% 
25 |           summarize(count = n(), year_acquired_min = min(year_acquired), 
26 |                     year_acquired_max = max(year_acquired), department = names(which.max(table(department))))
27 | 
28 | moma <- left_join(moma_artists, moma_artists_2) %>% 
29 |           mutate(genx = (birth >= 1965)) %>% 
30 |           select(artist, country, birth, death, alive, genx, gender, department, 
31 |                  count, year_acquired_min, year_acquired_max) %>% 
32 |           distinct()  %>% 
33 |           mutate_if(is.character, as.factor)
34 | 
35 | 
36 | set.seed(109)
37 | moma_sample <- moma %>% 
38 |           filter(!is.na(alive), !is.na(birth), !is.na(count), !is.na(year_acquired_min)) %>% 
39 |           sample_n(., size = 100) %>% 
40 |           select(-department)
41 | 
42 | 
43 | 
44 | usethis::use_data(moma, overwrite = TRUE)
45 | usethis::use_data(moma_sample, overwrite = TRUE)


--------------------------------------------------------------------------------
/data-raw/penguins_bayes.R:
--------------------------------------------------------------------------------
 1 | library(palmerpenguins)
 2 | data(penguins)
 3 | penguins_bayes <- penguins %>% 
 4 |           mutate(species = as.factor(species)) %>% 
 5 |           mutate(above_average_weight = body_mass_g > 4200) %>% 
 6 |           mutate(above_average_weight = as.factor(as.numeric(above_average_weight))) %>% 
 7 |           select(species, island, year, bill_length_mm, bill_depth_mm, flipper_length_mm, body_mass_g, above_average_weight, sex)
 8 | 
 9 | 
10 | usethis::use_data(penguins_bayes, overwrite = TRUE)
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/data-raw/pop_vs_soda/pop_vs_soda_dataprep.R:
--------------------------------------------------------------------------------
 1 | library(readr)
 2 | library(dplyr)
 3 | library(reprex)
 4 | library(tidyr)
 5 | 
 6 | pop_vs_soda <- read_csv("data-raw/pop_vs_soda/pop_vs_soda_raw.csv") %>% 
 7 |           mutate_if(is.character, as.factor) %>% 
 8 |           mutate(state = tolower(state)) %>% 
 9 |           pivot_longer(cols = c("pop","soda","coke","other"), names_to = "word_for_cola", values_to = "count") %>% 
10 |           uncount(count) %>% 
11 |           mutate(pop = (word_for_cola == "pop"))
12 | 
13 | # Save the final data
14 | usethis::use_data(pop_vs_soda, overwrite = TRUE)
15 | 


--------------------------------------------------------------------------------
/data-raw/pop_vs_soda/pop_vs_soda_raw.csv:
--------------------------------------------------------------------------------
 1 | ﻿state,region,pop,soda,coke,other
 2 | Alabama,south,153,582,2849,665
 3 | Alaska,west,324,636,60,92
 4 | Arizona,west,586,2799,437,174
 5 | Arkansas,south,154,347,1442,80
 6 | California,west,925,20119,2892,1941
 7 | Colorado,west,2909,1943,327,183
 8 | Connecticut,northeast,70,4273,102,115
 9 | Delaware,south,24,699,41,29
10 | District of Columbia,south,35,442,57,40
11 | Florida,south,344,5400,3642,1921
12 | Georgia,south,140,1425,4933,410
13 | Hawaii,west,35,673,36,76
14 | Idaho,west,954,520,63,123
15 | Illinois,midwest,16400,7267,885,572
16 | Indiana,midwest,5591,1485,1989,363
17 | Iowa,midwest,6295,1227,67,173
18 | Kansas,midwest,2960,934,342,253
19 | Kentucky,south,1251,692,2248,329
20 | Louisiana,south,81,375,2739,597
21 | Maine,northeast,35,1418,21,74
22 | Maryland,south,208,5127,677,218
23 | Massachusetts,northeast,115,5874,240,1883
24 | Michigan,midwest,20493,10894,232,398
25 | Minnesota,midwest,11627,1745,119,311
26 | Mississippi,south,74,184,1435,102
27 | Missouri,midwest,1948,6733,541,230
28 | Montana,west,1061,284,37,73
29 | Nebraska,midwest,3045,661,54,119
30 | Nevada,west,119,849,136,52
31 | New Hampshire,northeast,18,1428,34,255
32 | New Jersey,northeast,135,8306,263,201
33 | New Mexico,west,95,493,754,79
34 | New York,northeast,7607,16671,489,525
35 | North Carolina,south,228,2671,1843,1008
36 | North Dakota,midwest,1136,253,15,25
37 | Ohio,midwest,19487,3082,450,490
38 | Oklahoma,south,1687,692,1486,151
39 | Oregon,west,3086,1903,188,179
40 | Pennsylvania,northeast,13089,11395,331,450
41 | Rhode Island,northeast,15,934,16,43
42 | South Carolina,south,60,880,1283,237
43 | South Dakota,midwest,1152,313,22,32
44 | Tennessee,south,226,892,3656,323
45 | Texas,south,361,4813,14494,906
46 | Utah,west,858,1096,161,137
47 | Vermont,northeast,19,679,9,56
48 | Virginia,south,562,5066,1457,620
49 | Washington,west,6772,3059,213,286
50 | West Virginia,south,1555,488,240,88
51 | Wisconsin,midwest,3410,8751,87,438
52 | Wyoming,west,463,150,74,318


--------------------------------------------------------------------------------
/data-raw/pulse_of_the_nation/pulse_of_the_nation_dataprep.R:
--------------------------------------------------------------------------------
1 | pulse_of_the_nation <- read_csv("data-raw/pulse_of_the_nation/pulse_of_the_nation.csv") %>% 
2 |   mutate_if(is.character, as.factor)
3 | 
4 | # Save the final data
5 | usethis::use_data(pulse_of_the_nation, overwrite = TRUE)
6 | 


--------------------------------------------------------------------------------
/data-raw/voices.R:
--------------------------------------------------------------------------------
 1 | # Import original data from Bodo Winter
 2 | 
 3 | voices <- readr::read_csv("http://www.bodowinter.com/uploads/1/2/9/3/129362560/politeness_data.csv") %>% 
 4 |           select(-gender) %>% 
 5 |           rename(pitch = frequency) %>% 
 6 |           mutate(subject = as.factor(rep(c("C","A","B","D","E","F"), each = 14)),
 7 |                  attitude = forcats::fct_recode(attitude, polite = "pol", informal = "inf"),
 8 |                  scenario = as.factor(chartr("1234567", "ABCDEFG", scenario))) %>% 
 9 |           arrange(subject)
10 | 
11 | usethis::use_data(voices, overwrite = TRUE)
12 | 


--------------------------------------------------------------------------------
/data-raw/weather_WU.Rmd:
--------------------------------------------------------------------------------
1 | # Load the data
2 | library(bayesrules)
3 | data(weather_australia)
4 | weather_WU <- weather_australia %>% 
5 |   filter(location %in% c("Wollongong", "Uluru")) %>%
6 |   droplevels()
7 | 
8 | usethis::use_data(weather_WU, overwrite = TRUE)
9 | 


--------------------------------------------------------------------------------
/data-raw/weather_australia.R:
--------------------------------------------------------------------------------
 1 | # Load the data
 2 | library(rattle)
 3 | library(lubridate)
 4 | data(weatherAUS)
 5 | 
 6 | # Take a sub-sample of the data
 7 | set.seed(22)
 8 | weather <- weatherAUS %>% 
 9 |           filter(Location %in% c("Wollongong", "Hobart", "Uluru")) %>% 
10 |           mutate(Location = as.factor(droplevels(as.factor(Location)))) %>% 
11 |           filter(!is.na(WindSpeed9am), !is.na(Humidity9am), !is.na(Pressure9am), !is.na(Temp9am), !is.na(Temp3pm)) %>% 
12 |           group_by(Location) %>% 
13 |           sample_n(100) %>% 
14 |           ungroup() %>% 
15 |           mutate(year = year(Date), month = month(Date), day_of_year = yday(Date)) %>% 
16 |           dplyr::select(-Date, -Cloud9am, -Cloud3pm, -Evaporation, -Sunshine)
17 | names(weather) <- tolower(names(weather))
18 | weather_australia <- weather
19 | 
20 | usethis::use_data(weather_australia, overwrite = TRUE)
21 | 


--------------------------------------------------------------------------------
/data-raw/weather_perth.R:
--------------------------------------------------------------------------------
 1 | # Load the data
 2 | library(rattle)
 3 | library(dplyr)
 4 | data(weatherAUS)
 5 | 
 6 | # Take a sub-sample of the data
 7 | set.seed(84735)
 8 | weather_perth <- weatherAUS %>% 
 9 |           filter(Location == "Perth") %>% 
10 |           na.omit() %>% 
11 |           sample_n(1000) %>% 
12 |           mutate(year = year(Date), month = month(Date), day_of_year = yday(Date)) %>% 
13 |           select(-Date, -Cloud9am, -Cloud3pm, -Evaporation, -Sunshine, -Location)
14 | 
15 | names(weather_perth) <- tolower(names(weather_perth))
16 | 
17 | usethis::use_data(weather_perth, overwrite = TRUE)
18 | 


--------------------------------------------------------------------------------
/data/airbnb.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/airbnb.rda


--------------------------------------------------------------------------------
/data/airbnb_small.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/airbnb_small.rda


--------------------------------------------------------------------------------
/data/bald_eagles.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/bald_eagles.rda


--------------------------------------------------------------------------------
/data/basketball.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/basketball.rda


--------------------------------------------------------------------------------
/data/bechdel.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/bechdel.rda


--------------------------------------------------------------------------------
/data/big_word_club.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/big_word_club.rda


--------------------------------------------------------------------------------
/data/bike_users.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/bike_users.rda


--------------------------------------------------------------------------------
/data/bikes.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/bikes.rda


--------------------------------------------------------------------------------
/data/bird_counts.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/bird_counts.rda


--------------------------------------------------------------------------------
/data/book_banning.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/book_banning.rda


--------------------------------------------------------------------------------
/data/cherry_blossom_sample.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/cherry_blossom_sample.rda


--------------------------------------------------------------------------------
/data/climbers_sub.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/climbers_sub.rda


--------------------------------------------------------------------------------
/data/coffee_ratings.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/coffee_ratings.rda


--------------------------------------------------------------------------------
/data/coffee_ratings_small.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/coffee_ratings_small.rda


--------------------------------------------------------------------------------
/data/equality_index.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/equality_index.rda


--------------------------------------------------------------------------------
/data/fake_news.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/fake_news.rda


--------------------------------------------------------------------------------
/data/football.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/football.rda


--------------------------------------------------------------------------------
/data/hotel_bookings.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/hotel_bookings.rda


--------------------------------------------------------------------------------
/data/loons.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/loons.rda


--------------------------------------------------------------------------------
/data/moma.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/moma.rda


--------------------------------------------------------------------------------
/data/moma_sample.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/moma_sample.rda


--------------------------------------------------------------------------------
/data/penguins_bayes.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/penguins_bayes.rda


--------------------------------------------------------------------------------
/data/pop_vs_soda.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/pop_vs_soda.rda


--------------------------------------------------------------------------------
/data/pulse_of_the_nation.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/pulse_of_the_nation.rda


--------------------------------------------------------------------------------
/data/spotify.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/spotify.rda


--------------------------------------------------------------------------------
/data/voices.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/voices.rda


--------------------------------------------------------------------------------
/data/weather_WU.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/weather_WU.rda


--------------------------------------------------------------------------------
/data/weather_australia.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/weather_australia.rda


--------------------------------------------------------------------------------
/data/weather_perth.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/data/weather_perth.rda


--------------------------------------------------------------------------------
/docs/articles/conjugate-families_files/figure-html/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/articles/conjugate-families_files/figure-html/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/docs/articles/conjugate-families_files/figure-html/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/articles/conjugate-families_files/figure-html/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/docs/articles/conjugate-families_files/figure-html/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/articles/conjugate-families_files/figure-html/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/docs/articles/conjugate-families_files/figure-html/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/articles/conjugate-families_files/figure-html/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/docs/articles/conjugate-families_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/conjugate-families_files/header-attrs-2.11/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/conjugate-families_files/header-attrs-2.8/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/conjugate-families_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/model-evaluation_files/header-attrs-2.10/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/model-evaluation_files/header-attrs-2.11/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/model-evaluation_files/header-attrs-2.8/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/articles/model-evaluation_files/header-attrs-2.9/header-attrs.js:
--------------------------------------------------------------------------------
 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to
 2 | // be compatible with the behavior of Pandoc < 2.8).
 3 | document.addEventListener('DOMContentLoaded', function(e) {
 4 |   var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
 5 |   var i, h, a;
 6 |   for (i = 0; i < hs.length; i++) {
 7 |     h = hs[i];
 8 |     if (!/^h[1-6]$/i.test(h.tagName)) continue;  // it should be a header h1-h6
 9 |     a = h.attributes;
10 |     while (a.length > 0) h.removeAttribute(a[0].name);
11 |   }
12 | });
13 | 


--------------------------------------------------------------------------------
/docs/bootstrap-toc.css:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
 3 |  * Copyright 2015 Aidan Feldman
 4 |  * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
 5 | 
 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */
 7 | 
 8 | /* All levels of nav */
 9 | nav[data-toggle='toc'] .nav > li > a {
10 |   display: block;
11 |   padding: 4px 20px;
12 |   font-size: 13px;
13 |   font-weight: 500;
14 |   color: #767676;
15 | }
16 | nav[data-toggle='toc'] .nav > li > a:hover,
17 | nav[data-toggle='toc'] .nav > li > a:focus {
18 |   padding-left: 19px;
19 |   color: #563d7c;
20 |   text-decoration: none;
21 |   background-color: transparent;
22 |   border-left: 1px solid #563d7c;
23 | }
24 | nav[data-toggle='toc'] .nav > .active > a,
25 | nav[data-toggle='toc'] .nav > .active:hover > a,
26 | nav[data-toggle='toc'] .nav > .active:focus > a {
27 |   padding-left: 18px;
28 |   font-weight: bold;
29 |   color: #563d7c;
30 |   background-color: transparent;
31 |   border-left: 2px solid #563d7c;
32 | }
33 | 
34 | /* Nav: second level (shown on .active) */
35 | nav[data-toggle='toc'] .nav .nav {
36 |   display: none; /* Hide by default, but at >768px, show it */
37 |   padding-bottom: 10px;
38 | }
39 | nav[data-toggle='toc'] .nav .nav > li > a {
40 |   padding-top: 1px;
41 |   padding-bottom: 1px;
42 |   padding-left: 30px;
43 |   font-size: 12px;
44 |   font-weight: normal;
45 | }
46 | nav[data-toggle='toc'] .nav .nav > li > a:hover,
47 | nav[data-toggle='toc'] .nav .nav > li > a:focus {
48 |   padding-left: 29px;
49 | }
50 | nav[data-toggle='toc'] .nav .nav > .active > a,
51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a,
52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a {
53 |   padding-left: 28px;
54 |   font-weight: 500;
55 | }
56 | 
57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */
58 | nav[data-toggle='toc'] .nav > .active > ul {
59 |   display: block;
60 | }
61 | 


--------------------------------------------------------------------------------
/docs/docsearch.js:
--------------------------------------------------------------------------------
 1 | $(function() {
 2 | 
 3 |   // register a handler to move the focus to the search bar
 4 |   // upon pressing shift + "/" (i.e. "?")
 5 |   $(document).on('keydown', function(e) {
 6 |     if (e.shiftKey && e.keyCode == 191) {
 7 |       e.preventDefault();
 8 |       $("#search-input").focus();
 9 |     }
10 |   });
11 | 
12 |   $(document).ready(function() {
13 |     // do keyword highlighting
14 |     /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
15 |     var mark = function() {
16 | 
17 |       var referrer = document.URL ;
18 |       var paramKey = "q" ;
19 | 
20 |       if (referrer.indexOf("?") !== -1) {
21 |         var qs = referrer.substr(referrer.indexOf('?') + 1);
22 |         var qs_noanchor = qs.split('#')[0];
23 |         var qsa = qs_noanchor.split('&');
24 |         var keyword = "";
25 | 
26 |         for (var i = 0; i < qsa.length; i++) {
27 |           var currentParam = qsa[i].split('=');
28 | 
29 |           if (currentParam.length !== 2) {
30 |             continue;
31 |           }
32 | 
33 |           if (currentParam[0] == paramKey) {
34 |             keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
35 |           }
36 |         }
37 | 
38 |         if (keyword !== "") {
39 |           $(".contents").unmark({
40 |             done: function() {
41 |               $(".contents").mark(keyword);
42 |             }
43 |           });
44 |         }
45 |       }
46 |     };
47 | 
48 |     mark();
49 |   });
50 | });
51 | 
52 | /* Search term highlighting ------------------------------*/
53 | 
54 | function matchedWords(hit) {
55 |   var words = [];
56 | 
57 |   var hierarchy = hit._highlightResult.hierarchy;
58 |   // loop to fetch from lvl0, lvl1, etc.
59 |   for (var idx in hierarchy) {
60 |     words = words.concat(hierarchy[idx].matchedWords);
61 |   }
62 | 
63 |   var content = hit._highlightResult.content;
64 |   if (content) {
65 |     words = words.concat(content.matchedWords);
66 |   }
67 | 
68 |   // return unique words
69 |   var words_uniq = [...new Set(words)];
70 |   return words_uniq;
71 | }
72 | 
73 | function updateHitURL(hit) {
74 | 
75 |   var words = matchedWords(hit);
76 |   var url = "";
77 | 
78 |   if (hit.anchor) {
79 |     url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
80 |   } else {
81 |     url = hit.url + '?q=' + escape(words.join(" "));
82 |   }
83 | 
84 |   return url;
85 | }
86 | 


--------------------------------------------------------------------------------
/docs/link.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 19.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 20 20" style="enable-background:new 0 0 20 20;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#75AADB;}
 7 | </style>
 8 | <path class="st0" d="M4,11.3h1.3v1.3H4c-2,0-4-2.3-4-4.7s2.1-4.7,4-4.7h5.3c1.9,0,4,2.3,4,4.7c0,1.9-1.2,3.6-2.7,4.3v-1.5
 9 | 	C11.4,10.2,12,9.1,12,8c0-1.7-1.4-3.3-2.7-3.3H4C2.7,4.7,1.3,6.3,1.3,8S2.7,11.3,4,11.3z M16,7.3h-1.3v1.3H16c1.3,0,2.7,1.6,2.7,3.3
10 | 	s-1.4,3.3-2.7,3.3h-5.3C9.4,15.3,8,13.7,8,12c0-1.1,0.6-2.2,1.3-2.8V7.7C7.9,8.4,6.7,10.1,6.7,12c0,2.4,2.1,4.7,4,4.7H16
11 | 	c1.9,0,4-2.3,4-4.7S18,7.3,16,7.3z"/>
12 | </svg>
13 | 


--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
1 | pandoc: 2.14.0.3
2 | pkgdown: 2.0.2
3 | pkgdown_sha: ~
4 | articles:
5 |   conjugate-families: conjugate-families.html
6 |   model-evaluation: model-evaluation.html
7 | last_built: 2022-03-14T16:27Z
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/reference/Rplot001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/Rplot001.png


--------------------------------------------------------------------------------
/docs/reference/Rplot002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/Rplot002.png


--------------------------------------------------------------------------------
/docs/reference/figures/README-pressure-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/figures/README-pressure-1.png


--------------------------------------------------------------------------------
/docs/reference/figures/bayes-rules-hex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/figures/bayes-rules-hex.png


--------------------------------------------------------------------------------
/docs/reference/plot_beta-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_beta-1.png


--------------------------------------------------------------------------------
/docs/reference/plot_beta_binomial-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_beta_binomial-1.png


--------------------------------------------------------------------------------
/docs/reference/plot_beta_binomial-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_beta_binomial-2.png


--------------------------------------------------------------------------------
/docs/reference/plot_beta_ci-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_beta_ci-1.png


--------------------------------------------------------------------------------
/docs/reference/plot_binomial_likelihood-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_binomial_likelihood-1.png


--------------------------------------------------------------------------------
/docs/reference/plot_gamma-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_gamma-1.png


--------------------------------------------------------------------------------
/docs/reference/plot_gamma_poisson-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_gamma_poisson-1.png


--------------------------------------------------------------------------------
/docs/reference/plot_gamma_poisson-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_gamma_poisson-2.png


--------------------------------------------------------------------------------
/docs/reference/plot_normal-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_normal-1.png


--------------------------------------------------------------------------------
/docs/reference/plot_normal_likelihood-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_normal_likelihood-1.png


--------------------------------------------------------------------------------
/docs/reference/plot_normal_normal-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_normal_normal-1.png


--------------------------------------------------------------------------------
/docs/reference/plot_normal_normal-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_normal_normal-2.png


--------------------------------------------------------------------------------
/docs/reference/plot_poisson_likelihood-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/docs/reference/plot_poisson_likelihood-1.png


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | bibentry(bibtype = "Manual",
 2 |   title        = "bayesrules: Datasets and Supplemental Functions from Bayes Rules! Book",
 3 |   author       = c(person(given = "Mine",
 4 |                           family = "Dogucu"),
 5 |                    person(given = "Alicia",
 6 |                           family = "Johnson"),
 7 |                    person(given = "Miles",
 8 |                           family = "Ott")),
 9 |   year         = "2021",
10 |   url = "https://github.com/bayes-rules/bayesrules",
11 |   note = "R package version 0.0.2.9000",
12 |   header       = "To cite bayesrules package in publications use:",
13 |   textVersion  =
14 |   paste("Mine Dogucu, Alicia Johnson, Miles Ott (2021).",
15 |         "bayesrules: Datasets and Supplemental Functions from Bayes Rules! Book",
16 |         "Retrieved from https://github.com/bayes-rules/bayesrules R package version 0.0.2.900")
17 | )
18 | 


--------------------------------------------------------------------------------
/man/airbnb.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_airbnb.R
 3 | \docType{data}
 4 | \name{airbnb}
 5 | \alias{airbnb}
 6 | \title{Chicago AirBnB Data}
 7 | \format{
 8 | A data frame with 1561 rows and 12 variables. Each row represents a single AirBnB listing.
 9 | \describe{
10 |   \item{price}{the nightly price of the listing (in USD)}
11 |   \item{rating}{the listing's average rating, on a scale from 1 to 5}
12 |   \item{reviews}{number of user reviews the listing has}
13 |   \item{room_type}{the type of listing (eg: Shared room)}
14 |   \item{accommodates}{number of guests the listing accommodates}
15 |   \item{bedrooms}{the number of bedrooms the listing has}
16 |   \item{minimum_stay}{the minimum number of nights to stay in the listing}
17 |   \item{neighborhood}{the neighborhood in which the listing is located}
18 |   \item{district}{the broader district in which the listing is located}
19 |   \item{walk_score}{the neighborhood's rating for walkability (0 - 100)}
20 |   \item{transit_score}{the neighborhood's rating for access to public transit (0 - 100)}
21 |   \item{bike_score}{the neighborhood's rating for bikeability (0 - 100)}
22 | }
23 | }
24 | \source{
25 | Ly Trinh and Pony Ameri (2018). Airbnb Price Determinants: A Multilevel Modeling Approach. Project for Statistics 316-Advanced Statistical Modeling, St. Olaf College.
26 | Julie Legler and Paul Roback (2019). Broadening Your Statistical Horizons: Generalized Linear Models and Multilevel Models. \url{https://bookdown.org/roback/bookdown-bysh/}.
27 | \url{https://github.com/proback/BeyondMLR/blob/master/data/airbnb.csv/}
28 | }
29 | \usage{
30 | airbnb
31 | }
32 | \description{
33 | The AirBnB data was collated by Trinh and Ameri as part of a course project
34 | at St Olaf College, and distributed with "Broadening Your Statistical Horizons" by Legler and Roback.
35 | This data set includes the prices and features for 1561 AirBnB listings in Chicago, collected in 2016.
36 | }
37 | \keyword{datasets}
38 | 


--------------------------------------------------------------------------------
/man/airbnb_small.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_airbnb_small.R
 3 | \docType{data}
 4 | \name{airbnb_small}
 5 | \alias{airbnb_small}
 6 | \title{Chicago AirBnB Data}
 7 | \format{
 8 | A data frame with 869 rows and 12 variables. Each row represents a single AirBnB listing.
 9 | \describe{
10 |   \item{price}{the nightly price of the listing (in USD)}
11 |   \item{rating}{the listing's average rating, on a scale from 1 to 5}
12 |   \item{reviews}{number of user reviews the listing has}
13 |   \item{room_type}{the type of listing (eg: Shared room)}
14 |   \item{accommodates}{number of guests the listing accommodates}
15 |   \item{bedrooms}{the number of bedrooms the listing has}
16 |   \item{minimum_stay}{the minimum number of nights to stay in the listing}
17 |   \item{neighborhood}{the neighborhood in which the listing is located}
18 |   \item{district}{the broader district in which the listing is located}
19 |   \item{walk_score}{the neighborhood's rating for walkability (0 - 100)}
20 |   \item{transit_score}{the neighborhood's rating for access to public transit (0 - 100)}
21 |   \item{bike_score}{the neighborhood's rating for bikeability (0 - 100)}
22 | }
23 | }
24 | \source{
25 | Ly Trinh and Pony Ameri (2018). Airbnb Price Determinants: A Multilevel Modeling Approach. Project for Statistics 316-Advanced Statistical Modeling, St. Olaf College.
26 | Julie Legler and Paul Roback (2019). Broadening Your Statistical Horizons: Generalized Linear Models and Multilevel Models. \url{https://bookdown.org/roback/bookdown-bysh/}.
27 | \url{https://github.com/proback/BeyondMLR/blob/master/data/airbnb.csv/}
28 | }
29 | \usage{
30 | airbnb_small
31 | }
32 | \description{
33 | The AirBnB data was collated by Trinh and Ameri as part of a course project
34 | at St Olaf College, and distributed with "Broadening Your Statistical Horizons" by Legler and Roback.
35 | This data set, a subset of the airbnb data in the bayesrules package, includes the prices and features for 869 AirBnB listings in Chicago, collected in 2016.
36 | }
37 | \keyword{datasets}
38 | 


--------------------------------------------------------------------------------
/man/bald_eagles.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_bald_eagles.R
 3 | \docType{data}
 4 | \name{bald_eagles}
 5 | \alias{bald_eagles}
 6 | \title{Bald Eagle Count Data}
 7 | \format{
 8 | A data frame with 37 rows and 5 variables. Each row represents Bald Eagle observations in the given year.
 9 | \describe{
10 |   \item{year}{year of data collection}
11 |   \item{count}{number of birds observed}
12 |   \item{hours}{total person-hours of observation period}
13 |   \item{count_per_hour}{count divided by hours}
14 |   \item{count_per_week}{count_per_hour multiplied by 168 hours per week}
15 |   }
16 | }
17 | \source{
18 | \url{https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-06-18/bird_counts.csv}.
19 | }
20 | \usage{
21 | bald_eagles
22 | }
23 | \description{
24 | Bald Eagle count data collected from the year 1981 to 2017, in late December, by birdwatchers in the Ontario, Canada area.
25 | The data was made available by the Bird Studies Canada website and distributed through the R for Data Science TidyTuesday project. 
26 | A more complete data set with a larger selection of birds can be found in the bird_counts data in the bayesrules package.
27 | }
28 | \keyword{datasets}
29 | 


--------------------------------------------------------------------------------
/man/basketball.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_basketball.R
 3 | \docType{data}
 4 | \name{basketball}
 5 | \alias{basketball}
 6 | \title{WNBA Basketball Data}
 7 | \format{
 8 | A data frame with 146 rows and 30 variables. Each row represents a single WNBA basketball player. The variables on each player are as follows.
 9 | \describe{
10 |   \item{player_name}{first and last name}
11 |   \item{height}{height in inches}
12 |   \item{weight}{weight in pounds}
13 |   \item{year}{year of the WNBA season}
14 |   \item{team}{team that the WNBA player is a member of}
15 |   \item{age}{age in years}
16 |   \item{games_played}{number of games played by the player in that season}
17 |   \item{games_started}{number of games the player started in that season}
18 |   \item{avg_minutes_played}{average number of minutes played per game}
19 |   \item{avg_field_goals}{average number of field goals per game played}
20 |   \item{avg_field_goal_attempts}{average number of field goals attempted per game played}
21 |   \item{field_goal_pct}{percent of field goals made throughout the season}
22 |   \item{avg_three_pointers}{average number of three pointers per game played}
23 |   \item{avg_three_pointer_attempts}{average number of three pointers attempted per game played}
24 |   \item{three_pointer_pct}{percent of three pointers made throughout the season}
25 |   \item{avg_two_pointers}{average number of two pointers made per game played}
26 |   \item{avg_two_pointer_attempts}{average number of two pointers attempted per game played}
27 |   \item{two_pointer_pct}{percent of two pointers made throughout the season}
28 |   \item{avg_free_throws}{average number of free throws made per game played}
29 |   \item{avg_free_throw_attempts}{average number of free throws attempted per game played}
30 |   \item{free_throw_pct}{percent of free throws made throughout the season}
31 |   \item{avg_offensive_rb}{average number of offensive rebounds per game played}
32 |   \item{avg_defensive_rb}{average number of defensive rebounds per game played}
33 |   \item{avg_rb}{average number of rebounds (both offensive and defensive) per game played}
34 |   \item{avg_assists}{average number of assists per game played}
35 |   \item{avg_steals}{average number of steals per game played}
36 |   \item{avg_blocks}{average number of blocks per game played}
37 |   \item{avg_turnovers}{average number of turnovers per game played}
38 |   \item{avg_personal_fouls}{average number of personal fouls per game played. Note: after 5 fouls the player is not allowed to play in that game anymore}
39 |   \item{avg_points}{average number of points made per game played}
40 |   \item{total_minutes}{total number of minutes played throughout the season}
41 |   \item{starter}{whether or not the player started in more than half of the games they played}
42 | }
43 | }
44 | \source{
45 | \url{https://www.basketball-reference.com/}
46 | }
47 | \usage{
48 | basketball
49 | }
50 | \description{
51 | The WNBA Basketball Data was scraped from \url{https://www.basketball-reference.com/wnba/players/} and contains information on basketball players from the 2019 season.
52 | }
53 | \keyword{datasets}
54 | 


--------------------------------------------------------------------------------
/man/bechdel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_bechdel.R
 3 | \docType{data}
 4 | \name{bechdel}
 5 | \alias{bechdel}
 6 | \title{Bechdel Test for over 1500 movies}
 7 | \format{
 8 | A data frame with 1794 rows and 3 variables:
 9 | \describe{
10 |   \item{year}{The release year of the movie}
11 |   \item{title}{The title of the movie}
12 |   \item{binary}{Bechdel test result (PASS, FAIL)}
13 | }
14 | }
15 | \source{
16 | <https://github.com/fivethirtyeight/data/tree/master/bechdel/>
17 | }
18 | \usage{
19 | bechdel
20 | }
21 | \description{
22 | A dataset containing data behind the story
23 | "The Dollar-And-Cents Case Against Hollywood's Exclusion of Women"
24 | \url{https://fivethirtyeight.com/features/the-dollar-and-cents-case-against-hollywoods-exclusion-of-women/}.
25 | }
26 | \keyword{datasets}
27 | 


--------------------------------------------------------------------------------
/man/bike_users.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_bike_users.R
 3 | \docType{data}
 4 | \name{bike_users}
 5 | \alias{bike_users}
 6 | \title{Capital Bikeshare Bike Ridership (Registered and Casual Riders)}
 7 | \format{
 8 | A data frame with 534 daily observations, 267 each for registered riders and casual riders, and 13 variables:
 9 | \describe{
10 |   \item{date}{date of observation}
11 |   \item{season}{fall, spring, summer, or winter}
12 |   \item{year}{the year of the date}
13 |   \item{month}{the month of the date}
14 |   \item{day_of_week}{the day of the week}
15 |   \item{weekend}{whether or not the date falls on a weekend (TRUE or FALSE)}
16 |   \item{holiday}{whether or not the date falls on a holiday (yes or no)}
17 |   \item{temp_actual}{raw temperature (degrees Fahrenheit)}
18 |   \item{temp_feel}{what the temperature feels like (degrees Fahrenheit)}
19 |   \item{humidity}{humidity level (percentage)}
20 |   \item{windspeed}{wind speed (miles per hour)}
21 |   \item{weather_cat}{weather category (categ1 = pleasant, categ2 = moderate, categ3 = severe)}
22 |   \item{user}{rider type (casual or registered)}
23 |   \item{rides}{number of bikeshare rides}
24 |   }
25 | }
26 | \source{
27 | Fanaee-T, Hadi and Gama, Joao (2013). Event labeling combining ensemble detectors and background knowledge. Progress in Artificial Intelligence. \url{https://archive.ics.uci.edu/ml/datasets/Bike+Sharing+Dataset/}
28 | }
29 | \usage{
30 | bike_users
31 | }
32 | \description{
33 | Data on ridership among registered members and casual users of the Capital Bikeshare service in Washington, D.C..
34 | }
35 | \keyword{datasets}
36 | 


--------------------------------------------------------------------------------
/man/bikes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_bikes.R
 3 | \docType{data}
 4 | \name{bikes}
 5 | \alias{bikes}
 6 | \title{Capital Bikeshare Bike Ridership}
 7 | \format{
 8 | A data frame with 500 daily observations and 13 variables:
 9 | \describe{
10 |   \item{date}{date of observation}
11 |   \item{season}{fall, spring, summer, or winter}
12 |   \item{year}{the year of the date}
13 |   \item{month}{the month of the date}
14 |   \item{day_of_week}{the day of the week}
15 |   \item{weekend}{whether or not the date falls on a weekend (TRUE or FALSE)}
16 |   \item{holiday}{whether or not the date falls on a holiday (yes or no)}
17 |   \item{temp_actual}{raw temperature (degrees Fahrenheit)}
18 |   \item{temp_feel}{what the temperature feels like (degrees Fahrenheit)}
19 |   \item{humidity}{humidity level (percentage)}
20 |   \item{windspeed}{wind speed (miles per hour)}
21 |   \item{weather_cat}{weather category (categ1 = pleasant, categ2 = moderate, categ3 = severe)}
22 |   \item{rides}{number of bikeshare rides}
23 |   }
24 | }
25 | \source{
26 | Fanaee-T, Hadi and Gama, Joao (2013). Event labeling combining ensemble detectors and background knowledge. Progress in Artificial Intelligence. \url{https://archive.ics.uci.edu/ml/datasets/Bike+Sharing+Dataset}
27 | }
28 | \usage{
29 | bikes
30 | }
31 | \description{
32 | Data on ridership among registered members of the Capital Bikeshare service in Washington, D.C..
33 | }
34 | \keyword{datasets}
35 | 


--------------------------------------------------------------------------------
/man/bird_counts.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_bird_counts.R
 3 | \docType{data}
 4 | \name{bird_counts}
 5 | \alias{bird_counts}
 6 | \title{Bird Counts Data}
 7 | \format{
 8 | A data frame with 18706 rows and 7 variables. Each row represents observations for the given bird species in the given year.
 9 | \describe{
10 |   \item{year}{year of data collection}
11 |   \item{species}{scientific name of observed bird species}
12 |   \item{species_latin}{latin name of observed bird species}
13 |   \item{count}{number of birds observed}
14 |   \item{hours}{total person-hours of observation period}
15 |   \item{count_per_hour}{count divided by hours}
16 |   \item{count_per_week}{count_per_hour multiplied by 168 hours per week}
17 |   }
18 | }
19 | \source{
20 | \url{https://github.com/rfordatascience/tidytuesday/blob/master/data/2019/2019-06-18/bird_counts.csv/}.
21 | }
22 | \usage{
23 | bird_counts
24 | }
25 | \description{
26 | Bird count data collected between the years 1921 and 2017, in late December, by birdwatchers in the Ontario, Canada area.
27 | The data was made available by the Bird Studies Canada website and distributed through the R for Data Science TidyTuesday project.
28 | }
29 | \keyword{datasets}
30 | 


--------------------------------------------------------------------------------
/man/book_banning.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_book_banning.R
 3 | \docType{data}
 4 | \name{book_banning}
 5 | \alias{book_banning}
 6 | \title{Book Banning Data}
 7 | \format{
 8 | A data frame with 931 rows and 17 variables. Each row represents a single book challenge within the given state and date.
 9 | \describe{
10 |   \item{title}{title of book being challenged}
11 |   \item{book_id}{identifier for the book}
12 |   \item{author}{author of the book}
13 |   \item{date}{date of the challenge}
14 |   \item{year}{year of the challenge}
15 |   \item{removed}{whether or not the challenge was successful (the book was removed)}
16 |   \item{explicit}{whether the book was challenged for sexually explicit material}
17 |   \item{antifamily}{whether the book was challenged for anti-family material}
18 |   \item{occult}{whether the book was challenged for occult material}
19 |   \item{language}{whether the book was challenged for inapropriate language}
20 |   \item{lgbtq}{whether the book was challenged for LGBTQ material}
21 |   \item{violent}{whether the book was challenged for violent material}
22 |   \item{state}{US state in which the challenge was made}
23 |   \item{political_value_index}{Political Value Index of the state (negative = leans Republican, 0 = neutral, positive = leans Democrat)}
24 |   \item{median_income}{median income in the state, relative to the average state median income}
25 |   \item{hs_grad_rate}{high school graduation rate, in percent, relative to the average state high school graduation rate}
26 |   \item{college_grad_rate}{college graduation rate, in percent, relative to the average state college graduation rate}
27 |   }
28 | }
29 | \source{
30 | Shannon Fast and Thomas Hegland (2011). Book Challenges: A Statistical Examination. Project for Statistics 316-Advanced Statistical Modeling, St. Olaf College.
31 | Julie Legler and Paul Roback (2019). Broadening Your Statistical Horizons: Generalized Linear Models and Multilevel Models. \url{https://bookdown.org/roback/bookdown-bysh/}.
32 | \url{https://github.com/proback/BeyondMLR/blob/master/data/bookbanningNoTex.csv/}
33 | }
34 | \usage{
35 | book_banning
36 | }
37 | \description{
38 | The book banning data was collected by Fast and Hegland as part of a course project
39 | at St Olaf College, and distributed with "Broadening Your Statistical Horizons" by Legler and Roback.
40 | This data set includes the features and outcomes for 931 book challenges 
41 | (ie. requests to ban a book) made in the US between 2000 and 2010.
42 | Information on the books being challenged and the characteristics of these books 
43 | were collected from the American Library Society. State-level demographic information and 
44 | political leanings were obtained from the US Census Bureau and Cook Political Report, respectively.
45 | Due to an outlying large number of challenges, book challenges made in the state of Texas 
46 | were omitted.
47 | }
48 | \keyword{datasets}
49 | 


--------------------------------------------------------------------------------
/man/cherry_blossom_sample.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_cherry_blossom_sample.R
 3 | \docType{data}
 4 | \name{cherry_blossom_sample}
 5 | \alias{cherry_blossom_sample}
 6 | \title{Cherry Blossom Running Race}
 7 | \format{
 8 | A data frame with 252 Cherry Blossom outcomes and 7 variables:
 9 | \describe{
10 |   \item{runner}{a unique identifier for the runner}
11 |   \item{age}{age of the runner}
12 |   \item{net}{time to complete the race, from starting line to finish line (minutes)}
13 |   \item{gun}{time between the official start of the of race and the finish line (minutes)}
14 |   \item{year}{year of the race}
15 |   \item{previous}{the number of previous years in which the subject ran in the race}
16 |   }
17 | }
18 | \source{
19 | Data in the original Cherry data set were obtained from \url{https://www.cherryblossom.org/post-race/race-results/}.
20 | }
21 | \usage{
22 | cherry_blossom_sample
23 | }
24 | \description{
25 | A sub-sample of outcomes for the annual Cherry Blossom Ten Mile race in Washington, D.C.. This sub-sample was taken from the complete Cherry data in the mdsr package.
26 | }
27 | \keyword{datasets}
28 | 


--------------------------------------------------------------------------------
/man/classification_summary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/classification_summary.R
 3 | \name{classification_summary}
 4 | \alias{classification_summary}
 5 | \title{Posterior Classification Summaries}
 6 | \usage{
 7 | classification_summary(model, data, cutoff = 0.5)
 8 | }
 9 | \arguments{
10 | \item{model}{an rstanreg model object with binary y}
11 | 
12 | \item{data}{data frame including the variables in the model, both response y and predictors x}
13 | 
14 | \item{cutoff}{probability cutoff to classify a new case as positive (0.5 is the default)}
15 | }
16 | \value{
17 | a list
18 | }
19 | \description{
20 | Given a set of observed data including a binary response variable y 
21 | and an rstanreg model of y, 
22 | this function returns summaries of the model's posterior classification quality.
23 | These summaries include a confusion matrix as well as estimates of the model's
24 | sensitivity, specificity, and overall accuracy.
25 | }
26 | \examples{
27 | x <- rnorm(20)
28 | z <- 3*x
29 | prob <- 1/(1+exp(-z))
30 | y <- rbinom(20, 1, prob)
31 | example_data <- data.frame(x = x, y = y)
32 | example_model <- rstanarm::stan_glm(y ~ x, data = example_data, family = binomial)
33 | classification_summary(model = example_model, data = example_data, cutoff = 0.5)                   
34 | }
35 | 


--------------------------------------------------------------------------------
/man/classification_summary_cv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/classification_summary_cv.R
 3 | \name{classification_summary_cv}
 4 | \alias{classification_summary_cv}
 5 | \title{Cross-Validated Posterior Classification Summaries}
 6 | \usage{
 7 | classification_summary_cv(model, data, group, k, cutoff = 0.5)
 8 | }
 9 | \arguments{
10 | \item{model}{an rstanreg model object with binary y}
11 | 
12 | \item{data}{data frame including the variables in the model, both response y (0 or 1) and predictors x}
13 | 
14 | \item{group}{a character string representing the name of the factor grouping variable, ie. random effect (only used for hierarchical models)}
15 | 
16 | \item{k}{the number of folds to use for cross validation}
17 | 
18 | \item{cutoff}{probability cutoff to classify a new case as positive}
19 | }
20 | \value{
21 | a list
22 | }
23 | \description{
24 | Given a set of observed data including a binary response variable y 
25 | and an rstanreg model of y, 
26 | this function returns cross validated estimates of the model's posterior classification quality:
27 | sensitivity, specificity, and overall accuracy.
28 | For hierarchical models of class lmerMod, the folds are comprised by collections of groups, not individual observations.
29 | }
30 | \examples{
31 | x <- rnorm(20)
32 | z <- 3*x
33 | prob <- 1/(1+exp(-z))
34 | y <- rbinom(20, 1, prob)
35 | example_data <- data.frame(x = x, y = y)
36 | example_model <- rstanarm::stan_glm(y ~ x, data = example_data, family = binomial)
37 | classification_summary_cv(model = example_model, data = example_data, k = 2, cutoff = 0.5)                   
38 | }
39 | 


--------------------------------------------------------------------------------
/man/climbers_sub.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_climbers_sub.R
 3 | \docType{data}
 4 | \name{climbers_sub}
 5 | \alias{climbers_sub}
 6 | \title{Himalayan Climber Data}
 7 | \format{
 8 | A data frame with 2076 observations (1 per climber) and 22 variables:
 9 | \describe{
10 |   \item{expedition_id}{unique expedition identifier}
11 |   \item{member_id}{unique climber identifier}
12 |   \item{peak_id}{unique identifier of the expedition's destination peak}
13 |   \item{peak_name}{name of the expedition's destination peak}
14 |   \item{year}{year of expedition}
15 |   \item{season}{season of expedition (Autumn, Spring, Summer, Winter)}
16 |   \item{sex}{climber gender identity which the database oversimplifies to a binary category}
17 |   \item{age}{climber age}
18 |   \item{citizenship}{climber citizenship}
19 |   \item{expedition_role}{climber's role in the expedition (eg: Co-Leader)}
20 |   \item{hired}{whether the climber was a hired member of the expedition}
21 |   \item{highpoint_metres}{the destination peak's highpoint (metres)}
22 |   \item{success}{whether the climber successfully reached the destination}
23 |   \item{solo}{whether the climber was on a solo expedition}
24 |   \item{oxygen_used}{whether the climber utilized supplemental oxygen}
25 |   \item{died}{whether the climber died during the expedition}
26 |   \item{death_cause}{}
27 |   \item{death_height_metres}{}
28 |   \item{injured}{whether the climber was injured on the expedition}
29 |   \item{injury_type}{}
30 |   \item{injury_height_metres}{}
31 |   \item{count}{number of climbers in the expedition}
32 |   \item{height_metres}{height of the peak in meters}
33 |   \item{first_ascent_year}{the year of the first recorded summit of the peak (though not necessarily the actual first summit!)}
34 |   }
35 | }
36 | \source{
37 | Original source: \url{https://www.himalayandatabase.com/}. Complete dataset distributed by: \url{https://github.com/rfordatascience/tidytuesday/tree/master/data/2020/2020-09-22/}.
38 | }
39 | \usage{
40 | climbers_sub
41 | }
42 | \description{
43 | A sub-sample of the Himalayan Database distributed through the R for Data Science TidyTuesday project. This dataset includes information on the results and conditions for various Himalayan climbing expeditions. Each row corresponds to a single member of a climbing expedition team.
44 | }
45 | \keyword{datasets}
46 | 


--------------------------------------------------------------------------------
/man/coffee_ratings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_coffee_ratings.R
 3 | \docType{data}
 4 | \name{coffee_ratings}
 5 | \alias{coffee_ratings}
 6 | \title{Coffee Ratings Data}
 7 | \format{
 8 | A data frame with 1339 batches of coffee beans and 27 variables on each batch. 
 9 | \describe{
10 |   \item{owner}{farm owner}
11 |   \item{farm_name}{farm where beans were grown}
12 |   \item{country_of_origin}{country where farm is}
13 |   \item{mill}{where beans were processed}
14 |   \item{in_country_partner}{country of coffee partner}
15 |   \item{altitude_low_meters}{lowest altitude of the farm}
16 |   \item{altitude_high_meters}{highest altitude of the farm}
17 |   \item{altitude_mean_meters}{average altitude of the farm}
18 |   \item{number_of_bags}{number of bags tested}
19 |   \item{bag_weight}{weight of each tested bag}
20 |   \item{species}{bean species}
21 |   \item{variety}{bean variety}
22 |   \item{processing_method}{how beans were processed}
23 |   \item{aroma}{bean aroma grade}
24 |   \item{flavor}{bean flavor grade}
25 |   \item{aftertaste}{bean aftertaste grade}
26 |   \item{acidity}{bean acidity grade}
27 |   \item{body}{bean body grade}
28 |   \item{balance}{bean balance grade}
29 |   \item{uniformity}{bean uniformity grade}
30 |   \item{clean_cup}{bean clean cup grade}
31 |   \item{sweetness}{bean sweetness grade}
32 |   \item{moisture}{bean moisture grade}
33 |   \item{category_one_defects}{count of category one defects}
34 |   \item{category_two_defects}{count of category two defects}
35 |   \item{color}{bean color}
36 |   \item{total_cup_points}{total bean rating (0 -- 100)}
37 |   }
38 | }
39 | \source{
40 | \url{https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv}.
41 | }
42 | \usage{
43 | coffee_ratings
44 | }
45 | \description{
46 | A sub-set of data on coffee bean ratings / quality originally collected by James LeDoux (jmzledoux) and distributed through the R for Data Science TidyTuesday project.
47 | }
48 | \keyword{datasets}
49 | 


--------------------------------------------------------------------------------
/man/coffee_ratings_small.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_coffee_ratings_small.R
 3 | \docType{data}
 4 | \name{coffee_ratings_small}
 5 | \alias{coffee_ratings_small}
 6 | \title{Coffee Ratings Data}
 7 | \format{
 8 | A data frame with 636 batches of coffee beans and 11 variables on each batch. 
 9 | \describe{
10 |   \item{farm_name}{farm where beans were grown}
11 |   \item{total_cup_points}{total bean rating (0 -- 100)}
12 |   \item{aroma}{bean aroma grade}
13 |   \item{flavor}{bean flavor grade}
14 |   \item{aftertaste}{bean aftertaste grade}
15 |   \item{acidity}{bean acidity grade}
16 |   \item{body}{bean body grade}
17 |   \item{balance}{bean balance grade}
18 |   \item{uniformity}{bean uniformity grade}
19 |   \item{sweetness}{bean sweetness grade}
20 |   \item{moisture}{bean moisture grade}
21 |   }
22 | }
23 | \source{
24 | \url{https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-07/coffee_ratings.csv}.
25 | }
26 | \usage{
27 | coffee_ratings_small
28 | }
29 | \description{
30 | A sub-set of data on coffee bean ratings / quality originally collected by James LeDoux (jmzledoux) and distributed through the R for Data Science TidyTuesday project.
31 | This is a simplified version of the coffee_ratings data.
32 | }
33 | \keyword{datasets}
34 | 


--------------------------------------------------------------------------------
/man/equality_index.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_equality_index.R
 3 | \docType{data}
 4 | \name{equality_index}
 5 | \alias{equality_index}
 6 | \title{LGBTQ+ Rights Laws by State}
 7 | \format{
 8 | A data frame with 50 observations, one per state, and 6 variables:
 9 | \describe{
10 |   \item{state}{state name}
11 |   \item{region}{region in which the state falls}
12 |   \item{gop_2016}{percent of the 2016 presidential election vote earned by the Republican ("GOP") candidate}
13 |   \item{laws}{number of LGBTQ+ rights laws (as of 2019)}
14 |   \item{historical}{political leaning of the state over time (gop = Republican, dem = Democrat, swing = swing state)}
15 |   \item{percent_urban}{percent of state's residents that live in urban areas (by the 2010 census)}
16 |   }
17 | }
18 | \source{
19 | Data on LGBTQ+ laws were obtained from Warbelow, Sarah, Courtnay Avant, and Colin Kutney (2020). 2019 State Equality Index. Washington, DC. Human Rights Campaign Foundation. \url{https://assets2.hrc.org/files/assets/resources/HRC-SEI-2019-Report.pdf?_ga=2.148925686.1325740687.1594310864-1928808113.1594310864&_gac=1.213124768.1594312278.EAIaIQobChMI9dP2hMzA6gIVkcDACh21GgLEEAAYASAAEgJiJvD_BwE/}. Data on urban residency obtained from \url{https://www.icip.iastate.edu/tables/population/urban-pct-states/}.
20 | }
21 | \usage{
22 | equality_index
23 | }
24 | \description{
25 | Data on the number of LGBTQ+ equality laws (as of 2019) and demographics in each U.S. state.
26 | }
27 | \keyword{datasets}
28 | 


--------------------------------------------------------------------------------
/man/fake_news.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_fake_news.R
 3 | \docType{data}
 4 | \name{fake_news}
 5 | \alias{fake_news}
 6 | \title{A collection of 150 news articles}
 7 | \format{
 8 | A data frame with 150 rows and 6 variables:
 9 | \describe{
10 |   \item{title}{The title of the news article}
11 |   \item{text}{Text of the article}
12 |   \item{url}{Hyperlink for the article}
13 |   \item{authors}{Authors of the article}
14 |   \item{type}{Binary variable indicating whether the article presents fake or real news(fake, real)}
15 |   \item{title_words}{Number of words in the title}
16 |   \item{text_words}{Number of words in the text}
17 |   \item{title_char}{Number of characters in the title}
18 |   \item{text_char}{Number of characters in the text}
19 |   \item{title_caps}{Number of words that are all capital letters in the title}
20 |   \item{text_caps}{Number of words that are all capital letters in the text}
21 |   \item{title_caps_percent}{Percent of words that are all capital letters in the title}
22 |   \item{text_caps_percent}{Percent of words that are all capital letters in the text}
23 |   \item{title_excl}{Number of characters that are exclamation marks in the title}
24 |   \item{text_excl}{Number of characters that are exclamation marks in the text}
25 |   \item{title_excl_percent}{Percent of characters that are exclamation marks in the title}
26 |   \item{text_excl_percent}{Percent of characters that are exclamation marks in the text}
27 |   \item{title_has_excl}{Binary variable indicating whether the title of the article includes an exlamation point or not(TRUE, FALSE)}
28 |   \item{anger}{Percent of words that are associated with anger}
29 |   \item{anticipation}{Percent of words that are associated with anticipation}
30 |   \item{disgust}{Percent of words that are associated with disgust}
31 |   \item{fear}{Percent of words that are associated with fear}
32 |   \item{joy}{Percent of words that are associated with joy}
33 |   \item{sadness}{Percent of words that are associated with sadness}
34 |   \item{surprise}{Percent of words that are associated with surprise}
35 |   \item{trust}{Percent of words that are associated with trust}
36 |   \item{negative}{Percent of words that have negative sentiment}
37 |   \item{positive}{Percent of words that have positive sentiment}
38 |   \item{text_syllables}{Number of syllables in text}
39 |   \item{text_syllables_per_word}{Number of syllables per word in text}
40 | }
41 | }
42 | \source{
43 | Shu, K., Mahudeswaran, D., Wang, S., Lee, D. and Liu, H. (2018) FakeNewsNet: A Data Repository with News Content, Social Context and Dynamic Information for Studying Fake News on Social Media
44 | }
45 | \usage{
46 | fake_news
47 | }
48 | \description{
49 | A dataset containing data behind the study
50 | "FakeNewsNet: A Data Repository with News Content, Social Context and Spatialtemporal Information for Studying Fake News on Social Media"
51 | \url{https://arxiv.org/abs/1809.01286}.
52 | The news articles in this dataset were posted to Facebook in September 2016, in the run-up to the U.S. presidential election.
53 | }
54 | \keyword{datasets}
55 | 


--------------------------------------------------------------------------------
/man/figures/bayes-rules-hex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bayes-rules/bayesrules/404fbdbae2957976820f9249e9cc663a72141463/man/figures/bayes-rules-hex.png


--------------------------------------------------------------------------------
/man/football.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_football.R
 3 | \docType{data}
 4 | \name{football}
 5 | \alias{football}
 6 | \title{Football Brain Measurements}
 7 | \format{
 8 | A data frame with 75 observations and 5 variables:
 9 | \describe{
10 |   \item{group}{control = no football, 
11 |   fb_no_concuss = football player but no concussions, 
12 |   fb_concuss = football player with concussion history}
13 |   \item{years}{Number of years a person played football}
14 |   \item{volume}{Total hippocampus volume, in cubic centimeters}
15 |   }
16 | }
17 | \source{
18 | Singh R, Meier T, Kuplicki R, Savitz J, et al., 
19 | "Relationship of Collegiate Football Experience and Concussion 
20 | With Hippocampal Volume and Cognitive Outcome," JAMA, 311(18), 2014
21 | }
22 | \usage{
23 | football
24 | }
25 | \description{
26 | Brain measurements for football and non-football players as provided in the Lock5 package
27 | }
28 | \keyword{datasets}
29 | 


--------------------------------------------------------------------------------
/man/hotel_bookings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_hotel_bookings.R
 3 | \docType{data}
 4 | \name{hotel_bookings}
 5 | \alias{hotel_bookings}
 6 | \title{Hotel Bookings Data}
 7 | \format{
 8 | A data frame with 1000 hotel bookings and 32 variables on each booking. 
 9 | \describe{
10 |   \item{hotel}{"Resort Hotel" or "City Hotel"}
11 |   \item{is_canceled}{whether the booking was cancelled}
12 |   \item{lead_time}{number of days between booking and arrival}
13 |   \item{arrival_date_year}{year of scheduled arrival}
14 |   \item{arrival_date_month}{month of scheduled arrival}
15 |   \item{arrival_date_week_number}{week of scheduled arrival}
16 |   \item{arrival_date_day_of_month}{day of month of scheduled arrival}
17 |   \item{stays_in_weekend_nights}{number of reserved weekend nights}
18 |   \item{stays_in_week_nights}{number of reserved week nights}
19 |   \item{adults}{number of adults in booking}
20 |   \item{children}{number of children}
21 |   \item{babies}{number of babies}
22 |   \item{meal}{whether the booking includes breakfast (BB = bed & breakfast), breakfast and dinner (HB = half board), or breakfast, lunch, and dinner (FB = full board)}
23 |   \item{country}{guest's country of origin}
24 |   \item{market_segment}{market segment designation (eg: TA = travel agent, TO = tour operator)}
25 |   \item{distribution_channel}{booking distribution channel (eg: TA = travel agent, TO = tour operator)}
26 |   \item{is_repeated_guest}{whether or not booking was made by a repeated guest}
27 |   \item{previous_cancellations}{guest's number of previous booking cancellations}
28 |   \item{previous_bookings_not_canceled}{guest's number of previous bookings that weren't cancelled}
29 |   \item{reserved_room_type}{code for type of room reserved by guest}
30 |   \item{assigned_room_type}{code for type of room assigned by hotel}
31 |   \item{booking_changes}{number of changes made to the booking}
32 |   \item{deposit_type}{No Deposit, Non Refund, Refundable}
33 |   \item{agent}{booking travel agency}
34 |   \item{company}{booking company}
35 |   \item{days_in_waiting_list}{number of days the guest waited for booking confirmation}
36 |   \item{customer_type}{Contract, Group, Transient, Transient-party (a transient booking tied to another transient booking)}
37 |   \item{average_daily_rate}{average hotel cost per day}
38 |   \item{required_car_parking_spaces}{number of parking spaces the guest needed}
39 |   \item{total_of_special_requests}{number of guest special requests}
40 |   \item{reservation_status}{Canceled, Check-Out, No-Show}
41 |   \item{reservation_status_date}{when the guest cancelled or checked out}
42 |   }
43 | }
44 | \source{
45 | Nuno Antonio, Ana de Almeida, and Luis Nunes (2019). "Hotel booking demand datasets." Data in Brief (22): 41-49.
46 | \url{https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-02-11/hotels.csv/}.
47 | }
48 | \usage{
49 | hotel_bookings
50 | }
51 | \description{
52 | A random subset of the data on hotel bookings originally collected by Antonio, Almeida and Nunes (2019) and distributed through the R for Data Science TidyTuesday project.
53 | }
54 | \keyword{datasets}
55 | 


--------------------------------------------------------------------------------
/man/loons.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_loons.R
 3 | \docType{data}
 4 | \name{loons}
 5 | \alias{loons}
 6 | \title{Loon Count Data}
 7 | \format{
 8 | A data frame with 18 rows and 5 variables. Each row represents loon observations in the given year.
 9 | \describe{
10 |   \item{year}{year of data collection}
11 |   \item{count}{number of loons observed}
12 |   \item{hours}{total person-hours of observation period}
13 |   \item{count_per_hour}{count divided by hours}
14 |   \item{count_per_100}{count_per_hour multiplied by 100 hours}
15 |   }
16 | }
17 | \source{
18 | \url{https://github.com/rfordatascience/tidytuesday/blob/master/data/2019/2019-06-18/bird_counts.csv}.
19 | }
20 | \usage{
21 | loons
22 | }
23 | \description{
24 | Loon count data collected from the year 2000 to 2017, in late December, by birdwatchers in the Ontario, Canada area.
25 | The data was made available by the Bird Studies Canada website and distributed through the R for Data Science TidyTuesday project. 
26 | A more complete data set with a larger selection of birds can be found in the bird_counts data in the bayesrules package.
27 | }
28 | \keyword{datasets}
29 | 


--------------------------------------------------------------------------------
/man/moma.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_moma.R
 3 | \docType{data}
 4 | \name{moma}
 5 | \alias{moma}
 6 | \title{Museum of Modern Art (MoMA) data}
 7 | \format{
 8 | A data frame with 10964 rows and 11 variables. Each row represents an individual artist in the MoMA collection.
 9 | \describe{
10 |   \item{artist}{name}
11 |   \item{country}{country of origin}
12 |   \item{birth}{year of birth}
13 |   \item{death}{year of death}
14 |   \item{alive}{whether or not the artist was living at the time of data collection (December 2020)}
15 |   \item{genx}{whether or not the artist is Gen X or younger, ie. born during 1965 or after}
16 |   \item{gender}{gender identity (as perceived by MoMA employees)}
17 |   \item{department}{MoMA department in which the artist's works most frequently appear}
18 |   \item{count}{number of the artist's works in the MoMA collection}
19 |   \item{year_acquired_min}{first year MoMA acquired one of the artist's works}
20 |   \item{year_acquired_max}{most recent year MoMA acquired one of the artist's works}
21 |   }
22 | }
23 | \source{
24 | \url{https://github.com/MuseumofModernArt/collection/blob/master/Artworks.csv/}.
25 | }
26 | \usage{
27 | moma
28 | }
29 | \description{
30 | The Museum of Modern Art data includes information about the individual artists included in the collection of the Museum of Modern Art in New York City.
31 | It does not include information about works for artist collectives or companies.
32 | The data was made available by MoMA itself and downloaded in December 2020.
33 | }
34 | \keyword{datasets}
35 | 


--------------------------------------------------------------------------------
/man/moma_sample.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_moma_sample.R
 3 | \docType{data}
 4 | \name{moma_sample}
 5 | \alias{moma_sample}
 6 | \title{Museum of Modern Art (MoMA) data sample}
 7 | \format{
 8 | A data frame with 100 rows and 10 variables. Each row represents an individual artist in the MoMA collection.
 9 | \describe{
10 |   \item{artist}{name}
11 |   \item{country}{country of origin}
12 |   \item{birth}{year of birth}
13 |   \item{death}{year of death}
14 |   \item{alive}{whether or not the artist was living at the time of data collection (December 2020)}
15 |   \item{genx}{whether or not the artist is Gen X or younger, ie. born during 1965 or after}
16 |   \item{gender}{gender identity (as perceived by MoMA employees)}
17 |   \item{count}{number of the artist's works in the MoMA collection}
18 |   \item{year_acquired_min}{first year MoMA acquired one of the artist's works}
19 |   \item{year_acquired_max}{most recent year MoMA acquired one of the artist's works}
20 |   }
21 | }
22 | \source{
23 | \url{https://github.com/MuseumofModernArt/collection/blob/master/Artworks.csv/}.
24 | }
25 | \usage{
26 | moma_sample
27 | }
28 | \description{
29 | A random sample of 100 artists represented in the Museum of Modern Art in New York City.
30 | The data was made available by MoMA itself and downloaded in December 2020.
31 | It does not include information about artist collectives or companies.
32 | }
33 | \keyword{datasets}
34 | 


--------------------------------------------------------------------------------
/man/naive_classification_summary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/naive_classification_summary.R
 3 | \name{naive_classification_summary}
 4 | \alias{naive_classification_summary}
 5 | \title{Posterior Classification Summaries for a Naive Bayes model}
 6 | \usage{
 7 | naive_classification_summary(model, data, y)
 8 | }
 9 | \arguments{
10 | \item{model}{a naiveBayes model object with categorical y}
11 | 
12 | \item{data}{data frame including the variables in the model}
13 | 
14 | \item{y}{a character string indicating the y variable in data}
15 | }
16 | \value{
17 | a list
18 | }
19 | \description{
20 | Given a set of observed data including a categorical response variable y 
21 | and a naiveBayes model of y, 
22 | this function returns summaries of the model's posterior classification quality.
23 | These summaries include a confusion matrix as well as an estimate of the model's
24 | overall accuracy.
25 | }
26 | \examples{
27 | data(penguins_bayes, package = "bayesrules")
28 | example_model <- e1071::naiveBayes(species ~ bill_length_mm, data = penguins_bayes)
29 | naive_classification_summary(model = example_model, data = penguins_bayes, y = "species")
30 | }
31 | 


--------------------------------------------------------------------------------
/man/naive_classification_summary_cv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/naive_classification_summary_cv.R
 3 | \name{naive_classification_summary_cv}
 4 | \alias{naive_classification_summary_cv}
 5 | \title{Cross-Validated Posterior Classification Summaries for a Naive Bayes model}
 6 | \usage{
 7 | naive_classification_summary_cv(model, data, y, k = 10)
 8 | }
 9 | \arguments{
10 | \item{model}{a naiveBayes model object with categorical y}
11 | 
12 | \item{data}{data frame including the variables in the model}
13 | 
14 | \item{y}{a character string indicating the y variable in data}
15 | 
16 | \item{k}{the number of folds to use for cross validation}
17 | }
18 | \value{
19 | a list
20 | }
21 | \description{
22 | Given a set of observed data including a categorical response variable y 
23 | and a naiveBayes model of y, 
24 | this function returns a cross validated confusion matrix by which to assess 
25 | the model's posterior classification quality.
26 | }
27 | \examples{
28 | data(penguins_bayes, package = "bayesrules")
29 | example_model <- e1071::naiveBayes(species ~ bill_length_mm, data = penguins_bayes)
30 | naive_classification_summary_cv(model = example_model, data = penguins_bayes, y = "species", k = 2)
31 | }
32 | 


--------------------------------------------------------------------------------
/man/penguins_bayes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_penguins_bayes.R
 3 | \docType{data}
 4 | \name{penguins_bayes}
 5 | \alias{penguins_bayes}
 6 | \title{Penguins Data}
 7 | \format{
 8 | A data frame with 344 penguins and 9 variables on each. 
 9 | \describe{
10 |   \item{species}{species (Adelie, Chinstrap, Gentoo)}
11 |   \item{island}{home island (Biscoe, Dream, Torgersen)}
12 |   \item{year}{year of observation}
13 |   \item{bill_length_mm}{length of bill (mm)}
14 |   \item{bill_depth_mm}{depth of bill (mm)}
15 |   \item{flipper_length_mm}{length of flipper (mm)}
16 |   \item{body_mass_g}{body mass (g)}
17 |   \item{above_average_weight}{whether or not the body mass exceeds 4200g (TRUE or FALSE)}
18 |   \item{sex}{male or female}
19 |   }
20 | }
21 | \source{
22 | Gorman KB, Williams TD, and Fraser WR (2014). Ecological sexual dimorphism and environmental variability within a community of antarctic penguins (Genus Pygoscelis). PLoS ONE, 9(3).
23 | }
24 | \usage{
25 | penguins_bayes
26 | }
27 | \description{
28 | Data on penguins in the Palmer Archipelago, originally collected by Gordan etal and distributed through the penguins data in the palmerpenguins package.
29 | In addition to the original penguins data is a variable above_average_weight.
30 | }
31 | \keyword{datasets}
32 | 


--------------------------------------------------------------------------------
/man/plot_beta.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_beta.R
 3 | \name{plot_beta}
 4 | \alias{plot_beta}
 5 | \title{Plot a Beta Model for \eqn{\pi}}
 6 | \usage{
 7 | plot_beta(alpha, beta, mean = FALSE, mode = FALSE)
 8 | }
 9 | \arguments{
10 | \item{alpha, beta}{positive shape parameters of the Beta model}
11 | 
12 | \item{mean, mode}{a logical value indicating whether to display the model mean and mode}
13 | }
14 | \value{
15 | A density plot for the Beta model.
16 | }
17 | \description{
18 | Plots the probability density function (pdf) for
19 | a Beta(alpha, beta) model of variable \eqn{\pi}.
20 | }
21 | \examples{
22 | plot_beta(alpha = 1, beta = 12, mean = TRUE, mode = TRUE)
23 | }
24 | 


--------------------------------------------------------------------------------
/man/plot_beta_binomial.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_beta_binomial.R
 3 | \name{plot_beta_binomial}
 4 | \alias{plot_beta_binomial}
 5 | \title{Plot a Beta-Binomial Bayesian Model}
 6 | \usage{
 7 | plot_beta_binomial(
 8 |   alpha,
 9 |   beta,
10 |   y = NULL,
11 |   n = NULL,
12 |   prior = TRUE,
13 |   likelihood = TRUE,
14 |   posterior = TRUE
15 | )
16 | }
17 | \arguments{
18 | \item{alpha, beta}{positive shape parameters of the prior Beta model}
19 | 
20 | \item{y}{observed number of successes}
21 | 
22 | \item{n}{observed number of trials}
23 | 
24 | \item{prior}{a logical value indicating whether the prior model should be plotted}
25 | 
26 | \item{likelihood}{a logical value indicating whether the scaled likelihood should be plotted}
27 | 
28 | \item{posterior}{a logical value indicating whether posterior model should be plotted}
29 | }
30 | \value{
31 | a ggplot
32 | }
33 | \description{
34 | Consider a Beta-Binomial Bayesian model for parameter \eqn{\pi} with 
35 | a Beta(alpha, beta) prior on \eqn{\pi} and Binomial likelihood with n trials
36 | and y successes. Given information on the prior (alpha and data) and data (y and n),
37 | this function produces a plot of any combination of the corresponding prior pdf, 
38 | scaled likelihood function, and posterior pdf.  All three are included by default.
39 | }
40 | \examples{
41 | 
42 | plot_beta_binomial(alpha = 1, beta = 13, y = 25, n = 50)
43 | plot_beta_binomial(alpha = 1, beta = 13, y = 25, n = 50, posterior = FALSE)
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/man/plot_beta_ci.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_beta_ci.R
 3 | \name{plot_beta_ci}
 4 | \alias{plot_beta_ci}
 5 | \title{Plot a Beta Model with Credible Interval}
 6 | \usage{
 7 | plot_beta_ci(alpha, beta, ci_level = 0.95)
 8 | }
 9 | \arguments{
10 | \item{alpha, beta}{positive shape parameters of the Beta model}
11 | 
12 | \item{ci_level}{credible interval level}
13 | }
14 | \value{
15 | A density plot for the Beta model
16 | }
17 | \description{
18 | Plots the probability density function (pdf) for a
19 | Beta(alpha, beta) model of variable \eqn{\pi} with markings indicating
20 | a credible interval for \eqn{\pi}.
21 | }
22 | \examples{
23 | plot_beta_ci(alpha = 7, beta = 12, ci_level = 0.80)
24 | }
25 | 


--------------------------------------------------------------------------------
/man/plot_binomial_likelihood.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_binomial_likelihood.R
 3 | \name{plot_binomial_likelihood}
 4 | \alias{plot_binomial_likelihood}
 5 | \title{Plot a Binomial Likelihood Function}
 6 | \usage{
 7 | plot_binomial_likelihood(y, n, mle = FALSE)
 8 | }
 9 | \arguments{
10 | \item{y}{number of successes}
11 | 
12 | \item{n}{number of trials}
13 | 
14 | \item{mle}{a logical value indicating whether maximum likelihood estimate of \eqn{\pi}, y/n, should be plotted}
15 | }
16 | \value{
17 | a ggplot
18 | }
19 | \description{
20 | Plots the Binomial likelihood function for variable \eqn{\pi}
21 | given y observed successes in a series of n Binomial trials.
22 | }
23 | \examples{
24 | plot_binomial_likelihood(y = 3, n = 10, mle = TRUE)
25 | }
26 | 


--------------------------------------------------------------------------------
/man/plot_gamma.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_gamma.R
 3 | \name{plot_gamma}
 4 | \alias{plot_gamma}
 5 | \title{Plot a Gamma Model for \eqn{\lambda}}
 6 | \usage{
 7 | plot_gamma(shape, rate, mean = FALSE, mode = FALSE)
 8 | }
 9 | \arguments{
10 | \item{shape}{non-negative shape parameter of the Gamma model}
11 | 
12 | \item{rate}{non-negative rate parameter of the Gamma model}
13 | 
14 | \item{mean, mode}{a logical value indicating whether to display the model mean and mode}
15 | }
16 | \value{
17 | A density plot for the Gamma model.
18 | }
19 | \description{
20 | Plots the probability density function (pdf) for
21 | a Gamma(shape, rate) model of variable \eqn{\lambda}.
22 | }
23 | \examples{
24 | plot_gamma(shape = 2, rate = 11, mean = TRUE, mode = TRUE)
25 | 
26 | }
27 | 


--------------------------------------------------------------------------------
/man/plot_gamma_poisson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_gamma_poisson.R
 3 | \name{plot_gamma_poisson}
 4 | \alias{plot_gamma_poisson}
 5 | \title{Plot a Gamma-Poisson Bayesian Model}
 6 | \usage{
 7 | plot_gamma_poisson(
 8 |   shape,
 9 |   rate,
10 |   sum_y = NULL,
11 |   n = NULL,
12 |   prior = TRUE,
13 |   likelihood = TRUE,
14 |   posterior = TRUE
15 | )
16 | }
17 | \arguments{
18 | \item{shape}{non-negative shape parameter of the Gamma prior}
19 | 
20 | \item{rate}{non-negative rate parameter of the Gamma prior}
21 | 
22 | \item{sum_y}{sum of observed data values for the Poisson likelihood}
23 | 
24 | \item{n}{number of observations for the Poisson likelihood}
25 | 
26 | \item{prior}{a logical value indicating whether the prior model should be plotted.}
27 | 
28 | \item{likelihood}{a logical value indicating whether the scaled likelihood should be plotted.}
29 | 
30 | \item{posterior}{a logical value indicating whether posterior model should be plotted.}
31 | }
32 | \value{
33 | a ggplot
34 | }
35 | \description{
36 | Consider a Gamma-Poisson Bayesian model for rate parameter \eqn{\lambda} with 
37 | a Gamma(shape, rate) prior on \eqn{\lambda} and a Poisson likelihood for the data. 
38 | Given information on the prior (shape and rate) 
39 | and data (the sample size n and sum_y),
40 | this function produces a plot of any combination of the corresponding prior pdf, 
41 | scaled likelihood function, and posterior pdf.  All three are included by default.
42 | }
43 | \examples{
44 | plot_gamma_poisson(shape = 100, rate = 20, sum_y = 39, n = 6)
45 | plot_gamma_poisson(shape = 100, rate = 20, sum_y = 39, n = 6, posterior = FALSE)
46 | }
47 | 


--------------------------------------------------------------------------------
/man/plot_normal.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_normal.R
 3 | \name{plot_normal}
 4 | \alias{plot_normal}
 5 | \title{Plot a Normal Model for \eqn{\mu}}
 6 | \usage{
 7 | plot_normal(mean, sd)
 8 | }
 9 | \arguments{
10 | \item{mean}{mean parameter of the Normal model}
11 | 
12 | \item{sd}{standard deviation parameter of the Normal model}
13 | }
14 | \value{
15 | a ggplot
16 | }
17 | \description{
18 | Plots the probability density function (pdf) for a
19 | Normal(mean, sd^2) model of variable \eqn{\mu}.
20 | }
21 | \examples{
22 | plot_normal(mean = 3.5, sd = 0.5)
23 | }
24 | 


--------------------------------------------------------------------------------
/man/plot_normal_likelihood.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_normal_likelihood.R
 3 | \name{plot_normal_likelihood}
 4 | \alias{plot_normal_likelihood}
 5 | \title{Plot a Normal Likelihood Function}
 6 | \usage{
 7 | plot_normal_likelihood(y, sigma = NULL)
 8 | }
 9 | \arguments{
10 | \item{y}{vector of observed data}
11 | 
12 | \item{sigma}{optional value for assumed standard deviation of y. by default, this is calculated by the sample standard deviation of y.}
13 | }
14 | \value{
15 | a ggplot of Normal likelihood
16 | }
17 | \description{
18 | Plots the Normal likelihood function for variable \eqn{\mu}
19 | given a vector of Normal data y.
20 | }
21 | \examples{
22 | plot_normal_likelihood(y = rnorm(50, mean = 10, sd = 2), sigma = 1.5)
23 | }
24 | 


--------------------------------------------------------------------------------
/man/plot_normal_normal.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_normal_normal.R
 3 | \name{plot_normal_normal}
 4 | \alias{plot_normal_normal}
 5 | \title{Plot a Normal-Normal Bayesian model}
 6 | \usage{
 7 | plot_normal_normal(
 8 |   mean,
 9 |   sd,
10 |   sigma = NULL,
11 |   y_bar = NULL,
12 |   n = NULL,
13 |   prior = TRUE,
14 |   likelihood = TRUE,
15 |   posterior = TRUE
16 | )
17 | }
18 | \arguments{
19 | \item{mean}{mean of the Normal prior}
20 | 
21 | \item{sd}{standard deviation of the Normal prior}
22 | 
23 | \item{sigma}{standard deviation of the data, or likelihood standard deviation}
24 | 
25 | \item{y_bar}{sample mean of the data}
26 | 
27 | \item{n}{sample size of the data}
28 | 
29 | \item{prior}{a logical value indicating whether the prior model should be plotted}
30 | 
31 | \item{likelihood}{a logical value indicating whether the scaled likelihood should be plotted}
32 | 
33 | \item{posterior}{a logical value indicating whether posterior model should be plotted}
34 | }
35 | \value{
36 | a ggplot
37 | }
38 | \description{
39 | Consider a Normal-Normal Bayesian model for mean parameter \eqn{\mu} with 
40 | a N(mean, sd^2) prior on \eqn{\mu} and a Normal likelihood for the data. 
41 | Given information on the prior (mean and sd) 
42 | and data (the sample size n, mean y_bar, and standard deviation sigma),
43 | this function produces a plot of any combination of the corresponding prior pdf, 
44 | scaled likelihood function, and posterior pdf.  All three are included by default.
45 | }
46 | \examples{
47 | plot_normal_normal(mean = 0, sd = 3, sigma= 4, y_bar = 5, n = 3)
48 | plot_normal_normal(mean = 0, sd = 3, sigma= 4, y_bar = 5, n = 3, posterior = FALSE)
49 | }
50 | 


--------------------------------------------------------------------------------
/man/plot_poisson_likelihood.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_poisson_likelihood.R
 3 | \name{plot_poisson_likelihood}
 4 | \alias{plot_poisson_likelihood}
 5 | \title{Plot a Poisson Likelihood Function}
 6 | \usage{
 7 | plot_poisson_likelihood(y, lambda_upper_bound = 10)
 8 | }
 9 | \arguments{
10 | \item{y}{vector of observed Poisson counts}
11 | 
12 | \item{lambda_upper_bound}{upper bound for lambda values to display on x-axis}
13 | }
14 | \value{
15 | a ggplot of Poisson likelihood
16 | }
17 | \description{
18 | Plots the Poisson likelihood function for variable \eqn{\lambda}
19 | given a vector of Poisson counts y.
20 | }
21 | \examples{
22 | plot_poisson_likelihood(y = c(4, 2, 7), lambda_upper_bound = 10)
23 | }
24 | 


--------------------------------------------------------------------------------
/man/pop_vs_soda.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_pop_vs_soda.R
 3 | \docType{data}
 4 | \name{pop_vs_soda}
 5 | \alias{pop_vs_soda}
 6 | \title{Pop vs Soda vs Coke}
 7 | \format{
 8 | A data frame with 374250 observations, one per survey respondent, and 4 variables:
 9 | \describe{
10 |   \item{state}{the U.S. state in which the respondent resides}
11 |   \item{region}{region in which the state falls (as defined by the U.S. Census)}
12 |   \item{word_for_cola}{how the respondent refers to fizzy cola drinks}
13 |   \item{pop}{whether or not the respondent refers to fizzy cola drinks as "pop"}
14 |   }
15 | }
16 | \source{
17 | The survey responses were obtained at \url{https://popvssoda.com/} which is maintained by Alan McConchie.
18 | }
19 | \usage{
20 | pop_vs_soda
21 | }
22 | \description{
23 | Results of a volunteer survey on how people around the U.S. refer to fizzy cola drinks. The options are "pop", "soda", "coke", or "other".
24 | }
25 | \keyword{datasets}
26 | 


--------------------------------------------------------------------------------
/man/prediction_summary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/prediction_summary.R
 3 | \name{prediction_summary}
 4 | \alias{prediction_summary}
 5 | \title{Posterior Predictive Summaries}
 6 | \usage{
 7 | prediction_summary(
 8 |   model,
 9 |   data,
10 |   prob_inner = 0.5,
11 |   prob_outer = 0.95,
12 |   stable = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{model}{an rstanreg model object with quantitative y}
17 | 
18 | \item{data}{data frame including the variables in the model, both response y and predictors x}
19 | 
20 | \item{prob_inner}{posterior predictive interval probability (a value between 0 and 1)}
21 | 
22 | \item{prob_outer}{posterior predictive interval probability (a value between 0 and 1)}
23 | 
24 | \item{stable}{TRUE returns the number of absolute deviations and FALSE returns the standard deviations that observed y values fall from their predictive medians}
25 | }
26 | \value{
27 | a tibble
28 | }
29 | \description{
30 | Given a set of observed data including a quantitative response variable y 
31 | and an rstanreg model of y, 
32 | this function returns 4 measures of the posterior prediction quality.
33 | Median absolute prediction error (mae) measures the typical difference between the observed y values and their posterior predictive medians (stable = TRUE) or means (stable = FALSE).
34 | Scaled mae (mae_scaled) measures the typical number of absolute deviations (stable = TRUE) or standard deviations (stable = FALSE) that observed y values fall from their predictive medians (stable = TRUE) or means (stable = FALSE).
35 | within_50 and within_90 report the proportion of observed y values that fall within their posterior prediction intervals, the probability levels of which are set by the user.
36 | }
37 | \examples{
38 | example_data <- data.frame(x = sample(1:100, 20))
39 | example_data$y <- example_data$x*3 + rnorm(20, 0, 5)
40 | example_model <- rstanarm::stan_glm(y ~ x,  data = example_data)
41 | prediction_summary(example_model, example_data, prob_inner = 0.6, prob_outer = 0.80, stable = TRUE)
42 | }
43 | 


--------------------------------------------------------------------------------
/man/prediction_summary_cv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/prediction_summary_cv.R
 3 | \name{prediction_summary_cv}
 4 | \alias{prediction_summary_cv}
 5 | \title{Cross-Validated Posterior Predictive Summaries}
 6 | \usage{
 7 | prediction_summary_cv(
 8 |   data,
 9 |   group,
10 |   model,
11 |   k,
12 |   prob_inner = 0.5,
13 |   prob_outer = 0.95
14 | )
15 | }
16 | \arguments{
17 | \item{data}{data frame including the variables in the model, both response y and predictors x}
18 | 
19 | \item{group}{a character string representing the name of the factor grouping variable, ie. random effect (only used for hierarchical models)}
20 | 
21 | \item{model}{an rstanreg model object with quantitative y}
22 | 
23 | \item{k}{the number of folds to use for cross validation}
24 | 
25 | \item{prob_inner}{posterior predictive interval probability (a value between 0 and 1)}
26 | 
27 | \item{prob_outer}{posterior predictive interval probability (a value between 0 and 1)}
28 | }
29 | \value{
30 | list
31 | }
32 | \description{
33 | Given a set of observed data including a quantitative response variable y 
34 | and an rstanreg model of y, 
35 | this function returns 4 cross-validated measures of the model's posterior prediction quality: 
36 | Median absolute prediction error (mae) measures the typical difference between the observed y values and their posterior predictive medians (stable = TRUE) or means (stable = FALSE).
37 | Scaled mae (mae_scaled) measures the typical number of absolute deviations (stable = TRUE) or standard deviations (stable = FALSE) that observed y values fall from their predictive medians (stable = TRUE) or means (stable = FALSE).
38 | within_50 and within_90 report the proportion of observed y values that fall within their posterior prediction intervals, the probability levels of which are set by the user.
39 | For hierarchical models of class lmerMod, the folds are comprised by collections of groups, not individual observations.
40 | }
41 | \examples{
42 | example_data <- data.frame(x = sample(1:100, 20))
43 | example_data$y <- example_data$x*3 + rnorm(20, 0, 5)
44 | example_model <- rstanarm::stan_glm(y ~ x,  data = example_data)
45 | prediction_summary_cv(model = example_model, data = example_data, k = 2)
46 | }
47 | 


--------------------------------------------------------------------------------
/man/pulse_of_the_nation.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_pulse_of_the_nation.R
 3 | \docType{data}
 4 | \name{pulse_of_the_nation}
 5 | \alias{pulse_of_the_nation}
 6 | \title{Cards Against Humanity's Pulse of the Nation Survey}
 7 | \format{
 8 | A data frame with observations on 1000 survey respondents with 15 variables:
 9 | \describe{
10 |   \item{income}{income in \$1000s}
11 |   \item{age}{age in years}
12 |   \item{party}{political party affiliation}
13 |   \item{trump_approval}{approval level of Donald Trump's job performance}
14 |   \item{education}{maximum education level completed}
15 |   \item{robots}{opinion of how likely their job is to be replaced by robots within 10 years}
16 |   \item{climate_change}{belief in climate change}
17 |   \item{transformers}{the number of Transformers film the respondent has seen}
18 |   \item{science_is_honest}{opinion of whether scientists are generally honest and serve the public good}
19 |   \item{vaccines_are_safe}{opinion of whether vaccines are safe and protect children from disease}
20 |   \item{books}{number of books read in the past year}
21 |   \item{ghosts}{whether or not they believe in ghosts}
22 |   \item{fed_sci_budget}{respondent's estimate of the percentage of the federal budget that is spent on scientific research}
23 |   \item{earth_sun}{belief about whether the earth is always farther away from the sun in winter than in summer (TRUE or FALSE)}
24 |   \item{wise_unwise}{whether the respondent would rather be wise but unhappy, or unwise but happy}
25 |   }
26 | }
27 | \source{
28 | \url{https://thepulseofthenation.com/downloads/201709-CAH_PulseOfTheNation_Raw.csv}
29 | }
30 | \usage{
31 | pulse_of_the_nation
32 | }
33 | \description{
34 | Cards Against Humanity's "Pulse of the Nation" project (\url{https://thepulseofthenation.com/}) conducted monthly polls into people's social and political views, as well as some silly things. This data includes responses to a subset of questions included in the poll conducted in September 2017.
35 | }
36 | \keyword{datasets}
37 | 


--------------------------------------------------------------------------------
/man/sample_mode.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sample_mode.R
 3 | \name{sample_mode}
 4 | \alias{sample_mode}
 5 | \title{Sample Mode}
 6 | \usage{
 7 | sample_mode(x)
 8 | }
 9 | \arguments{
10 | \item{x}{vector of sample data}
11 | }
12 | \value{
13 | sample mode
14 | }
15 | \description{
16 | Calculate the sample mode of vector x.
17 | }
18 | \examples{
19 | sample_mode(rbeta(100, 2, 7))
20 | }
21 | 


--------------------------------------------------------------------------------
/man/spotify.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_spotify.R
 3 | \docType{data}
 4 | \name{spotify}
 5 | \alias{spotify}
 6 | \title{Spotify Song Data}
 7 | \format{
 8 | A data frame with 350 songs (or tracks) and 23 variables:
 9 | \describe{
10 |   \item{track_id}{unique song identifier}
11 |   \item{title}{song name}
12 |   \item{artist}{song artist}
13 |   \item{popularity}{song popularity from 0 (low) to 100 (high)}
14 |   \item{album_id}{id of the album on which the song appears}
15 |   \item{album_name}{name of the album on which the song appears}
16 |   \item{album_release_date}{when the album was released}
17 |   \item{playlist_name}{Spotify playlist on which the song appears}
18 |   \item{playlist_id}{unique playlist identifier}
19 |   \item{genre}{genre of the playlist}
20 |   \item{subgenre}{subgenre of the playlist}
21 |   \item{danceability}{a score from 0 (not danceable) to 100 (danceable) based on features such as tempo, rhythm, etc.}
22 |   \item{energy}{a score from 0 (low energy) to 100 (high energy) based on features such as loudness, timbre, entropy, etc.}
23 |   \item{key}{song key}
24 |   \item{loudness}{song loudness (dB)}
25 |   \item{mode}{0 (minor key) or 1 (major key)}
26 |   \item{speechiness}{a score from 0 (non-speechy tracks) to 100 (speechy tracks)}
27 |   \item{acousticness}{a score from 0 (not acoustic) to 100 (very acoustic)}
28 |   \item{instrumentalness}{a score from 0 (not instrumental) to 100 (very instrumental)}
29 |   \item{liveness}{a score from 0 (no live audience presence on the song) to 100 (strong live audience presence on the song)}
30 |   \item{valence}{a score from 0 (the song is more negative, sad, angry) to 100 (the song is more positive, happy, euphoric)}
31 |   \item{tempo}{song tempo (beats per minute)}
32 |   \item{duration_ms}{song duration (ms)}
33 |   }
34 | }
35 | \source{
36 | \url{https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-01-21/spotify_songs.csv/}.
37 | }
38 | \usage{
39 | spotify
40 | }
41 | \description{
42 | A sub-sample of the Spotify song data originally collected by Kaylin Pavlik (kaylinquest) and distributed through the R for Data Science TidyTuesday project.
43 | }
44 | \keyword{datasets}
45 | 


--------------------------------------------------------------------------------
/man/summarize_beta.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/summarize_beta.R
 3 | \name{summarize_beta}
 4 | \alias{summarize_beta}
 5 | \title{Summarize a Beta Model for \eqn{\pi}}
 6 | \usage{
 7 | summarize_beta(alpha, beta)
 8 | }
 9 | \arguments{
10 | \item{alpha, beta}{positive shape parameters of the Beta model}
11 | }
12 | \value{
13 | a summary table
14 | }
15 | \description{
16 | Summarizes the expected value, variance, and mode of 
17 | a Beta(alpha, beta) model for variable \eqn{\pi}.
18 | }
19 | \examples{
20 | summarize_beta(alpha = 1, beta = 15)
21 | }
22 | 


--------------------------------------------------------------------------------
/man/summarize_beta_binomial.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/summarize_beta_binomial.R
 3 | \name{summarize_beta_binomial}
 4 | \alias{summarize_beta_binomial}
 5 | \title{Summarize a Beta-Binomial Bayesian model}
 6 | \usage{
 7 | summarize_beta_binomial(alpha, beta, y = NULL, n = NULL)
 8 | }
 9 | \arguments{
10 | \item{alpha, beta}{positive shape parameters of the prior Beta model}
11 | 
12 | \item{y}{number of successes}
13 | 
14 | \item{n}{number of trials}
15 | }
16 | \value{
17 | a summary table
18 | }
19 | \description{
20 | Consider a Beta-Binomial Bayesian model for parameter \eqn{\pi} with 
21 | a Beta(alpha, beta) prior on \eqn{\pi} and Binomial likelihood with n trials
22 | and y successes. Given information on the prior (alpha and data) and data (y and n),
23 | this function summarizes the mean, mode, and variance of the 
24 | prior and posterior Beta models of \eqn{\pi}.
25 | }
26 | \examples{
27 | summarize_beta_binomial(alpha = 1, beta = 15, y = 25, n = 50)
28 | }
29 | 


--------------------------------------------------------------------------------
/man/summarize_gamma.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/summarize_gamma.R
 3 | \name{summarize_gamma}
 4 | \alias{summarize_gamma}
 5 | \title{Summarize a Gamma Model for \eqn{\lambda}}
 6 | \usage{
 7 | summarize_gamma(shape, rate)
 8 | }
 9 | \arguments{
10 | \item{shape}{positive shape parameter of the Gamma model}
11 | 
12 | \item{rate}{positive rate parameter of the Gamma model}
13 | }
14 | \value{
15 | a summary table
16 | }
17 | \description{
18 | Summarizes the expected value, variance, and mode of 
19 | a Gamma(shape, rate) model for variable \eqn{\lambda}.
20 | }
21 | \examples{
22 | summarize_gamma(shape = 1, rate = 15)
23 | 
24 | }
25 | 


--------------------------------------------------------------------------------
/man/summarize_gamma_poisson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/summarize_gamma_poisson.R
 3 | \name{summarize_gamma_poisson}
 4 | \alias{summarize_gamma_poisson}
 5 | \title{Summarize the Gamma-Poisson Model}
 6 | \usage{
 7 | summarize_gamma_poisson(shape, rate, sum_y = NULL, n = NULL)
 8 | }
 9 | \arguments{
10 | \item{shape}{positive shape parameter of the Gamma prior}
11 | 
12 | \item{rate}{positive rate parameter of the Gamma prior}
13 | 
14 | \item{sum_y}{sum of observed data values for the Poisson likelihood}
15 | 
16 | \item{n}{number of observations for the Poisson likelihood}
17 | }
18 | \value{
19 | data frame
20 | }
21 | \description{
22 | Consider a Gamma-Poisson Bayesian model for rate parameter \eqn{\lambda} with 
23 | a Gamma(shape, rate) prior on \eqn{\lambda} and a Poisson likelihood for the data. 
24 | Given information on the prior (shape and rate) 
25 | and data (the sample size n and sum_y),
26 | this function summarizes the mean, mode, and variance of the 
27 | prior and posterior Gamma models of \eqn{\lambda}.
28 | }
29 | \examples{
30 | summarize_gamma_poisson(shape = 3, rate = 4, sum_y = 7, n = 12)
31 | 
32 | }
33 | 


--------------------------------------------------------------------------------
/man/summarize_normal_normal.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/summarize_normal_normal.R
 3 | \name{summarize_normal_normal}
 4 | \alias{summarize_normal_normal}
 5 | \title{Summarize a Normal-Normal Bayesian model}
 6 | \usage{
 7 | summarize_normal_normal(mean, sd, sigma = NULL, y_bar = NULL, n = NULL)
 8 | }
 9 | \arguments{
10 | \item{mean}{mean of the Normal prior}
11 | 
12 | \item{sd}{standard deviation of the Normal prior}
13 | 
14 | \item{sigma}{standard deviation of the data, or likelihood standard deviation}
15 | 
16 | \item{y_bar}{sample mean of the data}
17 | 
18 | \item{n}{sample size of the data}
19 | }
20 | \value{
21 | data frame
22 | }
23 | \description{
24 | Consider a Normal-Normal Bayesian model for mean parameter \eqn{\mu} with 
25 | a N(mean, sd^2) prior on \eqn{\mu} and a Normal likelihood for the data. 
26 | Given information on the prior (mean and sd) 
27 | and data (the sample size n, mean y_bar, and standard deviation sigma),
28 | this function summarizes the mean, mode, and variance of the 
29 | prior and posterior Normal models of \eqn{\mu}.
30 | }
31 | \examples{
32 | summarize_normal_normal(mean = 2.3, sd = 0.3, sigma = 5.1, y_bar = 128.5, n = 20)
33 | }
34 | 


--------------------------------------------------------------------------------
/man/voices.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_voices.R
 3 | \docType{data}
 4 | \name{voices}
 5 | \alias{voices}
 6 | \title{Voice Pitch Data}
 7 | \format{
 8 | A data frame with 84 rows and 4 variables. Each row represents a single observation for the given subject.
 9 | \describe{
10 |   \item{subject}{subject identifier}
11 |   \item{scenario}{context of the dialog (encoded as A, B, ..., G)}
12 |   \item{attitude}{whether the attitude to use in dialog was polite or informal}
13 |   \item{pitch}{average voice pitch (Hz)}
14 |   }
15 | }
16 | \source{
17 | Winter, B., & Grawunder, S. (2012). The Phonetic Profile of Korean Formal and Informal Speech Registers. Journal of Phonetics, 40, 808-815. 
18 | \url{https://bodo-winter.net/data_and_scripts/POP.csv}.
19 | \url{https://bodo-winter.net/tutorial/bw_LME_tutorial2.pdf}.
20 | }
21 | \usage{
22 | voices
23 | }
24 | \description{
25 | Voice pitch data collected by Winter and Grawunder (2012). 
26 | In an experiment, subjects participated in role-playing dialog under various conditions,
27 | while researchers monitored voice pitch (Hz).
28 | The conditions spanned different scenarios (eg: making an appointment, asking for a favor)
29 | and different attitudes to use in the scenario (polite or informal).
30 | }
31 | \keyword{datasets}
32 | 


--------------------------------------------------------------------------------
/man/weather_WU.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_weather_WU.R
 3 | \docType{data}
 4 | \name{weather_WU}
 5 | \alias{weather_WU}
 6 | \title{Weather Data for 2 Australian Cities}
 7 | \format{
 8 | A data frame with 200 daily observations and 22 variables from 2 Australian weather stations:
 9 | \describe{
10 |   \item{location}{one of two weather stations}
11 |   \item{mintemp}{minimum temperature (degrees Celsius)}
12 |   \item{maxtemp}{maximum temperature (degrees Celsius)}
13 |   \item{rainfall}{rainfall (mm)}
14 |   \item{windgustdir}{direction of strongest wind gust}
15 |   \item{windgustspeed}{speed of strongest wind gust (km/h)}
16 |   \item{winddir9am}{direction of wind gust at 9am}
17 |   \item{winddir3pm}{direction of wind gust at 3pm}
18 |   \item{windspeed9am}{wind speed at 9am (km/h)}
19 |   \item{windspeed3pm}{wind speed at 3pm (km/h)}
20 |   \item{humidity9am}{humidity level at 9am (percent)}
21 |   \item{humidity3pm}{humidity level at 3pm (percent)}
22 |   \item{pressure9am}{atmospheric pressure at 9am (hpa)}
23 |   \item{pressure3pm}{atmospheric pressure at 3pm (hpa)}
24 |   \item{temp9am}{temperature at 9am (degrees Celsius)}
25 |   \item{temp3pm}{temperature at 3pm (degrees Celsius)}
26 |   \item{raintoday}{whether or not it rained today (Yes or No)}
27 |   \item{risk_mm}{the amount of rain today (mm)}
28 |   \item{raintomorrow}{whether or not it rained the next day (Yes or No)}
29 |   \item{year}{the year of the date}
30 |   \item{month}{the month of the date}
31 |   \item{day_of_year}{the day of the year}
32 |   }
33 | }
34 | \source{
35 | Data in the original weatherAUS data set were obtained from \url{https://www.bom.gov.au/climate/data}. Copyright Commonwealth of Australia 2010, Bureau of Meteorology.
36 | }
37 | \usage{
38 | weather_WU
39 | }
40 | \description{
41 | A sub-sample of daily weather information from the weatherAUS data in the rattle package for two Australian cities, Wollongong and Uluru.
42 | The weather_australia data in the bayesrules package combines this data with a third city
43 | }
44 | \keyword{datasets}
45 | 


--------------------------------------------------------------------------------
/man/weather_australia.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_weather_australia.R
 3 | \docType{data}
 4 | \name{weather_australia}
 5 | \alias{weather_australia}
 6 | \title{Weather Data for 3 Australian Cities}
 7 | \format{
 8 | A data frame with 300 daily observations and 22 variables from 3 Australian weather stations:
 9 | \describe{
10 |   \item{location}{one of three weather stations}
11 |   \item{mintemp}{minimum temperature (degrees Celsius)}
12 |   \item{maxtemp}{maximum temperature (degrees Celsius)}
13 |   \item{rainfall}{rainfall (mm)}
14 |   \item{windgustdir}{direction of strongest wind gust}
15 |   \item{windgustspeed}{speed of strongest wind gust (km/h)}
16 |   \item{winddir9am}{direction of wind gust at 9am}
17 |   \item{winddir3pm}{direction of wind gust at 3pm}
18 |   \item{windspeed9am}{wind speed at 9am (km/h)}
19 |   \item{windspeed3pm}{wind speed at 3pm (km/h)}
20 |   \item{humidity9am}{humidity level at 9am (percent)}
21 |   \item{humidity3pm}{humidity level at 3pm (percent)}
22 |   \item{pressure9am}{atmospheric pressure at 9am (hpa)}
23 |   \item{pressure3pm}{atmospheric pressure at 3pm (hpa)}
24 |   \item{temp9am}{temperature at 9am (degrees Celsius)}
25 |   \item{temp3pm}{temperature at 3pm (degrees Celsius)}
26 |   \item{raintoday}{whether or not it rained today (Yes or No)}
27 |   \item{risk_mm}{the amount of rain today (mm)}
28 |   \item{raintomorrow}{whether or not it rained the next day (Yes or No)}
29 |   \item{year}{the year of the date}
30 |   \item{month}{the month of the date}
31 |   \item{day_of_year}{the day of the year}
32 |   }
33 | }
34 | \source{
35 | Data in the original weatherAUS data set were obtained from \url{https://www.bom.gov.au/climate/data/}. Copyright Commonwealth of Australia 2010, Bureau of Meteorology.
36 | }
37 | \usage{
38 | weather_australia
39 | }
40 | \description{
41 | A sub-sample of daily weather information from the weatherAUS data in the rattle package for three Australian cities: Wollongong, Hobart, and Uluru.
42 | }
43 | \keyword{datasets}
44 | 


--------------------------------------------------------------------------------
/man/weather_perth.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data_weather_perth.R
 3 | \docType{data}
 4 | \name{weather_perth}
 5 | \alias{weather_perth}
 6 | \title{Weather Data for Perth, Australia}
 7 | \format{
 8 | A data frame with 1000 daily observations and 21 variables:
 9 | \describe{
10 |   \item{mintemp}{minimum temperature (degrees Celsius)}
11 |   \item{maxtemp}{maximum temperature (degrees Celsius)}
12 |   \item{rainfall}{rainfall (mm)}
13 |   \item{windgustdir}{direction of strongest wind gust}
14 |   \item{windgustspeed}{speed of strongest wind gust (km/h)}
15 |   \item{winddir9am}{direction of wind gust at 9am}
16 |   \item{winddir3pm}{direction of wind gust at 3pm}
17 |   \item{windspeed9am}{wind speed at 9am (km/h)}
18 |   \item{windspeed3pm}{wind speed at 3pm (km/h)}
19 |   \item{humidity9am}{humidity level at 9am (percent)}
20 |   \item{humidity3pm}{humidity level at 3pm (percent)}
21 |   \item{pressure9am}{atmospheric pressure at 9am (hpa)}
22 |   \item{pressure3pm}{atmospheric pressure at 3pm (hpa)}
23 |   \item{temp9am}{temperature at 9am (degrees Celsius)}
24 |   \item{temp3pm}{temperature at 3pm (degrees Celsius)}
25 |   \item{raintoday}{whether or not it rained today (Yes or No)}
26 |   \item{risk_mm}{the amount of rain today (mm)}
27 |   \item{raintomorrow}{whether or not it rained the next day (Yes or No)}
28 |   \item{year}{the year of the date}
29 |   \item{month}{the month of the date}
30 |   \item{day_of_year}{the day of the year}
31 |   }
32 | }
33 | \source{
34 | Data in the original weatherAUS data set were obtained from \url{https://www.bom.gov.au/climate/data/}. Copyright Commonwealth of Australia 2010, Bureau of Meteorology.
35 | }
36 | \usage{
37 | weather_perth
38 | }
39 | \description{
40 | A sub-sample of daily weather information on Perth, Australia from the weatherAUS data in the rattle package.
41 | }
42 | \keyword{datasets}
43 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.R
2 | *.html
3 | 


--------------------------------------------------------------------------------
/vignettes/conjugate-families.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Conjugate Families"
 3 | output: rmarkdown::html_vignette
 4 | vignette: >
 5 |   %\VignetteIndexEntry{Conjugate Families}
 6 |   %\VignetteEngine{knitr::rmarkdown}
 7 |   %\VignetteEncoding{UTF-8}
 8 | ---
 9 | 
10 | ```{r, include = FALSE}
11 | knitr::opts_chunk$set(
12 |   collapse = TRUE,
13 |   comment = "#>"
14 | )
15 | ```
16 | 
17 | ```{r setup}
18 | library(bayesrules)
19 | ```
20 | 
21 | The **bayesrules** package has a set of functions that support exploring Bayesian models from three conjugate families: **Beta-Binomial**, **Gamma-Poisson**, and **Normal-Normal**. 
22 | The functions either help with plotting (prior, likelihood, and/or posterior) or summarizing the descriptives (mean, mode, variance, and sd) of the prior and/or posterior. 
23 | 
24 | ## The Beta-Binomial Model
25 | 
26 | We use the Beta-Binomial model to show the different set of functions and the arguments.
27 | 
28 | ### Prior
29 | 
30 | ```{r fig.align='center', fig.height=4, fig.width=5}
31 | plot_beta(alpha = 3, beta = 13, mean = TRUE, mode = TRUE)
32 | ```
33 | 
34 | ```{r}
35 | summarize_beta(alpha = 3, beta = 13)
36 | ```
37 | 
38 | 
39 | ### Likelihood
40 | 
41 | In addition, `plot_binomial_likelihood()` helps users visualize the Binomial likelihood function and shows the maximum likelihood estimate.
42 | 
43 | ```{r fig.align='center', fig.height=4, fig.width=5, message = FALSE}
44 | plot_binomial_likelihood(y = 3, n = 15, mle = TRUE)
45 | ```
46 | 
47 | 
48 | 
49 | ### Prior-Likelihood-Posterior
50 | 
51 | The two other functions `plot_beta_binomial()` and `summarize_beta_binomial()` require both the prior parameters and the data for the likelihood. 
52 | 
53 | ```{r fig.align='center', warning = FALSE, fig.height=4, fig.width=5}
54 | plot_beta_binomial(alpha = 3, beta = 13, y = 5, n = 10, 
55 |                    prior = TRUE, #the default 
56 |                    likelihood = TRUE,  #the default
57 |                    posterior = TRUE #the default
58 |                    )
59 | ```
60 | 
61 | ```{r fig.align='center', warning = FALSE, fig.height=4, fig.width=5}
62 | summarize_beta_binomial(alpha = 3, beta = 13, y = 5, n = 10)
63 | ```
64 | 
65 | ## Other Conjugate-Families
66 | 
67 | For Gamma-Poisson and Normal-Normal models, the set of functions are similar but the arguments are different for each model. Arguments of the Gamma-Poisson functions include the `shape` and `rate` of the Gamma prior and `sum_y` and `n` arguments related to observed data which represent the sum of observed data values and number of observations respectively.
68 | 
69 | ```{r fig.align='center', warning = FALSE, fig.height=4, fig.width=5}
70 | plot_gamma_poisson(
71 |   shape = 3,
72 |   rate = 4,
73 |   sum_y = 3,
74 |   n = 9,
75 |   prior = TRUE,
76 |   likelihood = TRUE,
77 |   posterior = TRUE
78 | )
79 | ```
80 | 
81 | For the Normal-Normal model functions, the prior Normal model has the `mean` and `sd` argument. The observed data has `sigma`, `y_bar`, and `n` which indicate the standard deviation, mean, and sample size of the data respectively.
82 | 
83 | ```{r}
84 | summarize_normal_normal(mean = 3.8, sd = 1.12, sigma = 5.8, y_bar = 3.35, n = 8)
85 | ```
86 | 
87 | 


--------------------------------------------------------------------------------