├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS.md
├── R
    ├── BF_app.R
    ├── ames.R
    ├── ames_sampling_dist.R
    ├── arbuthnot.R
    ├── atheism.R
    ├── bandit_posterior.R
    ├── bandit_sim.R
    ├── bayes_inference.R
    ├── bayes_single_mean_JZS.R
    ├── bayes_single_mean_sim.R
    ├── bayes_single_mean_theo.R
    ├── bayes_single_prop.R
    ├── bayes_two_mean.R
    ├── bayes_two_prop.R
    ├── bayes_util.R
    ├── behren-fisher.R
    ├── brfss.R
    ├── calc_streak.R
    ├── ci_single_mean_sim.R
    ├── ci_single_mean_theo.R
    ├── ci_single_median_sim.R
    ├── ci_single_prop_sim.R
    ├── ci_single_prop_theo.R
    ├── ci_two_mean_sim.R
    ├── ci_two_mean_theo.R
    ├── ci_two_median_sim.R
    ├── ci_two_prop_sim.R
    ├── ci_two_prop_theo.R
    ├── credible_interval.R
    ├── evals.R
    ├── globals.R
    ├── ht_many_mean_theo.R
    ├── ht_many_prop_sim.R
    ├── ht_many_prop_theo.R
    ├── ht_single_mean_sim.R
    ├── ht_single_mean_theo.R
    ├── ht_single_median_sim.R
    ├── ht_single_prop_sim.R
    ├── ht_single_prop_theo.R
    ├── ht_two_mean_sim.R
    ├── ht_two_mean_theo.R
    ├── ht_two_median_sim.R
    ├── ht_two_prop_sim.R
    ├── ht_two_prop_theo.R
    ├── inference.R
    ├── kobe_basket.R
    ├── mlb11.R
    ├── nc.R
    ├── nycflights.R
    ├── plot_ss.R
    ├── present.R
    ├── rep_sample_n.R
    ├── rstudio.R
    ├── statsr.R
    ├── statswithr_lab.R
    ├── tapwater.R
    ├── wage.R
    └── zinc.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── cran-comments.md
├── data
    ├── ames.rda
    ├── arbuthnot.rda
    ├── atheism.rda
    ├── brfss.rda
    ├── evals.rda
    ├── kobe_basket.rda
    ├── mlb11.rda
    ├── nc.rda
    ├── nycflights.rda
    ├── present.rda
    ├── tapwater.rda
    ├── wage.rda
    └── zinc.rda
├── docs
    ├── 404.html
    ├── LICENSE-text.html
    ├── authors.html
    ├── bootstrap-toc.css
    ├── bootstrap-toc.js
    ├── docsearch.css
    ├── docsearch.js
    ├── index.html
    ├── jquery.sticky-kit.min.js
    ├── link.svg
    ├── news
    │   └── index.html
    ├── pkgdown.css
    ├── pkgdown.js
    ├── pkgdown.yml
    ├── reference
    │   ├── BF_app.html
    │   ├── Rplot001.png
    │   ├── Rplot002.png
    │   ├── Rplot003.png
    │   ├── Rplot004.png
    │   ├── Rplot005.png
    │   ├── allow_shiny.html
    │   ├── ames.html
    │   ├── ames_sampling_dist.html
    │   ├── arbuthnot.html
    │   ├── atheism.html
    │   ├── bandit_posterior-1.png
    │   ├── bandit_posterior.html
    │   ├── bandit_sim-1.png
    │   ├── bandit_sim.html
    │   ├── bayes_inference-1.png
    │   ├── bayes_inference-2.png
    │   ├── bayes_inference-3.png
    │   ├── bayes_inference-4.png
    │   ├── bayes_inference-5.png
    │   ├── bayes_inference.html
    │   ├── brfss.html
    │   ├── calc_streak.html
    │   ├── credible_interval_app.html
    │   ├── evals.html
    │   ├── figures
    │   │   └── unnamed-chunk-3-1.png
    │   ├── index.html
    │   ├── inference-1.png
    │   ├── inference-2.png
    │   ├── inference-3.png
    │   ├── inference.html
    │   ├── kobe_basket.html
    │   ├── mlb11.html
    │   ├── nc.html
    │   ├── nycflights.html
    │   ├── plot_bandit_posterior-1.png
    │   ├── plot_bandit_posterior.html
    │   ├── plot_ss.html
    │   ├── present.html
    │   ├── rep_sample_n.html
    │   ├── statsr.html
    │   ├── tapwater.html
    │   ├── wage.html
    │   ├── zinc-1.png
    │   └── zinc.html
    └── sitemap.xml
├── inst
    ├── WORDLIST
    └── lab.css
├── man
    ├── BF_app.Rd
    ├── allow_shiny.Rd
    ├── ames.Rd
    ├── ames_sampling_dist.Rd
    ├── arbuthnot.Rd
    ├── atheism.Rd
    ├── bandit_posterior.Rd
    ├── bandit_sim.Rd
    ├── bayes_inference.Rd
    ├── brfss.Rd
    ├── calc_streak.Rd
    ├── credible_interval_app.Rd
    ├── evals.Rd
    ├── figures
    │   └── unnamed-chunk-3-1.png
    ├── inference.Rd
    ├── kobe_basket.Rd
    ├── mlb11.Rd
    ├── nc.Rd
    ├── nycflights.Rd
    ├── plot_bandit_posterior.Rd
    ├── plot_ss.Rd
    ├── present.Rd
    ├── rep_sample_n.Rd
    ├── statsr.Rd
    ├── tapwater.Rd
    ├── wage.Rd
    └── zinc.Rd
├── statsr.Rproj
└── tests
    ├── spelling.R
    ├── testthat.R
    └── testthat
        └── test-bayes_inference.R


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | .travis.yml
 4 | orig_data
 5 | .Rhistory
 6 | ^README\.Rmd$
 7 | ^README-.*\.png$
 8 | cran-comments.md
 9 | ^CRAN-RELEASE$
10 | ^_pkgdown\.yml$
11 | ^docs$
12 | ^pkgdown$
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | # Session Data files
 5 | .RData
 6 | # Example code in package build process
 7 | *-Ex.R
 8 | # RStudio files
 9 | .Rproj.user/
10 | # produced vignettes
11 | vignettes/*.html
12 | vignettes/*.pdf
13 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
14 | .httr-oauth
15 | .Rproj.user
16 | orig_data/
17 | .DS_Store
18 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: r
 2 | cache: packages
 3 | r_check_args: '--as-cran'
 4 | 
 5 | warnings_are_errors: false
 6 | 
 7 | r:
 8 |   - release
 9 |   - devel
10 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: statsr
 2 | Type: Package
 3 | Title: Companion Software for the Coursera Statistics with R Specialization
 4 | Version: 0.3.0
 5 | Date: 2021-01-21
 6 | Authors@R: c(person("Colin", "Rundel", role=c("aut"), email="rundel@gmail.com"),
 7 |              person("Mine", "Cetinkaya-Rundel", role=c("aut"), email="mine@stat.duke.edu"),
 8 |              person("Merlise", "Clyde", role=c("aut", "cre"), email="clyde@duke.edu"),
 9 |              person("David", "Banks", role=c("aut"), email="banks@stat.duke.edu"))
10 | Maintainer: Merlise Clyde <clyde@duke.edu>
11 | Description: Data and functions to support Bayesian and frequentist inference and decision making 
12 |             for the Coursera Specialization "Statistics with R".
13 |             See <https://github.com/StatsWithR/statsr> for more information.
14 | LazyData: true
15 | License: MIT + file LICENSE
16 | RoxygenNote: 7.1.1
17 | Encoding: UTF-8
18 | Depends: 
19 |     R (>= 3.3.0),
20 |     BayesFactor
21 | Imports: 
22 |     dplyr,
23 |     rmarkdown,
24 |     knitr,
25 |     ggplot2,
26 |     broom,
27 |     gridExtra,
28 |     shiny,
29 |     cubature,
30 |     tidyr,
31 |     tibble,
32 |     utils
33 | Suggests:
34 |     spelling,
35 |     HistData,
36 |     testthat (>= 3.0.0)
37 | URL: https://github.com/StatsWithR/statsr    
38 | BugReports: https://github.com/StatsWithR/statsr/issues
39 | Language: en-US
40 | Config/testthat/edition: 3
41 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2016
2 | COPYRIGHT HOLDER:  StatsWithR
3 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(BF_app)
 4 | export(ames_sampling_dist)
 5 | export(bandit_posterior)
 6 | export(bandit_sim)
 7 | export(bayes_inference)
 8 | export(calc_streak)
 9 | export(credible_interval_app)
10 | export(inference)
11 | export(plot_bandit_posterior)
12 | export(plot_ss)
13 | export(rep_sample_n)
14 | import(ggplot2)
15 | import(graphics)
16 | import(shiny)
17 | import(stats)
18 | importFrom(BayesFactor,ttestBF)
19 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # News for statsr
 2 | 
 3 | # statsr 0.3.0
 4 | 
 5 | * merged the 'BayesFactor' branch with main branch so that the `bayes_inference` function on CRAN is consistent with book and other supplemental materials online.  Provides a more unified function and additional options.  Addresses [issue #15](https://github.com/StatsWithR/statsr/issues/15)
 6 | 
 7 | * Restore the tapwater and zinc data
 8 | 
 9 | # statsr 0.2.0
10 |  
11 | * updates so that functions are compatible with tibble package version 3.0.0
12 | 
13 | # statsr 0.1.0
14 | 
15 | * First release of package on CRAN to accompany version 2 of the Statistics With R course on Coursera and release of the online book [Introduction to Bayesian Thinking](https://statswithr.github.io/book/)


--------------------------------------------------------------------------------
/R/BF_app.R:
--------------------------------------------------------------------------------
  1 | #' Run the interactive Bayes Factor shiny app
  2 | #' 
  3 | #' This app illustrates how changing the Z score and prior precision
  4 | #' affects the Bayes Factor for testing H1 that the mean is zero 
  5 | #' versus H2 that the mean is not zero for data arising from a normal
  6 | #' population.  Lindley's paradox occurs for large sample sizes
  7 | #' when the Bayes factor favors H1 even though the Z score is large or the
  8 | #' p-value is small enough to reach statistical significance and the values of 
  9 | #' the sample mean do not reflex practical significance based on the prior 
 10 | #' distribution.
 11 | #' Bartlett's paradox may occur when the prior precision goes to zero, leading 
 12 | #' to Bayes factors that favor H1 regardless of the data.
 13 | #' A prior precision of one corresponds to the unit information prior.
 14 | #' @examples
 15 | #' if (interactive()) { 
 16 | #' BF.app()
 17 | #' }
 18 | #' @export
 19 | #' 
 20 | BF_app = function()
 21 | {
 22 |   shinyApp(
 23 |     ui = pageWithSidebar(
 24 |       headerPanel(""),
 25 |       sidebarPanel(
 26 |         selectInput(inputId = "dist",
 27 |                     label = "Prior Distribution Family:",
 28 |                     choices = c("Normal" = "norm"),
 29 |                     selected = "norm"),
 30 |         br(),
 31 |         #   sliderInput("n0", "Prior Sample Size n0", min=0, max=4, step=0.01, value=1.0),
 32 |         sliderInput("Z", "Z score", 
 33 |                     min=-3, max=3, step=.05, value=0.0),
 34 | #        sliderInput("mu_2", "mu_2 (in units of  standard deviations)", 
 35 | #                     min=-3, max=3, step=.01, value=0.0),
 36 |         sliderInput("phi0", "Prior Precision", min=.000001, max=2, step=.01, 
 37 |                     value=1.0)
 38 |       ),
 39 |       mainPanel(
 40 |         plotOutput("BF_plot_mu")
 41 |       )
 42 |     ),
 43 |     server = function(input, output, session) 
 44 |     {
 45 |       
 46 |       
 47 |       output$BF_plot_mu = renderPlot(
 48 |         {   
 49 | 
 50 |           d = data.frame(
 51 |             n = 1:1000) 
 52 |           
 53 |             ybar = input$Z*sqrt(d$n)
 54 |             d$y = exp(-dnorm(ybar, 0, 1/sqrt(d$n), log=TRUE) +
 55 |                        dnorm(ybar, 0, sqrt(1/(input$phi0+.0000001) + 1/d$n), 
 56 |                              log=TRUE))
 57 |             d$y = BF10.normal(input$Z, n=d$n, n0=input$phi0, 
 58 |                                   logBF=FALSE, recip=FALSE)
 59 |           
 60 |           BF.fav = subset(d, d$y >= 1)
 61 |           BF.against = subset(d, d$y < 1)
 62 |           if (nrow(BF.against) == nrow(d)) {  # none in favor
 63 |             BF.against = rbind(c(1,1),BF.against, 
 64 |                                c(max(d$n), 1)) }
 65 |           else {
 66 |             if (nrow(BF.fav) == nrow(d)) {  # all in favor
 67 |               BF.fav =  rbind(c(1,1), BF.fav,c(max(d$n), 1))}
 68 |             else {# nrow(BF.fav) > 1 ) { # mix
 69 |               BF.fav = rbind(c(1,1), BF.fav, c(max(BF.fav$n),1))
 70 |               BF.against = rbind(c(min(BF.against$n),1), BF.against,
 71 |                                  c(max(BF.against$n),1))
 72 |               }
 73 |           }
 74 |          
 75 |           
 76 |           param =  "\u03BC"
 77 |           
 78 |           
 79 |           ggplot(d, aes_string(x='n', y='y')) + 
 80 |             ylab("BF[H2:H1]") +
 81 |             xlab("Sample Size n") +
 82 |             geom_line() +
 83 |             geom_abline(slope=0, intercept=0) +
 84 |             geom_polygon(data=BF.fav,aes_string(x='n',y='y'),alpha=0.5) +
 85 |             geom_polygon(data=BF.against,aes_string(x='n',y='y'),alpha=0.5) +
 86 |             scale_y_log10() +
 87 |             ggtitle("Bayes Factor H2:H1   H1: mu = 0.0 versus H2: mu = mu_2 ")
 88 |         })
 89 |       
 90 |       output$BF_plot_sd = renderPlot(
 91 |         {   
 92 |           
 93 |           x=10^seq(-2, 6, length=1000)
 94 |           n0 =  1/(x^2)   # precision
 95 |           Z = input$ybar/sqrt(1/input$n)
 96 |           y = BF10.normal(Z, n=input$n, n0=n0, logBF=FALSE, recip=TRUE) 
 97 |           d = data.frame(x, y)
 98 |           
 99 |           ggplot(d, aes_string(x='x', y='y')) + 
100 |             ylab("BF[H1:H2]") +
101 |             xlab("Prior Standard Deviation (in units of sigma)") +
102 |             geom_line() +
103 |             scale_y_log10() +
104 |             scale_x_log10() +
105 |             geom_abline(slope=0, intercept=0) +
106 |             ggtitle(paste0("Bayes Factor H1:H2   H1: mu = 0.0, Z = ", round(Z,2)))
107 |         })
108 |       output$BF_plot_sd = renderPlot(
109 |         {   
110 |           
111 |           x=10^seq(-2, 6, length=1000)
112 |           n0 =  1/(x^2)   # precision
113 |           Z = input$ybar/sqrt(1/input$n)
114 |           y = BF10.normal(Z, n=input$n, n0=n0, logBF=FALSE, recip=TRUE) 
115 |           d = data.frame(x, y)
116 |           
117 |           ggplot(d, aes_string(x='x', y='y')) + 
118 |             ylab("BF[H1:H2]") +
119 |             xlab("Prior Standard Deviation (in units of sigma)") +
120 |             geom_line() +
121 |             scale_y_log10() +
122 |             scale_x_log10() +
123 |             geom_abline(slope=0, intercept=0) +
124 |             ggtitle(paste0("Bayes Factor H1:H2   H1: mu = 0.0, Z = ", round(Z,2)))
125 |         })
126 |       options = list(height = 500)
127 |     }
128 |   )
129 | }
130 | 
131 | BF10.normal = function(z, n, n0, logBF=TRUE, recip=FALSE) {
132 |   BF10 = .5*(z^2)*n/(n+n0) - .5*log(n + n0) + .5*log(n0)
133 |   if (recip)   BF10 = -BF10
134 |   if (logBF == FALSE)  BF10 = exp(BF10)
135 |   return(BF10)
136 | }
137 | 
138 | 


--------------------------------------------------------------------------------
/R/ames.R:
--------------------------------------------------------------------------------
 1 | #' Housing prices in Ames, Iowa
 2 | #'
 3 | #' Data set contains information from the Ames Assessor's Office used in computing 
 4 | #' assessed values for individual residential properties sold in Ames, IA from 2006 
 5 | #' to 2010. See http://www.amstat.org/publications/jse/v19n3/decock/datadocumentation.txt 
 6 | #' for detailed variable descriptions.
 7 | #'
 8 | #' @format A tbl_df with with 2930 rows and 82 variables:
 9 | #' \describe{
10 | #'   \item{Order}{Observation number.}
11 | #'   \item{PID}{Parcel identification number  - can be used with city web site for parcel review.}
12 | #'   \item{area}{Above grade (ground) living area square feet.}
13 | #'   \item{price}{Sale price in USD.}
14 | #'   \item{MS.SubClass}{Identifies the type of dwelling involved in the sale.}
15 | #'   \item{MS.Zoning}{Identifies the general zoning classification of the sale.}
16 | #'   \item{Lot.Frontage}{Linear feet of street connected to property.}
17 | #'   \item{Lot.Area}{Lot size in square feet.}
18 | #'   \item{Street}{Type of road access to property.}
19 | #'   \item{Alley}{Type of alley access to property.}
20 | #'   \item{Lot.Shape}{General shape of property.}
21 | #'   \item{Land.Contour}{Flatness of the property.}
22 | #'   \item{Utilities}{Type of utilities available.}
23 | #'   \item{Lot.Config}{Lot configuration.}
24 | #'   \item{Land.Slope}{Slope of property.}
25 | #'   \item{Neighborhood}{Physical locations within Ames city limits (map available).}
26 | #'   \item{Condition.1}{Proximity to various conditions.}
27 | #'   \item{Condition.2}{Proximity to various conditions (if more than one is present).}
28 | #'   \item{Bldg.Type}{Type of dwelling.}
29 | #'   \item{House.Style}{Style of dwelling.}
30 | #'   \item{Overall.Qual}{Rates the overall material and finish of the house.}
31 | #'   \item{Overall.Cond}{Rates the overall condition of the house.}
32 | #'   \item{Year.Built}{Original construction date.}
33 | #'   \item{Year.Remod.Add}{Remodel date (same as construction date if no remodeling or additions).}
34 | #'   \item{Roof.Style}{Type of roof.}
35 | #'   \item{Roof.Matl}{Roof material.}
36 | #'   \item{Exterior.1st}{Exterior covering on house.}
37 | #'   \item{Exterior.2nd}{Exterior covering on house (if more than one material).}
38 | #'   \item{Mas.Vnr.Type}{Masonry veneer type.}
39 | #'   \item{Mas.Vnr.Area}{Masonry veneer area in square feet.}
40 | #'   \item{Exter.Qual}{Evaluates the quality of the material on the exterior.}
41 | #'   \item{Exter.Cond}{Evaluates the present condition of the material on the exterior.}
42 | #'   \item{Foundation}{Type of foundation.}
43 | #'   \item{Bsmt.Qual}{Evaluates the height of the basement.}
44 | #'   \item{Bsmt.Cond}{Evaluates the general condition of the basement.}
45 | #'   \item{Bsmt.Exposure}{Refers to walkout or garden level walls.}
46 | #'   \item{BsmtFin.Type.1}{Rating of basement finished area.}
47 | #'   \item{BsmtFin.SF.1}{Type 1 finished square feet.}
48 | #'   \item{BsmtFin.Type.2}{Rating of basement finished area (if multiple types).}
49 | #'   \item{BsmtFin.SF.2}{Type 2 finished square feet.}
50 | #'   \item{Bsmt.Unf.SF}{Unfinished square feet of basement area.}
51 | #'   \item{Total.Bsmt.SF}{Total square feet of basement area.}
52 | #'   \item{Heating}{Type of heating.}
53 | #'   \item{Heating.QC}{Heating quality and condition.}
54 | #'   \item{Central.Air}{Central air conditioning.}
55 | #'   \item{Electrical}{Electrical system.}
56 | #'   \item{X1st.Flr.SF}{First Floor square feet.}
57 | #'   \item{X2nd.Flr.SF}{Second floor square feet.}
58 | #'   \item{Low.Qual.Fin.SF}{Low quality finished square feet (all floors).}
59 | #'   \item{Bsmt.Full.Bath}{Basement full bathrooms.}
60 | #'   \item{Bsmt.Half.Bath}{Basement half bathrooms.}
61 | #'   \item{Full.Bath}{Full bathrooms above grade.}
62 | #'   \item{Half.Bath}{Half baths above grade.}
63 | #'   \item{Bedroom.AbvGr}{Bedrooms above grade (does NOT include basement bedrooms).}
64 | #'   \item{Kitchen.AbvGr}{Kitchens above grade.}
65 | #'   \item{Kitchen.Qual}{Kitchen quality.}
66 | #'   \item{TotRms.AbvGrd}{Total rooms above grade (does not include bathrooms).}
67 | #'   \item{Functional}{Home functionality (Assume typical unless deductions are warranted).}
68 | #'   \item{Fireplaces}{Number of fireplaces.}
69 | #'   \item{Fireplace.Qu}{Fireplace quality.}
70 | #'   \item{Garage.Type}{Garage location.}
71 | #'   \item{Garage.Yr.Blt}{Year garage was built.}
72 | #'   \item{Garage.Finish}{Interior finish of the garage.}
73 | #'   \item{Garage.Cars}{Size of garage in car capacity.}
74 | #'   \item{Garage.Area}{Size of garage in square feet.}
75 | #'   \item{Garage.Qual}{Garage quality.}
76 | #'   \item{Garage.Cond}{Garage condition.}
77 | #'   \item{Paved.Drive}{Paved driveway.}
78 | #'   \item{Wood.Deck.SF}{Wood deck area in square feet.}
79 | #'   \item{Open.Porch.SF}{Open porch area in square feet.}
80 | #'   \item{Enclosed.Porch}{Enclosed porch area in square feet.}
81 | #'   \item{X3Ssn.Porch}{Three season porch area in square feet.}
82 | #'   \item{Screen.Porch}{Screen porch area in square feet.}
83 | #'   \item{Pool.Area}{Pool area in square feet.}
84 | #'   \item{Pool.QC}{Pool quality.}
85 | #'   \item{Fence}{Fence quality.}
86 | #'   \item{Misc.Feature}{Miscellaneous feature not covered in other categories.}
87 | #'   \item{Misc.Val}{Dollar value of miscellaneous feature.}
88 | #'   \item{Mo.Sold}{Month Sold (MM).}
89 | #'   \item{Yr.Sold}{Year Sold (YYYY).}
90 | #'   \item{Sale.Type}{Type of sale.}
91 | #'   \item{Sale.Condition}{Condition of sale.}
92 | #' }
93 | #' @source De Cock, Dean. "Ames, Iowa: Alternative to the Boston housing data as 
94 | #' an end of semester regression project." Journal of Statistics Education 19.3 (2011).
95 | "ames"


--------------------------------------------------------------------------------
/R/ames_sampling_dist.R:
--------------------------------------------------------------------------------
 1 | #' Simulate Sampling Distribution
 2 | #' 
 3 | #' Run the interactive ames sampling distribution shiny app to 
 4 | #' illustrate sampling distributions using variables from the `ames`
 5 | #' dataset.
 6 | #' 
 7 | #'
 8 | #' @examples
 9 | #' if (interactive()) { 
10 | #'   ames_sampling_dist()
11 | #' }
12 | 
13 | #' @export
14 | 
15 | ames_sampling_dist = function()
16 | {
17 |   if (!allow_shiny())
18 |     stop("Shiny app will only run when built within RStudio.")
19 |   
20 |   ames = statsr::ames
21 |   
22 |   shinyApp(
23 |     ui <- fluidPage(
24 |       # Sidebar with a slider input for number of bins 
25 |       sidebarLayout(
26 |         sidebarPanel(
27 |           selectInput("selected_var", "Variable:",  choices = list("area", "price"), selected = "area"),         
28 |           numericInput("n_samp", "Sample size:", min = 1, max = nrow(ames), value = 30),
29 |           numericInput("n_sim", "Number of samples:", min = 1, max = 30000, value = 15000) 
30 |         ),
31 |         # Show a plot of the generated distribution
32 |         mainPanel(
33 |           plotOutput("sampling_plot"),
34 |           verbatimTextOutput("sampling_mean"),
35 |           verbatimTextOutput("sampling_se")
36 |         )
37 |       )
38 |     ),
39 |     
40 |     # Define server logic required to draw a histogram
41 |     server <- function(input, output) {
42 |       
43 |       # create sampling distribution
44 |       sampling_dist <- reactive({
45 |         s = sample(ames[[input$selected_var]], size = input$n_samp * input$n_sim, replace = TRUE)
46 |         m = matrix(s, ncol = input$n_samp)
47 |         data.frame(x_bar = rowMeans(m))
48 |       })
49 |       
50 |       # plot sampling distribution
51 |       output$sampling_plot <- renderPlot({
52 |         x_min <- quantile(ames[[input$selected_var]], 0.1)
53 |         x_max <- quantile(ames[[input$selected_var]], 0.9)
54 |         
55 |         ggplot(sampling_dist(), aes_string(x = "x_bar")) +
56 |           geom_histogram(na.rm=TRUE, bins=50) +
57 |           xlim(x_min, x_max) +
58 |           ylim(0, input$n_sim * 0.35) +
59 |           ggtitle(paste0("Sampling distribution of mean ", 
60 |                          input$selected_var, " (n = ", input$n_samp, ")")) +
61 |           xlab(paste("mean", input$selected_var)) +
62 |           theme(plot.title = element_text(face = "bold", size = 16))
63 |       })
64 |       
65 |       # mean of sampling distribution
66 |       output$sampling_mean <- renderText({
67 |         paste0("mean of sampling distribution = ", round(mean(sampling_dist()$x_bar), 2))
68 |       })
69 |       
70 |       # mean of sampling distribution
71 |       output$sampling_se <- renderText({
72 |         paste0("SE of sampling distribution = ", round(sd(sampling_dist()$x_bar), 2))
73 |       })
74 |     },
75 |     
76 |     options = list(height = 500) 
77 |   )
78 | }


--------------------------------------------------------------------------------
/R/arbuthnot.R:
--------------------------------------------------------------------------------
 1 | #' Male and female births in London
 2 | #'
 3 | #' Arbuthnot's data describes male and female christenings (births) for
 4 | #' London from 1629-1710.
 5 | #'
 6 | #' John Arbuthnot (1710) used these time series data to carry out the first
 7 | #' known significance test. During every one of the 82 years, there were more
 8 | #' male christenings than female christenings. As Arbuthnot wondered,
 9 | #' we might also wonder if this could be due to chance, or whether it meant
10 | #' the birth ratio was not actually 1:1.
11 | #'
12 | #' @format A tbl_df with with 82 rows and 3 variables:
13 | #' \describe{
14 | #'   \item{year}{year, ranging from 1629 to 1710}
15 | #'   \item{boys}{number of male christenings (births)}
16 | #'   \item{girls}{number of female christenings (births)}
17 | #' }
18 | #' @source These data are excerpted from the \code{\link[HistData]{Arbuthnot}}
19 | #' data set in the HistData package.
20 | "arbuthnot"


--------------------------------------------------------------------------------
/R/atheism.R:
--------------------------------------------------------------------------------
 1 | #' Atheism in the world data
 2 | #'
 3 | #' Survey results on atheism across several countries and years. Each row
 4 | #' represents a single respondent.
 5 | #'
 6 | #' @format A tbl_df with 88032 rows and 3 variables:
 7 | #' \describe{
 8 | #'   \item{nationality}{Country of the individual surveyed.}
 9 | #'   \item{response}{A categorical variable with two levels: atheist and non-atheist.}
10 | #'   \item{year}{Year in which the person was surveyed.}
11 | #'   }
12 | #' @source \href{https://github.com/OpenIntroStat/oilabs/blob/master/data-raw/atheism/Global_INDEX_of_Religiosity_and_Atheism_PR__6.pdf}{WIN-Gallup International Press Release}
13 | "atheism"


--------------------------------------------------------------------------------
/R/bandit_posterior.R:
--------------------------------------------------------------------------------
 1 | #' bandit posterior
 2 | #'
 3 | #' Utility function for calculating the posterior probability of each machine being "good" in 
 4 | #' two armed bandit problem. Calculated result is based on observed win loss data, prior belief about 
 5 | #' which machine is good and the probability of the good and bad machine paying out.
 6 | #'
 7 | #' @param data data frame containing win loss data
 8 | #' @param prior prior vector containing the probabilities of Machine 1 and Machine 2 being good, defaults to 0.5 and 0.5 respectively.
 9 | #' @param win_probs vector containing the probabilities of winning on the good and bad machine respectively.
10 | #' @return A vector containing the posterior probability of Machine 1 and Machine 2 being the good machine.
11 | #' @seealso \code{\link{bandit_sim}} to generate data and
12 | #'          \code{\link{plot_bandit_posterior}} to visualize.
13 | #' @examples
14 | #' data = data.frame(machine = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), 
15 | #'                   outcome = c("W", "L", "W", "L", "L", "W", "L", "L", "L", "W"))
16 | #' bandit_posterior(data)
17 | #' plot_bandit_posterior(data)
18 | #' 
19 | #' @export
20 | 
21 | 
22 | bandit_posterior <- function(data, prior = c(m1_good = 0.5, m2_good = 0.5), win_probs = c(good = 1 / 2, bad = 1 / 3)) {
23 |   if (length(names(prior)) == 0) {
24 |     names(prior) <- c("m1_good", "m2_good")
25 |   }
26 |   if (length(names(win_probs)) == 0) {
27 |     names(prior) <- c("good", "bad")
28 |   }
29 | 
30 |   m1_good_and_data <- prior["m1_good"] * win_probs["good"]^sum(data$machine == 1L & data$outcome == "W") *
31 |     (1 - win_probs["good"])^sum(data$machine == 1L & data$outcome == "L") *
32 |     win_probs["bad"]^sum(data$machine == 2L & data$outcome == "W") *
33 |     (1 - win_probs["bad"])^sum(data$machine == 2L & data$outcome == "L")
34 | 
35 |   m2_good_and_data <- prior["m2_good"] * win_probs["bad"]^sum(data$machine == 1L & data$outcome == "W") *
36 |     (1 - win_probs["bad"])^sum(data$machine == 1L & data$outcome == "L") *
37 |     win_probs["good"]^sum(data$machine == 2L & data$outcome == "W") *
38 |     (1 - win_probs["good"])^sum(data$machine == 2L & data$outcome == "L")
39 |   return(
40 |     c(
41 |       m1_good_and_data / (m1_good_and_data + m2_good_and_data),
42 |       m2_good_and_data / (m1_good_and_data + m2_good_and_data)
43 |     )
44 |   )
45 | }
46 | 
47 | #' plot_bandit_posterior
48 | #'
49 | #' Generates a plot that shows the bandit posterior values as they are sequentially updated 
50 | #' by the provided win / loss data.
51 | #'
52 | #' @param data data frame containing win loss data
53 | #' @param prior prior vector containing the probabilities of Machine 1 and Machine 2 being good, defaults to 50-50.
54 | #' @param win_probs vector containing the probabilities of winning on the good and bad machine respectively.
55 | #' @seealso \code{\link{bandit_sim}} to generate data to use below
56 | #'
57 | #' @examples
58 | #' # capture data from the `shiny` app `bandit_sim`.
59 | #' data = data.frame(machine = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), 
60 | #'                   outcome = c("W", "L", "W", "L", "L", "W", "L", "L", "L", "W"))
61 | #' plot_bandit_posterior(data)
62 | #'
63 | #' @export
64 | 
65 | 
66 | plot_bandit_posterior <- function(data,
67 |                                   prior = c(m1_good = 0.5, m2_good = 0.5),
68 |                                   win_probs = c(good = 1 / 2, bad = 1 / 3)) {
69 |   r <- tibble::tibble(
70 |     "P(M1 is good | Data)" = c(prior[["m1_good"]], rep(NA, nrow(data))),
71 |     "P(M2 is good | Data)" = c(prior[["m2_good"]], rep(NA, nrow(data)))
72 |   )
73 | 
74 |   for (i in 1:nrow(data)) {
75 |     bp <- bandit_posterior(data[1:i, ], prior, win_probs)
76 |     r[i+1, 1] <- bp[["m1_good"]]
77 |     r[i+1, 2] <- bp[["m1_good"]]
78 |   }
79 | 
80 |   r <- dplyr::mutate(r, play = dplyr::row_number())
81 |   r <- tidyr::gather(r, outcome, prob, -play)
82 | 
83 |   ggplot(r, aes_string(x = "play", y = "prob", color = "outcome")) +
84 |     geom_line(size = 1.5) +
85 |     labs(x = "Play #", y = "Posterior Prob.") +
86 |     scale_color_manual(values = c("#428bca", "#5cb85c"))
87 | }
88 | 


--------------------------------------------------------------------------------
/R/bayes_util.R:
--------------------------------------------------------------------------------
  1 | BF_plot = function(den_H2, res, parameter)
  2 | {
  3 |   d_H2 = data.frame(x = den_H2$x, 
  4 |                     y = den_H2$y * res$post_H2 / max(den_H2$y),
  5 |                     Hypothesis = "H2") 
  6 | 
  7 |   li = min(which(d_H2$x >= res$ci_H2[1]))  
  8 |   ui = max(which(d_H2$x <  res$ci_H2[2]))
  9 | 
 10 |   d_H2_poly = data.frame(x = c(d_H2$x[c(li,li:ui,ui)]), 
 11 |                          y = c(0, d_H2$y[li:ui], 0),
 12 |                          Hypothesis = "H2")
 13 | 
 14 |   d_H1 = data.frame(x = c(res$null, res$null), 
 15 |                     y = c(0, res$post_H1),
 16 |                     Hypothesis = "H1")
 17 | 
 18 |   d = rbind(data.frame(x=NA, y=NA, Hypothesis="H1"),
 19 |             d_H2,
 20 |             data.frame(x=NA, y=NA, Hypothesis="Overall"))
 21 | 
 22 |   # H2 Features
 23 |   p = ggplot(d, aes_string(x="x", y="y", color="Hypothesis", fill="Hypothesis")) + 
 24 |       geom_line(alpha=0.8) +
 25 |       geom_polygon(data = d_H2_poly, linetype="blank",alpha=0.8) + 
 26 |       ylab("Density") +  
 27 |       xlab(parameter)
 28 | 
 29 |   # H2 Features
 30 |   p = p + geom_line(data = d_H1, size=1.5, alpha=0.8)
 31 |       
 32 | 
 33 |   # Marginal plot features 
 34 |   y_min = ggplot_build(p)$panel$ranges[[1]]$y.range[1]
 35 | 
 36 |   d_Marg = data.frame(x = rep(res$ci_Marg, c(2,2)),
 37 |                       y = c(y_min*1/2, y_min, y_min, y_min*1/2),
 38 |                       Hypothesis = "Overall")
 39 | 
 40 |   p = p + geom_line(data = d_Marg, size=0.75, alpha=0.8)
 41 | 
 42 |   print(p)
 43 | }
 44 | 
 45 | coda_density = function(x, from, to)
 46 | {
 47 |     bwf = 1.06 * min(sd(x), IQR(x)/1.34) * length(x)^-0.2
 48 |     
 49 |     return(density(x, from=from, to=to, bw=bwf))
 50 | }
 51 | 
 52 | 
 53 | check_beta_prior = function(beta_prior, group="")
 54 | {
 55 |     arg_name = paste(substitute(beta_prior))
 56 |     if (arg_name == "") arg_name = "beta_prior"
 57 | 
 58 |     param = ifelse(group == "", "p", paste0("p_",group))    
 59 |     
 60 |     if (is.null(beta_prior))
 61 |     {
 62 |         warning("No beta prior for ",param," was specified, assuming a uniform prior (p ~ Beta(a=1,b=1)).\n",
 63 |                 "  This beta prior is specified using the argument ",arg_name,"=c(a,b),\n",
 64 |                 "  where a and b are your desired hyperparameters.")
 65 |         beta_prior = c(a=1,b=1)
 66 |     }
 67 |     
 68 |     stopifnot(length(beta_prior) == 2)
 69 |     
 70 |     if (is.null(names(beta_prior)))
 71 |         names(beta_prior) = c("a","b")
 72 |     stopifnot(all(sort(names(beta_prior)) == c("a","b")))
 73 |     beta_prior = beta_prior[c("a","b")]
 74 |     
 75 |     return(beta_prior)
 76 | }
 77 | 
 78 | check_hypothesis_prior = function(prior)
 79 | {
 80 |     if (is.null(prior))
 81 |     {
 82 |         warning("No prior set for H1 and H2, assuming a uniform prior of P(H1) = 0.5 and P(H2) = 0.5. The hypothesis prior is assigned using the argument  prior=c(H1=a,H2=b). ")
 83 |         prior = c(H1=0.5,H2=0.5)
 84 |     }
 85 | 
 86 |     if (length(prior) == 1)
 87 |     {   
 88 |         if (names(prior) %in% c("H1","H2"))
 89 |             prior[ setdiff(c("H1","H2"), names(prior)) ] = 1 - prior
 90 |     }
 91 | 
 92 |     stopifnot(length(prior) == 2)
 93 |     stopifnot(all(prior >= 0))
 94 |     stopifnot(sum(prior) == 1)
 95 | 
 96 |     if (is.null(names(prior)))
 97 |         names(prior) = c("H1","H2")
 98 | 
 99 |     stopifnot(all(sort(names(prior)) == c("H1","H2")))
100 |     
101 |     return(prior[c("H1","H2")])
102 | }


--------------------------------------------------------------------------------
/R/brfss.R:
--------------------------------------------------------------------------------
 1 | #' Behavioral Risk Factor Surveillance System 2013 (Subset)
 2 | #'
 3 | #' This data set is a small subset of BRFSS results from the 2013 survey, each row represents an individual respondent.
 4 | #'
 5 | #' @format A tbl_df with with 5000 rows and 6 variables:
 6 | #' \describe{
 7 | #'   \item{weight}{Weight in pounds.}
 8 | #'   \item{height}{Height in inches.}
 9 | #'   \item{sex}{Sex}
10 | #'   \item{exercise}{Any exercise in the last 30 days}
11 | #'   \item{fruit_per_day}{Number of servings of fruit consumed per day.}
12 | #'   \item{vege_per_day}{Number of servings of dark green vegetables consumed per day.}
13 | #' }
14 | #' @source Centers for Disease Control and Prevention (CDC). Behavioral Risk Factor Surveillance System
15 | #' Survey Data. Atlanta, Georgia: U.S. Department of Health and Human Services, Centers for
16 | #' Disease Control and Prevention, 2013.
17 | "brfss"
18 | 


--------------------------------------------------------------------------------
/R/calc_streak.R:
--------------------------------------------------------------------------------
 1 | #' Calculate hitting streaks
 2 | #' 
 3 | #' @param x A data frame or character vector of hits (\code{"H"}) and misses (\code{"M"}).
 4 | #' @return A data frame with one column, \code{length}, containing the length of each hit streak.
 5 | #' @examples
 6 | #' data(kobe_basket)
 7 | #' calc_streak(kobe_basket$shot)
 8 | #' 
 9 | #' @export
10 | 
11 | calc_streak = function(x)
12 | {
13 |     if (!is.atomic(x))
14 |         x = x[,1]
15 | 
16 |     if (any(!x %in% c("H","M")))
17 |         stop('Input should only contain hits ("H") and misses ("M")')
18 |     
19 |     y = rep(0,length(x))
20 |     y[x == "H"] = 1
21 |     y = c(0, y, 0)
22 |     wz = which(y == 0)
23 |     streak = diff(wz) - 1
24 |     
25 |     return(data.frame(length = streak))
26 | }


--------------------------------------------------------------------------------
/R/ci_single_mean_sim.R:
--------------------------------------------------------------------------------
  1 | ci_single_mean_sim <- function(y, conf_level, y_name,
  2 |                                boot_method, nsim, seed,
  3 |                                show_var_types, show_summ_stats, show_res,
  4 |                                show_eda_plot, show_inf_plot){
  5 | 
  6 |   # set seed
  7 |   if(!is.null(seed)){ set.seed(seed) }
  8 | 
  9 |   # calculate sample size
 10 |   n <- length(y)
 11 | 
 12 |   # calculate x-bar
 13 |   y_bar <- mean(y)
 14 | 
 15 |   # create bootstrap distribution
 16 |   sim_dist <- rep(NA, nsim)
 17 |   for(i in 1:nsim){
 18 |     boot_samp <- sample(y, size = n, replace = TRUE)
 19 |     sim_dist[i] <- mean(boot_samp)
 20 |   }
 21 | 
 22 |   # for percentile method
 23 |   if(boot_method == "perc"){
 24 |     # calculate quantile cutoffs based on confidence level
 25 |     lower_quantile <- (1-conf_level) / 2
 26 |     upper_quantile <- conf_level + lower_quantile
 27 | 
 28 |     # calculate quantiles of the bootstrap distribution
 29 |     ci_lower <- as.numeric(quantile(sim_dist, lower_quantile))
 30 |     ci_upper <- as.numeric(quantile(sim_dist, upper_quantile))
 31 | 
 32 |     # put CI together
 33 |     ci <- c(ci_lower, ci_upper)
 34 |   }
 35 | 
 36 |   # for standard error method
 37 |   if(boot_method == "se"){
 38 |     # define degrees of freedom
 39 |     df <- n - 1
 40 | 
 41 |     # find percentile associated with critical value
 42 |     perc_crit_value <- conf_level + ((1 - conf_level) / 2)
 43 | 
 44 |     # find critical value
 45 |     t_star <- qt(perc_crit_value, df)
 46 | 
 47 |     # calculate SE
 48 |     se <- sd(sim_dist)
 49 | 
 50 |     # calculate ME
 51 |     me <- t_star * se
 52 | 
 53 |     # calculate CI
 54 |     ci <- y_bar + c(-1, 1)* me
 55 |   }
 56 | 
 57 |   # print variable types
 58 |   if(show_var_types == TRUE){
 59 |     cat("Single numerical variable\n")
 60 |   }
 61 | 
 62 |   # print summary statistics
 63 |   if(show_summ_stats == TRUE){
 64 |     s <- sd(y)
 65 |     cat(paste0("n = ", n, ", y-bar = ", round(y_bar, 4), ", s = ", round(s, 4), "\n"))
 66 |   }
 67 | 
 68 |   # print results
 69 |   if(show_res == TRUE){
 70 |     conf_level_perc = conf_level * 100
 71 |     cat(paste0(conf_level_perc, "% CI: (", round(ci[1], 4), " , ", round(ci[2], 4), ")\n"))
 72 |   }
 73 | 
 74 |   # eda_plot
 75 |   d_eda <- data.frame(y = y)
 76 | 
 77 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = y), environment = environment()) +
 78 |     ggplot2::geom_histogram(fill = "#8FDEE1", binwidth = diff(range(y)) / 20) +
 79 |     ggplot2::xlab(y_name) +
 80 |     ggplot2::ylab("") +
 81 |     ggplot2::ggtitle("Sample Distribution") +
 82 |     ggplot2::geom_vline(xintercept = y_bar, col = "#1FBEC3", lwd = 1.5)
 83 | 
 84 |   # inf_plot
 85 |   d_inf <- data.frame(sim_dist = sim_dist)
 86 | 
 87 |   inf_plot <- ggplot2::ggplot(data = d_inf, ggplot2::aes(x = sim_dist), environment = environment()) +
 88 |               ggplot2::geom_histogram(fill = "#CCCCCC", binwidth = diff(range(sim_dist)) / 20) +
 89 |               ggplot2::annotate("rect", xmin = ci[1], xmax = ci[2], ymin = 0, ymax = Inf,alpha = 0.3, fill = "#FABAB8") +
 90 |               ggplot2::xlab("bootstrap means") +
 91 |               ggplot2::ylab("") +
 92 |               ggplot2::ggtitle("Bootstrap Distribution") +
 93 |               ggplot2::geom_vline(xintercept = ci, color = "#F57670", lwd = 1.5)
 94 | 
 95 |   # print plots
 96 |   if(show_eda_plot & !show_inf_plot){
 97 |     print(eda_plot)
 98 |   }
 99 |   if(!show_eda_plot & show_inf_plot){
100 |     print(inf_plot)
101 |   }
102 |   if(show_eda_plot & show_inf_plot){
103 |     gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2)
104 |   }
105 | 
106 |   # return
107 |   if(boot_method == "perc"){
108 |     return(list(sim_dist = sim_dist, CI = ci))
109 |   } else {
110 |     return(list(sim_dist = sim_dist, SE = se, ME = me, CI = ci))
111 |   }
112 | 
113 | }
114 | 


--------------------------------------------------------------------------------
/R/ci_single_mean_theo.R:
--------------------------------------------------------------------------------
 1 | ci_single_mean_theo <- function(y, conf_level, y_name, 
 2 |                                 show_var_types, show_summ_stats, show_res,
 3 |                                 show_eda_plot, show_inf_plot){
 4 | 
 5 |   # calculate sample size
 6 |   n <- length(y) 
 7 | 
 8 |   # calculate x-bar
 9 |   y_bar <- mean(y)
10 | 
11 |   # define degrees of freedom
12 |   df <- n - 1
13 |   
14 |   # find percentile associated with critical value
15 |   perc_crit_value <- conf_level + ((1 - conf_level) / 2)
16 |   
17 |   # find critical value
18 |   t_star <- qt(perc_crit_value, df)
19 |   
20 |   # calculate s
21 |   s <- sd(y)
22 | 
23 |   # calculate SE
24 |   se <- s / sqrt(n)
25 |   
26 |   # calculate ME
27 |   me <- t_star * se
28 |   
29 |   # calculate CI
30 |   ci <- y_bar + c(-1, 1)* me
31 | 
32 |   # print variable types
33 |   if(show_var_types == TRUE){
34 |     cat("Single numerical variable\n")
35 |   }
36 | 
37 |   # print summary statistics
38 |   if(show_summ_stats == TRUE){
39 |     cat(paste0("n = ", n, ", y-bar = ", round(y_bar, 4), ", s = ", round(s, 4), "\n"))
40 |   }
41 | 
42 |   # print results
43 |   if(show_res == TRUE){
44 |     conf_level_perc = conf_level * 100
45 |     cat(paste0(conf_level_perc, "% CI: (", round(ci[1], 4), " , ", round(ci[2], 4), ")\n"))
46 |   }
47 | 
48 |   # eda_plot
49 |   d_eda <- data.frame(y = y)
50 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = y), environment = environment()) +
51 |     ggplot2::geom_histogram(fill = "#8FDEE1", binwidth = diff(range(y)) / 20) +
52 |     ggplot2::xlab(y_name) +
53 |     ggplot2::ylab("") +
54 |     ggplot2::ggtitle("Sample Distribution") +
55 |     ggplot2::geom_vline(xintercept = y_bar, col = "#1FBEC3", lwd = 1.5)
56 |   
57 |   # print plots
58 |   if(show_eda_plot){ print(eda_plot) }
59 |   if(show_inf_plot){ warning("No inference plot available.", call. = FALSE) }
60 | 
61 |   # return
62 |   return(list(df = df, SE = se, ME = me, CI = ci))
63 |   
64 | }


--------------------------------------------------------------------------------
/R/ci_single_median_sim.R:
--------------------------------------------------------------------------------
  1 | ci_single_median_sim <- function(y, conf_level, y_name,
  2 |                                  boot_method, nsim, seed, 
  3 |                                  show_var_types, show_summ_stats, show_res,
  4 |                                  show_eda_plot, show_inf_plot){
  5 | 
  6 |   # set seed
  7 |   if(!is.null(seed)){ set.seed(seed) }
  8 |   
  9 |   # calculate sample size
 10 |   n <- length(y) 
 11 |   
 12 |   # calculate x-bar
 13 |   med <- median(y)
 14 |   
 15 |   # create bootstrap distribution
 16 |   sim_dist <- rep(NA, nsim)
 17 |   for(i in 1:nsim){
 18 |     boot_samp <- sample(y, size = n, replace = TRUE)
 19 |     sim_dist[i] <- median(boot_samp)
 20 |   }
 21 |   
 22 |   # for percentile method
 23 |   if(boot_method == "perc"){
 24 |     # calculate quantile cutoffs based on confidence level
 25 |     lower_quantile <- (1-conf_level) / 2
 26 |     upper_quantile <- conf_level + lower_quantile
 27 |     
 28 |     # calculate quantiles of the bootstrap distribution
 29 |     ci_lower <- as.numeric(quantile(sim_dist, lower_quantile))
 30 |     ci_upper <- as.numeric(quantile(sim_dist, upper_quantile))
 31 |     
 32 |     # put CI together
 33 |     ci <- c(ci_lower, ci_upper)
 34 |   }
 35 |   
 36 |   # for standard error method
 37 |   if(boot_method == "se"){
 38 |     # define degrees of freedom
 39 |     df <- n - 1
 40 |     
 41 |     # find percentile associated with critical value
 42 |     perc_crit_value <- conf_level + ((1 - conf_level) / 2)
 43 |     
 44 |     # find critical value
 45 |     t_star <- qt(perc_crit_value, df)
 46 |     
 47 |     # calculate SE
 48 |     se <- sd(sim_dist)
 49 |     
 50 |     # calculate ME
 51 |     me <- t_star * se
 52 |     
 53 |     # calculate CI
 54 |     ci <- med + c(-1, 1)* me
 55 |   }  
 56 |   
 57 |   # print variable types
 58 |   if(show_var_types == TRUE){
 59 |     cat("Single numerical variable\n")
 60 |   }
 61 |   
 62 |   # print summary statistics
 63 |   if(show_summ_stats == TRUE){
 64 |     q_25 <- quantile(y, 0.25)
 65 |     q_75 <- quantile(y, 0.75)
 66 |     cat(paste0("n = ", n, ", y_med = ", round(med, 4), 
 67 |                ", Q1 = ", round(q_25, 4), ", Q3 = ", round(q_75, 4), "\n"))
 68 |   }
 69 |   
 70 |   # print results
 71 |   if(show_res == TRUE){
 72 |     conf_level_perc = conf_level * 100
 73 |     cat(paste0(conf_level_perc, "% CI: (", round(ci[1], 4), " , ", round(ci[2], 4), ")\n"))
 74 |   }
 75 |   
 76 |   # eda_plot
 77 |   d_eda <- data.frame(y = y)
 78 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = y), environment = environment()) +
 79 |     ggplot2::geom_histogram(fill = "#8FDEE1", binwidth = diff(range(y)) / 20) +
 80 |     ggplot2::xlab(y_name) +
 81 |     ggplot2::ylab("") +
 82 |     ggplot2::ggtitle("Sample Distribution") +
 83 |     ggplot2::geom_vline(xintercept = med, col = "#1FBEC3", lwd = 1.5)
 84 |   
 85 |   # inf_plot
 86 |   d_inf <- data.frame(sim_dist = sim_dist)
 87 |   inf_plot <- ggplot2::ggplot(data = d_inf, ggplot2::aes(x = sim_dist), environment = environment()) +
 88 |     ggplot2::geom_histogram(fill = "#CCCCCC", binwidth = max(diff(range(sim_dist)) / 20, 1)) +
 89 |     ggplot2::annotate("rect", xmin = ci[1], xmax = ci[2], ymin = 0, ymax = Inf, 
 90 |              alpha = 0.3, fill = "#FABAB8") +
 91 |     ggplot2::xlab("bootstrap medians") +
 92 |     ggplot2::ylab("") +
 93 |     ggplot2::ggtitle("Bootstrap Distribution") +
 94 |     ggplot2::geom_vline(xintercept = ci, color = "#F57670", lwd = 1.5)
 95 |   
 96 |   # print plots
 97 |   if(show_eda_plot & !show_inf_plot){ 
 98 |     print(eda_plot)
 99 |   }
100 |   if(!show_eda_plot & show_inf_plot){ 
101 |     print(inf_plot)
102 |   }
103 |   if(show_eda_plot & show_inf_plot){
104 |     gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2)
105 |   }
106 |   
107 |   # return
108 |   if(boot_method == "perc"){
109 |     return(list(sim_dist = sim_dist, CI = ci))
110 |   } else {
111 |     return(list(sim_dist = sim_dist, SE = se, ME = me, CI = ci))
112 |   }
113 |   
114 | }


--------------------------------------------------------------------------------
/R/ci_single_prop_sim.R:
--------------------------------------------------------------------------------
  1 | ci_single_prop_sim <- function(y, success, conf_level, y_name,
  2 |                                boot_method, nsim, seed, 
  3 |                                show_var_types, show_summ_stats, show_res,
  4 |                                show_eda_plot, show_inf_plot){
  5 | 
  6 |   # set seed
  7 |   if(!is.null(seed)){set.seed(seed)}
  8 |   
  9 |   # calculate sample size
 10 |   n <- length(y) 
 11 |   
 12 |   # calculate p_hat
 13 |   p_hat <- sum(y == success) / n
 14 |   
 15 |   # create bootstrap distribution
 16 |   sim_dist <- rep(NA, nsim)
 17 |   for(i in 1:nsim){
 18 |     boot_samp <- sample(y, size = n, replace = TRUE)
 19 |     sim_dist[i] <- sum(boot_samp == success) / n
 20 |   }
 21 |   
 22 |   # for percentile method
 23 |   if(boot_method == "perc"){
 24 |     # calculate quantile cutoffs based on confidence level
 25 |     lower_quantile <- (1-conf_level) / 2
 26 |     upper_quantile <- conf_level + lower_quantile
 27 |     
 28 |     # calculate quantiles of the bootstrap distribution
 29 |     ci_lower <- as.numeric(quantile(sim_dist, lower_quantile))
 30 |     ci_upper <- as.numeric(quantile(sim_dist, upper_quantile))
 31 |     
 32 |     # put CI together
 33 |     ci <- c(ci_lower, ci_upper)
 34 |   }
 35 |   
 36 |   # for standard error method
 37 |   if(boot_method == "se"){
 38 |     
 39 |     # find percentile associated with critical value
 40 |     perc_crit_value <- conf_level + ((1 - conf_level) / 2)
 41 |     
 42 |     # find critical value
 43 |     z_star <- qnorm(perc_crit_value)
 44 |     
 45 |     # calculate SE
 46 |     se <- sd(sim_dist)
 47 |     
 48 |     # calculate ME
 49 |     me <- z_star * se
 50 |     
 51 |     # calculate CI
 52 |     ci <- p_hat + c(-1, 1) * me
 53 |   }
 54 |   
 55 |   # print variable types
 56 |   if(show_var_types == TRUE){
 57 |     cat(paste0("Single categorical variable, success: ", success,"\n"))
 58 |   }
 59 |   
 60 |   # print summary statistics
 61 |   if(show_summ_stats == TRUE){
 62 |     cat(paste0("n = ", n, ", p-hat = ", round(p_hat, 4), "\n"))
 63 |   }
 64 |   
 65 |   # print results
 66 |   if(show_res == TRUE){
 67 |     conf_level_perc = conf_level * 100
 68 |     cat(paste0(conf_level_perc, "% CI: (", round(ci[1], 4), " , ", round(ci[2], 4), ")\n"))
 69 |   }
 70 |   
 71 |   # eda_plot
 72 |   d_eda <- data.frame(y = y)
 73 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = y), environment = environment()) +
 74 |     ggplot2::geom_bar(fill = "#8FDEE1") +
 75 |     ggplot2::xlab(y_name) +
 76 |     ggplot2::ylab("") +
 77 |     ggplot2::ggtitle("Sample Distribution")
 78 |   
 79 |   # inf_plot
 80 |   d_inf <- data.frame(sim_dist = sim_dist)
 81 |   inf_plot <- ggplot2::ggplot(data = d_inf, ggplot2::aes(x = sim_dist), environment = environment()) +
 82 |     ggplot2::geom_histogram(fill = "#CCCCCC", binwidth = diff(range(sim_dist)) / 20) +
 83 |     ggplot2::annotate("rect", xmin = ci[1], xmax = ci[2], ymin = 0, ymax = Inf, 
 84 |              alpha = 0.3, fill = "#FABAB8") +
 85 |     ggplot2::xlab("bootstrap means") +
 86 |     ggplot2::ylab("") +
 87 |     ggplot2::ggtitle("Bootstrap Distribution") +
 88 |     ggplot2::geom_vline(xintercept = ci, color = "#F57670", lwd = 1.5)
 89 |   
 90 |   # print plots
 91 |   if(show_eda_plot & !show_inf_plot){ 
 92 |     print(eda_plot)
 93 |   }
 94 |   if(!show_eda_plot & show_inf_plot){ 
 95 |     print(inf_plot)
 96 |   }
 97 |   if(show_eda_plot & show_inf_plot){
 98 |     gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2)
 99 |   }
100 |   
101 |   # return
102 |   if(boot_method == "perc"){
103 |     return(list(sim_dist = sim_dist, CI = round(ci, 4)))
104 |   } else {
105 |     return(list(sim_dist = sim_dist, SE = round(se, 4), ME = round(me, 4), CI = round(ci, 4)))
106 |   }
107 |   
108 | }


--------------------------------------------------------------------------------
/R/ci_single_prop_theo.R:
--------------------------------------------------------------------------------
 1 | ci_single_prop_theo <- function(y, success, conf_level, y_name, 
 2 |                                 show_var_types, show_summ_stats, show_res,
 3 |                                 show_eda_plot, show_inf_plot){
 4 |   
 5 |   # calculate sample size
 6 |   n <- length(y) 
 7 |   
 8 |   # calculate p-hat
 9 |   p_hat <- sum(y == success) / n
10 |   
11 |   # find percentile associated with critical value
12 |   perc_crit_value <- conf_level + ((1 - conf_level) / 2)
13 |   
14 |   # find critical value
15 |   z_star <- qnorm(perc_crit_value)
16 |   
17 |   # calculate SE
18 |   se <- sqrt(p_hat * (1 - p_hat) / n)
19 |   
20 |   # calculate ME
21 |   me <- z_star * se
22 |   
23 |   # calculate CI
24 |   ci <- p_hat + c(-1, 1) * me
25 |   
26 |   # print variable types
27 |   if(show_var_types == TRUE){
28 |     cat(paste0("Single categorical variable, success: ", success,"\n"))
29 |   }
30 | 
31 |   # print summary statistics
32 |   if(show_summ_stats == TRUE){
33 |     cat(paste0("n = ", n, ", p-hat = ", round(p_hat, 4), "\n"))
34 |   }
35 | 
36 |   # print results
37 |   if(show_res == TRUE){
38 |     conf_level_perc = conf_level * 100
39 |     cat(paste0(conf_level_perc, "% CI: (", round(ci[1], 4), " , ", round(ci[2], 4), ")\n"))
40 |   }
41 | 
42 |   # eda_plot
43 |   d_eda <- data.frame(y = y)
44 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = y), environment = environment()) +
45 |     ggplot2::geom_bar(fill = "#8FDEE1") +
46 |     ggplot2::xlab(y_name) +
47 |     ggplot2::ylab("") +
48 |     ggplot2::ggtitle("Sample Distribution")
49 |   
50 |   # print plots
51 |   if(show_eda_plot){ print(eda_plot) }
52 |   if(show_inf_plot){ warning("No inference plot available.") }
53 |   
54 |   # return
55 |   return(list(SE = round(se, 4), ME = round(me, 4), CI = round(ci, 4)))
56 | }


--------------------------------------------------------------------------------
/R/ci_two_mean_sim.R:
--------------------------------------------------------------------------------
  1 | ci_two_mean_sim <- function(y, x, conf_level, y_name, x_name,
  2 |                             boot_method, nsim, seed, 
  3 |                             show_var_types, show_summ_stats, show_res,
  4 |                             show_eda_plot, show_inf_plot){
  5 |   
  6 |   # set seed
  7 |   if(!is.null(seed)){ set.seed(seed) }
  8 |   
  9 |   # calculate n1 and n2
 10 |   ns <- by(y, x, length)
 11 |   n1 <- as.numeric(ns[1])
 12 |   n2 <- as.numeric(ns[2])
 13 |   n <- n1 + n2
 14 | 
 15 |   # calculate y-bar1 and y-bar2
 16 |   y_bars <- by(y, x, mean)
 17 |   y_bar1 <- as.numeric(y_bars[1])
 18 |   y_bar2 <- as.numeric(y_bars[2])
 19 |   
 20 |   # calculate difference in y-bars
 21 |   y_bar_diff <- y_bar1 - y_bar2
 22 |   
 23 |   # create bootstrap distribution
 24 |   y1 <- y[x == levels(x)[1]]
 25 |   y2 <- y[x == levels(x)[2]]
 26 |   
 27 |   sim_dist <- rep(NA, nsim)
 28 |   for(i in 1:nsim){
 29 |     boot_samp1 <- sample(y1, size = n1, replace = TRUE)
 30 |     boot_samp2 <- sample(y2, size = n2, replace = TRUE)
 31 |     sim_dist[i] <- mean(boot_samp1) - mean(boot_samp2)
 32 |   }
 33 |   
 34 |   # for percentile method
 35 |   if(boot_method == "perc"){
 36 |     # calculate quantile cutoffs based on confidence level
 37 |     lower_quantile <- (1-conf_level) / 2
 38 |     upper_quantile <- conf_level + lower_quantile
 39 |     
 40 |     # calculate quantiles of the bootstrap distribution
 41 |     ci_lower <- as.numeric(quantile(sim_dist, lower_quantile))
 42 |     ci_upper <- as.numeric(quantile(sim_dist, upper_quantile))
 43 |     
 44 |     # put CI together
 45 |     ci <- c(ci_lower, ci_upper)
 46 |   }
 47 |   
 48 |   # for standard error method
 49 |   if(boot_method == "se"){
 50 |     # define degrees of freedom
 51 |     df <- min(n1 - 1, n2 - 1)
 52 |     
 53 |     # find percentile associated with critical value
 54 |     perc_crit_value <- conf_level + ((1 - conf_level) / 2)
 55 |     
 56 |     # find critical value
 57 |     t_star <- qt(perc_crit_value, df)
 58 |     
 59 |     # calculate SE
 60 |     se <- sd(sim_dist)
 61 |     
 62 |     # calculate ME
 63 |     me <- t_star * se
 64 |     
 65 |     # calculate CI
 66 |     ci <- y_bar_diff + c(-1, 1) * me
 67 |   }
 68 |   
 69 |   # print variable types
 70 |   if(show_var_types == TRUE){
 71 |     n_x_levels <- length(levels(x))
 72 |     cat(paste0("Response variable: numerical, Explanatory variable: categorical (", n_x_levels," levels)\n"))
 73 |   }
 74 |   
 75 |   # print summary statistics
 76 |   gr1 <- levels(x)[1]
 77 |   gr2 <- levels(x)[2]
 78 | 
 79 |   if(show_summ_stats == TRUE){
 80 |     sds <- by(y, x, sd)
 81 |     s1 <- as.numeric(sds[1])
 82 |     s2 <- as.numeric(sds[2])
 83 |     cat(paste0("n_", gr1, " = ", n1, ", y_bar_", gr1, " = ", round(y_bar1, 4), ", s_", gr1, " = ", round(s1, 4), "\n"))
 84 |     cat(paste0("n_", gr2, " = ", n2, ", y_bar_", gr2, " = ", round(y_bar2, 4), ", s_", gr2, " = ", round(s2, 4), "\n"))
 85 |   }
 86 |   
 87 |   # print results
 88 |   if(show_res == TRUE){
 89 |     conf_level_perc = conf_level * 100
 90 |     cat(paste0(conf_level_perc, "% CI (", gr1 ," - ", gr2,"): (", round(ci[1], 4), " , ", round(ci[2], 4), ")\n"))
 91 |   }
 92 | 
 93 |   # eda_plot
 94 |   d_eda <- data.frame(y = y, x = x)
 95 |   d_means <- data.frame(y_bars = as.numeric(y_bars), x = levels(x))
 96 |   
 97 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = y), environment = environment()) +
 98 |     ggplot2::geom_histogram(fill = "#8FDEE1", binwidth = diff(range(y)) / 20) +
 99 |     ggplot2::xlab(y_name) +
100 |     ggplot2::ylab(x_name) +
101 |     ggplot2::ggtitle("Sample Distribution") +
102 |     ggplot2::geom_vline(data = d_means, ggplot2::aes(xintercept = y_bars), col = "#1FBEC3", lwd = 1.5) +
103 |     ggplot2::facet_grid(x ~ .)
104 |   
105 |   # inf_plot
106 |   d_inf <- data.frame(sim_dist = sim_dist)
107 |   inf_plot <- ggplot2::ggplot(data = d_inf, ggplot2::aes(x = sim_dist), environment = environment()) +
108 |     ggplot2::geom_histogram(fill = "#CCCCCC", binwidth = diff(range(sim_dist)) / 20) +
109 |     ggplot2::annotate("rect", xmin = ci[1], xmax = ci[2], ymin = 0, ymax = Inf, 
110 |              alpha = 0.3, fill = "#FABAB8") +
111 |     ggplot2::xlab("bootstrap differences in means") +
112 |     ggplot2::ylab("") +
113 |     ggplot2::ggtitle("Bootstrap Distribution") +
114 |     ggplot2::geom_vline(xintercept = ci, color = "#F57670", lwd = 1.5)
115 |   
116 |   # print plots
117 |   if(show_eda_plot & !show_inf_plot){ 
118 |     print(eda_plot)
119 |   }
120 |   if(!show_eda_plot & show_inf_plot){ 
121 |     print(inf_plot)
122 |   }
123 |   if(show_eda_plot & show_inf_plot){
124 |     gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2)
125 |   }
126 |   
127 |   # return
128 |   if(boot_method == "perc"){
129 |     return(list(sim_dist = sim_dist, CI = ci))
130 |   } else {
131 |     return(list(sim_dist = sim_dist, SE = se, ME = me, CI = ci))
132 |   }
133 | }


--------------------------------------------------------------------------------
/R/ci_two_mean_theo.R:
--------------------------------------------------------------------------------
 1 | ci_two_mean_theo <- function(y, x, conf_level, y_name, x_name,
 2 |                              show_var_types, show_summ_stats, show_res,
 3 |                              show_eda_plot, show_inf_plot){
 4 |   
 5 |   # calculate n1 and n2
 6 |   ns <- by(y, x, length)
 7 |   n1 <- as.numeric(ns[1])
 8 |   n2 <- as.numeric(ns[2])
 9 |   
10 |   # calculate y-bar1 and y-bar2
11 |   y_bars <- by(y, x, mean)
12 |   y_bar1 <- as.numeric(y_bars[1])
13 |   y_bar2 <- as.numeric(y_bars[2])
14 |   
15 |   # calculate difference in y-bars
16 |   y_bar_diff <- y_bar1 - y_bar2
17 |   
18 |   # calculate s1 and s2
19 |   sds <- by(y, x, sd)
20 |   s1 <- as.numeric(sds[1])
21 |   s2 <- as.numeric(sds[2])
22 | 
23 |   # define degrees of freedom
24 |   df <- min(n1 - 1, n2 - 1)
25 |   
26 |   # find percentile associated with critical value
27 |   perc_crit_value <- conf_level + ((1 - conf_level) / 2)
28 |   
29 |   # find critical value
30 |   t_star <- qt(perc_crit_value, df)
31 |   
32 |   # calculate SE
33 |   se <- sqrt((s1^2 / n1) + (s2^2 / n2))
34 |   
35 |   # calculate ME
36 |   me <- t_star * se
37 |   
38 |   # calculate CI
39 |   ci <- y_bar_diff + c(-1, 1) * me
40 |   
41 |   # print variable types
42 |   if(show_var_types == TRUE){
43 |     n_x_levels <- length(levels(x))
44 |     cat(paste0("Response variable: numerical, Explanatory variable: categorical (", n_x_levels," levels)\n"))
45 |   }
46 |   
47 |   # print summary statistics
48 |   gr1 <- levels(x)[1]
49 |   gr2 <- levels(x)[2]
50 |   
51 |   if(show_summ_stats == TRUE){
52 |     cat(paste0("n_", gr1, " = ", n1, ", y_bar_", gr1, " = ", round(y_bar1, 4), ", s_", gr1, " = ", round(s1, 4), "\n"))
53 |     cat(paste0("n_", gr2, " = ", n2, ", y_bar_", gr2, " = ", round(y_bar2, 4), ", s_", gr2, " = ", round(s2, 4), "\n"))
54 |   }
55 |   
56 |   # print results
57 |   if(show_res == TRUE){
58 |     conf_level_perc = conf_level * 100
59 |     cat(paste0(conf_level_perc, "% CI (", gr1 ," - ", gr2,"): (", round(ci[1], 4), " , ", round(ci[2], 4), ")\n"))
60 |   }
61 |   
62 |   # eda_plot
63 |   d_eda <- data.frame(y = y, x = x)
64 |   d_means <- data.frame(y_bars = as.numeric(y_bars), x = levels(x))
65 |   
66 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = y), environment = environment()) +
67 |     ggplot2::geom_histogram(fill = "#8FDEE1", binwidth = diff(range(y)) / 20) +
68 |     ggplot2::xlab(y_name) +
69 |     ggplot2::ylab(x_name) +
70 |     ggplot2::ggtitle("Sample Distribution") +
71 |     ggplot2::geom_vline(data = d_means, ggplot2::aes(xintercept = y_bars), col = "#1FBEC3", lwd = 1.5) +
72 |     ggplot2::facet_grid(x ~ .)
73 |     
74 |   
75 |   # print plots
76 |   if(show_eda_plot){ print(eda_plot) }
77 |   if(show_inf_plot){ warning("No inference plot available.") }
78 |   
79 |   # return
80 |   return(list(df = df, SE = se, ME = me, CI = ci))
81 | }


--------------------------------------------------------------------------------
/R/ci_two_median_sim.R:
--------------------------------------------------------------------------------
  1 | ci_two_median_sim <- function(y, x, conf_level, y_name, x_name,
  2 |                               boot_method, nsim, seed, 
  3 |                               show_var_types, show_summ_stats, show_res,
  4 |                               show_eda_plot, show_inf_plot){
  5 |   
  6 |   # set seed
  7 |   if(!is.null(seed)){ set.seed(seed) }
  8 |   
  9 |   # calculate n1 and n2
 10 |   ns <- by(y, x, length)
 11 |   n1 <- as.numeric(ns[1])
 12 |   n2 <- as.numeric(ns[2])
 13 |   
 14 |   # calculate y-bar1 and y-bar2
 15 |   y_meds <- by(y, x, median)
 16 |   y_med1 <- as.numeric(y_meds[1])
 17 |   y_med2 <- as.numeric(y_meds[2])
 18 |   
 19 |   # calculate difference in y-bars
 20 |   y_med_diff <- y_med1 - y_med2
 21 |   
 22 |   # create bootstrap distribution
 23 |   y1 <- y[x == levels(x)[1]]
 24 |   y2 <- y[x == levels(x)[2]]
 25 |   
 26 |   sim_dist <- rep(NA, nsim)
 27 |   for(i in 1:nsim){
 28 |     boot_samp1 <- sample(y1, size = n1, replace = TRUE)
 29 |     boot_samp2 <- sample(y2, size = n2, replace = TRUE)
 30 |     sim_dist[i] <- median(boot_samp1) - median(boot_samp2)
 31 |   }
 32 |   
 33 |   # for percentile method
 34 |   if(boot_method == "perc"){
 35 |     # calculate quantile cutoffs based on confidence level
 36 |     lower_quantile <- (1-conf_level) / 2
 37 |     upper_quantile <- conf_level + lower_quantile
 38 |     
 39 |     # calculate quantiles of the bootstrap distribution
 40 |     ci_lower <- as.numeric(quantile(sim_dist, lower_quantile))
 41 |     ci_upper <- as.numeric(quantile(sim_dist, upper_quantile))
 42 |     
 43 |     # put CI together
 44 |     ci <- c(ci_lower, ci_upper)
 45 |   }
 46 |   
 47 |   # for standard error method
 48 |   if(boot_method == "se"){
 49 |     # define degrees of freedom
 50 |     df <- min(n1 - 1, n2 - 1)
 51 |     
 52 |     # find percentile associated with critical value
 53 |     perc_crit_value <- conf_level + ((1 - conf_level) / 2)
 54 |     
 55 |     # find critical value
 56 |     t_star <- qt(perc_crit_value, df)
 57 |     
 58 |     # calculate SE
 59 |     se <- sd(sim_dist)
 60 |     
 61 |     # calculate ME
 62 |     me <- t_star * se
 63 |     
 64 |     # calculate CI
 65 |     ci <- y_med_diff + c(-1, 1) * me
 66 |   }
 67 |   
 68 |   # print variable types
 69 |   if(show_var_types == TRUE){
 70 |     n_x_levels <- length(levels(x))
 71 |     cat(paste0("Response variable: numerical, Explanatory variable: categorical (", n_x_levels," levels)\n"))
 72 |   }
 73 |   
 74 |   # print summary statistics
 75 |   gr1 <- levels(x)[1]
 76 |   gr2 <- levels(x)[2]
 77 |   
 78 |   if(show_summ_stats == TRUE){
 79 |     iqrs <- by(y, x, IQR)
 80 |     iqr1 <- as.numeric(iqrs[1])
 81 |     iqr2 <- as.numeric(iqrs[2])
 82 |     cat(paste0("n_", gr1, " = ", n1, ", y_med_", gr1, " = ", round(y_med1, 4), ", IQR_", gr1, " = ", round(iqr1, 4), "\n"))
 83 |     cat(paste0("n_", gr2, " = ", n2, ", y_med_", gr2, " = ", round(y_med2, 4), ", IQR_", gr2, " = ", round(iqr2, 4), "\n"))
 84 |   }
 85 |   
 86 |   # print results
 87 |   if(show_res == TRUE){
 88 |     conf_level_perc = conf_level * 100
 89 |     cat(paste0(conf_level_perc, "% CI (", gr1 ," - ", gr2,"): (", round(ci[1], 4), " , ", round(ci[2], 4), ")\n"))
 90 |   }
 91 |   
 92 |   # eda_plot
 93 |   d_eda <- data.frame(y = y, x = x)
 94 | 
 95 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = x, y = y), environment = environment()) +
 96 |     ggplot2::geom_boxplot(color = "#1FBEC3", fill = "#8FDEE1", outlier.colour = "#1FBEC3") +
 97 |     ggplot2::xlab(x_name) +
 98 |     ggplot2::ylab(y_name) +
 99 |     ggplot2::ggtitle("Sample Distribution")
100 | 
101 |   # inf_plot
102 |   d_inf <- data.frame(sim_dist = sim_dist)
103 |   inf_plot <- ggplot2::ggplot(data = d_inf, ggplot2::aes(x = sim_dist), environment = environment()) +
104 |     ggplot2::geom_histogram(fill = "#CCCCCC", binwidth = diff(range(sim_dist)) / 20) +
105 |     ggplot2::annotate("rect", xmin = ci[1], xmax = ci[2], ymin = 0, ymax = Inf, 
106 |              alpha = 0.3, fill = "#FABAB8") +
107 |     ggplot2::xlab("bootstrap differences in medians") +
108 |     ggplot2::ylab("") +
109 |     ggplot2::ggtitle("Bootstrap Distribution") +
110 |     ggplot2::geom_vline(xintercept = ci, color = "#F57670", lwd = 1.5)
111 |   
112 |   # print plots
113 |   if(show_eda_plot & !show_inf_plot){ 
114 |     print(eda_plot)
115 |   }
116 |   if(!show_eda_plot & show_inf_plot){ 
117 |     print(inf_plot)
118 |   }
119 |   if(show_eda_plot & show_inf_plot){
120 |     gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2)
121 |   }
122 |   
123 |   # return
124 |   if(boot_method == "perc"){
125 |     return(list(sim_dist = sim_dist, CI = ci))
126 |   } else {
127 |     return(list(sim_dist = sim_dist, SE = se, ME = me, CI = ci))
128 |   }
129 | }


--------------------------------------------------------------------------------
/R/ci_two_prop_sim.R:
--------------------------------------------------------------------------------
  1 | ci_two_prop_sim <- function(y, x, success, conf_level, 
  2 |                             x_name, y_name,
  3 |                             boot_method, nsim, seed,
  4 |                             show_var_types, show_summ_stats, show_res,
  5 |                             show_eda_plot, show_inf_plot){
  6 |   
  7 |   # set seed
  8 |   if(!is.null(seed)){ set.seed(seed) }
  9 |   
 10 |   # calculate n1 and n2
 11 |   ns <- by(y, x, length)
 12 |   n1 <- as.numeric(ns[1])
 13 |   n2 <- as.numeric(ns[2]) 
 14 |   
 15 |   # calculate p-hat1 and p-hat2
 16 |   p_hat1 <- sum(y[x == levels(x)[1]] == success) / n1
 17 |   p_hat2 <- sum(y[x == levels(x)[2]] == success) / n2
 18 |   
 19 |   # calculate difference in p-hats
 20 |   p_hat_diff <- p_hat1 - p_hat2
 21 |   
 22 |   # create bootstrap distribution
 23 |   y1 <- y[x == levels(x)[1]]
 24 |   y2 <- y[x == levels(x)[2]]
 25 |   
 26 |   sim_dist <- rep(NA, nsim)
 27 |   for(i in 1:nsim){
 28 |     boot_samp1 <- sample(y1, size = n1, replace = TRUE)
 29 |     boot_samp2 <- sample(y2, size = n2, replace = TRUE)
 30 |     boot_phat1 <- sum(boot_samp1 == success) / n1
 31 |     boot_phat2 <- sum(boot_samp2 == success) / n2
 32 |     sim_dist[i] <- boot_phat1 - boot_phat2
 33 |   }
 34 |   
 35 |   # for percentile method
 36 |   if(boot_method == "perc"){
 37 |     # calculate quantile cutoffs based on confidence level
 38 |     lower_quantile <- (1-conf_level) / 2
 39 |     upper_quantile <- conf_level + lower_quantile
 40 |     
 41 |     # calculate quantiles of the bootstrap distribution
 42 |     ci_lower <- as.numeric(quantile(sim_dist, lower_quantile))
 43 |     ci_upper <- as.numeric(quantile(sim_dist, upper_quantile))
 44 |     
 45 |     # put CI together
 46 |     ci <- c(ci_lower, ci_upper)
 47 |   }
 48 |   
 49 |   # for standard error method
 50 |   if(boot_method == "se"){
 51 |     
 52 |     # find percentile associated with critical value
 53 |     perc_crit_value <- conf_level + ((1 - conf_level) / 2)
 54 |     
 55 |     # find critical value
 56 |     z_star <- qnorm(perc_crit_value)
 57 |     
 58 |     # calculate SE
 59 |     se <- sd(sim_dist)
 60 |     
 61 |     # calculate ME
 62 |     me <- z_star * se
 63 |     
 64 |     # calculate CI
 65 |     ci <- p_hat_diff + c(-1, 1) * me
 66 |   }  
 67 |   
 68 |   # print variable types
 69 |   if(show_var_types == TRUE){
 70 |     n_x_levels <- length(levels(x))
 71 |     n_y_levels <- length(levels(y))
 72 |     cat(paste0("Response variable: categorical (", n_x_levels, " levels, success: ", success, ")\n"))
 73 |     cat(paste0("Explanatory variable: categorical (", n_y_levels, " levels) \n"))
 74 |   }
 75 |   
 76 |   # print summary statistics
 77 |   if(show_summ_stats == TRUE){
 78 |     gr1 <- levels(x)[1]
 79 |     gr2 <- levels(x)[2]
 80 |     cat(paste0("n_", gr1, " = ", n1, ", p_hat_", gr1, " = ", round(p_hat1, 4), "\n"))
 81 |     cat(paste0("n_", gr2, " = ", n2, ", p_hat_", gr2, " = ", round(p_hat2, 4), "\n"))
 82 |   }
 83 |   
 84 |   # print results
 85 |   if(show_res == TRUE){
 86 |     conf_level_perc = conf_level * 100
 87 |     cat(paste0(conf_level_perc, "% CI (", gr1 ," - ", gr2,"): (", round(ci[1], 4), " , ", round(ci[2], 4), ")\n"))
 88 |   }
 89 |   
 90 |   # eda_plot
 91 |   d_eda <- data.frame(y = y, x = x)
 92 |   
 93 |   if(which(levels(y) == success) == 1){ 
 94 |     fill_values = c("#1FBEC3", "#8FDEE1") 
 95 |   } else {
 96 |     fill_values = c("#8FDEE1", "#1FBEC3") 
 97 |   }
 98 |   
 99 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = x, fill = y), environment = environment()) +
100 |     ggplot2::geom_bar() +
101 |     ggplot2::scale_fill_manual(values = fill_values) +
102 |     ggplot2::xlab(x_name) +
103 |     ggplot2::ylab("") +
104 |     ggplot2::ggtitle("Sample Distribution") +
105 |     ggplot2::guides(fill = ggplot2::guide_legend(title = y_name))
106 |   
107 |   # inf_plot
108 |   d_inf <- data.frame(sim_dist = sim_dist)
109 |   inf_plot <- ggplot2::ggplot(data = d_inf, ggplot2::aes(x = sim_dist), environment = environment()) +
110 |     ggplot2::geom_histogram(fill = "#CCCCCC", binwidth = diff(range(sim_dist)) / 20) +
111 |     ggplot2::annotate("rect", xmin = ci[1], xmax = ci[2], ymin = 0, ymax = Inf, 
112 |              alpha = 0.3, fill = "#FABAB8") +
113 |     ggplot2::xlab("bootstrap differences in proportions") +
114 |     ggplot2::ylab("") +
115 |     ggplot2::ggtitle("Bootstrap Distribution") +
116 |     ggplot2::geom_vline(xintercept = ci, color = "#F57670", lwd = 1.5)
117 |   
118 |   # print plots
119 |   if(show_eda_plot & !show_inf_plot){ 
120 |     print(eda_plot)
121 |   }
122 |   if(!show_eda_plot & show_inf_plot){ 
123 |     print(inf_plot)
124 |   }
125 |   if(show_eda_plot & show_inf_plot){
126 |     gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2)
127 |   }
128 |   
129 |   # return
130 |   if(boot_method == "perc"){
131 |     return(list(sim_dist = sim_dist, CI = ci))
132 |   } else {
133 |     return(list(sim_dist = sim_dist, SE = se, ME = me, CI = ci))
134 |   }
135 |   
136 | }


--------------------------------------------------------------------------------
/R/ci_two_prop_theo.R:
--------------------------------------------------------------------------------
 1 | ci_two_prop_theo <- function(y, x, success, conf_level, 
 2 |                              x_name, y_name,
 3 |                              show_var_types, show_summ_stats, show_res,
 4 |                              show_eda_plot, show_inf_plot){
 5 |   
 6 |   # calculate n1 and n2
 7 |   ns <- by(y, x, length)
 8 |   n1 <- as.numeric(ns[1])
 9 |   n2 <- as.numeric(ns[2])
10 |   
11 |   # calculate p-hat1 and p-hat2
12 |   p_hat1 <- sum(y[x == levels(x)[1]] == success) / n1
13 |   p_hat2 <- sum(y[x == levels(x)[2]] == success) / n2
14 |   
15 |   # calculate difference in p-hats
16 |   p_hat_diff <- p_hat1 - p_hat2
17 |   
18 |   # find percentile associated with critical value
19 |   perc_crit_value <- conf_level + ((1 - conf_level) / 2)
20 |   
21 |   # find critical value
22 |   z_star <- qnorm(perc_crit_value)
23 |   
24 |   # calculate SE
25 |   se <- sqrt((p_hat1 * (1 - p_hat1) / n1) + (p_hat2 * (1 - p_hat2) / n2))
26 |   
27 |   # calculate ME
28 |   me <- z_star * se
29 |   
30 |   # calculate CI
31 |   ci <- p_hat_diff + c(-1, 1) * me
32 |   
33 |   # print variable types
34 |   if(show_var_types == TRUE){
35 |     n_x_levels <- length(levels(x))
36 |     n_y_levels <- length(levels(y))
37 |     cat(paste0("Response variable: categorical (", n_x_levels, " levels, success: ", success, ")\n"))
38 |     cat(paste0("Explanatory variable: categorical (", n_y_levels, " levels) \n"))
39 |   }
40 |   
41 |   # print summary statistics
42 |   if(show_summ_stats == TRUE){
43 |     gr1 <- levels(x)[1]
44 |     gr2 <- levels(x)[2]
45 |     cat(paste0("n_", gr1, " = ", n1, ", p_hat_", gr1, " = ", round(p_hat1, 4), "\n"))
46 |     cat(paste0("n_", gr2, " = ", n2, ", p_hat_", gr2, " = ", round(p_hat2, 4), "\n"))
47 |   }
48 |   
49 |   # print results
50 |   if(show_res == TRUE){
51 |     conf_level_perc = conf_level * 100
52 |     cat(paste0(conf_level_perc, "% CI (", gr1 ," - ", gr2,"): (", round(ci[1], 4), " , ", round(ci[2], 4), ")\n"))
53 |   }
54 |   
55 |   # eda_plot
56 |   d_eda <- data.frame(y = y, x = x)
57 |   
58 |   if(which(levels(y) == success) == 1){ 
59 |     fill_values = c("#1FBEC3", "#8FDEE1") 
60 |   } else {
61 |       fill_values = c("#8FDEE1", "#1FBEC3") 
62 |       }
63 |   
64 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = x, fill = y), environment = environment()) +
65 |     ggplot2::geom_bar(position = "fill") +
66 |     ggplot2::scale_fill_manual(values = fill_values) +
67 |     ggplot2::xlab(x_name) +
68 |     ggplot2::ylab("") +
69 |     ggplot2::ggtitle("Sample Distribution") +
70 |     ggplot2::guides(fill = ggplot2::guide_legend(title = y_name))
71 |   
72 |   # print plots
73 |   if(show_eda_plot){ print(eda_plot) }
74 |   if(show_inf_plot){ warning("No inference plot available.") }
75 |   
76 |   # return
77 |   return(list(SE = se, ME = me, CI = ci))
78 | }


--------------------------------------------------------------------------------
/R/evals.R:
--------------------------------------------------------------------------------
 1 | #' Teachers evaluations at the University of Texas at Austin
 2 | #'
 3 | #' The data were gathered from end of semester student evaluations for a large
 4 | #' sample of professors from the University of Texas at Austin (variables beginning
 5 | #' with \code{cls}). In addition, six students rated the professors' physical
 6 | #' appearance (variables beginning with \code{bty}). (This is a slightly modified
 7 | #' version of the original data set that was released as part of the replication
 8 | #' data for Data Analysis Using Regression and Multilevel/Hierarchical Models
 9 | #' (Gelman and Hill, 2007).
10 | #'
11 | #' @format A data frame with 463 rows and 21 variables:
12 | #' \describe{
13 | #'   \item{score}{Average professor evaluation score: (1) very unsatisfactory - (5) excellent}
14 | #'   \item{rank}{Rank of professor: teaching, tenure track, tenure}
15 | #'   \item{ethnicity}{Ethnicity of professor: not minority, minority}
16 | #'   \item{gender}{Gender of professor: female, male}
17 | #'   \item{language}{Language of school where professor received education: english or non-english}
18 | #'   \item{age}{Age of professor}
19 | #'   \item{cls_perc_eval}{Percent of students in class who completed evaluation}
20 | #'   \item{cls_did_eval}{Number of students in class who completed evaluation}
21 | #'   \item{cls_students}{Total number of students in class}
22 | #'   \item{cls_level}{Class level: lower, upper}
23 | #'   \item{cls_profs}{Number of professors teaching sections in course in sample: single, multiple}
24 | #'   \item{cls_credits}{Number of credits of class: one credit (lab, PE, etc.), multi credit}
25 | #'   \item{bty_f1lower}{Beauty rating of professor from lower level female: (1) lowest - (10) highest}
26 | #'   \item{bty_f1upper}{Beauty rating of professor from upper level female: (1) lowest - (10) highest}
27 | #'   \item{bty_f2upper}{Beauty rating of professor from second upper level female: (1) lowest - (10) highest}
28 | #'   \item{bty_m1lower}{Beauty rating of professor from lower level male: (1) lowest - (10) highest}
29 | #'   \item{bty_m1upper}{Beauty rating of professor from upper level male: (1) lowest - (10) highest}
30 | #'   \item{bty_m2upper}{Beauty rating of professor from second upper level male: (1) lowest - (10) highest}
31 | #'   \item{bty_avg}{Average beauty rating of professor}
32 | #'   \item{pic_outfit}{Outfit of professor in picture: not formal, formal}
33 | #'   \item{pic_color}{Color of professor's picture: color, black & white}
34 | #' }
35 | #' @source These data appear in Hamermesh DS, and Parker A. 2005. Beauty in the
36 | #' classroom: instructors pulchritude and putative pedagogical productivity. Economics of Education Review
37 | #'  24(4):369-376.
38 | "evals"


--------------------------------------------------------------------------------
/R/globals.R:
--------------------------------------------------------------------------------
1 | utils::globalVariables(c("outcome", "play", "prob", "x_bar"))


--------------------------------------------------------------------------------
/R/ht_many_mean_theo.R:
--------------------------------------------------------------------------------
  1 | ht_many_mean_theo <- function(y, x, null, alternative, sig_level,
  2 |                               y_name, x_name, 
  3 |                               show_var_types, show_summ_stats, show_res,
  4 |                               show_eda_plot, show_inf_plot){
  5 |   # summary stats
  6 |   ns <- by(y, x, length)
  7 |   y_bars <- by(y, x, mean)
  8 |   sds <- by(y, x, sd)
  9 |   
 10 |   # anova
 11 |   res <- anova(lm(y ~ x))
 12 |   
 13 |   # anova pieces
 14 |   terms <- c(x_name, "Residuals", "Total")
 15 |   deg_frs <- res$Df
 16 |   ss <- res$`Sum Sq`
 17 |   ms <- res$`Mean Sq`
 18 |   stat <- res$`F value`[1]
 19 |   p_value <- res$`Pr(>F)`[1]
 20 |   
 21 |   # calculate totals
 22 |   ss_tot <- sum(ss)
 23 |   ss <- c(ss, ss_tot)
 24 |   df_tot <- sum(deg_frs)
 25 |   deg_frs <- c(deg_frs, df_tot)
 26 |   
 27 |   # ss format
 28 |   ss_format <- as.character(round(ss, 4))
 29 |   
 30 |   # ms format
 31 |   ms_format <- as.character(c(round(ms, 4), NA))
 32 |   
 33 |   # stat format
 34 |   stat_format <- as.character(c(round(stat, 4), NA, NA))
 35 |   
 36 |   # p-value format
 37 |   p_value_format <- as.character(c(ifelse(round(p_value, 4) == 0, "< 0.0001", round(p_value, 4)), NA, NA))
 38 |   
 39 |   # format output
 40 |   anova_output <- data.frame(
 41 |     df = deg_frs,
 42 |     Sum_Sq = ss_format,
 43 |     Mean_Sq = ms_format,
 44 |     F = stat_format,
 45 |     p_value = p_value_format, 
 46 |     row.names = terms
 47 |   )
 48 |   
 49 |   # print variable types
 50 |   if(show_var_types == TRUE){
 51 |     n_x_levels <- length(levels(x))
 52 |     cat(paste0("Response variable: numerical\n"))
 53 |     cat(paste0("Explanatory variable: categorical (", n_x_levels, " levels) \n"))
 54 |   }
 55 |   
 56 |   # print summary statistics
 57 |   if(show_summ_stats == TRUE){
 58 |     grs <- levels(x)
 59 |     ns <- by(y, x, length)
 60 |     ybars <- round(by(y, x, mean), 4)
 61 |     sds <- round(by(y, x, sd), 4)
 62 |     for(i in 1:n_x_levels){
 63 |       cat(paste0("n_", grs[i], " = ", ns[i], ", y_bar_", grs[i], " = ", round(ybars[i], 4),
 64 |                  ", s_", grs[i], " = ", sds[i] , "\n"))
 65 |     }
 66 |     cat("\n")
 67 |   }
 68 | 
 69 |   # print results
 70 |   if(show_res == TRUE){
 71 |     cat("ANOVA:\n")
 72 |     print(anova_output, na.print = "", digits = 4)
 73 |     
 74 |     # post-hoc tests (if ANOVA is significant)
 75 |     if(p_value < sig_level){
 76 |       cat("\nPairwise tests - ")
 77 |       pairwise <- pairwise.t.test(y, x, p.adjust.method = "none", pool.sd = TRUE)
 78 |       cat(paste0(pairwise$method, ":\n"))
 79 |       print(broom::tidy(pairwise), digits = 4)
 80 |     }
 81 |   }
 82 |   
 83 |   # eda_plot
 84 |   d_eda <- data.frame(y = y, x = x)
 85 |   d_means <- data.frame(y_bars = as.numeric(y_bars), x = levels(x))
 86 |   
 87 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = y), environment = environment()) +
 88 |     ggplot2::geom_histogram(fill = "#8FDEE1", binwidth = diff(range(y)) / 20) +
 89 |     ggplot2::xlab(y_name) +
 90 |     ggplot2::ylab(x_name) +
 91 |     ggplot2::ggtitle("Sample Distribution") +
 92 |     ggplot2::geom_vline(data = d_means, ggplot2::aes(xintercept = y_bars), col = "#1FBEC3", lwd = 1.5) +
 93 |     ggplot2::facet_grid(x ~ .)
 94 |   
 95 |   # inf_plot
 96 |   x_max <- max(qf(0.99, df1 = deg_frs[1], df2 = deg_frs[2]), stat*1.1)
 97 |   inf_plot <- ggplot2::ggplot(data.frame(x = c(0, x_max)), ggplot2::aes(x)) +
 98 |     ggplot2::stat_function(fun = df, args = list(df1 = deg_frs[1], df2 = deg_frs[2]), color = "#999999") +
 99 |     ggplot2::annotate("rect", xmin = stat, xmax = stat+Inf, ymin = 0, ymax = Inf, 
100 |              alpha = 0.3, fill = "#FABAB8") +
101 |     ggplot2::ggtitle(paste0("F Distribution\n(df_G = ", deg_frs[1], ", df_E = ", deg_frs[2], ")")) +
102 |     ggplot2::xlab("") +
103 |     ggplot2::ylab("") +
104 |     ggplot2::geom_vline(xintercept = stat, color = "#F57670", lwd = 1.5)
105 |   
106 |   # print plots
107 |   if(show_eda_plot & !show_inf_plot){ 
108 |     print(eda_plot)
109 |   }
110 |   if(!show_eda_plot & show_inf_plot){ 
111 |     print(inf_plot)
112 |   }
113 |   if(show_eda_plot & show_inf_plot){
114 |     gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2)
115 |   }
116 |   
117 |   # return
118 |   return(list(F = stat, df1 = deg_frs[1], df2 = deg_frs[2], p_value = p_value))
119 | }


--------------------------------------------------------------------------------
/R/ht_many_prop_sim.R:
--------------------------------------------------------------------------------
 1 | ht_many_prop_sim <- function(y, x, x_name, y_name, seed, nsim,
 2 |                               show_var_types, show_summ_stats, show_res,
 3 |                               show_eda_plot, show_inf_plot){
 4 | 
 5 |   length(x)
 6 |   length(y)
 7 |   
 8 |   # set seed
 9 |   if(!is.null(seed)){ set.seed(seed) }
10 |   
11 |   # chi-sq test of independence
12 |   res <- chisq.test(x, y, correct = FALSE, simulate.p.value = TRUE, B = min(2000, nsim))
13 |   stat <- res$statistic
14 | 
15 |   # print variable types
16 |   if(show_var_types == TRUE){
17 |     n_x_levels <- length(levels(x))
18 |     n_y_levels <- length(levels(y))
19 |     cat(paste0("Response variable: categorical (", n_y_levels, " levels) \n"))
20 |     cat(paste0("Explanatory variable: categorical (", n_x_levels, " levels) \n"))
21 |   }
22 |   
23 |   # print summary statistics
24 |   if(show_summ_stats == TRUE){
25 |     cat("Observed:\n")
26 |     print(res$observed) 
27 |     cat("\n")
28 |     cat("Expected:\n")
29 |     print(res$expected)
30 |     cat("\n")
31 |   }
32 |   
33 |   # print results
34 |   if(show_res == TRUE){
35 |     cat(paste0("H0: ", x_name, " and ", y_name, " are independent\n"))
36 |     cat(paste0("HA: ", x_name, " and ", y_name, " are dependent\n"))
37 |     cat(paste0("chi_sq = ", round(as.numeric(stat), 4), 
38 |                ", p_value = ", round(res$p.value, 4), "\n"))
39 |   }
40 |   
41 |   # eda_plot
42 |   d_eda <- data.frame(y = y, x = x)
43 |   
44 |   n_fill_values <- length(levels(y))
45 |   fill_values <- grDevices::colorRampPalette(c("#1FBEC3", "#C7EEF0"))( n_fill_values )
46 | 
47 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = x, fill = y), environment = environment()) +
48 |     ggplot2::geom_bar(position = "fill") +
49 |     ggplot2::scale_fill_manual(values = fill_values) +
50 |     ggplot2::xlab(x_name) +
51 |     ggplot2::ylab("") +
52 |     ggplot2::ggtitle("Sample Distribution") +
53 |     ggplot2::guides(fill = ggplot2::guide_legend(title = y_name))
54 |   
55 |   # print plots
56 |   if(show_eda_plot){ print(eda_plot) }
57 |   if(show_inf_plot){ warning("No inference plot available.") }
58 |   
59 |   # return
60 |   return(list(chi_sq = as.numeric(stat), p_value = res$p.value))
61 | }


--------------------------------------------------------------------------------
/R/ht_many_prop_theo.R:
--------------------------------------------------------------------------------
 1 | ht_many_prop_theo <- function(y, x, x_name, y_name, 
 2 |                               show_var_types, show_summ_stats, show_res,
 3 |                               show_eda_plot, show_inf_plot){
 4 |   
 5 |   # chi-sq test of independence
 6 |   res <- chisq.test(x, y, correct = FALSE)
 7 |   stat <- res$statistic
 8 |   deg_fr <- res$parameter
 9 | 
10 |   # print variable types
11 |   if(show_var_types == TRUE){
12 |     n_x_levels <- length(levels(x))
13 |     n_y_levels <- length(levels(y))
14 |     cat(paste0("Response variable: categorical (", n_y_levels, " levels) \n"))
15 |     cat(paste0("Explanatory variable: categorical (", n_x_levels, " levels) \n"))
16 |   }
17 |   
18 |   # print summary statistics
19 |   if(show_summ_stats == TRUE){
20 |     cat("Observed:\n")
21 |     print(res$observed) 
22 |     cat("\n")
23 |     cat("Expected:\n")
24 |     print(res$expected)
25 |     cat("\n")
26 |   }
27 |   
28 |   # print results
29 |   if(show_res == TRUE){
30 |     cat(paste0("H0: ", x_name, " and ", y_name, " are independent\n"))
31 |     cat(paste0("HA: ", x_name, " and ", y_name, " are dependent\n"))
32 |     cat(paste0("chi_sq = ", round(as.numeric(stat), 4), ", df = ", as.numeric(deg_fr),
33 |               ", p_value = ", round(res$p.value, 4), "\n"))
34 |   }
35 |   
36 |   # eda_plot
37 |   d_eda <- data.frame(y = y, x = x)
38 |   
39 |   n_fill_values <- length(levels(y))
40 |   fill_values <- grDevices::colorRampPalette(c("#1FBEC3", "#C7EEF0"))( n_fill_values )
41 | 
42 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = x, fill = y), environment = environment()) +
43 |     ggplot2::geom_bar(position = "fill") +
44 |     ggplot2::scale_fill_manual(values = fill_values) +
45 |     ggplot2::xlab(x_name) +
46 |     ggplot2::ylab("") +
47 |     ggplot2::ggtitle("Sample Distribution") +
48 |     ggplot2::guides(fill = ggplot2::guide_legend(title = y_name))
49 |   
50 |   # inf_plot
51 |   x_max <- max(qchisq(0.99, df = deg_fr), stat*1.1)
52 |   inf_plot <- ggplot2::ggplot(data.frame(x = c(0, x_max)), ggplot2::aes(x)) +
53 |     ggplot2::stat_function(fun = dchisq, args = list(df = deg_fr), color = "#999999") +
54 |     ggplot2::annotate("rect", xmin = stat, xmax = stat+Inf, ymin = 0, ymax = Inf, 
55 |              alpha = 0.3, fill = "#FABAB8") +
56 |     ggplot2::ggtitle(paste0("Chi-sq Distribution\n(df = ", deg_fr, ")")) +
57 |     ggplot2::xlab("") +
58 |     ggplot2::ylab("") +
59 |     ggplot2::geom_vline(xintercept = stat, color = "#F57670", lwd = 1.5)
60 |   
61 |   # print plots
62 |   if(show_eda_plot & !show_inf_plot){ 
63 |     print(eda_plot)
64 |   }
65 |   if(!show_eda_plot & show_inf_plot){ 
66 |     print(inf_plot)
67 |   }
68 |   if(show_eda_plot & show_inf_plot){
69 |     gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2)
70 |   }
71 | 
72 |   # return
73 |   return(list(chi_sq = as.numeric(stat), df = as.numeric(deg_fr), p_value = res$p.value))
74 | }


--------------------------------------------------------------------------------
/R/ht_single_mean_sim.R:
--------------------------------------------------------------------------------
  1 | ht_single_mean_sim <- function(y, null, alternative, y_name,
  2 |                                nsim, seed, 
  3 |                                show_var_types, show_summ_stats, show_res,
  4 |                                show_eda_plot, show_inf_plot){
  5 | 
  6 |   # set seed
  7 |   if(!is.null(seed)){ set.seed(seed) }
  8 |   
  9 |   # calculate sample size
 10 |   n <- length(y) 
 11 |   
 12 |   # calculate y-bar
 13 |   y_bar <- mean(y)
 14 |   
 15 |   # create bootstrap distribution
 16 |   sim_dist <- rep(NA, nsim)
 17 |   for(i in 1:nsim){
 18 |     boot_samp <- sample(y, size = n, replace = TRUE)
 19 |     sim_dist[i] <- mean(boot_samp)
 20 |   }
 21 |   
 22 |   # center bootstrap distribution at null
 23 |   sim_dist_temp <- sim_dist
 24 |   sim_dist <- sim_dist_temp - (mean(sim_dist_temp) - null)
 25 |   
 26 |   # shading cutoffs
 27 |   if(alternative == "greater"){ x_min = y_bar; x_max = Inf }
 28 |   if(alternative == "less"){ x_min = -Inf; x_max = y_bar }
 29 |   if(alternative == "twosided"){
 30 |     if(y_bar >= null){
 31 |       x_min = c(null - (y_bar - null), y_bar)
 32 |       x_max = c(-Inf, Inf)
 33 |     }
 34 |     if(y_bar <= null){
 35 |       x_min = c(y_bar, null + (null - y_bar))
 36 |       x_max = c(-Inf, Inf)
 37 |     }    
 38 |   }
 39 |   
 40 |   # calculate p-value
 41 |   if(alternative == "greater"){ p_value <- sum(sim_dist >= y_bar) / nsim }
 42 |   if(alternative == "less"){ p_value <- sum(sim_dist <= y_bar) / nsim }
 43 |   if(alternative == "twosided"){
 44 |     if(y_bar > null){
 45 |       p_value <- min(2 * (sum(sim_dist >= y_bar) / nsim), 1)
 46 |     }
 47 |     if(y_bar < null){
 48 |       p_value <- min(2 * (sum(sim_dist <= y_bar) / nsim), 1)
 49 |     }     
 50 |     if(y_bar == null){ p_value <- 1 }
 51 |   }
 52 |   
 53 |   # print variable types
 54 |   if(show_var_types == TRUE){
 55 |     cat("Single numerical variable\n")
 56 |   }
 57 |   
 58 |   # print summary statistics
 59 |   if(show_summ_stats == TRUE){
 60 |     s <- sd(y)
 61 |     cat(paste0("n = ", n, ", y-bar = ", round(y_bar, 4), ", s = ", round(s, 4), "\n"))
 62 |   }
 63 |   
 64 |   # print results
 65 |   if(show_res == TRUE){
 66 |     if(alternative == "greater"){
 67 |       alt_sign <- ">"
 68 |     } else if(alternative == "less"){
 69 |       alt_sign <- "<"
 70 |     } else {
 71 |       alt_sign <- "!="
 72 |     }
 73 |     cat(paste0("H0: mu = ", null, "\n"))
 74 |     cat(paste0("HA: mu ", alt_sign, " ", null, "\n"))
 75 |     p_val_to_print <- ifelse(round(p_value, 4) == 0, "< 0.0001", round(p_value, 4))
 76 |     cat(paste0("p_value = ", p_val_to_print))
 77 |   }
 78 | 
 79 |   # eda_plot
 80 |   d_eda <- data.frame(y = y)
 81 |   
 82 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = y), environment = environment()) +
 83 |     ggplot2::geom_histogram(fill = "#8FDEE1", binwidth = diff(range(y)) / 20) +
 84 |     ggplot2::xlab(y_name) +
 85 |     ggplot2::ylab("") +
 86 |     ggplot2::ggtitle("Sample Distribution") +
 87 |     ggplot2::geom_vline(xintercept = y_bar, col = "#1FBEC3", lwd = 1.5)
 88 |   
 89 |   # inf_plot
 90 |   d_inf <- data.frame(sim_dist = sim_dist)
 91 |   
 92 |   inf_plot <- ggplot2::ggplot(data = d_inf, ggplot2::aes(x = sim_dist), environment = environment()) +
 93 |     ggplot2::geom_histogram(fill = "#CCCCCC", binwidth = diff(range(sim_dist)) / 20) +
 94 |     ggplot2::annotate("rect", xmin = x_min, xmax = x_max, ymin = 0, ymax = Inf, 
 95 |              alpha = 0.3, fill = "#FABAB8") +
 96 |     ggplot2::xlab("simulated means") +
 97 |     ggplot2::ylab("") +
 98 |     ggplot2::ggtitle("Null Distribution") +
 99 |     ggplot2::geom_vline(xintercept = y_bar, color = "#F57670", lwd = 1.5)
100 |   
101 |   # print plots
102 |   if(show_eda_plot & !show_inf_plot){ 
103 |     suppressWarnings(print(eda_plot))
104 |   }
105 |   if(!show_eda_plot & show_inf_plot){ 
106 |     suppressWarnings(print(inf_plot))
107 |   }
108 |   if(show_eda_plot & show_inf_plot){
109 |     suppressWarnings(gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2))
110 |   }
111 |   
112 |   # return
113 |   return(list(sim_dist = sim_dist, p_value = p_value))
114 |   
115 | }


--------------------------------------------------------------------------------
/R/ht_single_mean_theo.R:
--------------------------------------------------------------------------------
  1 | ht_single_mean_theo <- function(y, null, alternative, y_name,
  2 |                                 show_var_types, show_summ_stats, show_res,
  3 |                                 show_eda_plot, show_inf_plot){
  4 | 
  5 |   # calculate sample size
  6 |   n <- length(y) 
  7 | 
  8 |   # calculate x-bar
  9 |   y_bar <- mean(y)
 10 |   
 11 |   # calculate s
 12 |   s <- sd(y)
 13 |   
 14 |   # calculate SE
 15 |   se <- s / sqrt(n)
 16 |   
 17 |   # calculate test statistic
 18 |   t <- (y_bar - null) / se
 19 |   
 20 |   # define degrees of freedom
 21 |   deg_fr <- n - 1
 22 | 
 23 |   # shading cutoffs
 24 |   if(alternative == "greater"){ x_min = y_bar; x_max = Inf }
 25 |   if(alternative == "less"){ x_min = -Inf; x_max = y_bar }
 26 |   if(alternative == "twosided"){
 27 |     if(y_bar >= null){
 28 |       x_min = c(null - (y_bar - null), y_bar)
 29 |       x_max = c(-Inf, Inf)
 30 |     }
 31 |     if(y_bar <= null){
 32 |       x_min = c(y_bar, null + (null - y_bar))
 33 |       x_max = c(-Inf, Inf)
 34 |     }    
 35 |   }
 36 |   
 37 |   # calculate p-value
 38 |   if(alternative == "greater"){ p_value <- pt(t, deg_fr, lower.tail = FALSE) }
 39 |   if(alternative == "less"){ p_value <- pt(t, deg_fr, lower.tail = TRUE) }
 40 |   if(alternative == "twosided"){
 41 |     p_value <- pt(abs(t), deg_fr, lower.tail = FALSE) * 2
 42 |   }
 43 | 
 44 |   # print variable types
 45 |   if(show_var_types == TRUE){
 46 |     cat("Single numerical variable\n")
 47 |   }
 48 |   
 49 |   # print summary statistics
 50 |   if(show_summ_stats == TRUE){
 51 |     cat(paste0("n = ", n, ", y-bar = ", round(y_bar, 4), ", s = ", round(s, 4), "\n"))
 52 |   }
 53 |   
 54 |   # print results
 55 |   if(show_res == TRUE){
 56 |     if(alternative == "greater"){
 57 |       alt_sign <- ">"
 58 |     } else if(alternative == "less"){
 59 |       alt_sign <- "<"
 60 |     } else {
 61 |       alt_sign <- "!="
 62 |     }
 63 |     cat(paste0("H0: mu = ", null, "\n"))
 64 |     cat(paste0("HA: mu ", alt_sign, " ", null, "\n"))
 65 |     cat(paste0("t = ", round(t, 4), ", df = ", deg_fr, "\n"))
 66 |     p_val_to_print <- ifelse(round(p_value, 4) == 0, "< 0.0001", round(p_value, 4))
 67 |     cat(paste0("p_value = ", p_val_to_print))
 68 |   }
 69 |   
 70 |   # eda_plot
 71 |   d_eda <- data.frame(y = y)
 72 |   
 73 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes_string(x = 'y'), environment = environment()) +
 74 |     ggplot2::geom_histogram(fill = "#8FDEE1", binwidth = diff(range(y)) / 20) +
 75 |     ggplot2::xlab(y_name) +
 76 |     ggplot2::ylab("") +
 77 |     ggplot2::ggtitle("Sample Distribution") +
 78 |     ggplot2::geom_vline(xintercept = y_bar, col = "#1FBEC3", lwd = 1.5)
 79 |   
 80 |   # inf_plot ### TO DO: remove y axis ticks
 81 |   d_inf <- data.frame(x = c(null - 4*se, null + 4*se))
 82 |   inf_plot <- ggplot2::ggplot(d_inf, ggplot2::aes_string(x = 'x')) + 
 83 |     ggplot2::stat_function(fun = dnorm, args = list(mean = null, sd = se), color = "#999999") +
 84 |     ggplot2::annotate("rect", xmin = x_min, xmax = x_max, ymin = 0, ymax = Inf, 
 85 |              alpha = 0.3, fill = "#FABAB8") +
 86 |     ggplot2::ggtitle("Null Distribution") +
 87 |     ggplot2::xlab("") +
 88 |     ggplot2::ylab("") +
 89 |     ggplot2::geom_vline(xintercept = y_bar, color = "#F57670", lwd = 1.5)
 90 |   
 91 |   # print plots
 92 |   if(show_eda_plot & !show_inf_plot){ 
 93 |     print(eda_plot)
 94 |   }
 95 |   if(!show_eda_plot & show_inf_plot){ 
 96 |     print(inf_plot)
 97 |   }
 98 |   if(show_eda_plot & show_inf_plot){
 99 |     gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2)
100 |   }
101 | 
102 |   # return
103 |   return(list(SE = se, t = t, df = deg_fr, p_value = p_value))  
104 | }


--------------------------------------------------------------------------------
/R/ht_single_median_sim.R:
--------------------------------------------------------------------------------
  1 | ht_single_median_sim <- function(y, null, alternative, y_name,
  2 |                                  nsim, seed, 
  3 |                                  show_var_types, show_summ_stats, show_res,
  4 |                                  show_eda_plot, show_inf_plot){
  5 | 
  6 |   # set seed
  7 |   if(!is.null(seed)){ set.seed(seed) }
  8 |   
  9 |   # calculate sample size
 10 |   n <- length(y) 
 11 |   
 12 |   # calculate y-bar
 13 |   y_med <- median(y)
 14 |   
 15 |   # create bootstrap distribution
 16 |   sim_dist <- rep(NA, nsim)
 17 |   for(i in 1:nsim){
 18 |     boot_samp <- sample(y, size = n, replace = TRUE)
 19 |     sim_dist[i] <- median(boot_samp)
 20 |   }
 21 |   
 22 |   # center bootstrap distribution at null
 23 |   sim_dist_temp <- sim_dist
 24 |   sim_dist <- sim_dist_temp - (mean(sim_dist_temp) - null)
 25 |   
 26 |   # shading cutoffs
 27 |   if(alternative == "greater"){ x_min = y_med; x_max = Inf }
 28 |   if(alternative == "less"){ x_min = -Inf; x_max = y_med }
 29 |   if(alternative == "twosided"){
 30 |     if(y_med >= null){
 31 |       x_min = c(null - (y_med - null), y_med)
 32 |       x_max = c(-Inf, Inf)
 33 |     }
 34 |     if(y_med <= null){
 35 |       x_min = c(y_med, null + (null - y_med))
 36 |       x_max = c(-Inf, Inf)
 37 |     }    
 38 |   }
 39 |   
 40 |   # calculate p-value
 41 |   if(alternative == "greater"){ p_value <- sum(sim_dist >= y_med) / nsim }
 42 |   if(alternative == "less"){ p_value <- sum(sim_dist <= y_med) / nsim }
 43 |   if(alternative == "twosided"){
 44 |     if(y_med > null){
 45 |       p_value <- min(2 * (sum(sim_dist >= y_med) / nsim), 1)
 46 |     }
 47 |     if(y_med < null){
 48 |       p_value <- min(2 * (sum(sim_dist <= y_med) / nsim), 1)
 49 |     }
 50 |     if(y_med == null){ p_value <- 1 }
 51 |   }
 52 | 
 53 |   # print variable types
 54 |   if(show_var_types == TRUE){
 55 |     cat("Single numerical variable\n")
 56 |   }
 57 |   
 58 |   # print summary statistics
 59 |   if(show_summ_stats == TRUE){
 60 |     q_25 <- quantile(y, 0.25)
 61 |     q_75 <- quantile(y, 0.75)
 62 |     cat(paste0("n = ", n, ", y_med = ", round(y_med, 4), 
 63 |                ", Q1 = ", round(q_25, 4), ", Q3 = ", round(q_75, 4), "\n"))
 64 |   }
 65 |   
 66 |   # print results
 67 |   if(show_res == TRUE){
 68 |     if(alternative == "greater"){
 69 |       alt_sign <- ">"
 70 |     } else if(alternative == "less"){
 71 |       alt_sign <- "<"
 72 |     } else {
 73 |       alt_sign <- "!="
 74 |     }
 75 |     cat(paste0("H0: pop_med = ", null, "\n"))
 76 |     cat(paste0("HA: pop_med ", alt_sign, " ", null, "\n"))
 77 |     p_val_to_print <- ifelse(round(p_value, 4) == 0, "< 0.0001", round(p_value, 4))
 78 |     cat(paste0("p_value = ", p_val_to_print))
 79 |   }
 80 |   
 81 |   # eda_plot
 82 |   d_eda <- data.frame(y = y)
 83 |   
 84 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = y), environment = environment()) +
 85 |     ggplot2::geom_histogram(fill = "#8FDEE1", binwidth = diff(range(y)) / 20) +
 86 |     ggplot2::xlab(y_name) +
 87 |     ggplot2::ylab("") +
 88 |     ggplot2::ggtitle("Sample Distribution") +
 89 |     ggplot2::geom_vline(xintercept = y_med, col = "#1FBEC3", lwd = 1.5)
 90 |   
 91 |   # inf_plot
 92 |   d_inf <- data.frame(sim_dist = sim_dist)
 93 |   
 94 |   inf_plot <- ggplot2::ggplot(data = d_inf, ggplot2::aes(x = sim_dist), environment = environment()) +
 95 |     ggplot2::geom_histogram(fill = "#CCCCCC", binwidth = max(diff(range(sim_dist)) / 20, 1)) +
 96 |     ggplot2::annotate("rect", xmin = x_min, xmax = x_max, ymin = 0, ymax = Inf, 
 97 |              alpha = 0.3, fill = "#FABAB8") +
 98 |     ggplot2::xlab("simulated medians") +
 99 |     ggplot2::ylab("") +
100 |     ggplot2::ggtitle("Null Distribution") +
101 |     ggplot2::geom_vline(xintercept = y_med, color = "#F57670", lwd = 1.5)
102 |   
103 |   # print plots
104 |   if(show_eda_plot & !show_inf_plot){ 
105 |     suppressWarnings(print(eda_plot))
106 |   }
107 |   if(!show_eda_plot & show_inf_plot){ 
108 |     print(inf_plot)
109 |   }
110 |   if(show_eda_plot & show_inf_plot){
111 |     gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2)
112 |   }
113 |   
114 |   # return
115 |   return(list(sim_dist = sim_dist, p_value = p_value))
116 |   
117 | }


--------------------------------------------------------------------------------
/R/ht_single_prop_sim.R:
--------------------------------------------------------------------------------
  1 | ht_single_prop_sim <- function(y, success, null, alternative,
  2 |                                nsim, seed, y_name,
  3 |                                show_var_types, show_summ_stats,
  4 |                                show_eda_plot, show_inf_plot, show_res){
  5 |   
  6 |   # set seed
  7 |   if(!is.null(seed)){ set.seed(seed) }
  8 | 
  9 |   # calculate sample size
 10 |   n <- length(y) 
 11 |   
 12 |   # calculate p-hat
 13 |   p_hat <- sum(y == success) / n
 14 | 
 15 |   # create null distribution
 16 |   sim_dist <- rep(NA, nsim)
 17 |   for(i in 1:nsim){
 18 |     sim_samp <- sample(c(TRUE, FALSE), size = n, replace = TRUE, prob = c(null, 1 - null))
 19 |     sim_dist[i] <- sum(sim_samp) / n
 20 |   }
 21 |   
 22 |   # shading cutoffs
 23 |   if(alternative == "greater"){ x_min = p_hat; x_max = Inf }
 24 |   if(alternative == "less"){ x_min = -Inf; x_max = p_hat }
 25 |   if(alternative == "twosided"){
 26 |     if(p_hat >= null){
 27 |       x_min = c(null - (p_hat - null), p_hat)
 28 |       x_max = c(-Inf, Inf)
 29 |     }
 30 |     if(p_hat <= null){
 31 |       x_min = c(p_hat, null + (null - p_hat))
 32 |       x_max = c(-Inf, Inf)
 33 |     }    
 34 |   }
 35 | 
 36 |   # calculate p-value
 37 |   if(alternative == "greater"){ p_value <- sum(sim_dist >= p_hat) / nsim }
 38 |   if(alternative == "less"){ p_value <- sum(sim_dist <= p_hat) / nsim }
 39 |   if(alternative == "twosided"){
 40 |     if(p_hat > null){
 41 |       p_value <- min(2 * (sum(sim_dist >= p_hat) / nsim), 1)
 42 |     }
 43 |     if(p_hat < null){
 44 |       p_value <- min(2 * (sum(sim_dist <= p_hat) / nsim), 1)
 45 |     }
 46 |     if(p_hat == null){ p_value <- 1 }
 47 |   }
 48 | 
 49 |   # print variable types
 50 |   if(show_var_types == TRUE){
 51 |     cat(paste0("Single categorical variable, success: ", success,"\n"))
 52 |   }
 53 |   
 54 |   # print summary statistics
 55 |   if(show_summ_stats == TRUE){
 56 |     cat(paste0("n = ", n, ", p-hat = ", round(p_hat, 4), "\n"))
 57 |   }
 58 |   
 59 |   # print results
 60 |   if(show_res == TRUE){
 61 |     if(alternative == "greater"){
 62 |       alt_sign <- ">"
 63 |     } else if(alternative == "less"){
 64 |       alt_sign <- "<"
 65 |     } else {
 66 |       alt_sign <- "!="
 67 |     }
 68 |     cat(paste0("H0: p = ", null, "\n"))
 69 |     cat(paste0("HA: p ", alt_sign, " ", null, "\n"))
 70 |     p_val_to_print <- ifelse(round(p_value, 4) == 0, "< 0.0001", round(p_value, 4))
 71 |     cat(paste0("p_value = ", p_val_to_print))
 72 |   }
 73 |   
 74 |   # eda_plot
 75 |   d_eda <- data.frame(y = y)
 76 | 
 77 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = y), environment = environment()) +
 78 |     ggplot2::geom_bar(fill = "#8FDEE1") +
 79 |     ggplot2::xlab(y_name) +
 80 |     ggplot2::ylab("") +
 81 |     ggplot2::ggtitle("Sample Distribution")
 82 | 
 83 |   # inf_plot
 84 |   d_inf <- data.frame(sim_dist = sim_dist)
 85 |   
 86 |   inf_plot <- ggplot2::ggplot(data = d_inf, ggplot2::aes(x = sim_dist), environment = environment()) +
 87 |     ggplot2::geom_histogram(fill = "#CCCCCC", binwidth = diff(range(sim_dist)) / 20) +
 88 |     ggplot2::annotate("rect", xmin = x_min, xmax = x_max, ymin = 0, ymax = Inf, 
 89 |              alpha = 0.3, fill = "#FABAB8") +
 90 |     ggplot2::xlab("simulated proportions") +
 91 |     ggplot2::ylab("") +
 92 |     ggplot2::ggtitle("Null Distribution") +
 93 |     ggplot2::geom_vline(xintercept = p_hat, color = "#F57670", lwd = 1.5)
 94 |   
 95 |   # print plots
 96 |   if(show_eda_plot & !show_inf_plot){ 
 97 |     print(eda_plot)
 98 |   }
 99 |   if(!show_eda_plot & show_inf_plot){ 
100 |     print(inf_plot)
101 |   }
102 |   if(show_eda_plot & show_inf_plot){
103 |     gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2)
104 |   }
105 |   
106 |   # return
107 |   return(list(sim_dist = sim_dist, p_value = p_value)) 
108 | }


--------------------------------------------------------------------------------
/R/ht_single_prop_theo.R:
--------------------------------------------------------------------------------
 1 | ht_single_prop_theo <- function(y, success, null, alternative, y_name,
 2 |                                 show_var_types, show_summ_stats, show_res,
 3 |                                 show_eda_plot, show_inf_plot){
 4 |   
 5 |   # calculate sample size
 6 |   n <- length(y) 
 7 |   
 8 |   # calculate p-hat
 9 |   p_hat <- sum(y == success) / n
10 |   
11 |   # calculate SE
12 |   se <- sqrt(p_hat * (1 - p_hat) / n)
13 |   
14 |   # calculate test statistic
15 |   z <- (p_hat - null) / se
16 |   
17 |   # shading cutoffs
18 |   if(alternative == "greater"){ x_min = p_hat; x_max = Inf }
19 |   if(alternative == "less"){ x_min = -Inf; x_max = p_hat }
20 |   if(alternative == "twosided"){
21 |     if(p_hat >= null){
22 |       x_min = c(null - (p_hat - null), p_hat)
23 |       x_max = c(-Inf, Inf)
24 |     }
25 |     if(p_hat <= null){
26 |       x_min = c(p_hat, null + (null - p_hat))
27 |       x_max = c(-Inf, Inf)
28 |     }    
29 |   }
30 | 
31 |   # calculate p-value
32 |   if(alternative == "greater"){ p_value <- pnorm(z, lower.tail = FALSE) }
33 |   if(alternative == "less"){ p_value <- pnorm(z, lower.tail = TRUE) }
34 |   if(alternative == "twosided"){
35 |     p_value <- 2 * pnorm(abs(z), lower.tail = FALSE)
36 |   }
37 |   
38 |   # print variable types
39 |   if(show_var_types == TRUE){
40 |     cat(paste0("Single categorical variable, success: ", success,"\n"))
41 |   }
42 |   
43 |   # print summary statistics
44 |   if(show_summ_stats == TRUE){
45 |     cat(paste0("n = ", n, ", p-hat = ", round(p_hat, 4), "\n"))
46 |   }
47 |   
48 |   # print results
49 |   if(show_res == TRUE){
50 |     if(alternative == "greater"){
51 |       alt_sign <- ">"
52 |     } else if(alternative == "less"){
53 |       alt_sign <- "<"
54 |     } else {
55 |       alt_sign <- "!="
56 |     }
57 |     cat(paste0("H0: p = ", null, "\n"))
58 |     cat(paste0("HA: p ", alt_sign, " ", null, "\n"))
59 |     p_val_to_print <- ifelse(round(p_value, 4) == 0, "< 0.0001", round(p_value, 4))
60 |     cat(paste0("z = ", round(z, 4), "\n"))
61 |     cat(paste0("p_value = ", p_val_to_print))
62 |   }
63 |   
64 |   # eda_plot
65 |   d_eda <- data.frame(y = y)
66 | 
67 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = y), environment = environment()) +
68 |     ggplot2::geom_bar(fill = "#8FDEE1") +
69 |     ggplot2::xlab(y_name) +
70 |     ggplot2::ylab("") +
71 |     ggplot2::ggtitle("Sample Distribution")
72 | 
73 |   # inf_plot
74 |   d_for_plot <- data.frame(x = c(null - 4*se, null + 4*se))
75 |   inf_plot <- ggplot2::ggplot(d_for_plot, ggplot2::aes_string(x = 'x')) + 
76 |     ggplot2::stat_function(fun = dnorm, args = list(mean = null, sd = se), color = "#999999") +
77 |     ggplot2::annotate("rect", xmin = x_min, xmax = x_max, ymin = 0, ymax = Inf, 
78 |              alpha = 0.3, fill = "#FABAB8") +
79 |     ggplot2::ggtitle("Null Distribution") +
80 |     ggplot2::xlab("") +
81 |     ggplot2::ylab("") +
82 |     ggplot2::geom_vline(xintercept = p_hat, color = "#F57670", lwd = 1.5)
83 |   
84 |   # print plots
85 |   if(show_eda_plot & !show_inf_plot){ 
86 |     print(eda_plot)
87 |   }
88 |   if(!show_eda_plot & show_inf_plot){ 
89 |     print(inf_plot)
90 |   }
91 |   if(show_eda_plot & show_inf_plot){
92 |     gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2)
93 |   }
94 |   
95 |   # return
96 |   return(list(SE = se, z = z, p_value = p_value)) 
97 | }


--------------------------------------------------------------------------------
/R/ht_two_mean_sim.R:
--------------------------------------------------------------------------------
  1 | ht_two_mean_sim <- function(y, x, null, alternative, nsim, seed,
  2 |                             y_name, x_name, 
  3 |                             show_var_types, show_summ_stats, show_res,
  4 |                             show_eda_plot, show_inf_plot){
  5 | 
  6 |   # set seed
  7 |   if(!is.null(seed)){ set.seed(seed) }
  8 |     
  9 |   # calculate n1 and n2
 10 |   ns <- by(y, x, length)
 11 |   n1 <- as.numeric(ns[1])
 12 |   n2 <- as.numeric(ns[2])
 13 |   n <- n1 + n2
 14 |   
 15 |   # calculate y-bar1 and y-bar2
 16 |   y_bars <- by(y, x, mean)
 17 |   y_bar1 <- as.numeric(y_bars[1])
 18 |   y_bar2 <- as.numeric(y_bars[2])
 19 |   
 20 |   # calculate difference in y-bars
 21 |   y_bar_diff <- y_bar1 - y_bar2
 22 | 
 23 |   # create null distribution
 24 |   sim_dist <- rep(NA, nsim)
 25 |   for(i in 1:nsim){
 26 |     y_sim <- sample(y, size = n, replace = FALSE)
 27 |     y_sim_bars <- by(y_sim, x, mean)
 28 |     y_sim_bar1 <- as.numeric(y_sim_bars[1])
 29 |     y_sim_bar2 <- as.numeric(y_sim_bars[2])
 30 |     sim_dist[i] <- y_sim_bar1 - y_sim_bar2
 31 |   }
 32 |   
 33 |   # shading cutoffs
 34 |   if(alternative == "greater"){ 
 35 |     x_min <- y_bar_diff
 36 |     x_max <- Inf 
 37 |     }
 38 |   if(alternative == "less"){ 
 39 |     x_min <- -Inf
 40 |     x_max <- y_bar_diff
 41 |     }
 42 |   if(alternative == "twosided"){
 43 |     if(y_bar_diff >= null){
 44 |       x_min <- c(null - (y_bar_diff - null), y_bar_diff)
 45 |       x_max <- c(-Inf, Inf)
 46 |     }
 47 |     if(y_bar_diff <= null){
 48 |       x_min <- c(y_bar_diff, null + (null - y_bar_diff))
 49 |       x_max <- c(-Inf, Inf)
 50 |     }    
 51 |   }
 52 |   
 53 |   # calculate p-value
 54 |   if(alternative == "greater"){ p_value <- sum(sim_dist >= y_bar_diff) / nsim }
 55 |   if(alternative == "less"){ p_value <- sum(sim_dist <= y_bar_diff) / nsim }
 56 |   if(alternative == "twosided"){
 57 |     if(y_bar_diff > null){
 58 |       p_value <- min(2 * (sum(sim_dist >= y_bar_diff) / nsim), 1)
 59 |     }
 60 |     if(y_bar_diff < null){
 61 |       p_value <- min(2 * (sum(sim_dist <= y_bar_diff) / nsim), 1)
 62 |     }
 63 |     if(y_bar_diff == null){ p_value <- 1 }
 64 |   }
 65 |   
 66 |   # print variable types
 67 |   if(show_var_types == TRUE){
 68 |     n_x_levels <- length(levels(x))
 69 |     cat(paste0("Response variable: numerical\n"))
 70 |     cat(paste0("Explanatory variable: categorical (", n_x_levels, " levels) \n"))
 71 |   }
 72 |   
 73 |   # print summary statistics
 74 |   if(show_summ_stats == TRUE){
 75 |     gr1 <- levels(x)[1]
 76 |     gr2 <- levels(x)[2]
 77 |     sds <- by(y, x, IQR)
 78 |     s1 <- sds[1]
 79 |     s2 <- sds[2]
 80 |     cat(paste0("n_", gr1, " = ", n1, ", y_bar_", gr1, " = ", round(y_bar1, 4), 
 81 |                ", s_", gr1, " = ", s1, "\n"))
 82 |     cat(paste0("n_", gr2, " = ", n2, ", y_bar_", gr2, " = ", round(y_bar2, 4),
 83 |                ", s_", gr2, " = ", s2, "\n"))
 84 |   }
 85 |   
 86 |   # print results
 87 |   if(show_res == TRUE){
 88 |     if(alternative == "greater"){
 89 |       alt_sign <- ">"
 90 |     } else if(alternative == "less"){
 91 |       alt_sign <- "<"
 92 |     } else {
 93 |       alt_sign <- "!="
 94 |     }
 95 |     cat(paste0("H0: mu_", gr1, " =  mu_", gr2, "\n"))
 96 |     cat(paste0("HA: mu_", gr1, " ", alt_sign, " mu_", gr2, "\n"))
 97 |     p_val_to_print <- ifelse(round(p_value, 4) == 0, "< 0.0001", round(p_value, 4))
 98 |     cat(paste0("p_value = ", p_val_to_print))
 99 |   }
100 |   
101 |   # eda_plot
102 |   d_eda <- data.frame(y = y, x = x)
103 |   d_means <- data.frame(y_bars = as.numeric(y_bars), x = levels(x))
104 |   
105 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = y), environment = environment()) +
106 |     ggplot2::geom_histogram(fill = "#8FDEE1", binwidth = diff(range(y)) / 20) +
107 |     ggplot2::xlab(y_name) +
108 |     ggplot2::ylab(x_name) +
109 |     ggplot2::ggtitle("Sample Distribution") +
110 |     ggplot2::geom_vline(data = d_means, ggplot2::aes(xintercept = y_bars), col = "#1FBEC3", lwd = 1.5) +
111 |     ggplot2::facet_grid(x ~ .) 
112 | 
113 |   # inf_plot
114 |   d_inf <- data.frame(sim_dist = sim_dist)
115 |   
116 |   inf_plot <- ggplot2::ggplot(data = d_inf, ggplot2::aes(x = sim_dist), environment = environment()) +
117 |     ggplot2::geom_histogram(fill = "#CCCCCC", binwidth = diff(range(sim_dist)) / 20) +
118 |     ggplot2::annotate("rect", xmin = x_min, xmax = x_max, ymin = 0, ymax = Inf, 
119 |              alpha = 0.3, fill = "#FABAB8") +
120 |     ggplot2::xlab("simulated difference in means") +
121 |     ggplot2::ylab("") +
122 |     ggplot2::ggtitle("Null Distribution") +
123 |     ggplot2::geom_vline(xintercept = y_bar_diff, color = "#F57670", lwd = 1.5)
124 |   
125 |   # print plots
126 |   if(show_eda_plot & !show_inf_plot){ 
127 |     print(eda_plot)
128 |   }
129 |   if(!show_eda_plot & show_inf_plot){ 
130 |     print(inf_plot)
131 |   }
132 |   if(show_eda_plot & show_inf_plot){
133 |     gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2)
134 |   }
135 |   
136 |   # return
137 |   return(list(sim_dist = sim_dist, p_value = p_value))
138 | }


--------------------------------------------------------------------------------
/R/ht_two_mean_theo.R:
--------------------------------------------------------------------------------
  1 | 
  2 | ht_two_mean_theo <- function(y, x, null, alternative, 
  3 |                              y_name, x_name, 
  4 |                              show_var_types, show_summ_stats, show_res,
  5 |                              show_eda_plot, show_inf_plot){
  6 |   
  7 |   # calculate n1 and n2
  8 |   ns <- by(y, x, length)
  9 |   n1 <- as.numeric(ns[1])
 10 |   n2 <- as.numeric(ns[2])
 11 |   
 12 |   # calculate y-bar1 and y-bar2
 13 |   y_bars <- by(y, x, mean)
 14 |   y_bar1 <- as.numeric(y_bars[1])
 15 |   y_bar2 <- as.numeric(y_bars[2])
 16 |   
 17 |   # calculate difference in y-bars
 18 |   y_bar_diff <- y_bar1 - y_bar2
 19 |   
 20 |   # calculate s1 and s2
 21 |   sds <- by(y, x, sd)
 22 |   s1 <- as.numeric(sds[1])
 23 |   s2 <- as.numeric(sds[2])
 24 |   
 25 |   # calculate SE
 26 |   se <- sqrt((s1^2 / n1) + (s2^2 / n2))
 27 | 
 28 |   # define degrees of freedom
 29 |   deg_fr <- min(n1 - 1, n2 - 1)
 30 |   
 31 |   # calculate t
 32 |   t <- (y_bar_diff - null) / se
 33 |   
 34 |   # shading cutoffs
 35 |   if(alternative == "greater"){ 
 36 |     x_min <- y_bar_diff
 37 |     x_max <- Inf 
 38 |     }
 39 |   if(alternative == "less"){ 
 40 |     x_min <- -Inf
 41 |     x_max <- y_bar_diff
 42 |     }
 43 |   if(alternative == "twosided"){
 44 |     if(y_bar_diff >= null){
 45 |       x_min <- c(null - (y_bar_diff - null), y_bar_diff)
 46 |       x_max <- c(-Inf, Inf)
 47 |     }
 48 |     if(y_bar_diff <= null){
 49 |       x_min <- c(y_bar_diff, null + (null - y_bar_diff))
 50 |       x_max <- c(-Inf, Inf)
 51 |     }    
 52 |   }
 53 |   
 54 |   # calculate p-value
 55 |   if(alternative == "greater"){ p_value <- pt(t, deg_fr, lower.tail = FALSE) }
 56 |   if(alternative == "less"){ p_value <- pt(t, deg_fr, lower.tail = TRUE) }
 57 |   if(alternative == "twosided"){
 58 |     p_value <- pt(abs(t), deg_fr, lower.tail = FALSE) * 2
 59 |   }
 60 |   
 61 |   # print variable types
 62 |   if(show_var_types == TRUE){
 63 |     n_x_levels <- length(levels(x))
 64 |     cat(paste0("Response variable: numerical\n"))
 65 |     cat(paste0("Explanatory variable: categorical (", n_x_levels, " levels) \n"))
 66 |   }
 67 |   
 68 |   # print summary statistics
 69 |   if(show_summ_stats == TRUE){
 70 |     gr1 <- levels(x)[1]
 71 |     gr2 <- levels(x)[2]
 72 |     cat(paste0("n_", gr1, " = ", n1, ", y_bar_", gr1, " = ", round(y_bar1, 4), ", s_", gr1, " = ", round(s1, 4), "\n"))
 73 |     cat(paste0("n_", gr2, " = ", n2, ", y_bar_", gr2, " = ", round(y_bar2, 4), ", s_", gr2, " = ", round(s2, 4), "\n"))
 74 |   }
 75 |   
 76 |   # print results
 77 |   if(show_res == TRUE){
 78 |     if(alternative == "greater"){
 79 |       alt_sign <- ">"
 80 |     } else if(alternative == "less"){
 81 |       alt_sign <- "<"
 82 |     } else {
 83 |       alt_sign <- "!="
 84 |     }
 85 |     cat(paste0("H0: mu_", gr1, " =  mu_", gr2, "\n"))
 86 |     cat(paste0("HA: mu_", gr1, " ", alt_sign, " mu_", gr2, "\n"))
 87 |     cat(paste0("t = ", round(t, 4), ", df = ", deg_fr, "\n"))
 88 |     p_val_to_print <- ifelse(round(p_value, 4) == 0, "< 0.0001", round(p_value, 4))
 89 |     cat(paste0("p_value = ", p_val_to_print))
 90 |   }
 91 |   
 92 |   # eda_plot
 93 |   d_eda <- data.frame(y = y, x = x)
 94 |   d_means <- data.frame(y_bars = as.numeric(y_bars), x = levels(x))
 95 |   
 96 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = y), environment = environment()) +
 97 |     ggplot2::geom_histogram(fill = "#8FDEE1", binwidth = diff(range(y)) / 20) +
 98 |     ggplot2::xlab(y_name) +
 99 |     ggplot2::ylab(x_name) +
100 |     ggplot2::ggtitle("Sample Distribution") +
101 |     ggplot2::geom_vline(data = d_means, ggplot2::aes(xintercept = y_bars), col = "#1FBEC3", lwd = 1.5) +
102 |     ggplot2::facet_grid(x ~ .)
103 |     
104 | 
105 |   # inf_plot
106 |   inf_plot <- ggplot2::ggplot(data.frame(x = c(null - 4*se, null + 4*se)), ggplot2::aes(x)) + 
107 |     ggplot2::stat_function(fun = dnorm, args = list(mean = null, sd = se), color = "#999999") +
108 |     ggplot2::annotate("rect", xmin = x_min, xmax = x_max, ymin = 0, ymax = Inf, 
109 |              alpha = 0.3, fill = "#FABAB8") +
110 |     ggplot2::ggtitle("Null Distribution") +
111 |     ggplot2::xlab("") +
112 |     ggplot2::ylab("") +
113 |     ggplot2::geom_vline(xintercept = y_bar_diff, color = "#F57670", lwd = 1.5)
114 |   
115 |   # print plots
116 |   if(show_eda_plot & !show_inf_plot){ 
117 |     print(eda_plot)
118 |   }
119 |   if(!show_eda_plot & show_inf_plot){ 
120 |     print(inf_plot)
121 |   }
122 |   if(show_eda_plot & show_inf_plot){
123 |     gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2)
124 |   }
125 |   
126 |   # return
127 |   return(list(SE = se, t = t, df = deg_fr, p_value = p_value))
128 | }


--------------------------------------------------------------------------------
/R/ht_two_median_sim.R:
--------------------------------------------------------------------------------
  1 | ht_two_median_sim <- function(y, x, null, alternative, nsim, seed,
  2 |                               y_name, x_name, 
  3 |                               show_var_types, show_summ_stats, show_res,
  4 |                               show_eda_plot, show_inf_plot){
  5 | 
  6 |   # set seed
  7 |   if(!is.null(seed)){ set.seed(seed) }
  8 | 
  9 |   # calculate n1 and n2
 10 |   ns <- by(y, x, length)
 11 |   n1 <- as.numeric(ns[1])
 12 |   n2 <- as.numeric(ns[2])
 13 |   n <- n1 + n2
 14 | 
 15 |   # calculate y-med1 and y-med2
 16 |   y_meds <- by(y, x, median)
 17 |   y_med1 <- as.numeric(y_meds[1])
 18 |   y_med2 <- as.numeric(y_meds[2])
 19 |   
 20 |   # calculate difference in y-meds
 21 |   y_med_diff <- y_med1 - y_med2
 22 | 
 23 |   # create null distribution
 24 |   sim_dist <- rep(NA, nsim)
 25 |   for(i in 1:nsim){
 26 |     y_sim <- sample(y, size = n, replace = FALSE)
 27 |     y_sim_meds <- by(y_sim, x, median)
 28 |     y_sim_med1 <- as.numeric(y_sim_meds[1])
 29 |     y_sim_med2 <- as.numeric(y_sim_meds[2])
 30 |     sim_dist[i] <- y_sim_med1 - y_sim_med2
 31 |   }
 32 |   
 33 |   # shading cutoffs
 34 |   if(alternative == "greater"){ 
 35 |     x_min <- y_med_diff
 36 |     x_max <- Inf 
 37 |     }
 38 |   if(alternative == "less"){ 
 39 |     x_min <- -Inf
 40 |     x_max <- y_med_diff
 41 |     }
 42 |   if(alternative == "twosided"){
 43 |     if(y_med_diff >= null){
 44 |       x_min <- c(null - (y_med_diff - null), y_med_diff)
 45 |       x_max <- c(-Inf, Inf)
 46 |     }
 47 |     if(y_med_diff <= null){
 48 |       x_min <- c(y_med_diff, null + (null - y_med_diff))
 49 |       x_max <- c(-Inf, Inf)
 50 |     }    
 51 |   }
 52 |   
 53 |   # calculate p-value
 54 |   if(alternative == "greater"){ p_value <- sum(sim_dist >= y_med_diff) / nsim }
 55 |   if(alternative == "less"){ p_value <- sum(sim_dist <= y_med_diff) / nsim }
 56 |   if(alternative == "twosided"){
 57 |     if(y_med_diff > null){
 58 |       p_value <- min(2 * (sum(sim_dist >= y_med_diff) / nsim), 1)
 59 |     }
 60 |     if(y_med_diff < null){
 61 |       p_value <- min(2 * (sum(sim_dist <= y_med_diff) / nsim), 1)
 62 |     }
 63 |     if(y_med_diff == null){ p_value <- 1 }
 64 |   }
 65 |   
 66 |   # print variable types
 67 |   if(show_var_types == TRUE){
 68 |     n_x_levels <- length(levels(x))
 69 |     cat(paste0("Response variable: numerical\n"))
 70 |     cat(paste0("Explanatory variable: categorical (", n_x_levels, " levels) \n"))
 71 |   }
 72 |   
 73 |   # print summary statistics
 74 |   if(show_summ_stats == TRUE){
 75 |     gr1 <- levels(x)[1]
 76 |     gr2 <- levels(x)[2]
 77 |     iqrs <- by(y, x, IQR)
 78 |     iqr1 <- iqrs[1]
 79 |     iqr2 <- iqrs[2]
 80 |     cat(paste0("n_", gr1, " = ", n1, ", y_med_", gr1, " = ", round(y_med1, 4), 
 81 |                ", IQR_", gr1, " = ", iqr1, "\n"))
 82 |     cat(paste0("n_", gr2, " = ", n2, ", y_med_", gr2, " = ", round(y_med2, 4), 
 83 |                ", IQR_", gr2, " = ", iqr2, "\n"))
 84 |   }
 85 |   
 86 |   # print results
 87 |   if(show_res == TRUE){
 88 |     if(alternative == "greater"){
 89 |       alt_sign <- ">"
 90 |     } else if(alternative == "less"){
 91 |       alt_sign <- "<"
 92 |     } else {
 93 |       alt_sign <- "!="
 94 |     }
 95 |     cat(paste0("H0: mu_", gr1, " =  mu_", gr2, "\n"))
 96 |     cat(paste0("HA: mu_", gr1, " ", alt_sign, " mu_", gr2, "\n"))
 97 |     p_val_to_print <- ifelse(round(p_value, 4) == 0, "< 0.0001", round(p_value, 4))
 98 |     cat(paste0("p_value = ", p_val_to_print))
 99 |   }
100 |   
101 |   # eda_plot
102 |   d_eda <- data.frame(y = y, x = x)
103 | 
104 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = x, y = y), environment = environment()) +
105 |     ggplot2::geom_boxplot(color = "#1FBEC3", fill = "#8FDEE1", outlier.colour = "#1FBEC3") +
106 |     ggplot2::xlab(x_name) +
107 |     ggplot2::ylab(y_name) +
108 |     ggplot2::ggtitle("Sample Distribution")
109 |     
110 |   # inf_plot
111 |   d_inf <- data.frame(sim_dist = sim_dist)
112 |   
113 |   inf_plot <- ggplot2::ggplot(data = d_inf, ggplot2::aes(x = sim_dist), environment = environment()) +
114 |     ggplot2::geom_histogram(fill = "#CCCCCC", binwidth = diff(range(sim_dist)) / 20) +
115 |     ggplot2::annotate("rect", xmin = x_min, xmax = x_max, ymin = 0, ymax = Inf, 
116 |              alpha = 0.3, fill = "#FABAB8") +
117 |     ggplot2::xlab("simulated difference in medians") +
118 |     ggplot2::ylab("") +
119 |     ggplot2::ggtitle("Null Distribution") +
120 |     ggplot2::geom_vline(xintercept = y_med_diff, color = "#F57670", lwd = 1.5)
121 |   
122 |   # print plots
123 |   if(show_eda_plot & !show_inf_plot){ 
124 |     print(eda_plot)
125 |   }
126 |   if(!show_eda_plot & show_inf_plot){ 
127 |     print(inf_plot)
128 |   }
129 |   if(show_eda_plot & show_inf_plot){
130 |     gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2)
131 |   }
132 |   
133 |   # return
134 |   return(list(sim_dist = sim_dist, p_value = p_value))
135 | }


--------------------------------------------------------------------------------
/R/ht_two_prop_sim.R:
--------------------------------------------------------------------------------
  1 | ht_two_prop_sim <- function(y, x, success, null, alternative, nsim, seed,
  2 |                             x_name, y_name, 
  3 |                             show_var_types, show_summ_stats, show_res,
  4 |                             show_eda_plot, show_inf_plot){
  5 | 
  6 |   # set seed
  7 |   if(!is.null(seed)){ set.seed(seed) }
  8 |     
  9 |   # calculate n1 and n2
 10 |   ns <- by(y, x, length)
 11 |   n1 <- as.numeric(ns[1])
 12 |   n2 <- as.numeric(ns[2])
 13 |   
 14 |   # calculate p-hat1 and p-hat2
 15 |   suc1 <- sum(y[x == levels(x)[1]] == success)
 16 |   suc2 <- sum(y[x == levels(x)[2]] == success)
 17 |   p_hat1 <- suc1 / n1
 18 |   p_hat2 <- suc2 / n2
 19 |   
 20 |   # calculate difference in p-hats
 21 |   p_hat_diff <- p_hat1 - p_hat2
 22 | 
 23 |   # create null distribution
 24 |   sim_dist <- rep(NA, nsim)
 25 |   for(i in 1:nsim){
 26 |     y_sim <- sample(y, size = (n1+n2), replace = FALSE)
 27 |     suc1_sim <- sum(y_sim[x == levels(x)[1]] == success)
 28 |     suc2_sim <- sum(y_sim[x == levels(x)[2]] == success)
 29 |     p_hat1_sim <- suc1_sim / n1
 30 |     p_hat2_sim <- suc2_sim / n2
 31 |     sim_dist[i] <- p_hat1_sim - p_hat2_sim
 32 |   }
 33 |   
 34 |   # shading cutoffs
 35 |   if(alternative == "greater"){ 
 36 |     x_min <- p_hat_diff
 37 |     x_max <- Inf 
 38 |     }
 39 |   if(alternative == "less"){ 
 40 |     x_min <- -Inf
 41 |     x_max <- p_hat_diff
 42 |     }
 43 |   if(alternative == "twosided"){
 44 |     if(p_hat_diff >= null){
 45 |       x_min <- c(null - (p_hat_diff - null), p_hat_diff)
 46 |       x_max <- c(-Inf, Inf)
 47 |     }
 48 |     if(p_hat_diff <= null){
 49 |       x_min <- c(p_hat_diff, null + (null - p_hat_diff))
 50 |       x_max <- c(-Inf, Inf)
 51 |     }    
 52 |   }
 53 |   
 54 |   # calculate p-value
 55 |   if(alternative == "greater"){ p_value <- sum(sim_dist >= p_hat_diff) / nsim }
 56 |   if(alternative == "less"){ p_value <- sum(sim_dist <= p_hat_diff) / nsim }
 57 |   if(alternative == "twosided"){
 58 |     if(p_hat_diff > null){
 59 |       p_value <- min(2 * (sum(sim_dist >= p_hat_diff) / nsim), 1)
 60 |     }
 61 |     if(p_hat_diff < null){
 62 |       p_value <- min(2 * (sum(sim_dist <= p_hat_diff) / nsim), 1)
 63 |     }
 64 |   }
 65 | 
 66 |   # print variable types
 67 |   if(show_var_types == TRUE){
 68 |     n_x_levels <- length(levels(x))
 69 |     n_y_levels <- length(levels(y))
 70 |     cat(paste0("Response variable: categorical (", n_x_levels, " levels, success: ", success, ")\n"))
 71 |     cat(paste0("Explanatory variable: categorical (", n_y_levels, " levels) \n"))
 72 |   }
 73 |   
 74 |   # print summary statistics
 75 |   if(show_summ_stats == TRUE){
 76 |     gr1 <- levels(x)[1]
 77 |     gr2 <- levels(x)[2]
 78 |     cat(paste0("n_", gr1, " = ", n1, ", p_hat_", gr1, " = ", round(p_hat1, 4), "\n"))
 79 |     cat(paste0("n_", gr2, " = ", n2, ", p_hat_", gr2, " = ", round(p_hat2, 4), "\n"))
 80 |   }
 81 |   
 82 |   # print results
 83 |   if(show_res == TRUE){
 84 |     if(alternative == "greater"){
 85 |       alt_sign <- ">"
 86 |     } else if(alternative == "less"){
 87 |       alt_sign <- "<"
 88 |     } else {
 89 |       alt_sign <- "!="
 90 |     }
 91 |     cat(paste0("H0: p_", gr1, " =  p_", gr2, "\n"))
 92 |     cat(paste0("HA: p_", gr1, " ", alt_sign, " p_", gr2, "\n"))
 93 |     p_val_to_print <- ifelse(round(p_value, 4) == 0, "< 0.0001", round(p_value, 4))
 94 |     cat(paste0("p_value = ", p_val_to_print))
 95 |   }
 96 |   
 97 |   # eda_plot
 98 |   d_eda <- data.frame(y = y, x = x)
 99 |   
100 |   if(which(levels(y) == success) == 1){ 
101 |     fill_values = c("#1FBEC3", "#8FDEE1") 
102 |   } else {
103 |       fill_values = c("#8FDEE1", "#1FBEC3") 
104 |       }
105 |   
106 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = x, fill = y), environment = environment()) +
107 |     ggplot2::geom_bar(position = "fill") +
108 |     ggplot2::scale_fill_manual(values = fill_values) +
109 |     ggplot2::xlab(x_name) +
110 |     ggplot2::ylab("") +
111 |     ggplot2::ggtitle("Sample Distribution") +
112 |     ggplot2::guides(fill = ggplot2::guide_legend(title = y_name))
113 | 
114 |   # inf_plot
115 |   d_inf <- data.frame(sim_dist = sim_dist)
116 |   
117 |   inf_plot <- ggplot2::ggplot(data = d_inf, ggplot2::aes(x = sim_dist), environment = environment()) +
118 |     ggplot2::geom_histogram(fill = "#CCCCCC", binwidth = diff(range(sim_dist)) / 20) +
119 |     ggplot2::annotate("rect", xmin = x_min, xmax = x_max, ymin = 0, ymax = Inf, 
120 |              alpha = 0.3, fill = "#FABAB8") +
121 |     ggplot2::xlab("simulated difference in means") +
122 |     ggplot2::ylab("") +
123 |     ggplot2::ggtitle("Null Distribution") +
124 |     ggplot2::geom_vline(xintercept = p_hat_diff, color = "#F57670", lwd = 1.5)
125 |   
126 |   # print plots
127 |   if(show_eda_plot & !show_inf_plot){ 
128 |     print(eda_plot)
129 |   }
130 |   if(!show_eda_plot & show_inf_plot){ 
131 |     print(inf_plot)
132 |   }
133 |   if(show_eda_plot & show_inf_plot){
134 |     gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2)
135 |   }
136 |   
137 |   # return
138 |   return(list(sim_dist = sim_dist, p_value = p_value))
139 | }


--------------------------------------------------------------------------------
/R/ht_two_prop_theo.R:
--------------------------------------------------------------------------------
  1 | ht_two_prop_theo <- function(y, x, success, null, alternative,
  2 |                              x_name, y_name, 
  3 |                              show_var_types, show_summ_stats, show_res,
  4 |                              show_eda_plot, show_inf_plot){
  5 |   
  6 |   # calculate n1 and n2
  7 |   ns <- by(y, x, length)
  8 |   n1 <- as.numeric(ns[1])
  9 |   n2 <- as.numeric(ns[2])
 10 |   
 11 |   # calculate p-hat1 and p-hat2
 12 |   suc1 <- sum(y[x == levels(x)[1]] == success)
 13 |   suc2 <- sum(y[x == levels(x)[2]] == success)
 14 |   p_hat1 <- suc1 / n1
 15 |   p_hat2 <- suc2 / n2
 16 |   
 17 |   # calculate difference in p-hats
 18 |   p_hat_diff <- p_hat1 - p_hat2
 19 |   
 20 |   # calculate pooled proportion
 21 |   suc_tot <- suc1 + suc2
 22 |   n_tot <- n1 + n2
 23 |   p_pool <- suc_tot / n_tot
 24 | 
 25 |   # calculate SE
 26 |   se <- sqrt((p_pool * (1 - p_pool) / n1) + (p_pool * (1 - p_pool) / n2))
 27 |   
 28 |   # calculate z
 29 |   z <- (p_hat_diff - null) / se
 30 |   
 31 |   # shading cutoffs
 32 |   if(alternative == "greater"){ 
 33 |     x_min <- p_hat_diff
 34 |     x_max <- Inf 
 35 |     }
 36 |   if(alternative == "less"){ 
 37 |     x_min <- -Inf
 38 |     x_max <- p_hat_diff
 39 |     }
 40 |   if(alternative == "twosided"){
 41 |     if(p_hat_diff >= null){
 42 |       x_min <- c(null - (p_hat_diff - null), p_hat_diff)
 43 |       x_max <- c(-Inf, Inf)
 44 |     }
 45 |     if(p_hat_diff <= null){
 46 |       x_min <- c(p_hat_diff, null + (null - p_hat_diff))
 47 |       x_max <- c(-Inf, Inf)
 48 |     }    
 49 |   }
 50 |   
 51 |   # calculate p-value
 52 |   if(alternative == "greater"){ p_value <- pnorm(z, lower.tail = FALSE) }
 53 |   if(alternative == "less"){ p_value <- pnorm(z, lower.tail = TRUE) }
 54 |   if(alternative == "twosided"){
 55 |     p_value <- 2 * pnorm(abs(z), lower.tail = FALSE)
 56 |   }
 57 |   
 58 |   # print variable types
 59 |   if(show_var_types == TRUE){
 60 |     n_x_levels <- length(levels(x))
 61 |     n_y_levels <- length(levels(y))
 62 |     cat(paste0("Response variable: categorical (", n_x_levels, " levels, success: ", success, ")\n"))
 63 |     cat(paste0("Explanatory variable: categorical (", n_y_levels, " levels) \n"))
 64 |   }
 65 |   
 66 |   # print summary statistics
 67 |   if(show_summ_stats == TRUE){
 68 |     gr1 <- levels(x)[1]
 69 |     gr2 <- levels(x)[2]
 70 |     cat(paste0("n_", gr1, " = ", n1, ", p_hat_", gr1, " = ", round(p_hat1, 4), "\n"))
 71 |     cat(paste0("n_", gr2, " = ", n2, ", p_hat_", gr2, " = ", round(p_hat2, 4), "\n"))
 72 |   }
 73 |   
 74 |   # print results
 75 |   if(show_res == TRUE){
 76 |     if(alternative == "greater"){
 77 |       alt_sign <- ">"
 78 |     } else if(alternative == "less"){
 79 |       alt_sign <- "<"
 80 |     } else {
 81 |       alt_sign <- "!="
 82 |     }
 83 |     cat(paste0("H0: p_", gr1, " =  p_", gr2, "\n"))
 84 |     cat(paste0("HA: p_", gr1, " ", alt_sign, " p_", gr2, "\n"))
 85 |     cat(paste0("z = ", round(z, 4), "\n"))
 86 |     p_val_to_print <- ifelse(round(p_value, 4) == 0, "< 0.0001", round(p_value, 4))
 87 |     cat(paste0("p_value = ", p_val_to_print))
 88 |   }
 89 | 
 90 |   # eda_plot
 91 |   d_eda <- data.frame(y = y, x = x)
 92 |   
 93 |   if(which(levels(y) == success) == 1){ 
 94 |     fill_values = c("#1FBEC3", "#8FDEE1") 
 95 |   } else {
 96 |       fill_values = c("#8FDEE1", "#1FBEC3") 
 97 |       }
 98 |   
 99 |   eda_plot <- ggplot2::ggplot(data = d_eda, ggplot2::aes(x = x, fill = y), environment = environment()) +
100 |     ggplot2::geom_bar(position = "fill") +
101 |     ggplot2::scale_fill_manual(values = fill_values) +
102 |     ggplot2::xlab(x_name) +
103 |     ggplot2::ylab("") +
104 |     ggplot2::ggtitle("Sample Distribution") +
105 |     ggplot2::guides(fill = ggplot2::guide_legend(title = y_name))
106 |   
107 |   # inf_plot
108 |   inf_plot <- ggplot2::ggplot(data.frame(x = c(null - 4*se, null + 4*se)), ggplot2::aes(x)) + 
109 |     ggplot2::stat_function(fun = dnorm, args = list(mean = null, sd = se), color = "#999999") +
110 |     ggplot2::annotate("rect", xmin = x_min, xmax = x_max, ymin = 0, ymax = Inf, 
111 |              alpha = 0.3, fill = "#FABAB8") +
112 |     ggplot2::ggtitle("Null Distribution") +
113 |     ggplot2::xlab("") +
114 |     ggplot2::ylab("") +
115 |     ggplot2::geom_vline(xintercept = p_hat_diff, color = "#F57670", lwd = 1.5)
116 |   
117 |   # print plots
118 |   if(show_eda_plot & !show_inf_plot){ 
119 |     print(eda_plot)
120 |   }
121 |   if(!show_eda_plot & show_inf_plot){ 
122 |     print(inf_plot)
123 |   }
124 |   if(show_eda_plot & show_inf_plot){
125 |     gridExtra::grid.arrange(eda_plot, inf_plot, ncol = 2)
126 |   }
127 |   
128 |   # return
129 |   return(list(SE = se, z = z, p_value = p_value))
130 | }


--------------------------------------------------------------------------------
/R/kobe_basket.R:
--------------------------------------------------------------------------------
 1 | #' Kobe Bryant basketball performance
 2 | #'
 3 | #' Data from the five games the Los Angeles Lakers played against the Orlando
 4 | #' Magic in the 2009 NBA finals.
 5 | #'
 6 | #' Each row represents a shot Kobe Bryant took during the five games of the
 7 | #' 2009 NBA finals. Kobe Bryant's performance earned him the title of Most
 8 | #' Valuable Player and many spectators commented on how he appeared to show
 9 | #' a hot hand.
10 | #'
11 | #' @format A data frame with 133 rows and 6 variables:
12 | #' \describe{
13 | #'   \item{vs}{A categorical vector, ORL if the Los Angeles Lakers played
14 | #'   against Orlando}
15 | #'   \item{game}{A numerical vector, game in the 2009 NBA finals}
16 | #'   \item{quarter}{A categorical vector, quarter in the game, OT stands for
17 | #'   overtime}
18 | #'   \item{time}{A character vector, time at which Kobe took a shot}
19 | #'   \item{description}{A character vector, description of the shot}
20 | #'   \item{shot}{A categorical vector, H if the shot was a hit, M if the shot
21 | #'   was a miss}
22 | #' }
23 | "kobe_basket"


--------------------------------------------------------------------------------
/R/mlb11.R:
--------------------------------------------------------------------------------
 1 | #' Major League Baseball team data
 2 | #'
 3 | #' Data from all 30 Major League Baseball teams from the 2011 season.
 4 | #'
 5 | #' @format A data frame with 30 rows and 12 variables:
 6 | #' \describe{
 7 | #'   \item{team}{Team name.}
 8 | #'   \item{runs}{Number of runs.}
 9 | #'   \item{at_bats}{Number of at bats.}
10 | #'   \item{hits}{Number of hits.}
11 | #'   \item{homeruns}{Number of home runs.}
12 | #'   \item{bat_avg}{Batting average.}
13 | #'   \item{strikeouts}{Number of strikeouts.}
14 | #'   \item{stolen_bases}{Number of stolen bases.}
15 | #'   \item{wins}{Number of wins.}
16 | #'   \item{new_onbase}{Newer variable: on-base percentage, a measure of
17 | #'       how often a batter reaches base for any reason other than a fielding error,
18 | #'       fielder's choice, dropped/uncaught third strike, fielder's obstruction, or
19 | #'       catcher's interference.}
20 | #'   \item{new_slug}{Newer variable: slugging percentage, popular measure of the
21 | #'       power of a hitter calculated as the total bases divided by at bats.}
22 | #'   \item{new_obs}{Newer variable: on-base plus slugging, calculated as the sum of the on-base and slugging percentages.}
23 | #' }
24 | #' @source \href{https://www.mlb.com/}{mlb.com}
25 | "mlb11"


--------------------------------------------------------------------------------
/R/nc.R:
--------------------------------------------------------------------------------
 1 | #' North Carolina births
 2 | #'
 3 | #' In 2004, the state of North Carolina released a large data set containing 
 4 | #' information on births recorded in this state. This data set is useful to 
 5 | #' researchers studying the relation between habits and practices of expectant 
 6 | #' mothers and the birth of their children. We will work with a random sample of 
 7 | #' observations from this data set.
 8 | #'
 9 | #' @format A tbl_df with 1000 rows and 13 variables:
10 | #' \describe{
11 | #'   \item{fage}{father's age in years}
12 | #'   \item{mage}{mother's age in years}
13 | #'   \item{mature}{maturity status of mother}
14 | #'   \item{weeks}{length of pregnancy in weeks}
15 | #'   \item{premie}{whether the birth was classified as premature (premie) or full-term}
16 | #'   \item{visits}{number of hospital visits during pregnancy}
17 | #'   \item{marital}{whether mother is `married` or `not married` at birth}
18 | #'   \item{gained}{weight gained by mother during pregnancy in pounds}
19 | #'   \item{weight}{weight of the baby at birth in pounds}
20 | #'   \item{lowbirthweight}{whether baby was classified as low birthweight (`low`) or not (`not low`)}
21 | #'   \item{gender}{gender of the baby, `female` or `male`}
22 | #'   \item{habit}{status of the mother as a `nonsmoker` or a `smoker`}
23 | #'   \item{whitemom}{whether mom is `white` or `not white`}
24 | #' }
25 | #' @source State of North Carolina.
26 | "nc"


--------------------------------------------------------------------------------
/R/nycflights.R:
--------------------------------------------------------------------------------
 1 | #' Flights data
 2 | #'
 3 | #' On-time data for a random sample of flights that departed NYC (i.e. JFK, LGA or EWR) 
 4 | #' in 2013.
 5 | #'
 6 | #' @source Hadley Wickham (2014). \code{nycflights13}: Data about flights departing 
 7 | #' NYC in 2013. R package version 0.1. 
 8 | #'  \url{https://CRAN.R-project.org/package=nycflights13}
 9 | #' @format A tbl_df with 32,735 rows and 16 variables:
10 | #' \describe{
11 | #' \item{year,month,day}{Date of departure}
12 | #' \item{dep_time,arr_time}{Departure and arrival times, local tz.}
13 | #' \item{dep_delay,arr_delay}{Departure and arrival delays, in minutes.
14 | #'   Negative times represent early departures/arrivals.}
15 | #' \item{hour,minute}{Time of departure broken in to hour and minutes}
16 | #' \item{carrier}{Two letter carrier abbreviation. See \code{airlines} in the
17 | #'   \code{nycflights13} package for more information}
18 | #' \item{tailnum}{Plane tail number}
19 | #' \item{flight}{Flight number}
20 | #' \item{origin,dest}{Origin and destination. See \code{airports} in the
21 | #'   \code{nycflights13} package for more information, or google airport the code.}
22 | #' \item{air_time}{Amount of time spent in the air}
23 | #' \item{distance}{Distance flown}
24 | #' }
25 | "nycflights"


--------------------------------------------------------------------------------
/R/plot_ss.R:
--------------------------------------------------------------------------------
 1 | #' plot_ss
 2 | #'
 3 | #' An interactive shiny app that will generate a scatterplot of two variables, then
 4 | #' allow the user to click the plot in two locations to draw a best fitting line.
 5 | #' Residuals are drawn by default; boxes representing the squared residuals are
 6 | #' optional.
 7 | #'
 8 | #' @param x the name of numerical vector 1 on x-axis
 9 | #' @param y the name of numerical vector 2 on y-axis
10 | #' @param data the dataframe in which x and y can be found
11 | #' @param showSquares logical option to show boxes representing the squared residuals
12 | #' @param leastSquares logical option to bypass point entry and automatically draw the least squares line
13 | #' @examples 
14 | #' \dontrun{plot_ss}
15 | #' @export
16 | 
17 | plot_ss <- function(x, y, data, showSquares = FALSE, leastSquares = FALSE){
18 |     missingargs <- missing(x) | missing(y) | missing(data)
19 |     if (missingargs) stop(simpleError("missing arguments x, y or data"))
20 |     
21 |     xlab <- paste(substitute(x))
22 |     ylab <- paste(substitute(y))
23 |     
24 |     x <- eval(substitute(x), data)
25 |     y <- eval(substitute(y), data)
26 |     
27 |     data=na.omit(data.frame(x=x, y=y))
28 |     x = data[["x"]]
29 |     y = data[["y"]]
30 |     
31 |     plot(y ~ x, data=data,
32 |          asp = 1, pch = 16, xlab = xlab, ylab = ylab) 
33 |     
34 |    
35 |     if(leastSquares){
36 |         m1 <- lm(y ~ x, data=data)
37 |         y.hat <- m1$fit
38 |     } else{
39 |         cat("Click two points to make a line.")
40 |         pt1 <- locator(1)
41 |         points(pt1$x, pt1$y, pch = 4)
42 |         pt2 <- locator(1)
43 |         points(pt2$x, pt2$y, pch = 4)
44 |         pts <- data.frame("x" = c(pt1$x, pt2$x),"y" = c(pt1$y, pt2$y))
45 |         m1 <- lm(y ~ x, data = pts)
46 |         y.hat <- predict(m1, newdata = data)
47 |     }
48 |     r <- y - y.hat
49 |     abline(m1)
50 |     
51 |     oSide <- x - r
52 |     
53 |     LLim <- par()$usr[1]
54 |     RLim <- par()$usr[2]
55 |     oSide[oSide < LLim | oSide > RLim] <- c(x + r)[oSide < LLim | oSide > RLim] # move boxes to avoid margins
56 |     
57 |     n <- length(y.hat)
58 |     for(i in 1:n){
59 |         lines(rep(x[i], 2), c(y[i], y.hat[i]), lty = 2, col = "#56B4E9")
60 |         if(showSquares){
61 |             lines(rep(oSide[i], 2), c(y[i], y.hat[i]), lty = 3, col = "#E69F00")
62 |             lines(c(oSide[i], x[i]), rep(y.hat[i],2), lty = 3, col = "#E69F00")
63 |             lines(c(oSide[i], x[i]), rep(y[i],2), lty = 3, col = "#E69F00")
64 |         }
65 |     }
66 |     
67 |     SS <- round(sum(r^2), 3)
68 |     cat("\r                                ")
69 |     print(m1)
70 |     cat("Sum of Squares: ", SS)
71 | }


--------------------------------------------------------------------------------
/R/present.R:
--------------------------------------------------------------------------------
 1 | #' Male and female births in the US
 2 | #'
 3 | #' Counts of the total number of male and female births in the United States from
 4 | #' 1940 to 2013.
 5 | #'
 6 | #' @format A tbl_df with 74 rows and 3 variables:
 7 | #' \describe{
 8 | #'   \item{year}{year, ranging from 1940 to 2013}
 9 | #'   \item{boys}{number of male births}
10 | #'   \item{girls}{number of female births}
11 | #' }
12 | #' @source Data up to 2002 appear in Mathews TJ, and Hamilton BE. 2005. Trend
13 | #' analysis of the sex ratio at birth in the United States. National Vital
14 | #' Statistics Reports 53(20):1-17. Data for 2003 - 2013 have been collected
15 | #' from annual National Vital Statistics Reports published by the US Department of 
16 | #' Health and Human Services, Centers for Disease Control and Prevention, 
17 | #' National Center for Health Statistics.
18 | "present"


--------------------------------------------------------------------------------
/R/rep_sample_n.R:
--------------------------------------------------------------------------------
 1 | #' Repeating Sampling from a Tibble
 2 | #' 
 3 | #' @param tbl tbl of data.
 4 | #' @param size The number of rows to select.
 5 | #' @param replace Sample with or without replacement?
 6 | #' @param reps The number of samples to collect.
 7 | #' @return A tbl_df that aggregates all created samples, with the addition of a \code{replicate} column that the tbl_df is also grouped by
 8 | #' @examples 
 9 | #' data(nc)
10 | #' rep_sample_n(nc, size=10, replace=FALSE, reps=1)
11 | #' @export
12 | 
13 | rep_sample_n <- function(tbl, size, replace = FALSE, reps = 1)
14 | {
15 |     n <- nrow(tbl)
16 |     i <- unlist(replicate(reps, sample.int(n, size, replace = replace), simplify = FALSE))
17 | 
18 |     rep_tbl <- cbind(replicate = rep(1:reps,rep(size,reps)), tbl[i, , drop=FALSE])
19 | 
20 |     dplyr::group_by(rep_tbl, replicate)
21 | }


--------------------------------------------------------------------------------
/R/rstudio.R:
--------------------------------------------------------------------------------
1 | #' Simple check to determine if code is being run in RStudio with the shiny runtime
2 | #' internal function
3 | #' @keywords internal 
4 | 
5 | allow_shiny = function() {
6 |   runtime = knitr::opts_knit$get("rmarkdown.runtime")
7 |   
8 |   identical(runtime, "shiny") | is.null(runtime)
9 | }


--------------------------------------------------------------------------------
/R/statsr.R:
--------------------------------------------------------------------------------
 1 | #' statsr: A companion package for Statistics with R 
 2 | #'
 3 | #' R package to support the online open access book "An Introduction
 4 | #' to Bayesian Thinking" available at 
 5 | #' \url{https://statswithr.github.io/book/} and videos for the Coursera "Statistics with
 6 | #' R" Specialization.  The package includes data sets, functions
 7 | #' and Shiny Applications for learning frequentist and Bayesian
 8 | #' statistics with R.  The two main functions for inference and decision making are
 9 | #' `inference` and `bayes_inference`  which support  
10 | #' confidence/credible intervals and hypothesis testing with one sample or two samples
11 | #' from Gaussian and Bernoulli populations.   Shiny apps are used to illustrate how prior
12 | #' hyperparameters or changes in the data may influence posterior distributions.
13 | #' 
14 | #' See \url{https://github.com/StatsWithR/statsr} for the development version and 
15 | #' additional information or for additional background and illustrations of functions
16 | #' the online book  \url{https://statswithr.github.io/book/}.
17 | #'
18 | #' @docType package
19 | #' @name statsr
20 | #' @import stats
21 | #' @import graphics
22 | #' @import ggplot2
23 | #' @import shiny
24 | 
25 | NULL


--------------------------------------------------------------------------------
/R/statswithr_lab.R:
--------------------------------------------------------------------------------
 1 | statswithr_lab = function(...) {
 2 | 
 3 |   # get the locations of resource files located within the package
 4 |   css = system.file("lab.css", package = "statsr")
 5 | 
 6 |   # call the base html_document function
 7 |   rmarkdown::html_document(css = css, 
 8 |                            highlight = "pygments",
 9 |                            theme = "cerulean",
10 |                            fig_width = 7,
11 |                            fig_height = 4,
12 |                            ...)
13 | }


--------------------------------------------------------------------------------
/R/tapwater.R:
--------------------------------------------------------------------------------
 1 | #' Total Trihalomethanes in Tapwater
 2 | #' 
 3 | #' Trihalomethanes are formed as a by-product predominantly when chlorine is used to disinfect water
 4 | #' for drinking. They result from the reaction of chlorine or bromine with
 5 | #' organic matter present in the water being treated.
 6 | #' THMs  have been associated through epidemiological studies
 7 | #' with some adverse health effects and many are considered carcinogenic.
 8 | #' In the United States, the EPA limits 
 9 | #' the total concentration of the four chief constituents (chloroform, bromoform, bromodichloromethane, and dibromochloromethane), referred to as
10 | #' total trihalomethanes (TTHM), to 80 parts per billion in treated water.
11 | #'
12 | #'
13 | #' @format A dataframe with 28 rows and 6 variables:
14 | #' \describe{
15 | #'   \item{date}{Date of collection}
16 | #'   \item{tthm}{average total trihalomethanes in ppb }
17 | #'   \item{samples}{number of samples}
18 | #'   \item{nondetects}{number of samples where tthm not detected (0)}
19 | #'   \item{min}{min tthm in ppb in samples}
20 | #'   \item{max}{max tthm in ppb in samples}
21 | #' }
22 | #' @source National Drinking Water Database for Durham, NC. \url{https://www.ewg.org}
23 | "tapwater"


--------------------------------------------------------------------------------
/R/wage.R:
--------------------------------------------------------------------------------
 1 | #' Wage data
 2 | #' 
 3 | #' The data were gathered as part of a random sample of 935 respondents throughout the United States.  
 4 | #' 
 5 | #' @format A tbl_df with with 935 rows and 17 variables:
 6 | #' \describe{
 7 | #'   \item{wage}{weekly earnings (dollars)}
 8 | #'   \item{hours}{average hours worked per week}
 9 | #'   \item{iq}{IQ score}
10 | #'   \item{kww}{Knowledge of world work score}
11 | #'   \item{educ}{years of education}
12 | #'   \item{exper}{years of work experience}
13 | #'   \item{tenure}{years with current employer}
14 | #'   \item{age}{age in years}
15 | #'   \item{married}{=1 if married}
16 | #'   \item{black}{=1 if black}
17 | #'   \item{south}{=1 if live in south}
18 | #'   \item{urban}{=1 if live in a Standard Metropolitan Statistical Area }
19 | #'   \item{sibs}{number of siblings}
20 | #'   \item{brthord}{birth order}
21 | #'   \item{meduc}{mother's education (years)}
22 | #'   \item{feduc}{father's education (years)}
23 | #'   \item{lwage}{natural log of wage}
24 | #' }
25 | #' @source Jeffrey M. Wooldridge (2000). Introductory Econometrics: A Modern Approach. South-Western College Publishing.
26 | "wage"


--------------------------------------------------------------------------------
/R/zinc.R:
--------------------------------------------------------------------------------
 1 | #' Zinc Concentration in Water
 2 | #'
 3 | #' Trace metals in drinking water affect the flavor and
 4 | #' an unusually high concentration can pose a health
 5 | #' hazard. Ten pairs of data were taken measuring zinc
 6 | #' concentration in bottom water and surface water.
 7 | #'
 8 | #' @format
 9 | #'  A data frame with 10 observations on the following 4 variables.
10 | #'  \describe{
11 | #'    \item{\code{location}}{sample number}
12 | #'    \item{\code{bottom}}{zinc concentration in bottom water}
13 | #'    \item{\code{surface}}{zinc concentration in surface water}
14 | #'    \item{\code{difference}}{difference between zinc concentration at the bottom and surface}
15 | #'  }
16 | #'
17 | #' @source
18 | #'  \href{https://online.stat.psu.edu/stat500/sites/stat500/files/data/zinc_conc.txt}{PennState Eberly College of Science Online Courses}
19 | #'
20 | #' @examples
21 | #'  data(zinc)
22 | #'  str(zinc)
23 | #'  plot(bottom ~ surface, data=zinc)
24 | #'  # use paired t-test to test if difference in means is zero
25 | #'
26 | "zinc"
27 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | output: github_document
 3 | ---
 4 | 
 5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 6 | 
 7 | ```{r, echo = FALSE}
 8 | knitr::opts_chunk$set(
 9 |   collapse = TRUE,
10 |   comment = "#>",
11 |   fig.path = "README-"
12 | )
13 | ```
14 | 
15 | [![Build Status](https://travis-ci.org/StatsWithR/statsr.svg?branch=BayesFactor)](https://travis-ci.org/StatsWithR/statsr)
16 | [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/statsr)](https://cran.r-project.org/package=statsr)
17 | [![](https://cranlogs.r-pkg.org/badges/statsr)](https://CRAN.R-project.org/package=statsr) [![](https://cranlogs.r-pkg.org/badges/grand-total/statsr)](https://CRAN.R-project.org/package=statsr)
18 | 
19 | # statsr
20 | 
21 | The `R` package `statsr` provides functions and datasets to support the Coursera [*Statistics with `R` Specialization*](https://www.coursera.org/specializations/statistics)  videos and open access book
22 | [*An Introduction to Bayesian Thinking*](https://statswithr.github.io/book/) for learning Bayesian and frequentist statistics using `R`. 
23 | 
24 | 
25 | To install the latest version from github, verify that there is a passing badge above on the README page.  In `R` enter
26 | 
27 | ```{r github, echo=TRUE,eval=FALSE}
28 | library(devtools)
29 | devtools::install_github("statswithr/statsr",
30 |                          dependencies=TRUE,
31 |                          upgrade_dependencies = TRUE)
32 | ```
33 | 
34 | This will install the packages and any packages that are required, as well as updating any installed packages to their latest versions.
35 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
 3 | 
 4 | [![Build
 5 | Status](https://travis-ci.org/StatsWithR/statsr.svg?branch=BayesFactor)](https://travis-ci.org/StatsWithR/statsr)
 6 | [![CRAN\_Status\_Badge](https://www.r-pkg.org/badges/version/statsr)](https://cran.r-project.org/package=statsr)
 7 | [![](https://cranlogs.r-pkg.org/badges/statsr)](https://CRAN.R-project.org/package=statsr)
 8 | [![](https://cranlogs.r-pkg.org/badges/grand-total/statsr)](https://CRAN.R-project.org/package=statsr)
 9 | 
10 | # statsr
11 | 
12 | The `R` package `statsr` provides functions and datasets to support the
13 | Coursera [*Statistics with `R`
14 | Specialization*](https://www.coursera.org/specializations/statistics)
15 | videos and open access book [*An Introduction to Bayesian
16 | Thinking*](https://statswithr.github.io/book/) for learning Bayesian and
17 | frequentist statistics using `R`.
18 | 
19 | To install the latest version from github, verify that there is a
20 | passing badge above on the README page. In `R` enter
21 | 
22 | ``` r
23 | library(devtools)
24 | devtools::install_github("statswithr/statsr",
25 |                          dependencies=TRUE,
26 |                          upgrade_dependencies = TRUE)
27 | ```
28 | 
29 | This will install the packages and any packages that are required, as
30 | well as updating any installed packages to their latest versions.
31 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url:  http://statswithr.github.io/statsr/
 2 | 
 3 | authors:
 4 |   Merlise Clyde:
 5 |     href: http://stat.duke.edu/~clyde
 6 | navbar:
 7 |   title: "statsr"
 8 |   left:
 9 |     - text: "Functions"
10 |       href: reference/index.html
11 |     - text: "News"
12 |       href: news/index.html
13 |   right:
14 |     - icon: fa-github
15 |       href: https://github.com/statswithr/statsr
16 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | # statsr 0.3.0 Comments to CRAN
 2 |  
 3 | Submission to update bayes_inference function so that package is consistent with book.  Addresses issue #15 in GitHub
 4 | 
 5 | 
 6 | ## Comments on Note from checking: 
 7 |  
 8 | None
 9 | 
10 | ## Test environments
11 |  
12 | - local MAC OSX 11.1  R 4.0.3
13 | - Ubuntu (on travis-ci), R-release and R-devel
14 | - win-builder R-release, R-devel
15 | - R-hub ubuntu-gcc-release (R-release)
16 | - R-hub fedora-clang-devel (R-devel)
17 | 
18 | ## R CMD check results
19 | 
20 | On windows_x86_64-w64-mingw32 (r-devel), ubuntu-gcc-release (r-release), fedora-clang-devel (r-devel)
21 |   checking CRAN incoming feasibility ... NOTE
22 |   Maintainer: 'Merlise Clyde <clyde@duke.edu>'
23 |  
24 | 0 errors ✓ | 0 warnings ✓ | 0 notes 
25 | 


--------------------------------------------------------------------------------
/data/ames.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/data/ames.rda


--------------------------------------------------------------------------------
/data/arbuthnot.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/data/arbuthnot.rda


--------------------------------------------------------------------------------
/data/atheism.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/data/atheism.rda


--------------------------------------------------------------------------------
/data/brfss.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/data/brfss.rda


--------------------------------------------------------------------------------
/data/evals.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/data/evals.rda


--------------------------------------------------------------------------------
/data/kobe_basket.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/data/kobe_basket.rda


--------------------------------------------------------------------------------
/data/mlb11.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/data/mlb11.rda


--------------------------------------------------------------------------------
/data/nc.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/data/nc.rda


--------------------------------------------------------------------------------
/data/nycflights.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/data/nycflights.rda


--------------------------------------------------------------------------------
/data/present.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/data/present.rda


--------------------------------------------------------------------------------
/data/tapwater.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/data/tapwater.rda


--------------------------------------------------------------------------------
/data/wage.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/data/wage.rda


--------------------------------------------------------------------------------
/data/zinc.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/data/zinc.rda


--------------------------------------------------------------------------------
/docs/404.html:
--------------------------------------------------------------------------------
  1 | <!-- Generated by pkgdown: do not edit by hand -->
  2 | <!DOCTYPE html>
  3 | <html lang="en">
  4 |   <head>
  5 |   <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | 
  9 | <title>Page not found (404) • statsr</title>
 10 | 
 11 | 
 12 | <!-- jquery -->
 13 | <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script>
 14 | <!-- Bootstrap -->
 15 | 
 16 | <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous" />
 17 | 
 18 | <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script>
 19 | 
 20 | <!-- bootstrap-toc -->
 21 | <link rel="stylesheet" href="http://statswithr.github.io/statsr//bootstrap-toc.css">
 22 | <script src="http://statswithr.github.io/statsr//bootstrap-toc.js"></script>
 23 | 
 24 | <!-- Font Awesome icons -->
 25 | <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous" />
 26 | <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous" />
 27 | 
 28 | <!-- clipboard.js -->
 29 | <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script>
 30 | 
 31 | <!-- headroom.js -->
 32 | <script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script>
 33 | <script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script>
 34 | 
 35 | <!-- pkgdown -->
 36 | <link href="http://statswithr.github.io/statsr//pkgdown.css" rel="stylesheet">
 37 | <script src="http://statswithr.github.io/statsr//pkgdown.js"></script>
 38 | 
 39 | 
 40 | 
 41 | 
 42 | <meta property="og:title" content="Page not found (404)" />
 43 | 
 44 | 
 45 | 
 46 | 
 47 | <!-- mathjax -->
 48 | <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script>
 49 | <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script>
 50 | 
 51 | <!--[if lt IE 9]>
 52 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 53 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 54 | <![endif]-->
 55 | 
 56 | 
 57 | 
 58 |   </head>
 59 | 
 60 |   <body data-spy="scroll" data-target="#toc">
 61 |     <div class="container template-title-body">
 62 |       <header>
 63 |       <div class="navbar navbar-default navbar-fixed-top" role="navigation">
 64 |   <div class="container">
 65 |     <div class="navbar-header">
 66 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 67 |         <span class="sr-only">Toggle navigation</span>
 68 |         <span class="icon-bar"></span>
 69 |         <span class="icon-bar"></span>
 70 |         <span class="icon-bar"></span>
 71 |       </button>
 72 |       <span class="navbar-brand">
 73 |         <a class="navbar-link" href="http://statswithr.github.io/statsr//index.html">statsr</a>
 74 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.0</span>
 75 |       </span>
 76 |     </div>
 77 | 
 78 |     <div id="navbar" class="navbar-collapse collapse">
 79 |       <ul class="nav navbar-nav">
 80 |         <li>
 81 |   <a href="reference/index.html">Functions</a>
 82 | </li>
 83 | <li>
 84 |   <a href="news/index.html">News</a>
 85 | </li>
 86 |       </ul>
 87 |       <ul class="nav navbar-nav navbar-right">
 88 |         <li>
 89 |   <a href="https://github.com/statswithr/statsr">
 90 |     <span class="fas fa-github"></span>
 91 |      
 92 |   </a>
 93 | </li>
 94 |       </ul>
 95 |       
 96 |     </div><!--/.nav-collapse -->
 97 |   </div><!--/.container -->
 98 | </div><!--/.navbar -->
 99 | 
100 |       
101 | 
102 |       </header>
103 | 
104 | <div class="row">
105 |   <div class="contents col-md-9">
106 |     <div class="page-header">
107 |       <h1>Page not found (404)</h1>
108 |     </div>
109 | 
110 | Content not found. Please use links in the navbar.
111 | 
112 |   </div>
113 | 
114 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
115 |     <nav id="toc" data-toggle="toc" class="sticky-top">
116 |       <h2 data-toc-skip>Contents</h2>
117 |     </nav>
118 |   </div>
119 | 
120 | </div>
121 | 
122 | 
123 | 
124 |       <footer>
125 |       <div class="copyright">
126 |   <p>Developed by Colin Rundel, Mine Cetinkaya-Rundel, <a href='http://stat.duke.edu/~clyde'>Merlise Clyde</a>, David Banks.</p>
127 | </div>
128 | 
129 | <div class="pkgdown">
130 |   <p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.6.1.</p>
131 | </div>
132 | 
133 |       </footer>
134 |    </div>
135 | 
136 |   
137 | 
138 | 
139 |   </body>
140 | </html>
141 | 
142 | 
143 | 


--------------------------------------------------------------------------------
/docs/LICENSE-text.html:
--------------------------------------------------------------------------------
  1 | <!-- Generated by pkgdown: do not edit by hand -->
  2 | <!DOCTYPE html>
  3 | <html lang="en">
  4 |   <head>
  5 |   <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | 
  9 | <title>License • statsr</title>
 10 | 
 11 | 
 12 | <!-- jquery -->
 13 | <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script>
 14 | <!-- Bootstrap -->
 15 | 
 16 | <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/css/bootstrap.min.css" integrity="sha256-bZLfwXAP04zRMK2BjiO8iu9pf4FbLqX6zitd+tIvLhE=" crossorigin="anonymous" />
 17 | 
 18 | <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script>
 19 | 
 20 | <!-- bootstrap-toc -->
 21 | <link rel="stylesheet" href="bootstrap-toc.css">
 22 | <script src="bootstrap-toc.js"></script>
 23 | 
 24 | <!-- Font Awesome icons -->
 25 | <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous" />
 26 | <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous" />
 27 | 
 28 | <!-- clipboard.js -->
 29 | <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script>
 30 | 
 31 | <!-- headroom.js -->
 32 | <script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script>
 33 | <script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script>
 34 | 
 35 | <!-- pkgdown -->
 36 | <link href="pkgdown.css" rel="stylesheet">
 37 | <script src="pkgdown.js"></script>
 38 | 
 39 | 
 40 | 
 41 | 
 42 | <meta property="og:title" content="License" />
 43 | 
 44 | 
 45 | 
 46 | 
 47 | <!-- mathjax -->
 48 | <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script>
 49 | <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script>
 50 | 
 51 | <!--[if lt IE 9]>
 52 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 53 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 54 | <![endif]-->
 55 | 
 56 | 
 57 | 
 58 |   </head>
 59 | 
 60 |   <body data-spy="scroll" data-target="#toc">
 61 |     <div class="container template-title-body">
 62 |       <header>
 63 |       <div class="navbar navbar-default navbar-fixed-top" role="navigation">
 64 |   <div class="container">
 65 |     <div class="navbar-header">
 66 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 67 |         <span class="sr-only">Toggle navigation</span>
 68 |         <span class="icon-bar"></span>
 69 |         <span class="icon-bar"></span>
 70 |         <span class="icon-bar"></span>
 71 |       </button>
 72 |       <span class="navbar-brand">
 73 |         <a class="navbar-link" href="index.html">statsr</a>
 74 |         <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.3.0</span>
 75 |       </span>
 76 |     </div>
 77 | 
 78 |     <div id="navbar" class="navbar-collapse collapse">
 79 |       <ul class="nav navbar-nav">
 80 |         <li>
 81 |   <a href="reference/index.html">Functions</a>
 82 | </li>
 83 | <li>
 84 |   <a href="news/index.html">News</a>
 85 | </li>
 86 |       </ul>
 87 |       <ul class="nav navbar-nav navbar-right">
 88 |         <li>
 89 |   <a href="https://github.com/statswithr/statsr">
 90 |     <span class="fas fa-github"></span>
 91 |      
 92 |   </a>
 93 | </li>
 94 |       </ul>
 95 |       
 96 |     </div><!--/.nav-collapse -->
 97 |   </div><!--/.container -->
 98 | </div><!--/.navbar -->
 99 | 
100 |       
101 | 
102 |       </header>
103 | 
104 | <div class="row">
105 |   <div class="contents col-md-9">
106 |     <div class="page-header">
107 |       <h1>License</h1>
108 |     </div>
109 | 
110 | <pre>YEAR: 2016
111 | COPYRIGHT HOLDER:  StatsWithR
112 | </pre>
113 | 
114 |   </div>
115 | 
116 |   <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
117 |     <nav id="toc" data-toggle="toc" class="sticky-top">
118 |       <h2 data-toc-skip>Contents</h2>
119 |     </nav>
120 |   </div>
121 | 
122 | </div>
123 | 
124 | 
125 | 
126 |       <footer>
127 |       <div class="copyright">
128 |   <p>Developed by Colin Rundel, Mine Cetinkaya-Rundel, <a href='http://stat.duke.edu/~clyde'>Merlise Clyde</a>, David Banks.</p>
129 | </div>
130 | 
131 | <div class="pkgdown">
132 |   <p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.6.1.</p>
133 | </div>
134 | 
135 |       </footer>
136 |    </div>
137 | 
138 |   
139 | 
140 | 
141 |   </body>
142 | </html>
143 | 
144 | 
145 | 


--------------------------------------------------------------------------------
/docs/bootstrap-toc.css:
--------------------------------------------------------------------------------
 1 | /*!
 2 |  * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
 3 |  * Copyright 2015 Aidan Feldman
 4 |  * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
 5 | 
 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */
 7 | 
 8 | /* All levels of nav */
 9 | nav[data-toggle='toc'] .nav > li > a {
10 |   display: block;
11 |   padding: 4px 20px;
12 |   font-size: 13px;
13 |   font-weight: 500;
14 |   color: #767676;
15 | }
16 | nav[data-toggle='toc'] .nav > li > a:hover,
17 | nav[data-toggle='toc'] .nav > li > a:focus {
18 |   padding-left: 19px;
19 |   color: #563d7c;
20 |   text-decoration: none;
21 |   background-color: transparent;
22 |   border-left: 1px solid #563d7c;
23 | }
24 | nav[data-toggle='toc'] .nav > .active > a,
25 | nav[data-toggle='toc'] .nav > .active:hover > a,
26 | nav[data-toggle='toc'] .nav > .active:focus > a {
27 |   padding-left: 18px;
28 |   font-weight: bold;
29 |   color: #563d7c;
30 |   background-color: transparent;
31 |   border-left: 2px solid #563d7c;
32 | }
33 | 
34 | /* Nav: second level (shown on .active) */
35 | nav[data-toggle='toc'] .nav .nav {
36 |   display: none; /* Hide by default, but at >768px, show it */
37 |   padding-bottom: 10px;
38 | }
39 | nav[data-toggle='toc'] .nav .nav > li > a {
40 |   padding-top: 1px;
41 |   padding-bottom: 1px;
42 |   padding-left: 30px;
43 |   font-size: 12px;
44 |   font-weight: normal;
45 | }
46 | nav[data-toggle='toc'] .nav .nav > li > a:hover,
47 | nav[data-toggle='toc'] .nav .nav > li > a:focus {
48 |   padding-left: 29px;
49 | }
50 | nav[data-toggle='toc'] .nav .nav > .active > a,
51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a,
52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a {
53 |   padding-left: 28px;
54 |   font-weight: 500;
55 | }
56 | 
57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */
58 | nav[data-toggle='toc'] .nav > .active > ul {
59 |   display: block;
60 | }
61 | 


--------------------------------------------------------------------------------
/docs/bootstrap-toc.js:
--------------------------------------------------------------------------------
  1 | /*!
  2 |  * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/)
  3 |  * Copyright 2015 Aidan Feldman
  4 |  * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */
  5 | (function() {
  6 |   'use strict';
  7 | 
  8 |   window.Toc = {
  9 |     helpers: {
 10 |       // return all matching elements in the set, or their descendants
 11 |       findOrFilter: function($el, selector) {
 12 |         // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/
 13 |         // http://stackoverflow.com/a/12731439/358804
 14 |         var $descendants = $el.find(selector);
 15 |         return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])');
 16 |       },
 17 | 
 18 |       generateUniqueIdBase: function(el) {
 19 |         var text = $(el).text();
 20 |         var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-');
 21 |         return anchor || el.tagName.toLowerCase();
 22 |       },
 23 | 
 24 |       generateUniqueId: function(el) {
 25 |         var anchorBase = this.generateUniqueIdBase(el);
 26 |         for (var i = 0; ; i++) {
 27 |           var anchor = anchorBase;
 28 |           if (i > 0) {
 29 |             // add suffix
 30 |             anchor += '-' + i;
 31 |           }
 32 |           // check if ID already exists
 33 |           if (!document.getElementById(anchor)) {
 34 |             return anchor;
 35 |           }
 36 |         }
 37 |       },
 38 | 
 39 |       generateAnchor: function(el) {
 40 |         if (el.id) {
 41 |           return el.id;
 42 |         } else {
 43 |           var anchor = this.generateUniqueId(el);
 44 |           el.id = anchor;
 45 |           return anchor;
 46 |         }
 47 |       },
 48 | 
 49 |       createNavList: function() {
 50 |         return $('<ul class="nav"></ul>');
 51 |       },
 52 | 
 53 |       createChildNavList: function($parent) {
 54 |         var $childList = this.createNavList();
 55 |         $parent.append($childList);
 56 |         return $childList;
 57 |       },
 58 | 
 59 |       generateNavEl: function(anchor, text) {
 60 |         var $a = $('<a></a>');
 61 |         $a.attr('href', '#' + anchor);
 62 |         $a.text(text);
 63 |         var $li = $('<li></li>');
 64 |         $li.append($a);
 65 |         return $li;
 66 |       },
 67 | 
 68 |       generateNavItem: function(headingEl) {
 69 |         var anchor = this.generateAnchor(headingEl);
 70 |         var $heading = $(headingEl);
 71 |         var text = $heading.data('toc-text') || $heading.text();
 72 |         return this.generateNavEl(anchor, text);
 73 |       },
 74 | 
 75 |       // Find the first heading level (`<h1>`, then `<h2>`, etc.) that has more than one element. Defaults to 1 (for `<h1>`).
 76 |       getTopLevel: function($scope) {
 77 |         for (var i = 1; i <= 6; i++) {
 78 |           var $headings = this.findOrFilter($scope, 'h' + i);
 79 |           if ($headings.length > 1) {
 80 |             return i;
 81 |           }
 82 |         }
 83 | 
 84 |         return 1;
 85 |       },
 86 | 
 87 |       // returns the elements for the top level, and the next below it
 88 |       getHeadings: function($scope, topLevel) {
 89 |         var topSelector = 'h' + topLevel;
 90 | 
 91 |         var secondaryLevel = topLevel + 1;
 92 |         var secondarySelector = 'h' + secondaryLevel;
 93 | 
 94 |         return this.findOrFilter($scope, topSelector + ',' + secondarySelector);
 95 |       },
 96 | 
 97 |       getNavLevel: function(el) {
 98 |         return parseInt(el.tagName.charAt(1), 10);
 99 |       },
100 | 
101 |       populateNav: function($topContext, topLevel, $headings) {
102 |         var $context = $topContext;
103 |         var $prevNav;
104 | 
105 |         var helpers = this;
106 |         $headings.each(function(i, el) {
107 |           var $newNav = helpers.generateNavItem(el);
108 |           var navLevel = helpers.getNavLevel(el);
109 | 
110 |           // determine the proper $context
111 |           if (navLevel === topLevel) {
112 |             // use top level
113 |             $context = $topContext;
114 |           } else if ($prevNav && $context === $topContext) {
115 |             // create a new level of the tree and switch to it
116 |             $context = helpers.createChildNavList($prevNav);
117 |           } // else use the current $context
118 | 
119 |           $context.append($newNav);
120 | 
121 |           $prevNav = $newNav;
122 |         });
123 |       },
124 | 
125 |       parseOps: function(arg) {
126 |         var opts;
127 |         if (arg.jquery) {
128 |           opts = {
129 |             $nav: arg
130 |           };
131 |         } else {
132 |           opts = arg;
133 |         }
134 |         opts.$scope = opts.$scope || $(document.body);
135 |         return opts;
136 |       }
137 |     },
138 | 
139 |     // accepts a jQuery object, or an options object
140 |     init: function(opts) {
141 |       opts = this.helpers.parseOps(opts);
142 | 
143 |       // ensure that the data attribute is in place for styling
144 |       opts.$nav.attr('data-toggle', 'toc');
145 | 
146 |       var $topContext = this.helpers.createChildNavList(opts.$nav);
147 |       var topLevel = this.helpers.getTopLevel(opts.$scope);
148 |       var $headings = this.helpers.getHeadings(opts.$scope, topLevel);
149 |       this.helpers.populateNav($topContext, topLevel, $headings);
150 |     }
151 |   };
152 | 
153 |   $(function() {
154 |     $('nav[data-toggle="toc"]').each(function(i, el) {
155 |       var $nav = $(el);
156 |       Toc.init($nav);
157 |     });
158 |   });
159 | })();
160 | 


--------------------------------------------------------------------------------
/docs/docsearch.js:
--------------------------------------------------------------------------------
 1 | $(function() {
 2 | 
 3 |   // register a handler to move the focus to the search bar
 4 |   // upon pressing shift + "/" (i.e. "?")
 5 |   $(document).on('keydown', function(e) {
 6 |     if (e.shiftKey && e.keyCode == 191) {
 7 |       e.preventDefault();
 8 |       $("#search-input").focus();
 9 |     }
10 |   });
11 | 
12 |   $(document).ready(function() {
13 |     // do keyword highlighting
14 |     /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
15 |     var mark = function() {
16 | 
17 |       var referrer = document.URL ;
18 |       var paramKey = "q" ;
19 | 
20 |       if (referrer.indexOf("?") !== -1) {
21 |         var qs = referrer.substr(referrer.indexOf('?') + 1);
22 |         var qs_noanchor = qs.split('#')[0];
23 |         var qsa = qs_noanchor.split('&');
24 |         var keyword = "";
25 | 
26 |         for (var i = 0; i < qsa.length; i++) {
27 |           var currentParam = qsa[i].split('=');
28 | 
29 |           if (currentParam.length !== 2) {
30 |             continue;
31 |           }
32 | 
33 |           if (currentParam[0] == paramKey) {
34 |             keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
35 |           }
36 |         }
37 | 
38 |         if (keyword !== "") {
39 |           $(".contents").unmark({
40 |             done: function() {
41 |               $(".contents").mark(keyword);
42 |             }
43 |           });
44 |         }
45 |       }
46 |     };
47 | 
48 |     mark();
49 |   });
50 | });
51 | 
52 | /* Search term highlighting ------------------------------*/
53 | 
54 | function matchedWords(hit) {
55 |   var words = [];
56 | 
57 |   var hierarchy = hit._highlightResult.hierarchy;
58 |   // loop to fetch from lvl0, lvl1, etc.
59 |   for (var idx in hierarchy) {
60 |     words = words.concat(hierarchy[idx].matchedWords);
61 |   }
62 | 
63 |   var content = hit._highlightResult.content;
64 |   if (content) {
65 |     words = words.concat(content.matchedWords);
66 |   }
67 | 
68 |   // return unique words
69 |   var words_uniq = [...new Set(words)];
70 |   return words_uniq;
71 | }
72 | 
73 | function updateHitURL(hit) {
74 | 
75 |   var words = matchedWords(hit);
76 |   var url = "";
77 | 
78 |   if (hit.anchor) {
79 |     url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
80 |   } else {
81 |     url = hit.url + '?q=' + escape(words.join(" "));
82 |   }
83 | 
84 |   return url;
85 | }
86 | 


--------------------------------------------------------------------------------
/docs/jquery.sticky-kit.min.js:
--------------------------------------------------------------------------------
 1 | /* Sticky-kit v1.1.2 | WTFPL | Leaf Corcoran 2015 |  */
 2 | /*
 3 |   Source: https://github.com/leafo/sticky-kit
 4 |   License: MIT
 5 | */
 6 | (function(){var b,f;b=this.jQuery||window.jQuery;f=b(window);b.fn.stick_in_parent=function(d){var A,w,J,n,B,K,p,q,k,E,t;null==d&&(d={});t=d.sticky_class;B=d.inner_scrolling;E=d.recalc_every;k=d.parent;q=d.offset_top;p=d.spacer;w=d.bottoming;null==q&&(q=0);null==k&&(k=void 0);null==B&&(B=!0);null==t&&(t="is_stuck");A=b(document);null==w&&(w=!0);J=function(a,d,n,C,F,u,r,G){var v,H,m,D,I,c,g,x,y,z,h,l;if(!a.data("sticky_kit")){a.data("sticky_kit",!0);I=A.height();g=a.parent();null!=k&&(g=g.closest(k));
 7 | if(!g.length)throw"failed to find stick parent";v=m=!1;(h=null!=p?p&&a.closest(p):b("<div />"))&&h.css("position",a.css("position"));x=function(){var c,f,e;if(!G&&(I=A.height(),c=parseInt(g.css("border-top-width"),10),f=parseInt(g.css("padding-top"),10),d=parseInt(g.css("padding-bottom"),10),n=g.offset().top+c+f,C=g.height(),m&&(v=m=!1,null==p&&(a.insertAfter(h),h.detach()),a.css({position:"",top:"",width:"",bottom:""}).removeClass(t),e=!0),F=a.offset().top-(parseInt(a.css("margin-top"),10)||0)-q,
 8 | u=a.outerHeight(!0),r=a.css("float"),h&&h.css({width:a.outerWidth(!0),height:u,display:a.css("display"),"vertical-align":a.css("vertical-align"),"float":r}),e))return l()};x();if(u!==C)return D=void 0,c=q,z=E,l=function(){var b,l,e,k;if(!G&&(e=!1,null!=z&&(--z,0>=z&&(z=E,x(),e=!0)),e||A.height()===I||x(),e=f.scrollTop(),null!=D&&(l=e-D),D=e,m?(w&&(k=e+u+c>C+n,v&&!k&&(v=!1,a.css({position:"fixed",bottom:"",top:c}).trigger("sticky_kit:unbottom"))),e<F&&(m=!1,c=q,null==p&&("left"!==r&&"right"!==r||a.insertAfter(h),
 9 | h.detach()),b={position:"",width:"",top:""},a.css(b).removeClass(t).trigger("sticky_kit:unstick")),B&&(b=f.height(),u+q>b&&!v&&(c-=l,c=Math.max(b-u,c),c=Math.min(q,c),m&&a.css({top:c+"px"})))):e>F&&(m=!0,b={position:"fixed",top:c},b.width="border-box"===a.css("box-sizing")?a.outerWidth()+"px":a.width()+"px",a.css(b).addClass(t),null==p&&(a.after(h),"left"!==r&&"right"!==r||h.append(a)),a.trigger("sticky_kit:stick")),m&&w&&(null==k&&(k=e+u+c>C+n),!v&&k)))return v=!0,"static"===g.css("position")&&g.css({position:"relative"}),
10 | a.css({position:"absolute",bottom:d,top:"auto"}).trigger("sticky_kit:bottom")},y=function(){x();return l()},H=function(){G=!0;f.off("touchmove",l);f.off("scroll",l);f.off("resize",y);b(document.body).off("sticky_kit:recalc",y);a.off("sticky_kit:detach",H);a.removeData("sticky_kit");a.css({position:"",bottom:"",top:"",width:""});g.position("position","");if(m)return null==p&&("left"!==r&&"right"!==r||a.insertAfter(h),h.remove()),a.removeClass(t)},f.on("touchmove",l),f.on("scroll",l),f.on("resize",
11 | y),b(document.body).on("sticky_kit:recalc",y),a.on("sticky_kit:detach",H),setTimeout(l,0)}};n=0;for(K=this.length;n<K;n++)d=this[n],J(b(d));return this}}).call(this);
12 | 


--------------------------------------------------------------------------------
/docs/link.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 19.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 20 20" style="enable-background:new 0 0 20 20;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#75AADB;}
 7 | </style>
 8 | <path class="st0" d="M4,11.3h1.3v1.3H4c-2,0-4-2.3-4-4.7s2.1-4.7,4-4.7h5.3c1.9,0,4,2.3,4,4.7c0,1.9-1.2,3.6-2.7,4.3v-1.5
 9 | 	C11.4,10.2,12,9.1,12,8c0-1.7-1.4-3.3-2.7-3.3H4C2.7,4.7,1.3,6.3,1.3,8S2.7,11.3,4,11.3z M16,7.3h-1.3v1.3H16c1.3,0,2.7,1.6,2.7,3.3
10 | 	s-1.4,3.3-2.7,3.3h-5.3C9.4,15.3,8,13.7,8,12c0-1.1,0.6-2.2,1.3-2.8V7.7C7.9,8.4,6.7,10.1,6.7,12c0,2.4,2.1,4.7,4,4.7H16
11 | 	c1.9,0,4-2.3,4-4.7S18,7.3,16,7.3z"/>
12 | </svg>
13 | 


--------------------------------------------------------------------------------
/docs/pkgdown.js:
--------------------------------------------------------------------------------
  1 | /* http://gregfranko.com/blog/jquery-best-practices/ */
  2 | (function($) {
  3 |   $(function() {
  4 | 
  5 |     $('.navbar-fixed-top').headroom();
  6 | 
  7 |     $('body').css('padding-top', $('.navbar').height() + 10);
  8 |     $(window).resize(function(){
  9 |       $('body').css('padding-top', $('.navbar').height() + 10);
 10 |     });
 11 | 
 12 |     $('[data-toggle="tooltip"]').tooltip();
 13 | 
 14 |     var cur_path = paths(location.pathname);
 15 |     var links = $("#navbar ul li a");
 16 |     var max_length = -1;
 17 |     var pos = -1;
 18 |     for (var i = 0; i < links.length; i++) {
 19 |       if (links[i].getAttribute("href") === "#")
 20 |         continue;
 21 |       // Ignore external links
 22 |       if (links[i].host !== location.host)
 23 |         continue;
 24 | 
 25 |       var nav_path = paths(links[i].pathname);
 26 | 
 27 |       var length = prefix_length(nav_path, cur_path);
 28 |       if (length > max_length) {
 29 |         max_length = length;
 30 |         pos = i;
 31 |       }
 32 |     }
 33 | 
 34 |     // Add class to parent <li>, and enclosing <li> if in dropdown
 35 |     if (pos >= 0) {
 36 |       var menu_anchor = $(links[pos]);
 37 |       menu_anchor.parent().addClass("active");
 38 |       menu_anchor.closest("li.dropdown").addClass("active");
 39 |     }
 40 |   });
 41 | 
 42 |   function paths(pathname) {
 43 |     var pieces = pathname.split("/");
 44 |     pieces.shift(); // always starts with /
 45 | 
 46 |     var end = pieces[pieces.length - 1];
 47 |     if (end === "index.html" || end === "")
 48 |       pieces.pop();
 49 |     return(pieces);
 50 |   }
 51 | 
 52 |   // Returns -1 if not found
 53 |   function prefix_length(needle, haystack) {
 54 |     if (needle.length > haystack.length)
 55 |       return(-1);
 56 | 
 57 |     // Special case for length-0 haystack, since for loop won't run
 58 |     if (haystack.length === 0) {
 59 |       return(needle.length === 0 ? 0 : -1);
 60 |     }
 61 | 
 62 |     for (var i = 0; i < haystack.length; i++) {
 63 |       if (needle[i] != haystack[i])
 64 |         return(i);
 65 |     }
 66 | 
 67 |     return(haystack.length);
 68 |   }
 69 | 
 70 |   /* Clipboard --------------------------*/
 71 | 
 72 |   function changeTooltipMessage(element, msg) {
 73 |     var tooltipOriginalTitle=element.getAttribute('data-original-title');
 74 |     element.setAttribute('data-original-title', msg);
 75 |     $(element).tooltip('show');
 76 |     element.setAttribute('data-original-title', tooltipOriginalTitle);
 77 |   }
 78 | 
 79 |   if(ClipboardJS.isSupported()) {
 80 |     $(document).ready(function() {
 81 |       var copyButton = "<button type='button' class='btn btn-primary btn-copy-ex' type = 'submit' title='Copy to clipboard' aria-label='Copy to clipboard' data-toggle='tooltip' data-placement='left auto' data-trigger='hover' data-clipboard-copy><i class='fa fa-copy'></i></button>";
 82 | 
 83 |       $(".examples, div.sourceCode").addClass("hasCopyButton");
 84 | 
 85 |       // Insert copy buttons:
 86 |       $(copyButton).prependTo(".hasCopyButton");
 87 | 
 88 |       // Initialize tooltips:
 89 |       $('.btn-copy-ex').tooltip({container: 'body'});
 90 | 
 91 |       // Initialize clipboard:
 92 |       var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', {
 93 |         text: function(trigger) {
 94 |           return trigger.parentNode.textContent;
 95 |         }
 96 |       });
 97 | 
 98 |       clipboardBtnCopies.on('success', function(e) {
 99 |         changeTooltipMessage(e.trigger, 'Copied!');
100 |         e.clearSelection();
101 |       });
102 | 
103 |       clipboardBtnCopies.on('error', function() {
104 |         changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy');
105 |       });
106 |     });
107 |   }
108 | })(window.jQuery || window.$)
109 | 


--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
 1 | pandoc: 2.11.2
 2 | pkgdown: 1.6.1
 3 | pkgdown_sha: ~
 4 | articles: {}
 5 | last_built: 2021-01-22T19:37Z
 6 | urls:
 7 |   reference: http://statswithr.github.io/statsr//reference
 8 |   article: http://statswithr.github.io/statsr//articles
 9 | 
10 | 


--------------------------------------------------------------------------------
/docs/reference/Rplot001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/Rplot001.png


--------------------------------------------------------------------------------
/docs/reference/Rplot002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/Rplot002.png


--------------------------------------------------------------------------------
/docs/reference/Rplot003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/Rplot003.png


--------------------------------------------------------------------------------
/docs/reference/Rplot004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/Rplot004.png


--------------------------------------------------------------------------------
/docs/reference/Rplot005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/Rplot005.png


--------------------------------------------------------------------------------
/docs/reference/bandit_posterior-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/bandit_posterior-1.png


--------------------------------------------------------------------------------
/docs/reference/bandit_sim-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/bandit_sim-1.png


--------------------------------------------------------------------------------
/docs/reference/bayes_inference-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/bayes_inference-1.png


--------------------------------------------------------------------------------
/docs/reference/bayes_inference-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/bayes_inference-2.png


--------------------------------------------------------------------------------
/docs/reference/bayes_inference-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/bayes_inference-3.png


--------------------------------------------------------------------------------
/docs/reference/bayes_inference-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/bayes_inference-4.png


--------------------------------------------------------------------------------
/docs/reference/bayes_inference-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/bayes_inference-5.png


--------------------------------------------------------------------------------
/docs/reference/figures/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/figures/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/docs/reference/inference-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/inference-1.png


--------------------------------------------------------------------------------
/docs/reference/inference-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/inference-2.png


--------------------------------------------------------------------------------
/docs/reference/inference-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/inference-3.png


--------------------------------------------------------------------------------
/docs/reference/plot_bandit_posterior-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/plot_bandit_posterior-1.png


--------------------------------------------------------------------------------
/docs/reference/zinc-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/docs/reference/zinc-1.png


--------------------------------------------------------------------------------
/docs/sitemap.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
 3 |   <url>
 4 |     <loc>http://statswithr.github.io/statsr//index.html</loc>
 5 |   </url>
 6 |   <url>
 7 |     <loc>http://statswithr.github.io/statsr//reference/BF_app.html</loc>
 8 |   </url>
 9 |   <url>
10 |     <loc>http://statswithr.github.io/statsr//reference/allow_shiny.html</loc>
11 |   </url>
12 |   <url>
13 |     <loc>http://statswithr.github.io/statsr//reference/ames.html</loc>
14 |   </url>
15 |   <url>
16 |     <loc>http://statswithr.github.io/statsr//reference/ames_sampling_dist.html</loc>
17 |   </url>
18 |   <url>
19 |     <loc>http://statswithr.github.io/statsr//reference/arbuthnot.html</loc>
20 |   </url>
21 |   <url>
22 |     <loc>http://statswithr.github.io/statsr//reference/atheism.html</loc>
23 |   </url>
24 |   <url>
25 |     <loc>http://statswithr.github.io/statsr//reference/bandit_posterior.html</loc>
26 |   </url>
27 |   <url>
28 |     <loc>http://statswithr.github.io/statsr//reference/bandit_sim.html</loc>
29 |   </url>
30 |   <url>
31 |     <loc>http://statswithr.github.io/statsr//reference/bayes_inference.html</loc>
32 |   </url>
33 |   <url>
34 |     <loc>http://statswithr.github.io/statsr//reference/brfss.html</loc>
35 |   </url>
36 |   <url>
37 |     <loc>http://statswithr.github.io/statsr//reference/calc_streak.html</loc>
38 |   </url>
39 |   <url>
40 |     <loc>http://statswithr.github.io/statsr//reference/credible_interval_app.html</loc>
41 |   </url>
42 |   <url>
43 |     <loc>http://statswithr.github.io/statsr//reference/evals.html</loc>
44 |   </url>
45 |   <url>
46 |     <loc>http://statswithr.github.io/statsr//reference/inference.html</loc>
47 |   </url>
48 |   <url>
49 |     <loc>http://statswithr.github.io/statsr//reference/kobe_basket.html</loc>
50 |   </url>
51 |   <url>
52 |     <loc>http://statswithr.github.io/statsr//reference/mlb11.html</loc>
53 |   </url>
54 |   <url>
55 |     <loc>http://statswithr.github.io/statsr//reference/nc.html</loc>
56 |   </url>
57 |   <url>
58 |     <loc>http://statswithr.github.io/statsr//reference/nycflights.html</loc>
59 |   </url>
60 |   <url>
61 |     <loc>http://statswithr.github.io/statsr//reference/plot_bandit_posterior.html</loc>
62 |   </url>
63 |   <url>
64 |     <loc>http://statswithr.github.io/statsr//reference/plot_ss.html</loc>
65 |   </url>
66 |   <url>
67 |     <loc>http://statswithr.github.io/statsr//reference/present.html</loc>
68 |   </url>
69 |   <url>
70 |     <loc>http://statswithr.github.io/statsr//reference/rep_sample_n.html</loc>
71 |   </url>
72 |   <url>
73 |     <loc>http://statswithr.github.io/statsr//reference/statsr.html</loc>
74 |   </url>
75 |   <url>
76 |     <loc>http://statswithr.github.io/statsr//reference/tapwater.html</loc>
77 |   </url>
78 |   <url>
79 |     <loc>http://statswithr.github.io/statsr//reference/wage.html</loc>
80 |   </url>
81 |   <url>
82 |     <loc>http://statswithr.github.io/statsr//reference/zinc.html</loc>
83 |   </url>
84 | </urlset>
85 | 


--------------------------------------------------------------------------------
/inst/WORDLIST:
--------------------------------------------------------------------------------
 1 | Ames
 2 | Arbuthnot
 3 | Arbuthnot's
 4 | BayesFactor
 5 | BRFSS
 6 | CLT
 7 | Coursera
 8 | DS
 9 | De
10 | EDA
11 | EWR
12 | Eberly
13 | Gelman
14 | Hadley
15 | Hamermesh
16 | HistData
17 | JSZ
18 | JUI
19 | JZS
20 | Jeffreys
21 | LGA
22 | Lakers
23 | Lindley's
24 | NG
25 | ORL
26 | PennState
27 | README
28 | RStudio
29 | Siow
30 | THMs
31 | TJ
32 | TTHM
33 | Tapwater
34 | Tibble
35 | Trihalomethanes
36 | Wickham
37 | Wooldridge
38 | YYYY
39 | Zellner
40 | ames
41 | amstat
42 | bayes
43 | birthweight
44 | bromodichloromethane
45 | bromoform
46 | ci
47 | datadocumentation
48 | decock
49 | df
50 | dibromochloromethane
51 | english
52 | freqentist
53 | github
54 | http
55 | jse
56 | mlb
57 | perc
58 | posttests
59 | ppb
60 | premie
61 | rscale
62 | se
63 | ss
64 | tapwater
65 | tbl
66 | tibble
67 | trihalomethanes
68 | tthm
69 | twosided
70 | tz
71 | www
72 | 


--------------------------------------------------------------------------------
/inst/lab.css:
--------------------------------------------------------------------------------
  1 | .fax-slot-machine::before {
  2 |     content: "\1f3b0"
  3 | }
  4 | 
  5 | body {
  6 |     counter-reset: question 0 exercise 0;
  7 | }
  8 | 
  9 | h1 {
 10 |     font-family: Arial, Helvetica, sans-serif;
 11 |     font-weight: bold;
 12 | }
 13 | 
 14 | h2 {
 15 |     font-family: Arial, Helvetica, sans-serif;
 16 |     font-weight: bold;
 17 |     margin-top: 24px;
 18 | }
 19 | 
 20 | hr {
 21 |     border: 1px solid #357FAA;
 22 | }
 23 | 
 24 | .question, .exercise {
 25 |     position: relative;
 26 |     margin: 2em;
 27 |     padding: 2em 20px 1em 20px;
 28 | }
 29 | 
 30 | .question::before, .exercise::before {
 31 |     position: absolute;
 32 |     top: -1em;
 33 |     left: -2em;
 34 |     width: 7em;
 35 |     padding: 5px 0;
 36 |     color: #ffffff;
 37 |     font-weight: bold;
 38 |     font-family: "Helvetica Neue", Arial, sans-serif;
 39 |     text-align: center;
 40 | }
 41 | 
 42 | .question {
 43 |     counter-increment: question;
 44 |     background: rgb(49, 126, 172);
 45 |     background: rgba(49, 126, 172, 0.1);
 46 | }
 47 | 
 48 | .question::before {
 49 |     content: "Question " counter(question);
 50 |     background: #317EAC;
 51 |     background: rgb(49, 126, 172);
 52 | }
 53 | 
 54 | .exercise {
 55 |   counter-increment: exercise;
 56 |   background: rgb(92, 184, 92);
 57 |   background: rgba(92, 184, 92, 0.1);
 58 | }
 59 | 
 60 | .exercise::before {
 61 |     content: "Exercise " counter(exercise);
 62 |     background: rgb(92, 184, 92);
 63 | }
 64 | 
 65 | 
 66 | .question ul {
 67 |   counter-reset: choice;
 68 |   margin-left: 1.5em;
 69 |   list-style-type: none;
 70 | }
 71 | 
 72 | .question li {
 73 |     margin-top: 20px;
 74 |     counter-increment: choice;
 75 | }
 76 | 
 77 | .question li::before {
 78 |   content: counter(choice, upper-alpha) '. ';
 79 |   color: #317EAC;
 80 |   font-weight: bold;
 81 |   font-family: "Helvetica Neue", Arial, sans-serif;
 82 |   text-align: left;
 83 |   width: 2em;
 84 |   margin-left: -2em;
 85 |   display: inline-block;
 86 | }
 87 | 
 88 | 
 89 | .instructions {   
 90 |     margin-top: 30px;
 91 |     /*margin-bottom: 30px;*/
 92 |     padding: 10px 10px 0;
 93 |     border: 1px solid rgb(0, 102, 102);
 94 |     border: 1px solid rgba(0, 102, 102, 0.2);
 95 |     border-radius: 5px;
 96 |     color: rgb(0, 102, 102);
 97 |     color: rgba(0, 102, 102, 0.8);
 98 |     background: rgb(204, 255, 255);
 99 |     background: rgba(204, 255, 255, 0.1);
100 | }
101 | 
102 | .license { 
103 |     margin-top: 30px;
104 |     margin-bottom: 30px;
105 |     padding: 10px 10px 0;
106 |     border: 1px solid rgb(76, 114, 29);
107 |     border: 1px solid rgba(76, 114, 29, 0.2);
108 |     border-radius: 5px;
109 |     color: rgb(76, 114, 29);
110 |     color: rgba(76, 114, 29, 0.8);
111 |     background: rgb(76, 114, 29);
112 |     background: rgba(76, 114, 29, 0.1);
113 | }
114 | 
115 | .boxedtext {
116 |     background-color: rgb(86, 155, 189);
117 |     background-color: rgba(86, 155, 189, 0.2);
118 |     padding: 20px;
119 |     margin-bottom: 20px;
120 |     font-size: 10pt;
121 | }
122 | 
123 | 
124 | 


--------------------------------------------------------------------------------
/man/BF_app.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/BF_app.R
 3 | \name{BF_app}
 4 | \alias{BF_app}
 5 | \title{Run the interactive Bayes Factor shiny app}
 6 | \usage{
 7 | BF_app()
 8 | }
 9 | \description{
10 | This app illustrates how changing the Z score and prior precision
11 | affects the Bayes Factor for testing H1 that the mean is zero 
12 | versus H2 that the mean is not zero for data arising from a normal
13 | population.  Lindley's paradox occurs for large sample sizes
14 | when the Bayes factor favors H1 even though the Z score is large or the
15 | p-value is small enough to reach statistical significance and the values of 
16 | the sample mean do not reflex practical significance based on the prior 
17 | distribution.
18 | Bartlett's paradox may occur when the prior precision goes to zero, leading 
19 | to Bayes factors that favor H1 regardless of the data.
20 | A prior precision of one corresponds to the unit information prior.
21 | }
22 | \examples{
23 | if (interactive()) { 
24 | BF.app()
25 | }
26 | }
27 | 


--------------------------------------------------------------------------------
/man/allow_shiny.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rstudio.R
 3 | \name{allow_shiny}
 4 | \alias{allow_shiny}
 5 | \title{Simple check to determine if code is being run in RStudio with the shiny runtime
 6 | internal function}
 7 | \usage{
 8 | allow_shiny()
 9 | }
10 | \description{
11 | Simple check to determine if code is being run in RStudio with the shiny runtime
12 | internal function
13 | }
14 | \keyword{internal}
15 | 


--------------------------------------------------------------------------------
/man/ames.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/ames.R
  3 | \docType{data}
  4 | \name{ames}
  5 | \alias{ames}
  6 | \title{Housing prices in Ames, Iowa}
  7 | \format{
  8 | A tbl_df with with 2930 rows and 82 variables:
  9 | \describe{
 10 |   \item{Order}{Observation number.}
 11 |   \item{PID}{Parcel identification number  - can be used with city web site for parcel review.}
 12 |   \item{area}{Above grade (ground) living area square feet.}
 13 |   \item{price}{Sale price in USD.}
 14 |   \item{MS.SubClass}{Identifies the type of dwelling involved in the sale.}
 15 |   \item{MS.Zoning}{Identifies the general zoning classification of the sale.}
 16 |   \item{Lot.Frontage}{Linear feet of street connected to property.}
 17 |   \item{Lot.Area}{Lot size in square feet.}
 18 |   \item{Street}{Type of road access to property.}
 19 |   \item{Alley}{Type of alley access to property.}
 20 |   \item{Lot.Shape}{General shape of property.}
 21 |   \item{Land.Contour}{Flatness of the property.}
 22 |   \item{Utilities}{Type of utilities available.}
 23 |   \item{Lot.Config}{Lot configuration.}
 24 |   \item{Land.Slope}{Slope of property.}
 25 |   \item{Neighborhood}{Physical locations within Ames city limits (map available).}
 26 |   \item{Condition.1}{Proximity to various conditions.}
 27 |   \item{Condition.2}{Proximity to various conditions (if more than one is present).}
 28 |   \item{Bldg.Type}{Type of dwelling.}
 29 |   \item{House.Style}{Style of dwelling.}
 30 |   \item{Overall.Qual}{Rates the overall material and finish of the house.}
 31 |   \item{Overall.Cond}{Rates the overall condition of the house.}
 32 |   \item{Year.Built}{Original construction date.}
 33 |   \item{Year.Remod.Add}{Remodel date (same as construction date if no remodeling or additions).}
 34 |   \item{Roof.Style}{Type of roof.}
 35 |   \item{Roof.Matl}{Roof material.}
 36 |   \item{Exterior.1st}{Exterior covering on house.}
 37 |   \item{Exterior.2nd}{Exterior covering on house (if more than one material).}
 38 |   \item{Mas.Vnr.Type}{Masonry veneer type.}
 39 |   \item{Mas.Vnr.Area}{Masonry veneer area in square feet.}
 40 |   \item{Exter.Qual}{Evaluates the quality of the material on the exterior.}
 41 |   \item{Exter.Cond}{Evaluates the present condition of the material on the exterior.}
 42 |   \item{Foundation}{Type of foundation.}
 43 |   \item{Bsmt.Qual}{Evaluates the height of the basement.}
 44 |   \item{Bsmt.Cond}{Evaluates the general condition of the basement.}
 45 |   \item{Bsmt.Exposure}{Refers to walkout or garden level walls.}
 46 |   \item{BsmtFin.Type.1}{Rating of basement finished area.}
 47 |   \item{BsmtFin.SF.1}{Type 1 finished square feet.}
 48 |   \item{BsmtFin.Type.2}{Rating of basement finished area (if multiple types).}
 49 |   \item{BsmtFin.SF.2}{Type 2 finished square feet.}
 50 |   \item{Bsmt.Unf.SF}{Unfinished square feet of basement area.}
 51 |   \item{Total.Bsmt.SF}{Total square feet of basement area.}
 52 |   \item{Heating}{Type of heating.}
 53 |   \item{Heating.QC}{Heating quality and condition.}
 54 |   \item{Central.Air}{Central air conditioning.}
 55 |   \item{Electrical}{Electrical system.}
 56 |   \item{X1st.Flr.SF}{First Floor square feet.}
 57 |   \item{X2nd.Flr.SF}{Second floor square feet.}
 58 |   \item{Low.Qual.Fin.SF}{Low quality finished square feet (all floors).}
 59 |   \item{Bsmt.Full.Bath}{Basement full bathrooms.}
 60 |   \item{Bsmt.Half.Bath}{Basement half bathrooms.}
 61 |   \item{Full.Bath}{Full bathrooms above grade.}
 62 |   \item{Half.Bath}{Half baths above grade.}
 63 |   \item{Bedroom.AbvGr}{Bedrooms above grade (does NOT include basement bedrooms).}
 64 |   \item{Kitchen.AbvGr}{Kitchens above grade.}
 65 |   \item{Kitchen.Qual}{Kitchen quality.}
 66 |   \item{TotRms.AbvGrd}{Total rooms above grade (does not include bathrooms).}
 67 |   \item{Functional}{Home functionality (Assume typical unless deductions are warranted).}
 68 |   \item{Fireplaces}{Number of fireplaces.}
 69 |   \item{Fireplace.Qu}{Fireplace quality.}
 70 |   \item{Garage.Type}{Garage location.}
 71 |   \item{Garage.Yr.Blt}{Year garage was built.}
 72 |   \item{Garage.Finish}{Interior finish of the garage.}
 73 |   \item{Garage.Cars}{Size of garage in car capacity.}
 74 |   \item{Garage.Area}{Size of garage in square feet.}
 75 |   \item{Garage.Qual}{Garage quality.}
 76 |   \item{Garage.Cond}{Garage condition.}
 77 |   \item{Paved.Drive}{Paved driveway.}
 78 |   \item{Wood.Deck.SF}{Wood deck area in square feet.}
 79 |   \item{Open.Porch.SF}{Open porch area in square feet.}
 80 |   \item{Enclosed.Porch}{Enclosed porch area in square feet.}
 81 |   \item{X3Ssn.Porch}{Three season porch area in square feet.}
 82 |   \item{Screen.Porch}{Screen porch area in square feet.}
 83 |   \item{Pool.Area}{Pool area in square feet.}
 84 |   \item{Pool.QC}{Pool quality.}
 85 |   \item{Fence}{Fence quality.}
 86 |   \item{Misc.Feature}{Miscellaneous feature not covered in other categories.}
 87 |   \item{Misc.Val}{Dollar value of miscellaneous feature.}
 88 |   \item{Mo.Sold}{Month Sold (MM).}
 89 |   \item{Yr.Sold}{Year Sold (YYYY).}
 90 |   \item{Sale.Type}{Type of sale.}
 91 |   \item{Sale.Condition}{Condition of sale.}
 92 | }
 93 | }
 94 | \source{
 95 | De Cock, Dean. "Ames, Iowa: Alternative to the Boston housing data as 
 96 | an end of semester regression project." Journal of Statistics Education 19.3 (2011).
 97 | }
 98 | \usage{
 99 | ames
100 | }
101 | \description{
102 | Data set contains information from the Ames Assessor's Office used in computing 
103 | assessed values for individual residential properties sold in Ames, IA from 2006 
104 | to 2010. See http://www.amstat.org/publications/jse/v19n3/decock/datadocumentation.txt 
105 | for detailed variable descriptions.
106 | }
107 | \keyword{datasets}
108 | 


--------------------------------------------------------------------------------
/man/ames_sampling_dist.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ames_sampling_dist.R
 3 | \name{ames_sampling_dist}
 4 | \alias{ames_sampling_dist}
 5 | \title{Simulate Sampling Distribution}
 6 | \usage{
 7 | ames_sampling_dist()
 8 | }
 9 | \description{
10 | Run the interactive ames sampling distribution shiny app to 
11 | illustrate sampling distributions using variables from the `ames`
12 | dataset.
13 | }
14 | \examples{
15 | if (interactive()) { 
16 |   ames_sampling_dist()
17 | }
18 | }
19 | 


--------------------------------------------------------------------------------
/man/arbuthnot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/arbuthnot.R
 3 | \docType{data}
 4 | \name{arbuthnot}
 5 | \alias{arbuthnot}
 6 | \title{Male and female births in London}
 7 | \format{
 8 | A tbl_df with with 82 rows and 3 variables:
 9 | \describe{
10 |   \item{year}{year, ranging from 1629 to 1710}
11 |   \item{boys}{number of male christenings (births)}
12 |   \item{girls}{number of female christenings (births)}
13 | }
14 | }
15 | \source{
16 | These data are excerpted from the \code{\link[HistData]{Arbuthnot}}
17 | data set in the HistData package.
18 | }
19 | \usage{
20 | arbuthnot
21 | }
22 | \description{
23 | Arbuthnot's data describes male and female christenings (births) for
24 | London from 1629-1710.
25 | }
26 | \details{
27 | John Arbuthnot (1710) used these time series data to carry out the first
28 | known significance test. During every one of the 82 years, there were more
29 | male christenings than female christenings. As Arbuthnot wondered,
30 | we might also wonder if this could be due to chance, or whether it meant
31 | the birth ratio was not actually 1:1.
32 | }
33 | \keyword{datasets}
34 | 


--------------------------------------------------------------------------------
/man/atheism.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/atheism.R
 3 | \docType{data}
 4 | \name{atheism}
 5 | \alias{atheism}
 6 | \title{Atheism in the world data}
 7 | \format{
 8 | A tbl_df with 88032 rows and 3 variables:
 9 | \describe{
10 |   \item{nationality}{Country of the individual surveyed.}
11 |   \item{response}{A categorical variable with two levels: atheist and non-atheist.}
12 |   \item{year}{Year in which the person was surveyed.}
13 |   }
14 | }
15 | \source{
16 | \href{https://github.com/OpenIntroStat/oilabs/blob/master/data-raw/atheism/Global_INDEX_of_Religiosity_and_Atheism_PR__6.pdf}{WIN-Gallup International Press Release}
17 | }
18 | \usage{
19 | atheism
20 | }
21 | \description{
22 | Survey results on atheism across several countries and years. Each row
23 | represents a single respondent.
24 | }
25 | \keyword{datasets}
26 | 


--------------------------------------------------------------------------------
/man/bandit_posterior.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bandit_posterior.R
 3 | \name{bandit_posterior}
 4 | \alias{bandit_posterior}
 5 | \title{bandit posterior}
 6 | \usage{
 7 | bandit_posterior(
 8 |   data,
 9 |   prior = c(m1_good = 0.5, m2_good = 0.5),
10 |   win_probs = c(good = 1/2, bad = 1/3)
11 | )
12 | }
13 | \arguments{
14 | \item{data}{data frame containing win loss data}
15 | 
16 | \item{prior}{prior vector containing the probabilities of Machine 1 and Machine 2 being good, defaults to 0.5 and 0.5 respectively.}
17 | 
18 | \item{win_probs}{vector containing the probabilities of winning on the good and bad machine respectively.}
19 | }
20 | \value{
21 | A vector containing the posterior probability of Machine 1 and Machine 2 being the good machine.
22 | }
23 | \description{
24 | Utility function for calculating the posterior probability of each machine being "good" in 
25 | two armed bandit problem. Calculated result is based on observed win loss data, prior belief about 
26 | which machine is good and the probability of the good and bad machine paying out.
27 | }
28 | \examples{
29 | data = data.frame(machine = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), 
30 |                   outcome = c("W", "L", "W", "L", "L", "W", "L", "L", "L", "W"))
31 | bandit_posterior(data)
32 | plot_bandit_posterior(data)
33 | 
34 | }
35 | \seealso{
36 | \code{\link{bandit_sim}} to generate data and
37 |          \code{\link{plot_bandit_posterior}} to visualize.
38 | }
39 | 


--------------------------------------------------------------------------------
/man/bandit_sim.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bandit_sim.R
 3 | \name{bandit_sim}
 4 | \alias{bandit_sim}
 5 | \title{Run the Bandit Simulation shiny app}
 6 | \usage{
 7 | bandit_sim()
 8 | }
 9 | \description{
10 | Simulate data from a two armed-bandit (two slot machines) by clicking
11 | on the images for Machine 1 or Machine 2 and guess/learn which machine 
12 | has the higher probability of winning as the number of 
13 | outcomes of wins and losses accumulate.
14 | }
15 | \examples{
16 | if (interactive()) {
17 | # run interactive shiny app to generate wins and losses
18 | bandit_sim()
19 | }
20 | # paste data from the shiny app into varible
21 | data = data.frame(
22 |  machine = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
23 |    1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
24 |    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
25 |    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
26 |    2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
27 |    1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
28 |    2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 
29 |    1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L), 
30 |  outcome = c("W", "W", "W", "L", "W", "W", "W", "L", "W", "L", "W", "L",
31 |   "L", "L", "W", "L", "W", "L", "L", "L", "W", "W", "W", "L", "L", "L", 
32 |   "L", "L", "W", "W", "L", "L", "W", "L", "L", "W", "L", "L", "W", "L",
33 |   "L", "L", "L", "L", "W", "L", "L", "W", "W", "W", "W", "L", "L", "L",
34 |   "L", "L", "L", "W", "L", "W", "L", "W", "L", "L", "L", "L", "L", "L", "L",
35 |   "L", "L", "L", "W", "W", "W", "L", "W", "L", "L", "L", "L", "L", "L", "L",
36 |   "L", "L", "L", "W", "W", "W", "W", "W", "L", "W", "W", "L", "W", "L", "L",
37 |   "L", "L", "L", "W", "L", "W", "L", "L", "L", "W", "W", "W", "W", "L", "L",
38 |   "W", "L", "W", "L", "L", "W"))
39 |   bandit_posterior(data)
40 |   plot_bandit_posterior(data)
41 | 
42 | }
43 | \seealso{
44 | \code{\link{bandit_posterior}} and \code{\link{plot_bandit_posterior}}
45 | }
46 | 


--------------------------------------------------------------------------------
/man/brfss.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/brfss.R
 3 | \docType{data}
 4 | \name{brfss}
 5 | \alias{brfss}
 6 | \title{Behavioral Risk Factor Surveillance System 2013 (Subset)}
 7 | \format{
 8 | A tbl_df with with 5000 rows and 6 variables:
 9 | \describe{
10 |   \item{weight}{Weight in pounds.}
11 |   \item{height}{Height in inches.}
12 |   \item{sex}{Sex}
13 |   \item{exercise}{Any exercise in the last 30 days}
14 |   \item{fruit_per_day}{Number of servings of fruit consumed per day.}
15 |   \item{vege_per_day}{Number of servings of dark green vegetables consumed per day.}
16 | }
17 | }
18 | \source{
19 | Centers for Disease Control and Prevention (CDC). Behavioral Risk Factor Surveillance System
20 | Survey Data. Atlanta, Georgia: U.S. Department of Health and Human Services, Centers for
21 | Disease Control and Prevention, 2013.
22 | }
23 | \usage{
24 | brfss
25 | }
26 | \description{
27 | This data set is a small subset of BRFSS results from the 2013 survey, each row represents an individual respondent.
28 | }
29 | \keyword{datasets}
30 | 


--------------------------------------------------------------------------------
/man/calc_streak.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/calc_streak.R
 3 | \name{calc_streak}
 4 | \alias{calc_streak}
 5 | \title{Calculate hitting streaks}
 6 | \usage{
 7 | calc_streak(x)
 8 | }
 9 | \arguments{
10 | \item{x}{A data frame or character vector of hits (\code{"H"}) and misses (\code{"M"}).}
11 | }
12 | \value{
13 | A data frame with one column, \code{length}, containing the length of each hit streak.
14 | }
15 | \description{
16 | Calculate hitting streaks
17 | }
18 | \examples{
19 | data(kobe_basket)
20 | calc_streak(kobe_basket$shot)
21 | 
22 | }
23 | 


--------------------------------------------------------------------------------
/man/credible_interval_app.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/credible_interval.R
 3 | \name{credible_interval_app}
 4 | \alias{credible_interval_app}
 5 | \title{Credible Interval shiny app}
 6 | \usage{
 7 | credible_interval_app()
 8 | }
 9 | \description{
10 | Run the `shiny` credible interval app to generate credible
11 | intervals under the prior or posterior distribution for 
12 | Beta, Gamma and Gaussian families.  Sliders are used to
13 | adjust the hyperparameters in the distribution so that one
14 | may see how the resulting credible intervals and plotted 
15 | distributions change.
16 | }
17 | \examples{
18 | if (interactive()) {
19 |    credible_interval_app()
20 | }
21 | }
22 | 


--------------------------------------------------------------------------------
/man/evals.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/evals.R
 3 | \docType{data}
 4 | \name{evals}
 5 | \alias{evals}
 6 | \title{Teachers evaluations at the University of Texas at Austin}
 7 | \format{
 8 | A data frame with 463 rows and 21 variables:
 9 | \describe{
10 |   \item{score}{Average professor evaluation score: (1) very unsatisfactory - (5) excellent}
11 |   \item{rank}{Rank of professor: teaching, tenure track, tenure}
12 |   \item{ethnicity}{Ethnicity of professor: not minority, minority}
13 |   \item{gender}{Gender of professor: female, male}
14 |   \item{language}{Language of school where professor received education: english or non-english}
15 |   \item{age}{Age of professor}
16 |   \item{cls_perc_eval}{Percent of students in class who completed evaluation}
17 |   \item{cls_did_eval}{Number of students in class who completed evaluation}
18 |   \item{cls_students}{Total number of students in class}
19 |   \item{cls_level}{Class level: lower, upper}
20 |   \item{cls_profs}{Number of professors teaching sections in course in sample: single, multiple}
21 |   \item{cls_credits}{Number of credits of class: one credit (lab, PE, etc.), multi credit}
22 |   \item{bty_f1lower}{Beauty rating of professor from lower level female: (1) lowest - (10) highest}
23 |   \item{bty_f1upper}{Beauty rating of professor from upper level female: (1) lowest - (10) highest}
24 |   \item{bty_f2upper}{Beauty rating of professor from second upper level female: (1) lowest - (10) highest}
25 |   \item{bty_m1lower}{Beauty rating of professor from lower level male: (1) lowest - (10) highest}
26 |   \item{bty_m1upper}{Beauty rating of professor from upper level male: (1) lowest - (10) highest}
27 |   \item{bty_m2upper}{Beauty rating of professor from second upper level male: (1) lowest - (10) highest}
28 |   \item{bty_avg}{Average beauty rating of professor}
29 |   \item{pic_outfit}{Outfit of professor in picture: not formal, formal}
30 |   \item{pic_color}{Color of professor's picture: color, black & white}
31 | }
32 | }
33 | \source{
34 | These data appear in Hamermesh DS, and Parker A. 2005. Beauty in the
35 | classroom: instructors pulchritude and putative pedagogical productivity. Economics of Education Review
36 |  24(4):369-376.
37 | }
38 | \usage{
39 | evals
40 | }
41 | \description{
42 | The data were gathered from end of semester student evaluations for a large
43 | sample of professors from the University of Texas at Austin (variables beginning
44 | with \code{cls}). In addition, six students rated the professors' physical
45 | appearance (variables beginning with \code{bty}). (This is a slightly modified
46 | version of the original data set that was released as part of the replication
47 | data for Data Analysis Using Regression and Multilevel/Hierarchical Models
48 | (Gelman and Hill, 2007).
49 | }
50 | \keyword{datasets}
51 | 


--------------------------------------------------------------------------------
/man/figures/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/StatsWithR/statsr/9cb9edad2f60a21308e13f9c52a70d1dfcbe423a/man/figures/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/man/inference.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/inference.R
  3 | \name{inference}
  4 | \alias{inference}
  5 | \title{Hypothesis tests and confidence intervals}
  6 | \usage{
  7 | inference(
  8 |   y,
  9 |   x = NULL,
 10 |   data,
 11 |   type = c("ci", "ht"),
 12 |   statistic = c("mean", "median", "proportion"),
 13 |   success = NULL,
 14 |   order = NULL,
 15 |   method = c("theoretical", "simulation"),
 16 |   null = NULL,
 17 |   alternative = c("less", "greater", "twosided"),
 18 |   sig_level = 0.05,
 19 |   conf_level = 0.95,
 20 |   boot_method = c("perc", "se"),
 21 |   nsim = 15000,
 22 |   seed = NULL,
 23 |   verbose = TRUE,
 24 |   show_var_types = verbose,
 25 |   show_summ_stats = verbose,
 26 |   show_eda_plot = verbose,
 27 |   show_inf_plot = verbose,
 28 |   show_res = verbose
 29 | )
 30 | }
 31 | \arguments{
 32 | \item{y}{Response variable, can be numerical or categorical}
 33 | 
 34 | \item{x}{Explanatory variable, categorical (optional)}
 35 | 
 36 | \item{data}{Name of data frame that y and x are in}
 37 | 
 38 | \item{type}{of inference; "ci" (confidence interval) or "ht" (hypothesis test)}
 39 | 
 40 | \item{statistic}{parameter to estimate: mean, median, or proportion}
 41 | 
 42 | \item{success}{which level of the categorical variable to call "success", i.e. do inference on}
 43 | 
 44 | \item{order}{when x is given, order of levels of x in which to subtract parameters}
 45 | 
 46 | \item{method}{of inference; "theoretical" (CLT based) or "simulation" (randomization/bootstrap)}
 47 | 
 48 | \item{null}{null value for a hypothesis test}
 49 | 
 50 | \item{alternative}{direction of the alternative hypothesis; "less","greater", or "twosided"}
 51 | 
 52 | \item{sig_level}{significance level, value between 0 and 1 (used only for ANOVA to determine if posttests are necessary)}
 53 | 
 54 | \item{conf_level}{confidence level, value between 0 and 1}
 55 | 
 56 | \item{boot_method}{bootstrap method; "perc" (percentile) or "se" (standard error)}
 57 | 
 58 | \item{nsim}{number of simulations}
 59 | 
 60 | \item{seed}{seed to be set, default is NULL}
 61 | 
 62 | \item{verbose}{whether output should be verbose or not, default is TRUE}
 63 | 
 64 | \item{show_var_types}{print variable types, set to verbose by default}
 65 | 
 66 | \item{show_summ_stats}{print summary stats, set to verbose by default}
 67 | 
 68 | \item{show_eda_plot}{print EDA plot, set to verbose by default}
 69 | 
 70 | \item{show_inf_plot}{print inference plot, set to verbose by default}
 71 | 
 72 | \item{show_res}{print results, set to verbose by default}
 73 | }
 74 | \value{
 75 | Results of inference task performed
 76 | }
 77 | \description{
 78 | Hypothesis tests and confidence intervals
 79 | }
 80 | \examples{
 81 | data(tapwater)
 82 | 
 83 | # Calculate 95\% CI using quantiles using a Student t distribution
 84 | inference(tthm, data=tapwater,
 85 |                 statistic="mean", 
 86 |                 type="ci",
 87 |                 method="theoretical")
 88 |                 
 89 | inference(tthm, data=tapwater,
 90 |                 statistic="mean", 
 91 |                 type="ci",
 92 |                 boot_method = "perc",
 93 |                 method="simulation")
 94 |                 
 95 | # Inference for a proportion
 96 | # Calculate 95\% confidence intervals for the proportion of atheists
 97 | 
 98 | data("atheism")
 99 | library("dplyr")
100 | us12 <- atheism \%>\%
101 |         filter(nationality == "United States" , atheism$year == "2012")
102 | inference(y = response, data = us12, statistic = "proportion",
103 |           type = "ci",
104 |           method = "theoretical", 
105 |           success = "atheist")
106 |                 
107 | }
108 | 


--------------------------------------------------------------------------------
/man/kobe_basket.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/kobe_basket.R
 3 | \docType{data}
 4 | \name{kobe_basket}
 5 | \alias{kobe_basket}
 6 | \title{Kobe Bryant basketball performance}
 7 | \format{
 8 | A data frame with 133 rows and 6 variables:
 9 | \describe{
10 |   \item{vs}{A categorical vector, ORL if the Los Angeles Lakers played
11 |   against Orlando}
12 |   \item{game}{A numerical vector, game in the 2009 NBA finals}
13 |   \item{quarter}{A categorical vector, quarter in the game, OT stands for
14 |   overtime}
15 |   \item{time}{A character vector, time at which Kobe took a shot}
16 |   \item{description}{A character vector, description of the shot}
17 |   \item{shot}{A categorical vector, H if the shot was a hit, M if the shot
18 |   was a miss}
19 | }
20 | }
21 | \usage{
22 | kobe_basket
23 | }
24 | \description{
25 | Data from the five games the Los Angeles Lakers played against the Orlando
26 | Magic in the 2009 NBA finals.
27 | }
28 | \details{
29 | Each row represents a shot Kobe Bryant took during the five games of the
30 | 2009 NBA finals. Kobe Bryant's performance earned him the title of Most
31 | Valuable Player and many spectators commented on how he appeared to show
32 | a hot hand.
33 | }
34 | \keyword{datasets}
35 | 


--------------------------------------------------------------------------------
/man/mlb11.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/mlb11.R
 3 | \docType{data}
 4 | \name{mlb11}
 5 | \alias{mlb11}
 6 | \title{Major League Baseball team data}
 7 | \format{
 8 | A data frame with 30 rows and 12 variables:
 9 | \describe{
10 |   \item{team}{Team name.}
11 |   \item{runs}{Number of runs.}
12 |   \item{at_bats}{Number of at bats.}
13 |   \item{hits}{Number of hits.}
14 |   \item{homeruns}{Number of home runs.}
15 |   \item{bat_avg}{Batting average.}
16 |   \item{strikeouts}{Number of strikeouts.}
17 |   \item{stolen_bases}{Number of stolen bases.}
18 |   \item{wins}{Number of wins.}
19 |   \item{new_onbase}{Newer variable: on-base percentage, a measure of
20 |       how often a batter reaches base for any reason other than a fielding error,
21 |       fielder's choice, dropped/uncaught third strike, fielder's obstruction, or
22 |       catcher's interference.}
23 |   \item{new_slug}{Newer variable: slugging percentage, popular measure of the
24 |       power of a hitter calculated as the total bases divided by at bats.}
25 |   \item{new_obs}{Newer variable: on-base plus slugging, calculated as the sum of the on-base and slugging percentages.}
26 | }
27 | }
28 | \source{
29 | \href{https://www.mlb.com/}{mlb.com}
30 | }
31 | \usage{
32 | mlb11
33 | }
34 | \description{
35 | Data from all 30 Major League Baseball teams from the 2011 season.
36 | }
37 | \keyword{datasets}
38 | 


--------------------------------------------------------------------------------
/man/nc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/nc.R
 3 | \docType{data}
 4 | \name{nc}
 5 | \alias{nc}
 6 | \title{North Carolina births}
 7 | \format{
 8 | A tbl_df with 1000 rows and 13 variables:
 9 | \describe{
10 |   \item{fage}{father's age in years}
11 |   \item{mage}{mother's age in years}
12 |   \item{mature}{maturity status of mother}
13 |   \item{weeks}{length of pregnancy in weeks}
14 |   \item{premie}{whether the birth was classified as premature (premie) or full-term}
15 |   \item{visits}{number of hospital visits during pregnancy}
16 |   \item{marital}{whether mother is `married` or `not married` at birth}
17 |   \item{gained}{weight gained by mother during pregnancy in pounds}
18 |   \item{weight}{weight of the baby at birth in pounds}
19 |   \item{lowbirthweight}{whether baby was classified as low birthweight (`low`) or not (`not low`)}
20 |   \item{gender}{gender of the baby, `female` or `male`}
21 |   \item{habit}{status of the mother as a `nonsmoker` or a `smoker`}
22 |   \item{whitemom}{whether mom is `white` or `not white`}
23 | }
24 | }
25 | \source{
26 | State of North Carolina.
27 | }
28 | \usage{
29 | nc
30 | }
31 | \description{
32 | In 2004, the state of North Carolina released a large data set containing 
33 | information on births recorded in this state. This data set is useful to 
34 | researchers studying the relation between habits and practices of expectant 
35 | mothers and the birth of their children. We will work with a random sample of 
36 | observations from this data set.
37 | }
38 | \keyword{datasets}
39 | 


--------------------------------------------------------------------------------
/man/nycflights.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/nycflights.R
 3 | \docType{data}
 4 | \name{nycflights}
 5 | \alias{nycflights}
 6 | \title{Flights data}
 7 | \format{
 8 | A tbl_df with 32,735 rows and 16 variables:
 9 | \describe{
10 | \item{year,month,day}{Date of departure}
11 | \item{dep_time,arr_time}{Departure and arrival times, local tz.}
12 | \item{dep_delay,arr_delay}{Departure and arrival delays, in minutes.
13 |   Negative times represent early departures/arrivals.}
14 | \item{hour,minute}{Time of departure broken in to hour and minutes}
15 | \item{carrier}{Two letter carrier abbreviation. See \code{airlines} in the
16 |   \code{nycflights13} package for more information}
17 | \item{tailnum}{Plane tail number}
18 | \item{flight}{Flight number}
19 | \item{origin,dest}{Origin and destination. See \code{airports} in the
20 |   \code{nycflights13} package for more information, or google airport the code.}
21 | \item{air_time}{Amount of time spent in the air}
22 | \item{distance}{Distance flown}
23 | }
24 | }
25 | \source{
26 | Hadley Wickham (2014). \code{nycflights13}: Data about flights departing 
27 | NYC in 2013. R package version 0.1. 
28 |  \url{https://CRAN.R-project.org/package=nycflights13}
29 | }
30 | \usage{
31 | nycflights
32 | }
33 | \description{
34 | On-time data for a random sample of flights that departed NYC (i.e. JFK, LGA or EWR) 
35 | in 2013.
36 | }
37 | \keyword{datasets}
38 | 


--------------------------------------------------------------------------------
/man/plot_bandit_posterior.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/bandit_posterior.R
 3 | \name{plot_bandit_posterior}
 4 | \alias{plot_bandit_posterior}
 5 | \title{plot_bandit_posterior}
 6 | \usage{
 7 | plot_bandit_posterior(
 8 |   data,
 9 |   prior = c(m1_good = 0.5, m2_good = 0.5),
10 |   win_probs = c(good = 1/2, bad = 1/3)
11 | )
12 | }
13 | \arguments{
14 | \item{data}{data frame containing win loss data}
15 | 
16 | \item{prior}{prior vector containing the probabilities of Machine 1 and Machine 2 being good, defaults to 50-50.}
17 | 
18 | \item{win_probs}{vector containing the probabilities of winning on the good and bad machine respectively.}
19 | }
20 | \description{
21 | Generates a plot that shows the bandit posterior values as they are sequentially updated 
22 | by the provided win / loss data.
23 | }
24 | \examples{
25 | # capture data from the `shiny` app `bandit_sim`.
26 | data = data.frame(machine = c(1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L), 
27 |                   outcome = c("W", "L", "W", "L", "L", "W", "L", "L", "L", "W"))
28 | plot_bandit_posterior(data)
29 | 
30 | }
31 | \seealso{
32 | \code{\link{bandit_sim}} to generate data to use below
33 | }
34 | 


--------------------------------------------------------------------------------
/man/plot_ss.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/plot_ss.R
 3 | \name{plot_ss}
 4 | \alias{plot_ss}
 5 | \title{plot_ss}
 6 | \usage{
 7 | plot_ss(x, y, data, showSquares = FALSE, leastSquares = FALSE)
 8 | }
 9 | \arguments{
10 | \item{x}{the name of numerical vector 1 on x-axis}
11 | 
12 | \item{y}{the name of numerical vector 2 on y-axis}
13 | 
14 | \item{data}{the dataframe in which x and y can be found}
15 | 
16 | \item{showSquares}{logical option to show boxes representing the squared residuals}
17 | 
18 | \item{leastSquares}{logical option to bypass point entry and automatically draw the least squares line}
19 | }
20 | \description{
21 | An interactive shiny app that will generate a scatterplot of two variables, then
22 | allow the user to click the plot in two locations to draw a best fitting line.
23 | Residuals are drawn by default; boxes representing the squared residuals are
24 | optional.
25 | }
26 | \examples{
27 | \dontrun{plot_ss}
28 | }
29 | 


--------------------------------------------------------------------------------
/man/present.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/present.R
 3 | \docType{data}
 4 | \name{present}
 5 | \alias{present}
 6 | \title{Male and female births in the US}
 7 | \format{
 8 | A tbl_df with 74 rows and 3 variables:
 9 | \describe{
10 |   \item{year}{year, ranging from 1940 to 2013}
11 |   \item{boys}{number of male births}
12 |   \item{girls}{number of female births}
13 | }
14 | }
15 | \source{
16 | Data up to 2002 appear in Mathews TJ, and Hamilton BE. 2005. Trend
17 | analysis of the sex ratio at birth in the United States. National Vital
18 | Statistics Reports 53(20):1-17. Data for 2003 - 2013 have been collected
19 | from annual National Vital Statistics Reports published by the US Department of 
20 | Health and Human Services, Centers for Disease Control and Prevention, 
21 | National Center for Health Statistics.
22 | }
23 | \usage{
24 | present
25 | }
26 | \description{
27 | Counts of the total number of male and female births in the United States from
28 | 1940 to 2013.
29 | }
30 | \keyword{datasets}
31 | 


--------------------------------------------------------------------------------
/man/rep_sample_n.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/rep_sample_n.R
 3 | \name{rep_sample_n}
 4 | \alias{rep_sample_n}
 5 | \title{Repeating Sampling from a Tibble}
 6 | \usage{
 7 | rep_sample_n(tbl, size, replace = FALSE, reps = 1)
 8 | }
 9 | \arguments{
10 | \item{tbl}{tbl of data.}
11 | 
12 | \item{size}{The number of rows to select.}
13 | 
14 | \item{replace}{Sample with or without replacement?}
15 | 
16 | \item{reps}{The number of samples to collect.}
17 | }
18 | \value{
19 | A tbl_df that aggregates all created samples, with the addition of a \code{replicate} column that the tbl_df is also grouped by
20 | }
21 | \description{
22 | Repeating Sampling from a Tibble
23 | }
24 | \examples{
25 | data(nc)
26 | rep_sample_n(nc, size=10, replace=FALSE, reps=1)
27 | }
28 | 


--------------------------------------------------------------------------------
/man/statsr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/statsr.R
 3 | \docType{package}
 4 | \name{statsr}
 5 | \alias{statsr}
 6 | \title{statsr: A companion package for Statistics with R}
 7 | \description{
 8 | R package to support the online open access book "An Introduction
 9 | to Bayesian Thinking" available at 
10 | \url{https://statswithr.github.io/book/} and videos for the Coursera "Statistics with
11 | R" Specialization.  The package includes data sets, functions
12 | and Shiny Applications for learning frequentist and Bayesian
13 | statistics with R.  The two main functions for inference and decision making are
14 | `inference` and `bayes_inference`  which support  
15 | confidence/credible intervals and hypothesis testing with one sample or two samples
16 | from Gaussian and Bernoulli populations.   Shiny apps are used to illustrate how prior
17 | hyperparameters or changes in the data may influence posterior distributions.
18 | }
19 | \details{
20 | See \url{https://github.com/StatsWithR/statsr} for the development version and 
21 | additional information or for additional background and illustrations of functions
22 | the online book  \url{https://statswithr.github.io/book/}.
23 | }
24 | 


--------------------------------------------------------------------------------
/man/tapwater.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tapwater.R
 3 | \docType{data}
 4 | \name{tapwater}
 5 | \alias{tapwater}
 6 | \title{Total Trihalomethanes in Tapwater}
 7 | \format{
 8 | A dataframe with 28 rows and 6 variables:
 9 | \describe{
10 |   \item{date}{Date of collection}
11 |   \item{tthm}{average total trihalomethanes in ppb }
12 |   \item{samples}{number of samples}
13 |   \item{nondetects}{number of samples where tthm not detected (0)}
14 |   \item{min}{min tthm in ppb in samples}
15 |   \item{max}{max tthm in ppb in samples}
16 | }
17 | }
18 | \source{
19 | National Drinking Water Database for Durham, NC. \url{https://www.ewg.org}
20 | }
21 | \usage{
22 | tapwater
23 | }
24 | \description{
25 | Trihalomethanes are formed as a by-product predominantly when chlorine is used to disinfect water
26 | for drinking. They result from the reaction of chlorine or bromine with
27 | organic matter present in the water being treated.
28 | THMs  have been associated through epidemiological studies
29 | with some adverse health effects and many are considered carcinogenic.
30 | In the United States, the EPA limits 
31 | the total concentration of the four chief constituents (chloroform, bromoform, bromodichloromethane, and dibromochloromethane), referred to as
32 | total trihalomethanes (TTHM), to 80 parts per billion in treated water.
33 | }
34 | \keyword{datasets}
35 | 


--------------------------------------------------------------------------------
/man/wage.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/wage.R
 3 | \docType{data}
 4 | \name{wage}
 5 | \alias{wage}
 6 | \title{Wage data}
 7 | \format{
 8 | A tbl_df with with 935 rows and 17 variables:
 9 | \describe{
10 |   \item{wage}{weekly earnings (dollars)}
11 |   \item{hours}{average hours worked per week}
12 |   \item{iq}{IQ score}
13 |   \item{kww}{Knowledge of world work score}
14 |   \item{educ}{years of education}
15 |   \item{exper}{years of work experience}
16 |   \item{tenure}{years with current employer}
17 |   \item{age}{age in years}
18 |   \item{married}{=1 if married}
19 |   \item{black}{=1 if black}
20 |   \item{south}{=1 if live in south}
21 |   \item{urban}{=1 if live in a Standard Metropolitan Statistical Area }
22 |   \item{sibs}{number of siblings}
23 |   \item{brthord}{birth order}
24 |   \item{meduc}{mother's education (years)}
25 |   \item{feduc}{father's education (years)}
26 |   \item{lwage}{natural log of wage}
27 | }
28 | }
29 | \source{
30 | Jeffrey M. Wooldridge (2000). Introductory Econometrics: A Modern Approach. South-Western College Publishing.
31 | }
32 | \usage{
33 | wage
34 | }
35 | \description{
36 | The data were gathered as part of a random sample of 935 respondents throughout the United States.
37 | }
38 | \keyword{datasets}
39 | 


--------------------------------------------------------------------------------
/man/zinc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/zinc.R
 3 | \docType{data}
 4 | \name{zinc}
 5 | \alias{zinc}
 6 | \title{Zinc Concentration in Water}
 7 | \format{
 8 | A data frame with 10 observations on the following 4 variables.
 9 |  \describe{
10 |    \item{\code{location}}{sample number}
11 |    \item{\code{bottom}}{zinc concentration in bottom water}
12 |    \item{\code{surface}}{zinc concentration in surface water}
13 |    \item{\code{difference}}{difference between zinc concentration at the bottom and surface}
14 |  }
15 | }
16 | \source{
17 | \href{https://online.stat.psu.edu/stat500/sites/stat500/files/data/zinc_conc.txt}{PennState Eberly College of Science Online Courses}
18 | }
19 | \usage{
20 | zinc
21 | }
22 | \description{
23 | Trace metals in drinking water affect the flavor and
24 | an unusually high concentration can pose a health
25 | hazard. Ten pairs of data were taken measuring zinc
26 | concentration in bottom water and surface water.
27 | }
28 | \examples{
29 |  data(zinc)
30 |  str(zinc)
31 |  plot(bottom ~ surface, data=zinc)
32 |  # use paired t-test to test if difference in means is zero
33 | 
34 | }
35 | \keyword{datasets}
36 | 


--------------------------------------------------------------------------------
/statsr.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | PackageRoxygenize: rd,collate,namespace
19 | 


--------------------------------------------------------------------------------
/tests/spelling.R:
--------------------------------------------------------------------------------
1 | if(requireNamespace('spelling', quietly = TRUE))
2 |   spelling::spell_check_test(vignettes = TRUE, error = FALSE,
3 |                              skip_on_cran = TRUE)
4 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(statsr)
3 | 
4 | test_check("statsr")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test-bayes_inference.R:
--------------------------------------------------------------------------------
 1 | test_that("multiplication works", {  # issue 15
 2 |   # 4.1.5 Example: TTHM in Tapwater"
 3 |   data(tapwater)
 4 |   # prior hyperparameters
 5 |   m_0 = 35; n_0 = 25;  s2_0 = 156.25; v_0 = n_0 - 1
 6 |   # sample summaries
 7 |   Y = tapwater$tthm
 8 |   ybar = mean(Y)
 9 |   s2 = var(Y)
10 |   n = length(Y)
11 |   # posterior hyperparamters
12 |   n_n = n_0 + n
13 |   m_n = (n*ybar + n_0*m_0)/n_n
14 |   v_n = v_0 + n
15 |   s2_n = ((n-1)*s2 + v_0*s2_0 + n_0*n*(m_0 - ybar)^2/n_n)/v_n
16 |   ci = m_n + qt(c(0.025, 0.975), v_n)*sqrt(s2_n/n_n)
17 |   out = bayes_inference(tthm, data=tapwater, prior="NG",
18 |                   mu_0 = m_0, n_0=n_0, s_0 = sqrt(s2_0), v_0 = v_0,
19 |                   stat="mean", type="ci", method="theoretical", 
20 |                   show_res=TRUE, show_summ=TRUE, show_plot=FALSE)
21 |   expect_equal(m_n, out$post_mean)
22 |   expect_equal(ci, out$ci)
23 | })
24 | 


--------------------------------------------------------------------------------