├── .Rbuildignore ├── .github └── workflows │ └── R-CMD-check.yaml ├── .gitignore ├── DESCRIPTION ├── NAMESPACE ├── NEWS.md ├── R ├── .Rapp.history ├── data.r ├── distances.r ├── distmet.r ├── encryption.r ├── method-dist.r ├── method-model.r ├── method-permute.r ├── method-ts.r ├── opt_diff.r ├── power.r ├── protocols.r ├── pvalues.r ├── quick_plots.R ├── sample_size.r └── theme.R ├── README.Rmd ├── README.md ├── cran-comments.md ├── data ├── aud.rda ├── electoral.rda ├── lakers.rdata ├── tips.rda ├── turk_results.rda └── wasps.rda ├── demo ├── 00Index └── lakers.r ├── docs ├── 404.html ├── articles │ ├── distances.html │ ├── distances_files │ │ └── figure-html │ │ │ ├── unnamed-chunk-10-1.png │ │ │ ├── unnamed-chunk-15-1.png │ │ │ └── unnamed-chunk-9-1.png │ ├── index.html │ ├── nullabor-distributions.html │ ├── nullabor-distributions_files │ │ └── figure-html │ │ │ ├── unnamed-chunk-10-1.png │ │ │ ├── unnamed-chunk-2-1.png │ │ │ ├── unnamed-chunk-3-1.png │ │ │ ├── unnamed-chunk-5-1.png │ │ │ ├── unnamed-chunk-6-1.png │ │ │ ├── unnamed-chunk-7-1.png │ │ │ ├── unnamed-chunk-8-1.png │ │ │ └── unnamed-chunk-9-1.png │ ├── nullabor-examples.html │ ├── nullabor-examples_files │ │ └── figure-html │ │ │ └── unnamed-chunk-6-1.png │ ├── nullabor-regression.html │ ├── nullabor-regression_files │ │ └── figure-html │ │ │ ├── unnamed-chunk-3-1.png │ │ │ ├── unnamed-chunk-4-1.png │ │ │ ├── unnamed-chunk-5-1.png │ │ │ ├── unnamed-chunk-6-1.png │ │ │ └── unnamed-chunk-7-1.png │ ├── nullabor.html │ └── nullabor_files │ │ └── figure-html │ │ ├── unnamed-chunk-3-1.png │ │ ├── unnamed-chunk-4-1.png │ │ └── unnamed-chunk-8-1.png ├── authors.html ├── bootstrap-toc.css ├── bootstrap-toc.js ├── docsearch.css ├── docsearch.js ├── index.html ├── jquery.sticky-kit.min.js ├── link.svg ├── news │ └── index.html ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml ├── reference │ ├── Rplot001.png │ ├── Rplot002.png │ ├── add_true.html │ ├── aud.html │ ├── bin_dist.html │ ├── box_dist.html │ ├── calc_diff.html │ ├── calc_mean_dist.html │ ├── decrypt.html │ ├── distmet-2.png │ ├── distmet-3.png │ ├── distmet.html │ ├── distplot-1.png │ ├── distplot.html │ ├── electoral.html │ ├── figures │ │ └── nullabor_hex.png │ ├── find_plot_data.html │ ├── index.html │ ├── lal.html │ ├── lineup-1.png │ ├── lineup-2.png │ ├── lineup.html │ ├── lineup_histograms-1.png │ ├── lineup_histograms-2.png │ ├── lineup_histograms-3.png │ ├── lineup_histograms-4.png │ ├── lineup_histograms.html │ ├── lineup_qq-1.png │ ├── lineup_qq-2.png │ ├── lineup_qq-3.png │ ├── lineup_qq-4.png │ ├── lineup_qq.html │ ├── lineup_residuals-1.png │ ├── lineup_residuals-2.png │ ├── lineup_residuals-3.png │ ├── lineup_residuals-4.png │ ├── lineup_residuals-5.png │ ├── lineup_residuals.html │ ├── null_dist-1.png │ ├── null_dist-2.png │ ├── null_dist.html │ ├── null_gen.html │ ├── null_lm-1.png │ ├── null_lm.html │ ├── null_permute-1.png │ ├── null_permute.html │ ├── null_ts-1.png │ ├── null_ts.html │ ├── opt_bin_diff-1.png │ ├── opt_bin_diff.html │ ├── pvisual.html │ ├── reg_dist.html │ ├── resid_boot.html │ ├── resid_pboot.html │ ├── resid_perm.html │ ├── resid_rotate.html │ ├── resid_sigma.html │ ├── rorschach.html │ ├── sample_size-1.png │ ├── sample_size.html │ ├── sep_dist.html │ ├── theme_strip-1.png │ ├── theme_strip.html │ ├── tips.html │ ├── turk_results.html │ ├── uni_dist.html │ ├── visual_power.html │ └── wasps.html └── sitemap.xml ├── inst └── CITATION ├── man ├── add_true.Rd ├── aud.Rd ├── bin_dist.Rd ├── box_dist.Rd ├── calc_diff.Rd ├── calc_mean_dist.Rd ├── decrypt.Rd ├── distmet.Rd ├── distplot.Rd ├── electoral.Rd ├── figures │ └── nullabor_hex.png ├── find_plot_data.Rd ├── lal.Rd ├── lineup.Rd ├── lineup_histograms.Rd ├── lineup_qq.Rd ├── lineup_residuals.Rd ├── null_dist.Rd ├── null_gen.Rd ├── null_lm.Rd ├── null_permute.Rd ├── null_ts.Rd ├── opt_bin_diff.Rd ├── pvisual.Rd ├── reg_dist.Rd ├── resid_boot.Rd ├── resid_pboot.Rd ├── resid_perm.Rd ├── resid_rotate.Rd ├── resid_sigma.Rd ├── rorschach.Rd ├── sample_size.Rd ├── sep_dist.Rd ├── theme_strip.Rd ├── tips.Rd ├── turk_results.Rd ├── uni_dist.Rd ├── visual_power.Rd └── wasps.Rd ├── nullabor.Rproj └── vignettes ├── distances.Rmd ├── nullabor-distributions.Rmd ├── nullabor-examples.Rmd ├── nullabor-regression.Rmd └── nullabor.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^\.travis\.yml$ 2 | ^.*\.Rproj$ 3 | ^\.Rproj\.user$ 4 | ^cran-comments\.md$ 5 | ^README\.Rmd$ 6 | ^README-.*\.png$ 7 | ^docs$ 8 | ^_pkgdown\.yml$ 9 | ^doc$ 10 | ^Meta$ 11 | ^CRAN-RELEASE$ 12 | ^.github 13 | ^revdep 14 | ^CRAN-SUBMISSION$ 15 | ^nullabor.Rcheck 16 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | 15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | config: 21 | - {os: macOS-latest, r: 'release'} 22 | - {os: windows-latest, r: 'release'} 23 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 24 | - {os: ubuntu-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'oldrel-1'} 26 | 27 | env: 28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 29 | R_KEEP_PKG_SOURCE: yes 30 | 31 | steps: 32 | - uses: actions/checkout@v2 33 | 34 | - uses: r-lib/actions/setup-pandoc@v2 35 | 36 | - uses: r-lib/actions/setup-r@v2 37 | with: 38 | r-version: ${{ matrix.config.r }} 39 | http-user-agent: ${{ matrix.config.http-user-agent }} 40 | use-public-rspm: true 41 | 42 | - uses: r-lib/actions/setup-r-dependencies@v2 43 | with: 44 | extra-packages: rcmdcheck 45 | 46 | - uses: r-lib/actions/check-r-package@v2 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | inst/doc 5 | doc 6 | Meta 7 | /doc/ 8 | /Meta/ 9 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: nullabor 2 | Version: 0.3.15 3 | Description: Tools for visual inference. Generate null data sets 4 | and null plots using permutation and simulation. Calculate distance metrics 5 | for a lineup, and examine the distributions of metrics. 6 | Title: Tools for Graphical Inference 7 | Authors@R: c( 8 | person("Hadley", "Wickham", email = "h.wickham@gmail.com", role = c("aut", "ctb"), comment = c(ORCID = "0000-0003-4757-117X")), 9 | person("Niladri Roy", "Chowdhury", email = "niladri.ia@gmail.com", role = c("aut", "ctb")), 10 | person("Di", "Cook", email = "dicook@monash.edu", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-3813-7155")), 11 | person("Heike", "Hofmann", email = "hofmann@iastate.edu", role = c("aut", "ctb"), comment = c(ORCID = "0000-0001-6216-5183")), 12 | person("Måns", "Thulin", email = "mans@statistikkonsult.com", role = c("aut", "ctb"), comment = c(ORCID = "0000-0002-2756-3933")) 13 | ) 14 | Maintainer: Di Cook 15 | License: GPL (>= 2) 16 | URL: https://github.com/dicook/nullabor 17 | BugReports: https://github.com/dicook/nullabor/issues 18 | Imports: 19 | MASS, 20 | moments, 21 | fpc, 22 | ggplot2, 23 | dplyr, 24 | purrr, 25 | tidyr, 26 | tibble, 27 | magrittr, 28 | stats 29 | Suggests: 30 | forecast, 31 | viridis, 32 | knitr 33 | Depends: 34 | R (>= 4.1.0) 35 | LazyData: true 36 | Type: Package 37 | LazyLoad: false 38 | VignetteBuilder: knitr 39 | Roxygen: list(markdown = TRUE) 40 | RoxygenNote: 7.3.2 41 | Encoding: UTF-8 42 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(bin_dist) 4 | export(box_dist) 5 | export(calc_diff) 6 | export(calc_mean_dist) 7 | export(decrypt) 8 | export(distmet) 9 | export(distplot) 10 | export(lineup) 11 | export(lineup_histograms) 12 | export(lineup_qq) 13 | export(lineup_residuals) 14 | export(null_dist) 15 | export(null_lm) 16 | export(null_permute) 17 | export(null_ts) 18 | export(opt_bin_diff) 19 | export(pvisual) 20 | export(reg_dist) 21 | export(resid_boot) 22 | export(resid_pboot) 23 | export(resid_perm) 24 | export(resid_rotate) 25 | export(resid_sigma) 26 | export(rorschach) 27 | export(sample_size) 28 | export(sep_dist) 29 | export(theme_strip) 30 | export(uni_dist) 31 | export(visual_power) 32 | import(fpc) 33 | import(moments) 34 | importFrom(MASS,fitdistr) 35 | importFrom(dplyr,do) 36 | importFrom(dplyr,filter) 37 | importFrom(dplyr,group_by) 38 | importFrom(dplyr,mutate) 39 | importFrom(dplyr,summarise) 40 | importFrom(ggplot2,.data) 41 | importFrom(ggplot2,aes) 42 | importFrom(ggplot2,after_stat) 43 | importFrom(ggplot2,facet_wrap) 44 | importFrom(ggplot2,geom_abline) 45 | importFrom(ggplot2,geom_density) 46 | importFrom(ggplot2,geom_histogram) 47 | importFrom(ggplot2,geom_line) 48 | importFrom(ggplot2,geom_point) 49 | importFrom(ggplot2,geom_qq) 50 | importFrom(ggplot2,geom_qq_line) 51 | importFrom(ggplot2,geom_smooth) 52 | importFrom(ggplot2,geom_tile) 53 | importFrom(ggplot2,ggplot) 54 | importFrom(ggplot2,labs) 55 | importFrom(ggplot2,last_plot) 56 | importFrom(ggplot2,scale_fill_gradient) 57 | importFrom(ggplot2,xlab) 58 | importFrom(ggplot2,ylab) 59 | importFrom(magrittr,"%>%") 60 | importFrom(purrr,rerun) 61 | importFrom(stats,as.ts) 62 | importFrom(stats,coef) 63 | importFrom(stats,cutree) 64 | importFrom(stats,density) 65 | importFrom(stats,deviance) 66 | importFrom(stats,df.residual) 67 | importFrom(stats,dist) 68 | importFrom(stats,fitted) 69 | importFrom(stats,formula) 70 | importFrom(stats,hclust) 71 | importFrom(stats,lm) 72 | importFrom(stats,lm.influence) 73 | importFrom(stats,pbinom) 74 | importFrom(stats,predict) 75 | importFrom(stats,qbinom) 76 | importFrom(stats,quantile) 77 | importFrom(stats,rbinom) 78 | importFrom(stats,resid) 79 | importFrom(stats,residuals) 80 | importFrom(stats,rnorm) 81 | importFrom(stats,runif) 82 | importFrom(stats,sd) 83 | importFrom(stats,simulate) 84 | importFrom(stats,update) 85 | importFrom(tibble,as_data_frame) 86 | importFrom(tibble,tibble) 87 | importFrom(tidyr,unnest) 88 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # nullabor 0.3.15 2 | 3 | * Reduced examples for lineup_residuals() and lineup_histogram() so passes CRAN automatic check 4 | 5 | # nullabor 0.3.14 6 | 7 | * Fix to qqplot for standardised residuals 8 | 9 | # nullabor 0.3.13 10 | 11 | - Updated null_lm to optionally compute leverages and standardized residuals. 12 | - Added a new function, lineup_residuals, for creating four different types of lineup plots for residuals with a single line of code. 13 | - Added two new functions, lineup_histograms and lineup_qq for creating lineup histogram and Q-Q plots to assess distributional assumptions. 14 | - Added two new vignettes. 15 | - Adds Måns Thulin as a co-author, due to the substantial additions. 16 | 17 | # nullabor 0.3.12 18 | 19 | - CITATION revised to satisfy CRAN check 20 | - And URLs in vignettes change to DOI because the DOI URL is apparently not valid according to the automated checks 21 | 22 | # nullabor 0.3.11 23 | 24 | - URLs in DESCRIPTION, CITATION, vignettes were causing an error on CRAN checks! 25 | 26 | # nullabor 0.3.10 27 | 28 | - removed dependency on reshape2 29 | - adjusting other dependencies 30 | - new theme to remove context from plots 31 | - updated roxygen dependency 32 | 33 | # nullabor 0.3.9 34 | 35 | - CRAN fixes, minor code changes 36 | 37 | # nullabor 0.3.8 38 | 39 | - CRAN fixes, minor code changes 40 | 41 | # nullabor 0.3.7 42 | 43 | - Bug fixes, minor 44 | 45 | # nullabor 0.3.6 46 | 47 | - Added a sample size calculator 48 | 49 | # nullabor 0.3.5 50 | 51 | - Added time series null generating mechanism 52 | - Fixed a bug in the residual generating mechanism reported by Jan Vanhove 53 | - Changed the encrypt/decrypt to have 2 digits 54 | - Using dplyr and purrr for making lineups now thanks to Stuart Lee 55 | 56 | # nullabor 0.3.6 57 | 58 | - Fix from Jenny Bryan on tidyverse 59 | - CITATION file added 60 | - vignettes update 61 | -------------------------------------------------------------------------------- /R/data.r: -------------------------------------------------------------------------------- 1 | #' Conversion rate of 1 Australian Doller (AUD) to 1 US Dollar 2 | #' 3 | #' The dataset consists of the daily exchange rates of 1 Australian Dollar to 1 US Dollar between Jan 9 2018 and Feb 21 2018. 4 | #' 5 | #' @docType data 6 | #' @name aud 7 | NULL 8 | 9 | #' Los Angeles Lakers play-by-play data. 10 | #' 11 | #' Play by play data from all games played by the Los Angeles lakers in the 12 | #' 2008/2009 season. 13 | #' 14 | #' @docType data 15 | #' @name lal 16 | NULL 17 | 18 | #' Wasp gene expression data. 19 | #' 20 | #' Data from Toth et al (2010) used in Niladri Roy et al (2015) 21 | #' 22 | #' @docType data 23 | #' @name wasps 24 | NULL 25 | 26 | #' Sample turk results 27 | #' 28 | #' Subset of data from a Turk experiment, used to show how to compute power of a lineup 29 | #' 30 | #' @docType data 31 | #' @name turk_results 32 | NULL 33 | 34 | #' Polls and election results from the 2012 US Election 35 | #' 36 | #' 37 | #' @format A list with two data frames: 38 | #' polls is a data frame of 51 rows and 4 variables 39 | #' \describe{ 40 | #' \item{State}{State name} 41 | #' \item{Electoral.vote}{Number of electoral votes in the 2012 election} 42 | #' \item{Margin}{Margin between the parties with the highest number of votes and second highest number of votes. 43 | #' These margins are based on polls.} 44 | #' \item{Democrat}{logical vector True, if the democratic party is the majority party in this state. } 45 | #' } 46 | #' `election` is a data frame of 51 rows and 5 variables 47 | #' \describe{ 48 | #' \item{State}{State name} 49 | #' \item{Candidate}{character string of the winner: Romney or Obama} 50 | #' \item{Electoral.vote}{Number of electoral votes in the 2012 election} 51 | #' \item{Margin}{Margin between the parties with the highest number of votes and second highest number of votes. 52 | #' These margins are based on the actual election outcome} 53 | #' \item{Democrat}{logical vector True, if the democratic party is the majority party in this state. } 54 | #' } 55 | #' @docType data 56 | #' @name electoral 57 | NULL 58 | 59 | #' Tipping data 60 | #' 61 | #' 62 | #' One waiter recorded information about each tip he received over a 63 | #' period of a few months working in one restaurant. He collected several 64 | #' variables: 65 | #' 66 | #' \itemize{ 67 | #' \item tip in dollars, 68 | #' \item bill in dollars, 69 | #' \item sex of the bill payer, 70 | #' \item whether there were smokers in the party, 71 | #' \item day of the week, 72 | #' \item time of day, 73 | #' \item size of the party. 74 | #' } 75 | #' 76 | #' In all he recorded 244 tips. The data was reported in a collection of 77 | #' case studies for business statistics (Bryant & Smith 1995). 78 | #' 79 | #' @references Bryant, P. G. and Smith, M (1995) \emph{Practical Data 80 | #' Analysis: Case Studies in Business Statistics}. Homewood, IL: Richard D. 81 | #' Irwin Publishing: 82 | #' @format A data frame with 244 rows and 7 variables 83 | #' @keywords datasets 84 | "tips" 85 | -------------------------------------------------------------------------------- /R/distmet.r: -------------------------------------------------------------------------------- 1 | #' Empirical distribution of the distance 2 | #' 3 | #' The empirical distribution of the distance measures is calculated based on the mean 4 | #' distance of each of the null plots from the other null plots in a lineup. At this moment 5 | #' this method works only for \code{\link{null_permute}} method. This function helps get some 6 | #' assessment of whether the actual data plot is very different from the null plots. 7 | #' 8 | #' @export 9 | #' @param lineup.dat lineup data 10 | #' @param var a vector of names of the variables to be used 11 | #' @param met distance metric needed to calculate the distance as a character 12 | #' @param method method for generating null data sets 13 | #' @param pos position of the observed data in the lineup 14 | #' @param repl number of sets of null plots selected to obtain the distribution; 1000 by 15 | #' default 16 | #' @param dist.arg a list or vector of inputs for the distance metric met; NULL by default 17 | #' @param m the number of plots in the lineup; m = 20 by default 18 | #' @return lineup has the data used for the calculations 19 | #' @return null_values contains new null samples from which to compare nulls in lineup 20 | #' @return diff difference in distance between nulls and actual data and that of the null 21 | #' that is most different from other nulls. A negative value means that the actual data 22 | #' plot is similar to the null plots. 23 | #' @return closest list of the five closest nulls to the actual data plot 24 | #' @return pos position of the actual data plot in the lineup 25 | #' @importFrom dplyr summarise group_by 26 | #' @examples 27 | #' # Each of these examples uses a small number of nulls (m=8), and a small number of 28 | #' # repeated sampling from the null distribution (repl=100), to make it faster to run. 29 | #' # In your own examples you should think about increasing each of these, at least to the defaults. 30 | #' \dontrun{ 31 | #' if (require('dplyr')) { 32 | #' d <- lineup(null_permute('mpg'), mtcars, pos = 1) 33 | #' dd <- distmet(d, var = c('mpg', 'wt'), 34 | #' 'reg_dist', null_permute('mpg'), pos = 1, repl = 100, m = 8) 35 | #' distplot(dd, m=8) 36 | #' } 37 | #' } 38 | #' 39 | #' \dontrun{ 40 | #' d <- lineup(null_permute('mpg'), mtcars, pos=4, n=8) 41 | #' library(ggplot2) 42 | #' ggplot(d, aes(mpg, wt)) + geom_point() + facet_wrap(~ .sample, ncol=4) 43 | #' if (require('dplyr')) { 44 | #' dd <- distmet(d, var = c('mpg', 'wt'), 'bin_dist', null_permute('mpg'), 45 | #' pos = 4, repl = 100, dist.arg = list(lineup.dat = d, X.bin = 5, 46 | #' Y.bin = 5), m = 8) 47 | #' distplot(dd, m=8) 48 | #' } 49 | #' } 50 | #' 51 | #' # Example using bin_dist 52 | #' \dontrun{ 53 | #' if (require('dplyr')) { 54 | #' d <- lineup(null_permute('mpg'), mtcars, pos = 1) 55 | #' library(ggplot2) 56 | #' ggplot(d, aes(mpg, wt)) + geom_point() + facet_wrap(~ .sample, ncol=5) 57 | #' dd <- distmet(d, var = c('mpg', 'wt'), 58 | #' 'bin_dist', null_permute('mpg'), pos = 1, repl = 500, 59 | #' dist.arg = list(lineup.dat = d, X.bin = 5, Y.bin = 5)) 60 | #' distplot(dd) 61 | #' } 62 | #' } 63 | #' 64 | #' # Example using uni_dist 65 | #' \dontrun{ 66 | #' mod <- lm(wt ~ mpg, data = mtcars) 67 | #' resid.dat <- data.frame(residual = mod$resid) 68 | #' d <- lineup(null_dist('residual', dist = 'normal'), resid.dat, pos=19) 69 | #' ggplot(d, aes(residual)) + geom_histogram(binwidth = 0.25) + facet_wrap(~ .sample, ncol=5) 70 | #' if (require('dplyr')) { 71 | #' dd <- distmet(d, var = 'residual', 'uni_dist', null_dist('residual', 72 | #' dist = 'normal'), pos = 19, repl = 500) 73 | #' distplot(dd) 74 | #' } 75 | #' } 76 | distmet <- function(lineup.dat, var, met, method, pos, repl = 1000, dist.arg = NULL, m = 20) { 77 | dist.mean <- calc_mean_dist(lineup.dat, var, met, pos, dist.arg, m) 78 | diff <- with(dist.mean, mean.dist[plotno == pos] - max(mean.dist[plotno != pos])) 79 | closest <- dist.mean[order(dist.mean$mean.dist, decreasing = TRUE), ]$plotno[2:6] 80 | obs.dat <- lineup.dat[lineup.dat$.sample == pos, c(var, ".sample")] 81 | all.samp <- replicate(repl, { 82 | null <- method(obs.dat) 83 | null_gen(lineup.dat, null, met, method, m, dist.arg) 84 | }) 85 | return(list(lineup = dist.mean[, c(pos.1 = "plotno", dist = "mean.dist")], null_values = all.samp, diff = diff, 86 | closest = closest, pos = pos)) 87 | } 88 | 89 | #' Computing th distance for the null plots 90 | #' 91 | #' @keywords internal 92 | null_gen <- function(lineup.dat, null, met, method, m, dist.arg){ 93 | func <- match.fun(met) 94 | Dist <- replicate(m - 2, { 95 | null.dat <- method(null) 96 | ifelse(is.null(dist.arg), do.call(func, list(null, null.dat)), 97 | do.call(func, append(list(null, null.dat), unname(dist.arg)))) 98 | }) 99 | mean(Dist) 100 | } 101 | 102 | #' Plotting the distribution of the distance measure 103 | #' 104 | #' The permutation distribution of the distance measure is plotted with the distances for 105 | #' the null plots. Distance measure values for the null plots and the true plot are overlaid. 106 | #' 107 | #' @param dat output from \code{\link{distmet}} 108 | #' @param m the number of plots in the lineup; m = 20 by default 109 | #' @export 110 | #' @importFrom stats density 111 | #' @examples 112 | #' \dontrun{ 113 | #' if (require('dplyr')) { 114 | #' d <- lineup(null_permute('mpg'), mtcars, pos = 1) 115 | #' library(ggplot2) 116 | #' ggplot(d, aes(mpg, wt)) + geom_point() + facet_wrap(~.sample) 117 | #' distplot(distmet(d, var = c('mpg', 'wt'), 'reg_dist', null_permute('mpg'), 118 | #' pos = 1, repl = 100, m = 8), m = 8) 119 | #' } 120 | #' } 121 | distplot <- function(dat, m = 20) { 122 | null_values <- NULL 123 | mean.dist <- NULL 124 | y <- yend <- NULL 125 | true <- NULL 126 | plotno <- NULL 127 | 128 | null <- data.frame(null_values=dat$null_values) 129 | lineupvals <- dat$lineup 130 | lineupvals$true <- ifelse(lineupvals$plotno == dat$pos, "true", "null") 131 | lineupvals$y <- 0.01 * min(density(null$null_values)$y) 132 | lineupvals$yend <- 0.05 * max(density(null$null_values)$y) 133 | 134 | p <- ggplot2::ggplot(null, ggplot2::aes(x=null_values)) + 135 | ggplot2::geom_density(fill = I("grey80"), colour = I("grey80")) + 136 | xlab("Permutation distribution") + ylab("") 137 | p <- p + ggplot2::geom_segment(data=lineupvals, ggplot2::aes(x=mean.dist, xend=mean.dist, 138 | y=y, yend=yend, colour=true), alpha=0.8) 139 | p <- p + ggplot2::geom_text(data=lineupvals, 140 | ggplot2::aes(x = mean.dist, y=0, label = plotno, colour=true), vjust="top") 141 | p <- p + ggplot2::scale_colour_manual(values=c("true"="darkorange","null"="grey50")) 142 | p <- p + ggplot2::theme(legend.position="none") 143 | 144 | return(p) 145 | } 146 | -------------------------------------------------------------------------------- /R/encryption.r: -------------------------------------------------------------------------------- 1 | .old <- c(LETTERS, letters, 0:9) 2 | .new <- sample(.old) 3 | 4 | .old_string <- paste(.old, collapse = "") 5 | .new_string <- paste(.new, collapse = "") 6 | 7 | encrypt <- function(...) { 8 | message <- paste(..., sep = "") 9 | chartr(.old_string, .new_string, message) 10 | } 11 | 12 | #' Use decrypt to reveal the position of the real data. 13 | #' 14 | #' The real data position is encrypted by the lineup function, and 15 | #' writes this out as a text string. Decrypt, decrypts this text 16 | #' string to reveal which where the real data is. 17 | #' 18 | #' @param ... character vector to decrypt 19 | #' @export 20 | #' @examples 21 | #' decrypt('0uXR2p rut L2O2') 22 | decrypt <- function(...) { 23 | message <- paste(..., sep = "") 24 | m <- chartr(.new_string, .old_string, message) 25 | m_num <- as.numeric(substr(m, 23, 24)) - 10 26 | paste(substr(m, 1, 22), m_num) 27 | } 28 | -------------------------------------------------------------------------------- /R/method-dist.r: -------------------------------------------------------------------------------- 1 | dists <- c(beta = "beta", cauchy = "cauchy", `chi-squared` = "chisq", exponential = "exp", f = "f", gamma = "gamma", 2 | geometric = "geom", `log-normal` = "lnorm", lognormal = "lnorm", logistic = "logis", `negative binomial` = "nbinom", binomial = "binom", 3 | normal = "norm", poisson = "pois", t = "t", uniform = 'unif', weibull = "weibull") 4 | 5 | # Specific distribution ------------------------------------------------------ 6 | 7 | #' Generate null data with a specific distribution. 8 | #' 9 | #' Null hypothesis: variable has specified distribution 10 | #' 11 | #' @param var variable name 12 | #' @param dist distribution name. One of: beta, cauchy, chisq, 13 | #' exp, f, gamma, geom, lnorm, logis, 14 | #' nbinom, binom, norm, pois, t, unif, weibull 15 | #' @param params list of parameters of distribution. If \code{NULL}, will 16 | #' use \code{\link[MASS]{fitdistr}} to estimate them. 17 | #' @return a function that given \code{data} generates a null data set. 18 | #' For use with \code{\link{lineup}} or \code{\link{rorschach}} 19 | #' @export 20 | #' @seealso null_permute, null_lm 21 | #' @importFrom MASS fitdistr 22 | #' @importFrom stats coef 23 | #' @examples 24 | #' dframe <- data.frame(x = rnorm(150)) 25 | #' library(ggplot2) 26 | #' # three histograms of normally distributed values 27 | #' ggplot( 28 | #' data=rorschach(method=null_dist("x", "norm"), n = 3, true=dframe) 29 | #' ) + 30 | #' geom_histogram(aes(x=x, y=..density..), binwidth=0.25) + 31 | #' facet_grid(.~.sample) + 32 | #' geom_density(aes(x=x), colour="steelblue", size=1) 33 | #' 34 | #' # uniform distributions are not as easy to recognize as such 35 | #' dframe$x = runif(150) 36 | #' ggplot( 37 | #' data=rorschach(method=null_dist("x", "uniform", 38 | #' params=list(min=0, max=1)), 39 | #' n = 3, true=dframe)) + 40 | #' geom_histogram(aes(x=x, y=..density..), binwidth=0.1) + 41 | #' facet_grid(.~.sample) + 42 | #' geom_density(aes(x=x), colour="steelblue", size=1) 43 | null_dist <- function(var, dist, params = NULL) { 44 | dist <- match.arg(dist, names(dists)) 45 | generator <- match.fun(paste("r", dists[dist], sep = "")) 46 | 47 | function(df) { 48 | # If parameters not specified, use fitdistr from MASS to find them 49 | if (is.null(params)) { 50 | if (dist == "uniform") stop("specify minimum and maximum of the uniform distribution in the function call, use the form: params = list(min = ., max = .)") 51 | params <- as.list(stats::coef(fitdistr(df[[var]], dist))) 52 | } 53 | params$n <- nrow(df) 54 | df[[var]] <- do.call(generator, params) 55 | df 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /R/method-model.r: -------------------------------------------------------------------------------- 1 | # Linear model as null ------------------------------------------------------- 2 | 3 | #' Generate null data with null residuals from a model. 4 | #' 5 | #' Null hypothesis: variable is linear combination of predictors 6 | #' 7 | #' @param f model specification formula, as defined by \code{\link{lm}} 8 | #' @param method method for generating null residuals. Built in methods 9 | #' 'rotate', 'perm', 'pboot' and 'boot' are defined by \code{\link{resid_rotate}}, 10 | #' \code{\link{resid_perm}}, \code{\link{resid_pboot}} and \code{\link{resid_boot}} 11 | #' respectively 12 | #' @param additional whether to compute additional measures: standardized 13 | #' residuals and leverage 14 | #' @param ... other arguments passed onto \code{method}. 15 | #' @return a function that given \code{data} generates a null data set. 16 | #' For use with \code{\link{lineup}} or \code{\link{rorschach}} 17 | #' @export 18 | #' @importFrom stats lm predict deviance df.residual lm.influence 19 | #' @seealso null_permute, null_dist 20 | #' @examples 21 | #' data(tips) 22 | #' x <- lm(tip ~ total_bill, data = tips) 23 | #' tips.reg <- data.frame(tips, .resid = residuals(x), .fitted = fitted(x)) 24 | #' library(ggplot2) 25 | #' ggplot(lineup(null_lm(tip ~ total_bill, method = 'rotate'), tips.reg)) + 26 | #' geom_point(aes(x = total_bill, y = .resid)) + 27 | #' facet_wrap(~ .sample) 28 | null_lm <- function(f, method = "rotate", additional = FALSE, ...) { 29 | n <- NULL 30 | if (is.character(method)) { 31 | method <- match.fun(paste("resid", method, sep = "_")) 32 | } 33 | function(df) { 34 | model <- eval(substitute(lm(formula, data = df), list(formula = f))) 35 | resp_var <- all.vars(f[[2]]) 36 | 37 | resid <- method(model, df, ...) 38 | fitted <- predict(model, df) 39 | df[".resid"] <- resid 40 | df[".fitted"] <- fitted 41 | if(additional){ 42 | s <- sqrt(deviance(model)/df.residual(model)) 43 | hii <- lm.influence(model, do.coef = FALSE)$hat 44 | df[".leverage"] <- dropInf(hii, hii) 45 | df[".stdresid"] <- dropInf(resid/(s * sqrt(1 - hii)), hii) 46 | } 47 | df[[resp_var]] <- fitted + resid 48 | df 49 | } 50 | } 51 | 52 | # Extractor methods 53 | rss <- function(model) sum(stats::resid(model)^2) 54 | sigma <- function(model) summary(model)$sigma 55 | n <- function(model) length(stats::resid(model)) 56 | 57 | #' Rotation residuals. 58 | #' 59 | #' For use with \code{\link{null_lm}} 60 | #' 61 | #' @param model to extract residuals from 62 | #' @param data used to fit model 63 | #' @importFrom stats update 64 | #' @export 65 | resid_rotate <- function(model, data) { 66 | data[names(model$model)[1]] <- stats::rnorm(nrow(data)) 67 | 68 | rmodel <- stats::update(model, data = data) 69 | stats::resid(rmodel) * sqrt(rss(model)/rss(rmodel)) 70 | } 71 | 72 | #' Parametric bootstrap residuals. 73 | #' 74 | #' For use with \code{\link{null_lm}} 75 | #' 76 | #' @param model to extract residuals from 77 | #' @param data used to fit model 78 | #' @export 79 | resid_pboot <- function(model, data) { 80 | stats::rnorm(n = length(model$residuals), sd = sigma(model)) 81 | } 82 | 83 | #' Residuals simulated by a normal model, with specified sigma 84 | #' 85 | #' For use with \code{\link{null_lm}} 86 | #' 87 | #' @param model to extract residuals from 88 | #' @param data used to fit model 89 | #' @param sigma, a specific sigma to model 90 | #' @importFrom stats rnorm 91 | #' @export 92 | resid_sigma <- function(model, data, sigma = 1) { 93 | stats::rnorm(n = n(model), sd = sigma) 94 | } 95 | 96 | #' Bootstrap residuals. 97 | #' 98 | #' For use with \code{\link{null_lm}} 99 | #' 100 | #' @param model to extract residuals from 101 | #' @param data used to fit model 102 | #' @importFrom stats resid 103 | #' @export 104 | resid_boot <- function(model, data) { 105 | sample(stats::resid(model), replace = TRUE) 106 | } 107 | 108 | #' Permutation residuals. 109 | #' 110 | #' For use with \code{\link{null_lm}} 111 | #' 112 | #' @param model to extract residuals from 113 | #' @importFrom stats resid 114 | #' @param data used to fit model 115 | #' @export 116 | resid_perm <- function(model, data) { 117 | sample(stats::resid(model)) 118 | } 119 | 120 | 121 | # Helper function for leverages, adapted from plot.lm 122 | dropInf <- function(x, h) { 123 | if (any(isInf <- h >= 1)) { 124 | warning(gettextf("not plotting observations with leverage greater than one:\n %s", 125 | paste(which(isInf), collapse = ", ")), call. = FALSE, 126 | domain = NA) 127 | x[isInf] <- NaN 128 | } 129 | x 130 | } 131 | -------------------------------------------------------------------------------- /R/method-permute.r: -------------------------------------------------------------------------------- 1 | # Multivariate independence -------------------------------------------------- 2 | 3 | 4 | #' Generate null data by permuting a variable. 5 | #' 6 | #' Null hypothesis: variable is independent of others 7 | #' 8 | #' @param var name of variable to permute 9 | #' @return a function that given \code{data} generates a null data set. 10 | #' For use with \code{\link{lineup}} or \code{\link{rorschach}} 11 | #' @export 12 | #' @seealso null_lm, null_dist 13 | #' @examples 14 | #' data(mtcars) 15 | #' library(ggplot2) 16 | #' ggplot(data=rorschach(method=null_permute("mpg"), n = 3, true=mtcars)) + 17 | #' geom_boxplot(aes(x=factor(cyl), y=mpg, fill=factor(cyl))) +facet_grid(.~.sample) + 18 | #' theme(legend.position="none", aspect.ratio=1) 19 | null_permute <- function(var) { 20 | function(df) { 21 | df[[var]] <- sample(df[[var]]) 22 | df 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /R/method-ts.r: -------------------------------------------------------------------------------- 1 | # Temporal dependence -------------------------- 2 | 3 | #' Generate null data by simulating from a time series model. 4 | #' 5 | #' Null hypothesis: data follows a time series model using auto.arima from the forecast package 6 | #' 7 | #' @param var variable to model as a time series 8 | #' @param modelfn method for simulating from ts model. 9 | #' @return a function that given \code{data} generates a null data set. 10 | #' For use with \code{\link{lineup}} or \code{\link{rorschach}} 11 | #' @export 12 | #' @seealso null_model 13 | #' @importFrom stats as.ts simulate 14 | #' @importFrom tibble as_data_frame 15 | #' @examples 16 | #' require(forecast) 17 | #' require(ggplot2) 18 | #' require(dplyr) 19 | #' data(aud) 20 | #' l <- lineup(null_ts("rate", auto.arima), aud) 21 | #' ggplot(l, aes(x=date, y=rate)) + geom_line() + 22 | #' facet_wrap(~.sample, scales="free_y") + 23 | #' theme(axis.text = element_blank()) + 24 | #' xlab("") + ylab("") 25 | #' l_dif <- l %>% 26 | #' group_by(.sample) %>% 27 | #' mutate(d=c(NA,diff(rate))) %>% 28 | #' ggplot(aes(x=d)) + geom_density() + 29 | #' facet_wrap(~.sample) 30 | null_ts <- function(var, modelfn) { 31 | function(df) { 32 | ts <- as.ts(df[[var]]) 33 | 34 | model_fit <- ts %>% 35 | modelfn 36 | x <- simulate(model_fit, future=FALSE) 37 | df[[var]] <- as.vector(x) 38 | df 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /R/opt_diff.r: -------------------------------------------------------------------------------- 1 | #' Calculating the mean distances of each plot in the lineup. 2 | #' 3 | #' Distance metric is used to calculate the mean distance between the true plot 4 | #' and all the null plots in a lineup. The mean distances of each null plot to all 5 | #' the other null plots are calculated. The mean distances are returned for all the plots 6 | #' in the lineup. 7 | #' 8 | #' @param lineup.dat lineup data of the lineup 9 | #' @param var a vector of names of the variables to be used to calculate the mean distances 10 | #' @param met distance metric needed to calculate the distance as a character 11 | #' @param pos position of the true plot in the lineup 12 | #' @param dist.arg a list or vector of inputs for the distance metric met; NULL by default 13 | #' @param m number of plots in the lineup, by default m = 20 14 | #' @return the mean distances of each plot in the lineup 15 | #' @importFrom dplyr summarise group_by filter 16 | #' @export 17 | #' @examples 18 | #' if(require('dplyr')){ 19 | #' calc_mean_dist(lineup(null_permute('mpg'), mtcars, pos = 1), var = c('mpg', 'wt'), 20 | #' met = 'reg_dist', pos = 1, m = 10)} 21 | calc_mean_dist <- function(lineup.dat, var, met, pos, dist.arg = NULL, m = 20){ 22 | plotno <- pos.2 <- b <- NULL 23 | dat.pos <- expand.grid(plotno = 1:m, pos.2 = 1:m) 24 | dat.pos <- dplyr::filter(dat.pos, plotno != pos.2 & pos.2 != pos) 25 | lineup.dat <- lineup.dat[, c(var, ".sample")] 26 | if (!is.character(met)) { 27 | stop("function met should be a character") 28 | } 29 | func <- match.fun(met) 30 | d <- summarise(group_by(dat.pos, plotno, pos.2), b = with(lineup.dat, ifelse(is.null(dist.arg), 31 | do.call(func, list(dplyr::filter(lineup.dat, .sample == plotno), 32 | dplyr::filter(lineup.dat, .sample == pos.2))), 33 | do.call(func, append(list(dplyr::filter(lineup.dat, .sample == plotno), 34 | dplyr::filter(lineup.dat, .sample == pos.2)), unname(dist.arg)))))) 35 | summarise(group_by(d, plotno), mean.dist = mean(b)) 36 | } 37 | #' Calculating the difference between true plot and the null plot with the maximum distance. 38 | #' 39 | #' Distance metric is used to calculate the mean distance between the true plot 40 | #' and all the null plots in a lineup. The difference between the mean 41 | #' distance of the true plot and the maximum mean distance of the null plots is 42 | #' calculated. 43 | #' 44 | #' @param lineup.dat lineup data to get the lineup 45 | #' @param var a vector of names of the variables to be used to calculate the difference 46 | #' @param met distance metric needed to calculate the distance as a character 47 | #' @param pos position of the true plot in the lineup 48 | #' @param dist.arg a list or vector of inputs for the distance metric met; NULL by default 49 | #' @param m number of plots in the lineup, by default m = 20 50 | #' @return difference between the mean distance of the true plot and 51 | #' the maximum mean distance of the null plots 52 | #' @importFrom dplyr summarise group_by 53 | #' @export 54 | #' @examples 55 | #' if(require('dplyr')){ 56 | #' lineup.dat <- lineup(null_permute('mpg'), mtcars, pos = 1) 57 | #' calc_diff(lineup.dat, var = c('mpg', 'wt'), met = 'bin_dist', 58 | #' dist.arg = list(lineup.dat = lineup.dat, X.bin = 5, Y.bin = 5), pos = 1, m = 8)} 59 | #' 60 | #' if(require('dplyr')){ 61 | #' calc_diff(lineup(null_permute('mpg'), mtcars, pos = 1), var = c('mpg', 'wt'), met = 'reg_dist', 62 | #' dist.arg = NULL, pos = 1, m = 8)} 63 | calc_diff <- function(lineup.dat, var, met, pos, dist.arg = NULL, m = 20){ 64 | dist.mean <- calc_mean_dist(lineup.dat, var, met, pos, dist.arg, m) 65 | with(dist.mean, mean.dist[plotno == pos] - max(mean.dist[plotno != pos])) 66 | } 67 | #' Finds the number of bins in x and y direction which gives the maximum binned distance. 68 | #' 69 | #' This function finds the optimal number of bins in both x and y direction which should 70 | #' be used to calculate the binned distance. The binned distance is calculated for each 71 | #' combination of provided choices of number of bins in x and y direction and finds the 72 | #' difference using \code{calc_diff} for each combination. The combination for which the 73 | #' difference is maximum should be used. 74 | #' 75 | #' @param lineup.dat lineup data to get the lineup 76 | #' @param var a list of names of the variables to be used to calculate the difference 77 | #' @param xlow the lowest value of number of bins on the x-direction 78 | #' @param xhigh the highest value of number of bins on the x-direction 79 | #' @param ylow the lowest value of number of bins on the y-direction 80 | #' @param yhigh the highest value of number of bins on the y-direction 81 | #' @param pos position of the true plot in the lineup 82 | #' @param plot LOGICAL; if true, returns a tile plot for the combinations 83 | #' of number of bins with the differences as weights 84 | #' @param m number of plots in the lineup, by default m = 20 85 | #' @return a dataframe with the number of bins and differences 86 | #' the maximum mean distance of the null plots 87 | #' @importFrom dplyr summarise group_by 88 | #' @importFrom ggplot2 ggplot aes geom_tile scale_fill_gradient xlab ylab 89 | #' @export 90 | #' @examples 91 | #' if(require('dplyr')){ 92 | #' opt_bin_diff(lineup(null_permute('mpg'), mtcars, pos = 1), var = c('mpg', 'wt'), 93 | #' 2, 5, 4, 8, pos = 1, plot = TRUE, m = 8) 94 | #' } 95 | opt_bin_diff <- function(lineup.dat, var, xlow, xhigh, ylow, yhigh, pos, plot = FALSE, m = 20) { 96 | Diff <- xbins <- ybins <- NULL 97 | bins <- expand.grid(xbins = xlow:xhigh, ybins = ylow:yhigh) 98 | diff.bins <- summarise(group_by(bins, xbins, ybins), Diff = calc_diff(lineup.dat, var, met = 'bin_dist', pos, dist.arg = list(lineup.dat = lineup.dat, X.bin = xbins, Y.bin = ybins), m)) 99 | if (plot) { 100 | p <- ggplot(diff.bins, aes(x = factor(xbins), y = factor(ybins))) + 101 | geom_tile(aes(fill = Diff)) + 102 | scale_fill_gradient(high = "blue", low = "white") + 103 | xlab("xbins") + ylab("ybins") 104 | return(list(dat = diff.bins, p = p)) 105 | } else { 106 | return(dat = diff.bins) 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /R/power.r: -------------------------------------------------------------------------------- 1 | #' Power calculations. 2 | #' 3 | #' This function simply counts the proportion of people who selected the data plot, 4 | #' in a set of lineups. It adjusts for multiple picks by the same individual, by weighting 5 | #' by the total number of choices. 6 | #' @param data summary of the results, containing columns id, pic_id, response, detected 7 | #' @param m size of the lineup 8 | #' @return vector of powers for each pic_id 9 | #' @export 10 | #' @importFrom dplyr mutate 11 | #' @examples 12 | #' data(turk_results) 13 | #' visual_power(turk_results) 14 | 15 | visual_power <- function(data, m=20) { 16 | pic_id <- NULL 17 | detected <- NULL 18 | nchoices_wgt <- NULL 19 | 20 | data <- data %>% mutate( 21 | nchoices_wgt = (m-lengths(strsplit(as.character(data$response), ",")))/(m-1)) 22 | visual_p <- data %>% group_by(pic_id) %>% 23 | summarise(power = sum(detected*nchoices_wgt)/length(detected), n=length(detected)) 24 | return(visual_p) 25 | } 26 | -------------------------------------------------------------------------------- /R/protocols.r: -------------------------------------------------------------------------------- 1 | #' The Rorschach protocol. 2 | #' 3 | #' This protocol is used to calibrate the eyes for variation due to sampling. 4 | #' All plots are typically null data sets, data that is consistent with a null 5 | #' hypothesis. The protocol is described in Buja, Cook, Hofmann, Lawrence, 6 | #' Lee, Swayne, Wickham (2009) Statistical inference for exploratory data 7 | #' analysis and model diagnostics, Phil. Trans. R. Soc. A, 367, 4361-4383. 8 | #' 9 | #' @export 10 | #' @param method method for generating null data sets 11 | #' @param true true data set. If \code{NULL}, \code{\link{find_plot_data}} 12 | #' will attempt to extract it from the current ggplot2 plot. 13 | #' @param n total number of samples to generate (including true data) 14 | #' @param p probability of including true data with null data. 15 | #' @importFrom purrr rerun 16 | #' @importFrom tidyr unnest 17 | #' @importFrom tibble tibble 18 | #' @importFrom stats rbinom 19 | rorschach <- function(method, true = NULL, n = 20, p = 0) { 20 | data <- NULL 21 | true <- find_plot_data(true) 22 | show_true <- stats::rbinom(1, 1, p) == 1 23 | 24 | if (show_true) { 25 | n <- n - 1 26 | } 27 | samples <- tibble( 28 | .n = seq_len(n), 29 | data = purrr::rerun(n, method(true))) 30 | samples <- data.frame(tidyr::unnest(samples, data)) 31 | # samples <- plyr::rdply(n, method(true)) 32 | 33 | if (show_true) { 34 | pos <- sample(n + 1, 1) 35 | message(encrypt("True data in position ", pos+10)) 36 | samples <- add_true(samples, true, pos) 37 | } else { 38 | samples$.sample <- samples$.n 39 | samples$.n <- NULL 40 | } 41 | 42 | samples 43 | } 44 | 45 | #' The line-up protocol. 46 | #' 47 | #' In this protocol the plot of the real data is embedded amongst a field of 48 | #' plots of data generated to be consistent with some null hypothesis. 49 | #' If the observe can pick the real data as different from the others, this 50 | #' lends weight to the statistical significance of the structure in the plot. 51 | #' The protocol is described in Buja, Cook, Hofmann, Lawrence, 52 | #' Lee, Swayne, Wickham (2009) Statistical inference for exploratory data 53 | #' analysis and model diagnostics, Phil. Trans. R. Soc. A, 367, 4361-4383. 54 | #' 55 | #' Generate n - 1 null datasets and randomly position the true data. If you 56 | #' pick the real data as being noticeably different, then you have formally 57 | #' established that it is different to with p-value 1/n. 58 | #' 59 | #' @param method method for generating null data sets 60 | #' @param true true data set. If \code{NULL}, \code{\link{find_plot_data}} 61 | #' will attempt to extract it from the current ggplot2 plot. 62 | #' @param n total number of samples to generate (including true data) 63 | #' @param pos position of true data. Leave missing to pick position at 64 | #' random. Encryped position will be printed on the command line, 65 | #' \code{\link{decrypt}} to understand. 66 | #' @param samples samples generated under the null hypothesis. Only specify 67 | #' this if you don't want lineup to generate the data for you. 68 | #' @importFrom tibble tibble 69 | #' @export 70 | #' @examples 71 | #' library(ggplot2) 72 | #' ggplot(lineup(null_permute('mpg'), mtcars), aes(mpg, wt)) + 73 | #' geom_point() + 74 | #' facet_wrap(~ .sample) 75 | #' ggplot(lineup(null_permute('cyl'), mtcars), 76 | #' aes(mpg, .sample, colour = factor(cyl))) + 77 | #' geom_point() 78 | lineup <- function(method, true = NULL, n = 20, pos = sample(n, 1), samples = NULL) { 79 | data <- NULL 80 | true <- find_plot_data(true) 81 | 82 | if (is.null(samples)) { 83 | samples <- tibble( 84 | .n = seq_len(n-1), 85 | data = purrr::rerun(n-1, method(true))) 86 | samples <- data.frame(tidyr::unnest(samples, data)) 87 | # samples <- plyr::rdply(n - 1, method(true)) 88 | } 89 | if (missing(pos)) { 90 | message("decrypt(\"", encrypt("True data in position ", pos+10), "\")") 91 | } 92 | add_true(samples, true, pos) 93 | } 94 | 95 | #' Add true data into data frame containing null data sets. 96 | #' @keywords internal 97 | add_true <- function(samples, true, pos) { 98 | samples$.sample <- with(samples, ifelse(.n >= pos, .n + 1, .n)) 99 | samples$.n <- NULL 100 | true$.sample <- pos 101 | 102 | all <- dplyr::bind_rows(samples, true) 103 | attr(all, "pos") <- pos 104 | all[order(all$.sample), ] 105 | } 106 | 107 | #' Find plot data. 108 | #' If data is not specified, this function will attempt to find the data 109 | #' corresponding to the last ggplot2 created or displayed. This will work 110 | #' in most situations where you are creating the plot and immediately 111 | #' displaying it, but may not work in other situations. In those cases, 112 | #' please specify the data explicitly. 113 | #' 114 | #' @keywords internal 115 | #' @importFrom ggplot2 last_plot 116 | find_plot_data <- function(data) { 117 | if (!is.null(data)) 118 | return(data) 119 | 120 | if (exists("last_plot") && !is.null(last_plot())) { 121 | last_plot()$data 122 | } else { 123 | stop("Missing true dataset") 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /R/pvalues.r: -------------------------------------------------------------------------------- 1 | #' P-value calculations. 2 | #' 3 | #' These set of functions allow the user to calculate a p-value from the lineup after 4 | #' it has been evaluated by K independent observers. The different functions 5 | #' accommodate different lineup construction and showing to observers. 6 | #' Details are in the papers Majumder et al (2012) JASA, and Hofmann et al (2015). 7 | #' We distinguish between three different scenarios: 8 | #' \itemize{ 9 | #' \item Scenario I: in each of K evaluations a different data set and a different set of (m-1) null plots is shown. 10 | #' \item Scenario II: in each of K evaluations the same data set but a different set of (m-1) null plots is shown. 11 | #' \item Scenario III: the same lineup, i.e. same data and same set of null plots, is shown to K different observers. 12 | #' } 13 | #' @param x number of observed picks of the data plot 14 | #' @param K number of evaluations 15 | #' @param m size of the lineup 16 | #' @param N MC parameter: number of replicates on which MC probabilities are based. Higher number of replicates will decrease MC variability. 17 | #' @param type type of simulation used: scenario 3 assumes that the same lineup is shown in all K evaluations 18 | #' @param xp exponent used, defaults to 1 19 | #' @param target integer value identifying the location of the data plot 20 | #' @param upper.tail compute probabilities P(X >= x). Be aware that the use of this parameter is not consistent with the other distribution functions in base. There, a value of P(X > x) is computed for upper.tail=TRUE. 21 | #' @return Vector/data frame. For comparison a p value based on a binomial distribution is provided as well. 22 | #' @importFrom stats pbinom runif 23 | #' @export 24 | #' @examples 25 | #' pvisual(15, 20, m=3) # triangle test 26 | pvisual <- function(x, K, m=20, N=10000, type="scenario3", xp=1, target=1, upper.tail=TRUE) { 27 | freq <- get(type)(N=N, K=K, m=m, xp=xp, target=target) 28 | if (upper.tail) { 29 | sim <- vapply(x, function(y) sum(freq[as.numeric(names(freq)) >= y]), numeric(1)) 30 | return(cbind(x=x, "simulated"=sim, "binom"=1-pbinom(x-1, size=K, prob=1/m))) 31 | } else { 32 | sim <- vapply(x, function(y) sum(freq[as.numeric(names(freq)) < y]), numeric(1)) 33 | return(cbind(x=x, "simulated"=sim, "binom"= pbinom(x-1, size=K, prob=1/m))) 34 | } 35 | } 36 | 37 | pickData <- function(m, xp=1, dataprob=NULL, nulls=NULL) { 38 | probs <- runif(m) 39 | n.targets = length(dataprob) 40 | if (!is.null(dataprob)) { 41 | probs[seq_len(n.targets)] <- dataprob 42 | } 43 | if (!is.null(nulls)) probs[(n.targets+1):m] <- nulls 44 | # sample(m, size=1, prob=1-probs) 45 | # rbinom(1, size=1, prob=f(probs)) 46 | ps <- (1-probs)^xp 47 | if (all (ps==0)) ps <- rep(1, length(probs)) 48 | rbinom(1, size=1, prob=sum(ps[seq_len(n.targets)])/sum(ps)) 49 | } 50 | 51 | scenario1 <- function(N, K, m = 20, xp=1, target=1) { 52 | # new lineup in each evaluation: new data, new sample of nulls 53 | table(replicate(N, { 54 | individual <- sum(replicate(K, pickData(m, dataprob=NULL, xp=xp)) %in% target) 55 | individual 56 | }))/N 57 | } 58 | 59 | 60 | scenario2 <- function(N, K, m = 20, xp=1, target=1) { 61 | # each data evaluated K times, always with different nulls 62 | table(replicate(N, { 63 | n.targets = length(target) 64 | dataprob <- runif(n.targets) 65 | individual <- sum(replicate(K, pickData(m, dataprob=dataprob, xp=xp) %in% target)) 66 | individual 67 | }))/N 68 | } 69 | 70 | 71 | scenario3 <- function(N, K, m=20, xp=1, target=1) { 72 | # each data evaluated K times, with the same nulls 73 | table(replicate(N/100, 74 | replicate(100, { 75 | n.targets = length(target) 76 | dataprob <- runif(n.targets) 77 | nulls <- runif(m-n.targets) 78 | 79 | individual <- sum(replicate(K, pickData(m, dataprob=dataprob, nulls=nulls, xp=xp)) %in% target) 80 | individual 81 | })))/N 82 | } 83 | 84 | scenario4 <- function(N, K, m=20, xp=1, target=1) { 85 | # K is vector: length(K) lineups are shown K[i] times to observers 86 | # all length(K) lineups show the same data, but have different nulls 87 | 88 | res <- replicate(N, { 89 | n.targets = length(target) 90 | dataprob <- runif(n.targets) 91 | 92 | individual <- vapply(seq_along(K), 93 | function(i) sum(replicate(K[i], pickData(m, dataprob=dataprob, nulls=runif(m-n.targets), xp=xp)) %in% target), 94 | numeric(1)) 95 | sum(individual) 96 | }) 97 | table(res)/N 98 | } 99 | -------------------------------------------------------------------------------- /R/sample_size.r: -------------------------------------------------------------------------------- 1 | #' Sample size calculator 2 | #' 3 | #' This function calculates a table of sample sizes for 4 | #' with an experiment, given a lineup size, and 5 | #' estimates of the detection rate. 6 | #' @param n range of sample sizes to check, default is 53:64 7 | #' @param m linup size, default 20 8 | #' @param pA range of estimated detection rates to consider, 9 | #' default is seq(1/20, 1/3, 0.01) 10 | #' @param conf confidence level to use to simulate from binomial 11 | #' @importFrom stats qbinom 12 | #' @examples 13 | #' pow <- sample_size() 14 | #' pow 15 | #' library(ggplot2) 16 | #' library(viridis) 17 | #' ggplot(pow, aes(x=n, y=pA, fill=prob, group=pA)) + 18 | #' geom_tile() + 19 | #' scale_fill_viridis_c("power") + 20 | #' ylab("detect rate (pA)") + xlab("sample size (n)") + 21 | #' theme_bw() 22 | #' @export 23 | sample_size <- function(n=53:64, m=20, pA=seq(1/20, 1/3, 0.01), conf=0.95) { 24 | g <- expand.grid(n, pA) 25 | k <- qbinom(1-conf, g[,1], 1/m, lower.tail=FALSE) 26 | pow <- tibble(n=g[,1], k=k, pA=g[,2]) 27 | pow <- pow %>% 28 | mutate(prob = pbinom(k-1, n, pA, lower.tail=FALSE)) 29 | pow 30 | } 31 | -------------------------------------------------------------------------------- /R/theme.R: -------------------------------------------------------------------------------- 1 | 2 | 3 | #' A theme to minimally strip away the context 4 | #' 5 | #' Note this is not a complete theme hence why there are no arguments. 6 | #' 7 | #' @examples 8 | #' library(ggplot2) 9 | #' ggplot(cars, aes(dist, speed)) + theme_strip() 10 | #' 11 | #' @export 12 | theme_strip <- function() { 13 | ggplot2::theme(axis.title = ggplot2::element_blank(), 14 | axis.text = ggplot2::element_blank(), 15 | axis.ticks.length = grid::unit(0, "mm")) 16 | 17 | } 18 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "nullabor: lineup and Rorschach plots for visual inference" 3 | author: "Di Cook, Hadley Wickham, Niladri Roy Chowdhury, Heike Hofmann, Mans Thulin" 4 | date: "`r format(Sys.time(), '%B %d, %Y')`" 5 | output: github_document 6 | --- 7 | 8 | 9 | 10 | ```{r, echo = FALSE} 11 | knitr::opts_chunk$set( 12 | collapse = TRUE, 13 | comment = "#>", 14 | fig.path = "README-" 15 | ) 16 | ``` 17 | 18 | # nullabor 19 | 20 | Tools for graphical inference: prevent fooling yourself with the Rorschach 21 | protocol and check the surprising features in your data with the lineup 22 | protocol! 23 | 24 | ## Installation 25 | 26 | The nullabor package can be installed from CRAN 27 | 28 | ```{r cran-installation, eval = FALSE} 29 | install.packages("nullabor") 30 | ``` 31 | 32 | You can install the development version of nullabor from github with: 33 | 34 | ```{r gh-installation, eval = FALSE} 35 | # install.packages("devtools") 36 | devtools::install_github("dicook/nullabor") 37 | ``` 38 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | nullabor: lineup and Rorschach plots for visual inference 2 | ================ 3 | Di Cook, Hadley Wickham, Niladri Roy Chowdhury, Heike Hofmann, Mans 4 | Thulin 5 | November 29, 2024 6 | 7 | 8 | 9 | # nullabor 10 | 11 | Tools for graphical inference: prevent fooling yourself with the 12 | Rorschach protocol and check the surprising features in your data with 13 | the lineup protocol! 14 | 15 | ## Installation 16 | 17 | The nullabor package can be installed from CRAN 18 | 19 | ``` r 20 | install.packages("nullabor") 21 | ``` 22 | 23 | You can install the development version of nullabor from github with: 24 | 25 | ``` r 26 | # install.packages("devtools") 27 | devtools::install_github("dicook/nullabor") 28 | ``` 29 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | 3 | These are small changes that fix several bugs. 4 | 5 | Also fixed https://win-builder.r-project.org/incoming_pretest/nullabor_0.3.14_20250210_040443/Debian/00check.log where 6 | the package failed automatic checks on linux because lineup_histograms() and lineup_residuals() took 5.669s and 5.574s to 7 | complete on linux, by removing one example in each. 8 | 9 | - Using devtools::check() 10 | 11 | ── R CMD check results ───────────────────────────────── nullabor 0.3.15 ──── 12 | Duration: 2m 29s 13 | 14 | 0 errors ✔ | 0 warnings ✔ | 0 notes ✔ 15 | 16 | - Using R CMD CHECK ../nullabor_0.3.15.tar.gz 17 | 18 | * DONE 19 | 20 | Status: OK 21 | 22 | 23 | ## Test environment 24 | 25 | * R version 4.4.2 (2024-10-31) -- "Pile of Leaves" 26 | 27 | Checks made using R-CMD-check.yaml GitHub Actions on the repo for environments: 28 | linux, macos, windows. It fails on the vignettes due to rmarkdown 29 | not being available on GitHub, beyond my control, but all other checks pass. 30 | 31 | ## Reverse dependencies 32 | 33 | All are ok 34 | 35 | > revdep_check() 36 | ── INIT ──────────────────────────────────────────────── Computing revdeps ── 37 | ── INSTALL ──────────────────────────────────────────────────── 2 versions ── 38 | Installing DEV version of nullabor 39 | ── CHECK ────────────────────────────────────────────────────── 3 packages ── 40 | ✔ agridat 1.24 ── E: 0 | W: 0 | N: 0 41 | ✔ metaviz 0.3.1 ── E: 0 | W: 0 | N: 0 42 | ✔ regressinator 0.2.0 ── E: 0 | W: 0 | N: 0 43 | OK: 3 44 | BROKEN: 0 45 | Total time: 9 min 46 | -------------------------------------------------------------------------------- /data/aud.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/data/aud.rda -------------------------------------------------------------------------------- /data/electoral.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/data/electoral.rda -------------------------------------------------------------------------------- /data/lakers.rdata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/data/lakers.rdata -------------------------------------------------------------------------------- /data/tips.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/data/tips.rda -------------------------------------------------------------------------------- /data/turk_results.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/data/turk_results.rda -------------------------------------------------------------------------------- /data/wasps.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/data/wasps.rda -------------------------------------------------------------------------------- /demo/00Index: -------------------------------------------------------------------------------- 1 | lakers Where do the Lakers take basketball shots from? 2 | -------------------------------------------------------------------------------- /demo/lakers.r: -------------------------------------------------------------------------------- 1 | library(nullabor) 2 | 3 | threept <- subset(lal, type == "3pt" & !is.na(x) & !is.na(y)) 4 | threept <- threept[c(".id", "period", "time", "team", "etype", "player", "points", "result", "x", "y")] 5 | 6 | threept <- transform(threept, 7 | x = x + runif(length(x), -0.5, 0.5), 8 | y = y + runif(length(y), -0.5, 0.5)) 9 | threept <- transform(threept, 10 | r = sqrt((x - 25) ^ 2 + y ^ 2), 11 | angle = atan2(y, x - 25)) 12 | 13 | # Focus in on shots in the typical range 14 | threept <- subset(threept, r > 20 & r < 39) 15 | 16 | qplot(x, y, data = threept) + coord_equal() 17 | 18 | angle_scale <- scale_x_continuous("Angle (degrees)", 19 | breaks = c(0, 45, 90, 135, 180), limits = c(0, 180)) 20 | 21 | qplot(angle * 180 / pi, r, data = threept) + 22 | angle_scale 23 | 24 | last_plot() %+% lineup(null_lm(r ~ poly(angle, 2)), threept, n = 9) + 25 | facet_wrap(~ .sample) 26 | 27 | segment <- function(x, br) (x - br) * (x > br) 28 | qplot(angle * 180 / pi, r, data = threept) + angle_scale + 29 | geom_smooth(method = lm, formula = y ~ x + segment(x, 90)) 30 | 31 | last_plot() %+% lineup(null_lm(r ~ angle + segment(angle, pi / 2)), n = 9) + facet_wrap(~ .sample) 32 | 33 | # Look at model residuals directly 34 | #mod <- lm(r ~ poly(angle, 2), data = threept) 35 | #inrange <- threept 36 | #inrange$resid <- resid(mod) 37 | #qplot(angle, resid, data = inrange) 38 | #last_plot() %+% lineup(has_dist("resid", "normal", list(mean = 0, sd = 1))) + facet_wrap(~ .sample) -------------------------------------------------------------------------------- /docs/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Page not found (404) • nullabor 9 | 10 | 11 | 12 | 13 | 14 | 15 | 19 | 20 | 21 | 22 | 23 |
24 |
85 | 86 | 87 | 88 | 89 |
90 |
91 | 94 | 95 | Content not found. Please use links in the navbar. 96 | 97 |
98 | 99 | 103 | 104 |
105 | 106 | 107 | 108 |
112 | 113 |
114 |

115 |

Site built with pkgdown 2.0.9.

116 |
117 | 118 |
119 |
120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /docs/articles/distances_files/figure-html/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/distances_files/figure-html/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /docs/articles/distances_files/figure-html/unnamed-chunk-15-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/distances_files/figure-html/unnamed-chunk-15-1.png -------------------------------------------------------------------------------- /docs/articles/distances_files/figure-html/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/distances_files/figure-html/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /docs/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | Articles • nullabor 6 | 7 | 8 |
9 |
62 | 63 | 64 | 65 |
66 |
67 | 70 | 71 | 86 |
87 |
88 | 89 | 90 |
93 | 94 |
95 |

Site built with pkgdown 2.0.9.

96 |
97 | 98 |
99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /docs/articles/nullabor-distributions_files/figure-html/unnamed-chunk-10-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/nullabor-distributions_files/figure-html/unnamed-chunk-10-1.png -------------------------------------------------------------------------------- /docs/articles/nullabor-distributions_files/figure-html/unnamed-chunk-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/nullabor-distributions_files/figure-html/unnamed-chunk-2-1.png -------------------------------------------------------------------------------- /docs/articles/nullabor-distributions_files/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/nullabor-distributions_files/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /docs/articles/nullabor-distributions_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/nullabor-distributions_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /docs/articles/nullabor-distributions_files/figure-html/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/nullabor-distributions_files/figure-html/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /docs/articles/nullabor-distributions_files/figure-html/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/nullabor-distributions_files/figure-html/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /docs/articles/nullabor-distributions_files/figure-html/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/nullabor-distributions_files/figure-html/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /docs/articles/nullabor-distributions_files/figure-html/unnamed-chunk-9-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/nullabor-distributions_files/figure-html/unnamed-chunk-9-1.png -------------------------------------------------------------------------------- /docs/articles/nullabor-examples_files/figure-html/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/nullabor-examples_files/figure-html/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /docs/articles/nullabor-regression_files/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/nullabor-regression_files/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /docs/articles/nullabor-regression_files/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/nullabor-regression_files/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/articles/nullabor-regression_files/figure-html/unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/nullabor-regression_files/figure-html/unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /docs/articles/nullabor-regression_files/figure-html/unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/nullabor-regression_files/figure-html/unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /docs/articles/nullabor-regression_files/figure-html/unnamed-chunk-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/nullabor-regression_files/figure-html/unnamed-chunk-7-1.png -------------------------------------------------------------------------------- /docs/articles/nullabor_files/figure-html/unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/nullabor_files/figure-html/unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /docs/articles/nullabor_files/figure-html/unnamed-chunk-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/nullabor_files/figure-html/unnamed-chunk-4-1.png -------------------------------------------------------------------------------- /docs/articles/nullabor_files/figure-html/unnamed-chunk-8-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/articles/nullabor_files/figure-html/unnamed-chunk-8-1.png -------------------------------------------------------------------------------- /docs/bootstrap-toc.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ 7 | 8 | /* All levels of nav */ 9 | nav[data-toggle='toc'] .nav > li > a { 10 | display: block; 11 | padding: 4px 20px; 12 | font-size: 13px; 13 | font-weight: 500; 14 | color: #767676; 15 | } 16 | nav[data-toggle='toc'] .nav > li > a:hover, 17 | nav[data-toggle='toc'] .nav > li > a:focus { 18 | padding-left: 19px; 19 | color: #563d7c; 20 | text-decoration: none; 21 | background-color: transparent; 22 | border-left: 1px solid #563d7c; 23 | } 24 | nav[data-toggle='toc'] .nav > .active > a, 25 | nav[data-toggle='toc'] .nav > .active:hover > a, 26 | nav[data-toggle='toc'] .nav > .active:focus > a { 27 | padding-left: 18px; 28 | font-weight: bold; 29 | color: #563d7c; 30 | background-color: transparent; 31 | border-left: 2px solid #563d7c; 32 | } 33 | 34 | /* Nav: second level (shown on .active) */ 35 | nav[data-toggle='toc'] .nav .nav { 36 | display: none; /* Hide by default, but at >768px, show it */ 37 | padding-bottom: 10px; 38 | } 39 | nav[data-toggle='toc'] .nav .nav > li > a { 40 | padding-top: 1px; 41 | padding-bottom: 1px; 42 | padding-left: 30px; 43 | font-size: 12px; 44 | font-weight: normal; 45 | } 46 | nav[data-toggle='toc'] .nav .nav > li > a:hover, 47 | nav[data-toggle='toc'] .nav .nav > li > a:focus { 48 | padding-left: 29px; 49 | } 50 | nav[data-toggle='toc'] .nav .nav > .active > a, 51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a, 52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a { 53 | padding-left: 28px; 54 | font-weight: 500; 55 | } 56 | 57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ 58 | nav[data-toggle='toc'] .nav > .active > ul { 59 | display: block; 60 | } 61 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | (function() { 6 | 'use strict'; 7 | 8 | window.Toc = { 9 | helpers: { 10 | // return all matching elements in the set, or their descendants 11 | findOrFilter: function($el, selector) { 12 | // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ 13 | // http://stackoverflow.com/a/12731439/358804 14 | var $descendants = $el.find(selector); 15 | return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); 16 | }, 17 | 18 | generateUniqueIdBase: function(el) { 19 | var text = $(el).text(); 20 | var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); 21 | return anchor || el.tagName.toLowerCase(); 22 | }, 23 | 24 | generateUniqueId: function(el) { 25 | var anchorBase = this.generateUniqueIdBase(el); 26 | for (var i = 0; ; i++) { 27 | var anchor = anchorBase; 28 | if (i > 0) { 29 | // add suffix 30 | anchor += '-' + i; 31 | } 32 | // check if ID already exists 33 | if (!document.getElementById(anchor)) { 34 | return anchor; 35 | } 36 | } 37 | }, 38 | 39 | generateAnchor: function(el) { 40 | if (el.id) { 41 | return el.id; 42 | } else { 43 | var anchor = this.generateUniqueId(el); 44 | el.id = anchor; 45 | return anchor; 46 | } 47 | }, 48 | 49 | createNavList: function() { 50 | return $(''); 51 | }, 52 | 53 | createChildNavList: function($parent) { 54 | var $childList = this.createNavList(); 55 | $parent.append($childList); 56 | return $childList; 57 | }, 58 | 59 | generateNavEl: function(anchor, text) { 60 | var $a = $(''); 61 | $a.attr('href', '#' + anchor); 62 | $a.text(text); 63 | var $li = $('
  • '); 64 | $li.append($a); 65 | return $li; 66 | }, 67 | 68 | generateNavItem: function(headingEl) { 69 | var anchor = this.generateAnchor(headingEl); 70 | var $heading = $(headingEl); 71 | var text = $heading.data('toc-text') || $heading.text(); 72 | return this.generateNavEl(anchor, text); 73 | }, 74 | 75 | // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). 76 | getTopLevel: function($scope) { 77 | for (var i = 1; i <= 6; i++) { 78 | var $headings = this.findOrFilter($scope, 'h' + i); 79 | if ($headings.length > 1) { 80 | return i; 81 | } 82 | } 83 | 84 | return 1; 85 | }, 86 | 87 | // returns the elements for the top level, and the next below it 88 | getHeadings: function($scope, topLevel) { 89 | var topSelector = 'h' + topLevel; 90 | 91 | var secondaryLevel = topLevel + 1; 92 | var secondarySelector = 'h' + secondaryLevel; 93 | 94 | return this.findOrFilter($scope, topSelector + ',' + secondarySelector); 95 | }, 96 | 97 | getNavLevel: function(el) { 98 | return parseInt(el.tagName.charAt(1), 10); 99 | }, 100 | 101 | populateNav: function($topContext, topLevel, $headings) { 102 | var $context = $topContext; 103 | var $prevNav; 104 | 105 | var helpers = this; 106 | $headings.each(function(i, el) { 107 | var $newNav = helpers.generateNavItem(el); 108 | var navLevel = helpers.getNavLevel(el); 109 | 110 | // determine the proper $context 111 | if (navLevel === topLevel) { 112 | // use top level 113 | $context = $topContext; 114 | } else if ($prevNav && $context === $topContext) { 115 | // create a new level of the tree and switch to it 116 | $context = helpers.createChildNavList($prevNav); 117 | } // else use the current $context 118 | 119 | $context.append($newNav); 120 | 121 | $prevNav = $newNav; 122 | }); 123 | }, 124 | 125 | parseOps: function(arg) { 126 | var opts; 127 | if (arg.jquery) { 128 | opts = { 129 | $nav: arg 130 | }; 131 | } else { 132 | opts = arg; 133 | } 134 | opts.$scope = opts.$scope || $(document.body); 135 | return opts; 136 | } 137 | }, 138 | 139 | // accepts a jQuery object, or an options object 140 | init: function(opts) { 141 | opts = this.helpers.parseOps(opts); 142 | 143 | // ensure that the data attribute is in place for styling 144 | opts.$nav.attr('data-toggle', 'toc'); 145 | 146 | var $topContext = this.helpers.createChildNavList(opts.$nav); 147 | var topLevel = this.helpers.getTopLevel(opts.$scope); 148 | var $headings = this.helpers.getHeadings(opts.$scope, topLevel); 149 | this.helpers.populateNav($topContext, topLevel, $headings); 150 | } 151 | }; 152 | 153 | $(function() { 154 | $('nav[data-toggle="toc"]').each(function(i, el) { 155 | var $nav = $(el); 156 | Toc.init($nav); 157 | }); 158 | }); 159 | })(); 160 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/jquery.sticky-kit.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | Sticky-kit v1.1.2 | WTFPL | Leaf Corcoran 2015 | http://leafo.net 3 | */ 4 | (function(){var b,f;b=this.jQuery||window.jQuery;f=b(window);b.fn.stick_in_parent=function(d){var A,w,J,n,B,K,p,q,k,E,t;null==d&&(d={});t=d.sticky_class;B=d.inner_scrolling;E=d.recalc_every;k=d.parent;q=d.offset_top;p=d.spacer;w=d.bottoming;null==q&&(q=0);null==k&&(k=void 0);null==B&&(B=!0);null==t&&(t="is_stuck");A=b(document);null==w&&(w=!0);J=function(a,d,n,C,F,u,r,G){var v,H,m,D,I,c,g,x,y,z,h,l;if(!a.data("sticky_kit")){a.data("sticky_kit",!0);I=A.height();g=a.parent();null!=k&&(g=g.closest(k)); 5 | if(!g.length)throw"failed to find stick parent";v=m=!1;(h=null!=p?p&&a.closest(p):b("
    "))&&h.css("position",a.css("position"));x=function(){var c,f,e;if(!G&&(I=A.height(),c=parseInt(g.css("border-top-width"),10),f=parseInt(g.css("padding-top"),10),d=parseInt(g.css("padding-bottom"),10),n=g.offset().top+c+f,C=g.height(),m&&(v=m=!1,null==p&&(a.insertAfter(h),h.detach()),a.css({position:"",top:"",width:"",bottom:""}).removeClass(t),e=!0),F=a.offset().top-(parseInt(a.css("margin-top"),10)||0)-q, 6 | u=a.outerHeight(!0),r=a.css("float"),h&&h.css({width:a.outerWidth(!0),height:u,display:a.css("display"),"vertical-align":a.css("vertical-align"),"float":r}),e))return l()};x();if(u!==C)return D=void 0,c=q,z=E,l=function(){var b,l,e,k;if(!G&&(e=!1,null!=z&&(--z,0>=z&&(z=E,x(),e=!0)),e||A.height()===I||x(),e=f.scrollTop(),null!=D&&(l=e-D),D=e,m?(w&&(k=e+u+c>C+n,v&&!k&&(v=!1,a.css({position:"fixed",bottom:"",top:c}).trigger("sticky_kit:unbottom"))),eb&&!v&&(c-=l,c=Math.max(b-u,c),c=Math.min(q,c),m&&a.css({top:c+"px"})))):e>F&&(m=!0,b={position:"fixed",top:c},b.width="border-box"===a.css("box-sizing")?a.outerWidth()+"px":a.width()+"px",a.css(b).addClass(t),null==p&&(a.after(h),"left"!==r&&"right"!==r||h.append(a)),a.trigger("sticky_kit:stick")),m&&w&&(null==k&&(k=e+u+c>C+n),!v&&k)))return v=!0,"static"===g.css("position")&&g.css({position:"relative"}), 8 | a.css({position:"absolute",bottom:d,top:"auto"}).trigger("sticky_kit:bottom")},y=function(){x();return l()},H=function(){G=!0;f.off("touchmove",l);f.off("scroll",l);f.off("resize",y);b(document.body).off("sticky_kit:recalc",y);a.off("sticky_kit:detach",H);a.removeData("sticky_kit");a.css({position:"",bottom:"",top:"",width:""});g.position("position","");if(m)return null==p&&("left"!==r&&"right"!==r||a.insertAfter(h),h.remove()),a.removeClass(t)},f.on("touchmove",l),f.on("scroll",l),f.on("resize", 9 | y),b(document.body).on("sticky_kit:recalc",y),a.on("sticky_kit:detach",H),setTimeout(l,0)}};n=0;for(K=this.length;n 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('.navbar-fixed-top').headroom(); 6 | 7 | $('body').css('padding-top', $('.navbar').height() + 10); 8 | $(window).resize(function(){ 9 | $('body').css('padding-top', $('.navbar').height() + 10); 10 | }); 11 | 12 | $('[data-toggle="tooltip"]').tooltip(); 13 | 14 | var cur_path = paths(location.pathname); 15 | var links = $("#navbar ul li a"); 16 | var max_length = -1; 17 | var pos = -1; 18 | for (var i = 0; i < links.length; i++) { 19 | if (links[i].getAttribute("href") === "#") 20 | continue; 21 | // Ignore external links 22 | if (links[i].host !== location.host) 23 | continue; 24 | 25 | var nav_path = paths(links[i].pathname); 26 | 27 | var length = prefix_length(nav_path, cur_path); 28 | if (length > max_length) { 29 | max_length = length; 30 | pos = i; 31 | } 32 | } 33 | 34 | // Add class to parent
  • , and enclosing
  • if in dropdown 35 | if (pos >= 0) { 36 | var menu_anchor = $(links[pos]); 37 | menu_anchor.parent().addClass("active"); 38 | menu_anchor.closest("li.dropdown").addClass("active"); 39 | } 40 | }); 41 | 42 | function paths(pathname) { 43 | var pieces = pathname.split("/"); 44 | pieces.shift(); // always starts with / 45 | 46 | var end = pieces[pieces.length - 1]; 47 | if (end === "index.html" || end === "") 48 | pieces.pop(); 49 | return(pieces); 50 | } 51 | 52 | // Returns -1 if not found 53 | function prefix_length(needle, haystack) { 54 | if (needle.length > haystack.length) 55 | return(-1); 56 | 57 | // Special case for length-0 haystack, since for loop won't run 58 | if (haystack.length === 0) { 59 | return(needle.length === 0 ? 0 : -1); 60 | } 61 | 62 | for (var i = 0; i < haystack.length; i++) { 63 | if (needle[i] != haystack[i]) 64 | return(i); 65 | } 66 | 67 | return(haystack.length); 68 | } 69 | 70 | /* Clipboard --------------------------*/ 71 | 72 | function changeTooltipMessage(element, msg) { 73 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 74 | element.setAttribute('data-original-title', msg); 75 | $(element).tooltip('show'); 76 | element.setAttribute('data-original-title', tooltipOriginalTitle); 77 | } 78 | 79 | if(ClipboardJS.isSupported()) { 80 | $(document).ready(function() { 81 | var copyButton = ""; 82 | 83 | $("div.sourceCode").addClass("hasCopyButton"); 84 | 85 | // Insert copy buttons: 86 | $(copyButton).prependTo(".hasCopyButton"); 87 | 88 | // Initialize tooltips: 89 | $('.btn-copy-ex').tooltip({container: 'body'}); 90 | 91 | // Initialize clipboard: 92 | var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { 93 | text: function(trigger) { 94 | return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); 95 | } 96 | }); 97 | 98 | clipboardBtnCopies.on('success', function(e) { 99 | changeTooltipMessage(e.trigger, 'Copied!'); 100 | e.clearSelection(); 101 | }); 102 | 103 | clipboardBtnCopies.on('error', function() { 104 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 105 | }); 106 | }); 107 | } 108 | })(window.jQuery || window.$) 109 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: '3.4' 2 | pkgdown: 2.0.9 3 | pkgdown_sha: ~ 4 | articles: 5 | distances: distances.html 6 | nullabor-distributions: nullabor-distributions.html 7 | nullabor-examples: nullabor-examples.html 8 | nullabor-regression: nullabor-regression.html 9 | nullabor: nullabor.html 10 | last_built: 2025-02-10T04:39Z 11 | 12 | -------------------------------------------------------------------------------- /docs/reference/Rplot001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/Rplot001.png -------------------------------------------------------------------------------- /docs/reference/Rplot002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/Rplot002.png -------------------------------------------------------------------------------- /docs/reference/add_true.html: -------------------------------------------------------------------------------- 1 | 2 | Add true data into data frame containing null data sets. — add_true • nullabor 6 | 7 | 8 |
    9 |
    62 | 63 | 64 | 65 |
    66 |
    67 | 72 | 73 |
    74 |

    Add true data into data frame containing null data sets.

    75 |
    76 | 77 |
    78 |
    add_true(samples, true, pos)
    79 |
    80 | 81 | 82 |
    83 | 86 |
    87 | 88 | 89 |
    92 | 93 |
    94 |

    Site built with pkgdown 2.0.9.

    95 |
    96 | 97 |
    98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /docs/reference/aud.html: -------------------------------------------------------------------------------- 1 | 2 | Conversion rate of 1 Australian Doller (AUD) to 1 US Dollar — aud • nullabor 6 | 7 | 8 |
    9 |
    62 | 63 | 64 | 65 |
    66 |
    67 | 72 | 73 |
    74 |

    The dataset consists of the daily exchange rates of 1 Australian Dollar to 1 US Dollar between Jan 9 2018 and Feb 21 2018.

    75 |
    76 | 77 | 78 | 79 |
    80 | 83 |
    84 | 85 | 86 |
    89 | 90 |
    91 |

    Site built with pkgdown 2.0.9.

    92 |
    93 | 94 |
    95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /docs/reference/distmet-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/distmet-2.png -------------------------------------------------------------------------------- /docs/reference/distmet-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/distmet-3.png -------------------------------------------------------------------------------- /docs/reference/distplot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/distplot-1.png -------------------------------------------------------------------------------- /docs/reference/figures/nullabor_hex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/figures/nullabor_hex.png -------------------------------------------------------------------------------- /docs/reference/lal.html: -------------------------------------------------------------------------------- 1 | 2 | Los Angeles Lakers play-by-play data. — lal • nullabor 7 | 8 | 9 |
    10 |
    63 | 64 | 65 | 66 |
    67 |
    68 | 73 | 74 |
    75 |

    Play by play data from all games played by the Los Angeles lakers in the 76 | 2008/2009 season.

    77 |
    78 | 79 | 80 | 81 |
    82 | 85 |
    86 | 87 | 88 |
    91 | 92 |
    93 |

    Site built with pkgdown 2.0.9.

    94 |
    95 | 96 |
    97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /docs/reference/lineup-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/lineup-1.png -------------------------------------------------------------------------------- /docs/reference/lineup-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/lineup-2.png -------------------------------------------------------------------------------- /docs/reference/lineup_histograms-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/lineup_histograms-1.png -------------------------------------------------------------------------------- /docs/reference/lineup_histograms-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/lineup_histograms-2.png -------------------------------------------------------------------------------- /docs/reference/lineup_histograms-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/lineup_histograms-3.png -------------------------------------------------------------------------------- /docs/reference/lineup_histograms-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/lineup_histograms-4.png -------------------------------------------------------------------------------- /docs/reference/lineup_qq-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/lineup_qq-1.png -------------------------------------------------------------------------------- /docs/reference/lineup_qq-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/lineup_qq-2.png -------------------------------------------------------------------------------- /docs/reference/lineup_qq-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/lineup_qq-3.png -------------------------------------------------------------------------------- /docs/reference/lineup_qq-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/lineup_qq-4.png -------------------------------------------------------------------------------- /docs/reference/lineup_residuals-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/lineup_residuals-1.png -------------------------------------------------------------------------------- /docs/reference/lineup_residuals-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/lineup_residuals-2.png -------------------------------------------------------------------------------- /docs/reference/lineup_residuals-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/lineup_residuals-3.png -------------------------------------------------------------------------------- /docs/reference/lineup_residuals-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/lineup_residuals-4.png -------------------------------------------------------------------------------- /docs/reference/lineup_residuals-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/lineup_residuals-5.png -------------------------------------------------------------------------------- /docs/reference/null_dist-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/null_dist-1.png -------------------------------------------------------------------------------- /docs/reference/null_dist-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/null_dist-2.png -------------------------------------------------------------------------------- /docs/reference/null_gen.html: -------------------------------------------------------------------------------- 1 | 2 | Computing th distance for the null plots — null_gen • nullabor 6 | 7 | 8 |
    9 |
    62 | 63 | 64 | 65 |
    66 |
    67 | 72 | 73 |
    74 |

    Computing th distance for the null plots

    75 |
    76 | 77 |
    78 |
    null_gen(lineup.dat, null, met, method, m, dist.arg)
    79 |
    80 | 81 | 82 |
    83 | 86 |
    87 | 88 | 89 |
    92 | 93 |
    94 |

    Site built with pkgdown 2.0.9.

    95 |
    96 | 97 |
    98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | -------------------------------------------------------------------------------- /docs/reference/null_lm-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/null_lm-1.png -------------------------------------------------------------------------------- /docs/reference/null_permute-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/null_permute-1.png -------------------------------------------------------------------------------- /docs/reference/null_ts-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/null_ts-1.png -------------------------------------------------------------------------------- /docs/reference/opt_bin_diff-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/opt_bin_diff-1.png -------------------------------------------------------------------------------- /docs/reference/resid_boot.html: -------------------------------------------------------------------------------- 1 | 2 | Bootstrap residuals. — resid_boot • nullabor 6 | 7 | 8 |
    9 |
    62 | 63 | 64 | 65 |
    66 |
    67 | 72 | 73 |
    74 |

    For use with null_lm

    75 |
    76 | 77 |
    78 |
    resid_boot(model, data)
    79 |
    80 | 81 |
    82 |

    Arguments

    83 |
    model
    84 |

    to extract residuals from

    85 | 86 | 87 |
    data
    88 |

    used to fit model

    89 | 90 |
    91 | 92 |
    93 | 96 |
    97 | 98 | 99 |
    102 | 103 |
    104 |

    Site built with pkgdown 2.0.9.

    105 |
    106 | 107 |
    108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /docs/reference/resid_perm.html: -------------------------------------------------------------------------------- 1 | 2 | Permutation residuals. — resid_perm • nullabor 6 | 7 | 8 |
    9 |
    62 | 63 | 64 | 65 |
    66 |
    67 | 72 | 73 |
    74 |

    For use with null_lm

    75 |
    76 | 77 |
    78 |
    resid_perm(model, data)
    79 |
    80 | 81 |
    82 |

    Arguments

    83 |
    model
    84 |

    to extract residuals from

    85 | 86 | 87 |
    data
    88 |

    used to fit model

    89 | 90 |
    91 | 92 |
    93 | 96 |
    97 | 98 | 99 |
    102 | 103 |
    104 |

    Site built with pkgdown 2.0.9.

    105 |
    106 | 107 |
    108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /docs/reference/sample_size-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/sample_size-1.png -------------------------------------------------------------------------------- /docs/reference/theme_strip-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/docs/reference/theme_strip-1.png -------------------------------------------------------------------------------- /docs/reference/turk_results.html: -------------------------------------------------------------------------------- 1 | 2 | Sample turk results — turk_results • nullabor 6 | 7 | 8 |
    9 |
    62 | 63 | 64 | 65 |
    66 |
    67 | 72 | 73 |
    74 |

    Subset of data from a Turk experiment, used to show how to compute power of a lineup

    75 |
    76 | 77 | 78 | 79 |
    80 | 83 |
    84 | 85 | 86 |
    89 | 90 |
    91 |

    Site built with pkgdown 2.0.9.

    92 |
    93 | 94 |
    95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /docs/reference/wasps.html: -------------------------------------------------------------------------------- 1 | 2 | Wasp gene expression data. — wasps • nullabor 6 | 7 | 8 |
    9 |
    62 | 63 | 64 | 65 |
    66 |
    67 | 72 | 73 |
    74 |

    Data from Toth et al (2010) used in Niladri Roy et al (2015)

    75 |
    76 | 77 | 78 | 79 |
    80 | 83 |
    84 | 85 | 86 |
    89 | 90 |
    91 |

    Site built with pkgdown 2.0.9.

    92 |
    93 | 94 |
    95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | -------------------------------------------------------------------------------- /docs/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | /404.html 5 | 6 | 7 | /articles/distances.html 8 | 9 | 10 | /articles/index.html 11 | 12 | 13 | /articles/nullabor-distributions.html 14 | 15 | 16 | /articles/nullabor-examples.html 17 | 18 | 19 | /articles/nullabor-regression.html 20 | 21 | 22 | /articles/nullabor.html 23 | 24 | 25 | /authors.html 26 | 27 | 28 | /index.html 29 | 30 | 31 | /news/index.html 32 | 33 | 34 | /reference/add_true.html 35 | 36 | 37 | /reference/aud.html 38 | 39 | 40 | /reference/bin_dist.html 41 | 42 | 43 | /reference/box_dist.html 44 | 45 | 46 | /reference/calc_diff.html 47 | 48 | 49 | /reference/calc_mean_dist.html 50 | 51 | 52 | /reference/decrypt.html 53 | 54 | 55 | /reference/distmet.html 56 | 57 | 58 | /reference/distplot.html 59 | 60 | 61 | /reference/electoral.html 62 | 63 | 64 | /reference/find_plot_data.html 65 | 66 | 67 | /reference/index.html 68 | 69 | 70 | /reference/lal.html 71 | 72 | 73 | /reference/lineup.html 74 | 75 | 76 | /reference/lineup_histograms.html 77 | 78 | 79 | /reference/lineup_qq.html 80 | 81 | 82 | /reference/lineup_residuals.html 83 | 84 | 85 | /reference/null_dist.html 86 | 87 | 88 | /reference/null_gen.html 89 | 90 | 91 | /reference/null_lm.html 92 | 93 | 94 | /reference/null_permute.html 95 | 96 | 97 | /reference/null_ts.html 98 | 99 | 100 | /reference/opt_bin_diff.html 101 | 102 | 103 | /reference/pvisual.html 104 | 105 | 106 | /reference/reg_dist.html 107 | 108 | 109 | /reference/resid_boot.html 110 | 111 | 112 | /reference/resid_pboot.html 113 | 114 | 115 | /reference/resid_perm.html 116 | 117 | 118 | /reference/resid_rotate.html 119 | 120 | 121 | /reference/resid_sigma.html 122 | 123 | 124 | /reference/rorschach.html 125 | 126 | 127 | /reference/sample_size.html 128 | 129 | 130 | /reference/sep_dist.html 131 | 132 | 133 | /reference/theme_strip.html 134 | 135 | 136 | /reference/tips.html 137 | 138 | 139 | /reference/turk_results.html 140 | 141 | 142 | /reference/uni_dist.html 143 | 144 | 145 | /reference/visual_power.html 146 | 147 | 148 | /reference/wasps.html 149 | 150 | 151 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | bibentry(bibtype = "Article", 2 | title = "Statistical Inference for Exploratory Data Analysis and Model Diagnostics", 3 | author = "Andreas Buja, Dianne Cook, Heike Hofmann, Michael Lawrence, Eun-kyung Lee, Deborah F. Swayne, Hadley Wickham", 4 | journal = "Royal Society Philosophical Transactions A", 5 | year = "2009", 6 | volume = "367", 7 | number = "1906", 8 | pages = "4361--4383", 9 | doi = "10.1098/rsta.2009.0120" 10 | ) 11 | -------------------------------------------------------------------------------- /man/add_true.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/protocols.r 3 | \name{add_true} 4 | \alias{add_true} 5 | \title{Add true data into data frame containing null data sets.} 6 | \usage{ 7 | add_true(samples, true, pos) 8 | } 9 | \description{ 10 | Add true data into data frame containing null data sets. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/aud.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.r 3 | \docType{data} 4 | \name{aud} 5 | \alias{aud} 6 | \title{Conversion rate of 1 Australian Doller (AUD) to 1 US Dollar} 7 | \description{ 8 | The dataset consists of the daily exchange rates of 1 Australian Dollar to 1 US Dollar between Jan 9 2018 and Feb 21 2018. 9 | } 10 | -------------------------------------------------------------------------------- /man/bin_dist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distances.r 3 | \name{bin_dist} 4 | \alias{bin_dist} 5 | \title{Binned Distance} 6 | \usage{ 7 | bin_dist(X, PX, lineup.dat = lineup.dat, X.bin = 5, Y.bin = 5) 8 | } 9 | \arguments{ 10 | \item{X}{a data.frame with two variables, the first two columns 11 | are used} 12 | 13 | \item{PX}{another data.frame with two variables, the first two columns 14 | are used} 15 | 16 | \item{lineup.dat}{lineup data so that the binning is done based on the lineup data and not 17 | the individual plots, by default lineup.dat = lineup.dat ; if one wishes to calculate the 18 | binned distance between two plots, one should use lineup.dat = NULL} 19 | 20 | \item{X.bin}{number of bins on the x-direction, by default X.bin = 5} 21 | 22 | \item{Y.bin}{number of bins on the y-direction, by default Y.bin = 5} 23 | } 24 | \value{ 25 | distance between X and PX 26 | } 27 | \description{ 28 | Data X is binned into X.bin bins in x-direction and Y.bins in y-direction. The number 29 | of points in each cell is then counted. Same is done for data PX. An euclidean 30 | distance is calculated between the number of points in each cell between X and PX. 31 | } 32 | \examples{ 33 | with(mtcars, bin_dist(data.frame(wt, mpg), data.frame(sample(wt), mpg), 34 | lineup.dat = NULL)) 35 | } 36 | -------------------------------------------------------------------------------- /man/box_dist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distances.r 3 | \name{box_dist} 4 | \alias{box_dist} 5 | \title{Distance based on side by side Boxplots} 6 | \usage{ 7 | box_dist(X, PX) 8 | } 9 | \arguments{ 10 | \item{X}{a data.frame with one factor variable and one continuous 11 | variable} 12 | 13 | \item{PX}{a data.frame with one factor variable and one continuous 14 | variable} 15 | } 16 | \value{ 17 | distance between X and PX 18 | } 19 | \description{ 20 | Assuming that data set X consists of a categorical group variable a numeric value, 21 | a summary of the first quartile, median and third quartile of this value is calculated 22 | for each group. 23 | The extent (as absolute difference) of the minimum and maximum value across groups is computed for 24 | first quartile, median and third quartile. Same is done for data PX. 25 | Finally an euclidean distance is calculated between the absolute differences of 26 | X and PX. 27 | } 28 | \examples{ 29 | if(require('dplyr')) { 30 | with(mtcars, 31 | box_dist(data.frame(as.factor(am), mpg), 32 | data.frame(as.factor(sample(am)), mpg)) 33 | ) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /man/calc_diff.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/opt_diff.r 3 | \name{calc_diff} 4 | \alias{calc_diff} 5 | \title{Calculating the difference between true plot and the null plot with the maximum distance.} 6 | \usage{ 7 | calc_diff(lineup.dat, var, met, pos, dist.arg = NULL, m = 20) 8 | } 9 | \arguments{ 10 | \item{lineup.dat}{lineup data to get the lineup} 11 | 12 | \item{var}{a vector of names of the variables to be used to calculate the difference} 13 | 14 | \item{met}{distance metric needed to calculate the distance as a character} 15 | 16 | \item{pos}{position of the true plot in the lineup} 17 | 18 | \item{dist.arg}{a list or vector of inputs for the distance metric met; NULL by default} 19 | 20 | \item{m}{number of plots in the lineup, by default m = 20} 21 | } 22 | \value{ 23 | difference between the mean distance of the true plot and 24 | the maximum mean distance of the null plots 25 | } 26 | \description{ 27 | Distance metric is used to calculate the mean distance between the true plot 28 | and all the null plots in a lineup. The difference between the mean 29 | distance of the true plot and the maximum mean distance of the null plots is 30 | calculated. 31 | } 32 | \examples{ 33 | if(require('dplyr')){ 34 | lineup.dat <- lineup(null_permute('mpg'), mtcars, pos = 1) 35 | calc_diff(lineup.dat, var = c('mpg', 'wt'), met = 'bin_dist', 36 | dist.arg = list(lineup.dat = lineup.dat, X.bin = 5, Y.bin = 5), pos = 1, m = 8)} 37 | 38 | if(require('dplyr')){ 39 | calc_diff(lineup(null_permute('mpg'), mtcars, pos = 1), var = c('mpg', 'wt'), met = 'reg_dist', 40 | dist.arg = NULL, pos = 1, m = 8)} 41 | } 42 | -------------------------------------------------------------------------------- /man/calc_mean_dist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/opt_diff.r 3 | \name{calc_mean_dist} 4 | \alias{calc_mean_dist} 5 | \title{Calculating the mean distances of each plot in the lineup.} 6 | \usage{ 7 | calc_mean_dist(lineup.dat, var, met, pos, dist.arg = NULL, m = 20) 8 | } 9 | \arguments{ 10 | \item{lineup.dat}{lineup data of the lineup} 11 | 12 | \item{var}{a vector of names of the variables to be used to calculate the mean distances} 13 | 14 | \item{met}{distance metric needed to calculate the distance as a character} 15 | 16 | \item{pos}{position of the true plot in the lineup} 17 | 18 | \item{dist.arg}{a list or vector of inputs for the distance metric met; NULL by default} 19 | 20 | \item{m}{number of plots in the lineup, by default m = 20} 21 | } 22 | \value{ 23 | the mean distances of each plot in the lineup 24 | } 25 | \description{ 26 | Distance metric is used to calculate the mean distance between the true plot 27 | and all the null plots in a lineup. The mean distances of each null plot to all 28 | the other null plots are calculated. The mean distances are returned for all the plots 29 | in the lineup. 30 | } 31 | \examples{ 32 | if(require('dplyr')){ 33 | calc_mean_dist(lineup(null_permute('mpg'), mtcars, pos = 1), var = c('mpg', 'wt'), 34 | met = 'reg_dist', pos = 1, m = 10)} 35 | } 36 | -------------------------------------------------------------------------------- /man/decrypt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/encryption.r 3 | \name{decrypt} 4 | \alias{decrypt} 5 | \title{Use decrypt to reveal the position of the real data.} 6 | \usage{ 7 | decrypt(...) 8 | } 9 | \arguments{ 10 | \item{...}{character vector to decrypt} 11 | } 12 | \description{ 13 | The real data position is encrypted by the lineup function, and 14 | writes this out as a text string. Decrypt, decrypts this text 15 | string to reveal which where the real data is. 16 | } 17 | \examples{ 18 | decrypt('0uXR2p rut L2O2') 19 | } 20 | -------------------------------------------------------------------------------- /man/distmet.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distmet.r 3 | \name{distmet} 4 | \alias{distmet} 5 | \title{Empirical distribution of the distance} 6 | \usage{ 7 | distmet( 8 | lineup.dat, 9 | var, 10 | met, 11 | method, 12 | pos, 13 | repl = 1000, 14 | dist.arg = NULL, 15 | m = 20 16 | ) 17 | } 18 | \arguments{ 19 | \item{lineup.dat}{lineup data} 20 | 21 | \item{var}{a vector of names of the variables to be used} 22 | 23 | \item{met}{distance metric needed to calculate the distance as a character} 24 | 25 | \item{method}{method for generating null data sets} 26 | 27 | \item{pos}{position of the observed data in the lineup} 28 | 29 | \item{repl}{number of sets of null plots selected to obtain the distribution; 1000 by 30 | default} 31 | 32 | \item{dist.arg}{a list or vector of inputs for the distance metric met; NULL by default} 33 | 34 | \item{m}{the number of plots in the lineup; m = 20 by default} 35 | } 36 | \value{ 37 | lineup has the data used for the calculations 38 | 39 | null_values contains new null samples from which to compare nulls in lineup 40 | 41 | diff difference in distance between nulls and actual data and that of the null 42 | that is most different from other nulls. A negative value means that the actual data 43 | plot is similar to the null plots. 44 | 45 | closest list of the five closest nulls to the actual data plot 46 | 47 | pos position of the actual data plot in the lineup 48 | } 49 | \description{ 50 | The empirical distribution of the distance measures is calculated based on the mean 51 | distance of each of the null plots from the other null plots in a lineup. At this moment 52 | this method works only for \code{\link{null_permute}} method. This function helps get some 53 | assessment of whether the actual data plot is very different from the null plots. 54 | } 55 | \examples{ 56 | # Each of these examples uses a small number of nulls (m=8), and a small number of 57 | # repeated sampling from the null distribution (repl=100), to make it faster to run. 58 | # In your own examples you should think about increasing each of these, at least to the defaults. 59 | \dontrun{ 60 | if (require('dplyr')) { 61 | d <- lineup(null_permute('mpg'), mtcars, pos = 1) 62 | dd <- distmet(d, var = c('mpg', 'wt'), 63 | 'reg_dist', null_permute('mpg'), pos = 1, repl = 100, m = 8) 64 | distplot(dd, m=8) 65 | } 66 | } 67 | 68 | \dontrun{ 69 | d <- lineup(null_permute('mpg'), mtcars, pos=4, n=8) 70 | library(ggplot2) 71 | ggplot(d, aes(mpg, wt)) + geom_point() + facet_wrap(~ .sample, ncol=4) 72 | if (require('dplyr')) { 73 | dd <- distmet(d, var = c('mpg', 'wt'), 'bin_dist', null_permute('mpg'), 74 | pos = 4, repl = 100, dist.arg = list(lineup.dat = d, X.bin = 5, 75 | Y.bin = 5), m = 8) 76 | distplot(dd, m=8) 77 | } 78 | } 79 | 80 | # Example using bin_dist 81 | \dontrun{ 82 | if (require('dplyr')) { 83 | d <- lineup(null_permute('mpg'), mtcars, pos = 1) 84 | library(ggplot2) 85 | ggplot(d, aes(mpg, wt)) + geom_point() + facet_wrap(~ .sample, ncol=5) 86 | dd <- distmet(d, var = c('mpg', 'wt'), 87 | 'bin_dist', null_permute('mpg'), pos = 1, repl = 500, 88 | dist.arg = list(lineup.dat = d, X.bin = 5, Y.bin = 5)) 89 | distplot(dd) 90 | } 91 | } 92 | 93 | # Example using uni_dist 94 | \dontrun{ 95 | mod <- lm(wt ~ mpg, data = mtcars) 96 | resid.dat <- data.frame(residual = mod$resid) 97 | d <- lineup(null_dist('residual', dist = 'normal'), resid.dat, pos=19) 98 | ggplot(d, aes(residual)) + geom_histogram(binwidth = 0.25) + facet_wrap(~ .sample, ncol=5) 99 | if (require('dplyr')) { 100 | dd <- distmet(d, var = 'residual', 'uni_dist', null_dist('residual', 101 | dist = 'normal'), pos = 19, repl = 500) 102 | distplot(dd) 103 | } 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /man/distplot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distmet.r 3 | \name{distplot} 4 | \alias{distplot} 5 | \title{Plotting the distribution of the distance measure} 6 | \usage{ 7 | distplot(dat, m = 20) 8 | } 9 | \arguments{ 10 | \item{dat}{output from \code{\link{distmet}}} 11 | 12 | \item{m}{the number of plots in the lineup; m = 20 by default} 13 | } 14 | \description{ 15 | The permutation distribution of the distance measure is plotted with the distances for 16 | the null plots. Distance measure values for the null plots and the true plot are overlaid. 17 | } 18 | \examples{ 19 | \dontrun{ 20 | if (require('dplyr')) { 21 | d <- lineup(null_permute('mpg'), mtcars, pos = 1) 22 | library(ggplot2) 23 | ggplot(d, aes(mpg, wt)) + geom_point() + facet_wrap(~.sample) 24 | distplot(distmet(d, var = c('mpg', 'wt'), 'reg_dist', null_permute('mpg'), 25 | pos = 1, repl = 100, m = 8), m = 8) 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /man/electoral.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.r 3 | \docType{data} 4 | \name{electoral} 5 | \alias{electoral} 6 | \title{Polls and election results from the 2012 US Election} 7 | \format{ 8 | A list with two data frames: 9 | polls is a data frame of 51 rows and 4 variables 10 | \describe{ 11 | \item{State}{State name} 12 | \item{Electoral.vote}{Number of electoral votes in the 2012 election} 13 | \item{Margin}{Margin between the parties with the highest number of votes and second highest number of votes. 14 | These margins are based on polls.} 15 | \item{Democrat}{logical vector True, if the democratic party is the majority party in this state. } 16 | } 17 | \code{election} is a data frame of 51 rows and 5 variables 18 | \describe{ 19 | \item{State}{State name} 20 | \item{Candidate}{character string of the winner: Romney or Obama} 21 | \item{Electoral.vote}{Number of electoral votes in the 2012 election} 22 | \item{Margin}{Margin between the parties with the highest number of votes and second highest number of votes. 23 | These margins are based on the actual election outcome} 24 | \item{Democrat}{logical vector True, if the democratic party is the majority party in this state. } 25 | } 26 | } 27 | \description{ 28 | Polls and election results from the 2012 US Election 29 | } 30 | -------------------------------------------------------------------------------- /man/figures/nullabor_hex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dicook/nullabor/bfea7086614558ee11f19a9022c18d8e13d4e39f/man/figures/nullabor_hex.png -------------------------------------------------------------------------------- /man/find_plot_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/protocols.r 3 | \name{find_plot_data} 4 | \alias{find_plot_data} 5 | \title{Find plot data. 6 | If data is not specified, this function will attempt to find the data 7 | corresponding to the last ggplot2 created or displayed. This will work 8 | in most situations where you are creating the plot and immediately 9 | displaying it, but may not work in other situations. In those cases, 10 | please specify the data explicitly.} 11 | \usage{ 12 | find_plot_data(data) 13 | } 14 | \description{ 15 | Find plot data. 16 | If data is not specified, this function will attempt to find the data 17 | corresponding to the last ggplot2 created or displayed. This will work 18 | in most situations where you are creating the plot and immediately 19 | displaying it, but may not work in other situations. In those cases, 20 | please specify the data explicitly. 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /man/lal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.r 3 | \docType{data} 4 | \name{lal} 5 | \alias{lal} 6 | \title{Los Angeles Lakers play-by-play data.} 7 | \description{ 8 | Play by play data from all games played by the Los Angeles lakers in the 9 | 2008/2009 season. 10 | } 11 | -------------------------------------------------------------------------------- /man/lineup.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/protocols.r 3 | \name{lineup} 4 | \alias{lineup} 5 | \title{The line-up protocol.} 6 | \usage{ 7 | lineup(method, true = NULL, n = 20, pos = sample(n, 1), samples = NULL) 8 | } 9 | \arguments{ 10 | \item{method}{method for generating null data sets} 11 | 12 | \item{true}{true data set. If \code{NULL}, \code{\link{find_plot_data}} 13 | will attempt to extract it from the current ggplot2 plot.} 14 | 15 | \item{n}{total number of samples to generate (including true data)} 16 | 17 | \item{pos}{position of true data. Leave missing to pick position at 18 | random. Encryped position will be printed on the command line, 19 | \code{\link{decrypt}} to understand.} 20 | 21 | \item{samples}{samples generated under the null hypothesis. Only specify 22 | this if you don't want lineup to generate the data for you.} 23 | } 24 | \description{ 25 | In this protocol the plot of the real data is embedded amongst a field of 26 | plots of data generated to be consistent with some null hypothesis. 27 | If the observe can pick the real data as different from the others, this 28 | lends weight to the statistical significance of the structure in the plot. 29 | The protocol is described in Buja, Cook, Hofmann, Lawrence, 30 | Lee, Swayne, Wickham (2009) Statistical inference for exploratory data 31 | analysis and model diagnostics, Phil. Trans. R. Soc. A, 367, 4361-4383. 32 | } 33 | \details{ 34 | Generate n - 1 null datasets and randomly position the true data. If you 35 | pick the real data as being noticeably different, then you have formally 36 | established that it is different to with p-value 1/n. 37 | } 38 | \examples{ 39 | library(ggplot2) 40 | ggplot(lineup(null_permute('mpg'), mtcars), aes(mpg, wt)) + 41 | geom_point() + 42 | facet_wrap(~ .sample) 43 | ggplot(lineup(null_permute('cyl'), mtcars), 44 | aes(mpg, .sample, colour = factor(cyl))) + 45 | geom_point() 46 | } 47 | -------------------------------------------------------------------------------- /man/lineup_histograms.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/quick_plots.R 3 | \name{lineup_histograms} 4 | \alias{lineup_histograms} 5 | \title{Check distributional assumptions using histograms and the lineup protocol.} 6 | \usage{ 7 | lineup_histograms( 8 | data, 9 | variable, 10 | dist = NULL, 11 | params = NULL, 12 | color_bars = "black", 13 | fill_bars = "grey", 14 | color_lines = "brown3" 15 | ) 16 | } 17 | \arguments{ 18 | \item{data}{a data frame.} 19 | 20 | \item{variable}{the name of the variable that should be plotted.} 21 | 22 | \item{dist}{the null distribution name. One of: "beta", "cauchy", 23 | "chi-squared", "exponential", "f", "gamma", "geometric", "log-normal", 24 | "lognormal", "logistic", "negative binomial", "binomial", "normal", 25 | "poisson", "t", "uniform", "weibull"} 26 | 27 | \item{params}{list of parameters of distribution. If \code{NULL}, will 28 | use \code{\link[MASS]{fitdistr}} to estimate them if possible. For 29 | uniform, beta, and binomial distributions, the parameters must be specified. 30 | See \code{?dunif}, \code{?dbeta}, and \code{?dbinom} for parameter names.} 31 | 32 | \item{color_bars}{the color used for the borders of the bars. Can be a name 33 | or a color HEX code.} 34 | 35 | \item{fill_bars}{the color used to fill the bars.} 36 | 37 | \item{color_lines}{the color used for the density curves.} 38 | } 39 | \value{ 40 | a \code{ggplot} 41 | } 42 | \description{ 43 | This function is used to quickly create lineup plots to check 44 | distributional assumptions using histograms with kernel density estimates. 45 | The null hypothesis is that the data follows the distribution specified by the 46 | \code{dist} argument. 47 | In the lineup protocol the plot of the real data is embedded amongst a field of 48 | plots of data generated to be consistent with some null hypothesis. 49 | If the observer can pick the real data as different from the others, this 50 | lends weight to the statistical significance of the structure in the plot. 51 | The protocol is described in Buja et al. (2009). 52 | } 53 | \details{ 54 | 19 null datasets are plotted together the the true data (randomly 55 | positioned) If you pick the real data as being noticeably different, then 56 | you have formally established that it is different to with p-value 0.05. 57 | 58 | Run the \code{decrypt} message printed in the R Console to see which 59 | plot represents the true data. 60 | } 61 | \examples{ 62 | data(tips) 63 | lineup_histograms(tips, "total_bill", dist = "normal") # Normal distribution 64 | 65 | # Some distributions require that the parameters be specified: 66 | lineup_histograms(tips, "size", dist = "binomial", params = list(size = 6, p = 0.3)) 67 | 68 | # Style the plot using color settings and ggplot2 functions: 69 | lineup_histograms(tips, "total_bill", 70 | dist = "gamma", 71 | color_bars = "steelblue", 72 | color_lines = "magenta") + 73 | ggplot2::theme_minimal() 74 | } 75 | \references{ 76 | Buja, Cook, Hofmann, Lawrence, Lee, Swayne, Wickham. (2009). 77 | Statistical inference for exploratory data analysis and model diagnostics, 78 | \emph{Phil. Trans. R. Soc. A}, 367, 4361-4383. 79 | } 80 | \seealso{ 81 | null_dist 82 | } 83 | -------------------------------------------------------------------------------- /man/lineup_qq.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/quick_plots.R 3 | \name{lineup_qq} 4 | \alias{lineup_qq} 5 | \title{Check distributional assumptions using Q-Q plots and the lineup protocol.} 6 | \usage{ 7 | lineup_qq( 8 | data, 9 | variable, 10 | dist = NULL, 11 | params = NULL, 12 | color_points = "black", 13 | color_lines = "brown3", 14 | alpha_points = 0.5 15 | ) 16 | } 17 | \arguments{ 18 | \item{data}{a data frame.} 19 | 20 | \item{variable}{the name of the variable that should be plotted.} 21 | 22 | \item{dist}{the null distribution name. One of: "beta", "cauchy", 23 | "chi-squared", "exponential", "f", "gamma", "geometric", "log-normal", 24 | "lognormal", "logistic", "negative binomial", "normal", 25 | "poisson", "t", "uniform", "weibull"} 26 | 27 | \item{params}{list of parameters of distribution. If \code{NULL}, will 28 | use \code{\link[MASS]{fitdistr}} to estimate them if possible. For 29 | uniform and beta distributions, the parameters must be specified. 30 | See \code{?dunif} and \code{?dbeta} for parameter names.} 31 | 32 | \item{color_points}{the color used for points. Can be a name 33 | or a color HEX code.} 34 | 35 | \item{color_lines}{the color used for reference lines.} 36 | 37 | \item{alpha_points}{the alpha (opacity) used for points (between 38 | 0 and 1, where 1 is opaque).} 39 | } 40 | \value{ 41 | a \code{ggplot} 42 | } 43 | \description{ 44 | This function is used to quickly create lineup plots to check 45 | distributional assumptions using Q-Q plots. The null hypothesis is that the 46 | data follows the distribution specified by the \code{dist} argument. 47 | In the lineup protocol the plot of the real data is embedded amongst a field of 48 | plots of data generated to be consistent with some null hypothesis. 49 | If the observer can pick the real data as different from the others, this 50 | lends weight to the statistical significance of the structure in the plot. 51 | The protocol is described in Buja et al. (2009). 52 | } 53 | \details{ 54 | 19 null datasets are plotted together the the true data (randomly 55 | positioned) If you pick the real data as being noticeably different, then 56 | you have formally established that it is different to with p-value 0.05. 57 | 58 | Run the \code{decrypt} message printed in the R Console to see which 59 | plot represents the true data. 60 | } 61 | \examples{ 62 | data(tips) 63 | lineup_qq(tips, "total_bill", dist = "normal") # Normal distribution 64 | lineup_qq(tips, "total_bill", dist = "gamma") # Gamma distribution 65 | 66 | # Some distributions require that the parameters be specified: 67 | tips$proportion_tips <- tips$tip/(tips$total_bill+tips$tip) 68 | lineup_qq(tips, "size", dist = "beta", params = list(shape1 = 0.1, shape2 = 0.2)) 69 | 70 | # Style the plot using color settings and ggplot2 functions: 71 | lineup_qq(tips, "total_bill", 72 | dist = "gamma", 73 | color_points = "chocolate", 74 | color_lines = "cyan", 75 | alpha_points = 0.25) + 76 | ggplot2::theme_minimal() 77 | } 78 | \references{ 79 | Buja, Cook, Hofmann, Lawrence, Lee, Swayne, Wickham. (2009). 80 | Statistical inference for exploratory data analysis and model diagnostics, 81 | \emph{Phil. Trans. R. Soc. A}, 367, 4361-4383. 82 | } 83 | \seealso{ 84 | null_dist 85 | } 86 | -------------------------------------------------------------------------------- /man/lineup_residuals.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/quick_plots.R 3 | \name{lineup_residuals} 4 | \alias{lineup_residuals} 5 | \title{Compare residual plots of a fitted model to plots of null residuals.} 6 | \usage{ 7 | lineup_residuals( 8 | model, 9 | type = 1, 10 | method = "rotate", 11 | color_points = "black", 12 | color_trends = "blue", 13 | color_lines = "brown3", 14 | alpha_points = 0.5, 15 | ... 16 | ) 17 | } 18 | \arguments{ 19 | \item{model}{a model object fitted using \code{\link{lm}}.} 20 | 21 | \item{type}{type of plot: 1 = residuals vs fitted, 2 = normal Q-Q, 22 | 3 = scale-location, 4 = residuals vs leverage.} 23 | 24 | \item{method}{method for generating null residuals. Built in methods 25 | 'rotate', 'perm', 'pboot' and 'boot' are defined by \code{\link{resid_rotate}}, 26 | \code{\link{resid_perm}}, \code{\link{resid_pboot}} and \code{\link{resid_boot}} 27 | respectively. 'pboot' is always used for plots of type 2.} 28 | 29 | \item{color_points}{the color used for points in the plot. Can be a name 30 | or a color HEX code.} 31 | 32 | \item{color_trends}{the color used for trend curves in the plot.} 33 | 34 | \item{color_lines}{the color used for reference lines in the plot.} 35 | 36 | \item{alpha_points}{the alpha (opacity) used for points in the plot (between 37 | 0 and 1, where 1 is opaque).} 38 | 39 | \item{...}{other arguments passed onto \code{method}.} 40 | } 41 | \value{ 42 | a \code{ggplot} 43 | } 44 | \description{ 45 | This function is used to quickly create lineup version of the residual 46 | plots created by \code{plot.lm} and \code{ggfortify::autoplot.lm}; see Details for 47 | descriptions of these plots. 48 | In the lineup protocol the plot of the real data is embedded amongst a field of 49 | plots of data generated to be consistent with some null hypothesis. 50 | If the observer can pick the real data as different from the others, this 51 | lends weight to the statistical significance of the structure in the plot. 52 | The protocol is described in Buja et al. (2009). 53 | } 54 | \details{ 55 | Four types of plots are available: 56 | \enumerate{ 57 | \item Residual vs fitted. Null hypothesis: variable is linear combination 58 | of predictors. 59 | \item Normal Q-Q plot. Null hypothesis: errors are normal. Always uses 60 | \code{method = "pboot"} to generate residuals under the null hypothesis. 61 | \item Scale-location. Null hypothesis: errors are homoscedastic. 62 | \item Residuals vs leverage. Used to identify points with high residuals 63 | and high leverage, which are likely to have a strong influence on 64 | the model fit. 65 | } 66 | 67 | 19 null datasets are plotted together the the true data (randomly 68 | positioned). If you pick the real data as being noticeably different, then 69 | you have formally established that it is different to with p-value 0.05. 70 | Run the \code{decrypt} message printed in the R Console to see which 71 | plot represents the true data. 72 | 73 | If the null hypothesis in the type 1 plot is violated, consider using 74 | a different model. If the null hypotheses in the type 2 or 3 plots 75 | are violated, consider using bootstrap p-values; see 76 | \href{https://www.modernstatisticswithr.com/regression.html#bootreg1}{Section 8.1.5} 77 | of Thulin (2024) for details and recommendations. 78 | } 79 | \examples{ 80 | data(tips) 81 | x <- lm(tip ~ total_bill, data = tips) 82 | lineup_residuals(x, type = 1) # Residuals vs Fitted 83 | lineup_residuals(x, type = 2, method = "pboot") # Normal Q-Q plot 84 | lineup_residuals(x, type = 4) # Residuals vs Leverage 85 | 86 | # Style the plot using color settings and ggplot2 functions: 87 | lineup_residuals(x, type = 3, 88 | color_points = "skyblue", 89 | color_trends = "darkorange") + 90 | ggplot2::theme_minimal() 91 | } 92 | \references{ 93 | Buja, Cook, Hofmann, Lawrence, Lee, Swayne, Wickham. (2009). 94 | Statistical inference for exploratory data analysis and model diagnostics, 95 | \emph{Phil. Trans. R. Soc. A}, 367, 4361-4383. 96 | 97 | Thulin, M. (2024) \emph{Modern Statistics with R}. Boca Raton: CRC Press. 98 | ISBN 9781032512440. \url{https://www.modernstatisticswithr.com/} 99 | } 100 | \seealso{ 101 | null_lm 102 | } 103 | -------------------------------------------------------------------------------- /man/null_dist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method-dist.r 3 | \name{null_dist} 4 | \alias{null_dist} 5 | \title{Generate null data with a specific distribution.} 6 | \usage{ 7 | null_dist(var, dist, params = NULL) 8 | } 9 | \arguments{ 10 | \item{var}{variable name} 11 | 12 | \item{dist}{distribution name. One of: beta, cauchy, chisq, 13 | exp, f, gamma, geom, lnorm, logis, 14 | nbinom, binom, norm, pois, t, unif, weibull} 15 | 16 | \item{params}{list of parameters of distribution. If \code{NULL}, will 17 | use \code{\link[MASS]{fitdistr}} to estimate them.} 18 | } 19 | \value{ 20 | a function that given \code{data} generates a null data set. 21 | For use with \code{\link{lineup}} or \code{\link{rorschach}} 22 | } 23 | \description{ 24 | Null hypothesis: variable has specified distribution 25 | } 26 | \examples{ 27 | dframe <- data.frame(x = rnorm(150)) 28 | library(ggplot2) 29 | # three histograms of normally distributed values 30 | ggplot( 31 | data=rorschach(method=null_dist("x", "norm"), n = 3, true=dframe) 32 | ) + 33 | geom_histogram(aes(x=x, y=..density..), binwidth=0.25) + 34 | facet_grid(.~.sample) + 35 | geom_density(aes(x=x), colour="steelblue", size=1) 36 | 37 | # uniform distributions are not as easy to recognize as such 38 | dframe$x = runif(150) 39 | ggplot( 40 | data=rorschach(method=null_dist("x", "uniform", 41 | params=list(min=0, max=1)), 42 | n = 3, true=dframe)) + 43 | geom_histogram(aes(x=x, y=..density..), binwidth=0.1) + 44 | facet_grid(.~.sample) + 45 | geom_density(aes(x=x), colour="steelblue", size=1) 46 | } 47 | \seealso{ 48 | null_permute, null_lm 49 | } 50 | -------------------------------------------------------------------------------- /man/null_gen.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distmet.r 3 | \name{null_gen} 4 | \alias{null_gen} 5 | \title{Computing th distance for the null plots} 6 | \usage{ 7 | null_gen(lineup.dat, null, met, method, m, dist.arg) 8 | } 9 | \description{ 10 | Computing th distance for the null plots 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/null_lm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method-model.r 3 | \name{null_lm} 4 | \alias{null_lm} 5 | \title{Generate null data with null residuals from a model.} 6 | \usage{ 7 | null_lm(f, method = "rotate", additional = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{f}{model specification formula, as defined by \code{\link{lm}}} 11 | 12 | \item{method}{method for generating null residuals. Built in methods 13 | 'rotate', 'perm', 'pboot' and 'boot' are defined by \code{\link{resid_rotate}}, 14 | \code{\link{resid_perm}}, \code{\link{resid_pboot}} and \code{\link{resid_boot}} 15 | respectively} 16 | 17 | \item{additional}{whether to compute additional measures: standardized 18 | residuals and leverage} 19 | 20 | \item{...}{other arguments passed onto \code{method}.} 21 | } 22 | \value{ 23 | a function that given \code{data} generates a null data set. 24 | For use with \code{\link{lineup}} or \code{\link{rorschach}} 25 | } 26 | \description{ 27 | Null hypothesis: variable is linear combination of predictors 28 | } 29 | \examples{ 30 | data(tips) 31 | x <- lm(tip ~ total_bill, data = tips) 32 | tips.reg <- data.frame(tips, .resid = residuals(x), .fitted = fitted(x)) 33 | library(ggplot2) 34 | ggplot(lineup(null_lm(tip ~ total_bill, method = 'rotate'), tips.reg)) + 35 | geom_point(aes(x = total_bill, y = .resid)) + 36 | facet_wrap(~ .sample) 37 | } 38 | \seealso{ 39 | null_permute, null_dist 40 | } 41 | -------------------------------------------------------------------------------- /man/null_permute.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method-permute.r 3 | \name{null_permute} 4 | \alias{null_permute} 5 | \title{Generate null data by permuting a variable.} 6 | \usage{ 7 | null_permute(var) 8 | } 9 | \arguments{ 10 | \item{var}{name of variable to permute} 11 | } 12 | \value{ 13 | a function that given \code{data} generates a null data set. 14 | For use with \code{\link{lineup}} or \code{\link{rorschach}} 15 | } 16 | \description{ 17 | Null hypothesis: variable is independent of others 18 | } 19 | \examples{ 20 | data(mtcars) 21 | library(ggplot2) 22 | ggplot(data=rorschach(method=null_permute("mpg"), n = 3, true=mtcars)) + 23 | geom_boxplot(aes(x=factor(cyl), y=mpg, fill=factor(cyl))) +facet_grid(.~.sample) + 24 | theme(legend.position="none", aspect.ratio=1) 25 | } 26 | \seealso{ 27 | null_lm, null_dist 28 | } 29 | -------------------------------------------------------------------------------- /man/null_ts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method-ts.r 3 | \name{null_ts} 4 | \alias{null_ts} 5 | \title{Generate null data by simulating from a time series model.} 6 | \usage{ 7 | null_ts(var, modelfn) 8 | } 9 | \arguments{ 10 | \item{var}{variable to model as a time series} 11 | 12 | \item{modelfn}{method for simulating from ts model.} 13 | } 14 | \value{ 15 | a function that given \code{data} generates a null data set. 16 | For use with \code{\link{lineup}} or \code{\link{rorschach}} 17 | } 18 | \description{ 19 | Null hypothesis: data follows a time series model using auto.arima from the forecast package 20 | } 21 | \examples{ 22 | require(forecast) 23 | require(ggplot2) 24 | require(dplyr) 25 | data(aud) 26 | l <- lineup(null_ts("rate", auto.arima), aud) 27 | ggplot(l, aes(x=date, y=rate)) + geom_line() + 28 | facet_wrap(~.sample, scales="free_y") + 29 | theme(axis.text = element_blank()) + 30 | xlab("") + ylab("") 31 | l_dif <- l \%>\% 32 | group_by(.sample) \%>\% 33 | mutate(d=c(NA,diff(rate))) \%>\% 34 | ggplot(aes(x=d)) + geom_density() + 35 | facet_wrap(~.sample) 36 | } 37 | \seealso{ 38 | null_model 39 | } 40 | -------------------------------------------------------------------------------- /man/opt_bin_diff.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/opt_diff.r 3 | \name{opt_bin_diff} 4 | \alias{opt_bin_diff} 5 | \title{Finds the number of bins in x and y direction which gives the maximum binned distance.} 6 | \usage{ 7 | opt_bin_diff( 8 | lineup.dat, 9 | var, 10 | xlow, 11 | xhigh, 12 | ylow, 13 | yhigh, 14 | pos, 15 | plot = FALSE, 16 | m = 20 17 | ) 18 | } 19 | \arguments{ 20 | \item{lineup.dat}{lineup data to get the lineup} 21 | 22 | \item{var}{a list of names of the variables to be used to calculate the difference} 23 | 24 | \item{xlow}{the lowest value of number of bins on the x-direction} 25 | 26 | \item{xhigh}{the highest value of number of bins on the x-direction} 27 | 28 | \item{ylow}{the lowest value of number of bins on the y-direction} 29 | 30 | \item{yhigh}{the highest value of number of bins on the y-direction} 31 | 32 | \item{pos}{position of the true plot in the lineup} 33 | 34 | \item{plot}{LOGICAL; if true, returns a tile plot for the combinations 35 | of number of bins with the differences as weights} 36 | 37 | \item{m}{number of plots in the lineup, by default m = 20} 38 | } 39 | \value{ 40 | a dataframe with the number of bins and differences 41 | the maximum mean distance of the null plots 42 | } 43 | \description{ 44 | This function finds the optimal number of bins in both x and y direction which should 45 | be used to calculate the binned distance. The binned distance is calculated for each 46 | combination of provided choices of number of bins in x and y direction and finds the 47 | difference using \code{calc_diff} for each combination. The combination for which the 48 | difference is maximum should be used. 49 | } 50 | \examples{ 51 | if(require('dplyr')){ 52 | opt_bin_diff(lineup(null_permute('mpg'), mtcars, pos = 1), var = c('mpg', 'wt'), 53 | 2, 5, 4, 8, pos = 1, plot = TRUE, m = 8) 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /man/pvisual.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pvalues.r 3 | \name{pvisual} 4 | \alias{pvisual} 5 | \title{P-value calculations.} 6 | \usage{ 7 | pvisual( 8 | x, 9 | K, 10 | m = 20, 11 | N = 10000, 12 | type = "scenario3", 13 | xp = 1, 14 | target = 1, 15 | upper.tail = TRUE 16 | ) 17 | } 18 | \arguments{ 19 | \item{x}{number of observed picks of the data plot} 20 | 21 | \item{K}{number of evaluations} 22 | 23 | \item{m}{size of the lineup} 24 | 25 | \item{N}{MC parameter: number of replicates on which MC probabilities are based. Higher number of replicates will decrease MC variability.} 26 | 27 | \item{type}{type of simulation used: scenario 3 assumes that the same lineup is shown in all K evaluations} 28 | 29 | \item{xp}{exponent used, defaults to 1} 30 | 31 | \item{target}{integer value identifying the location of the data plot} 32 | 33 | \item{upper.tail}{compute probabilities P(X >= x). Be aware that the use of this parameter is not consistent with the other distribution functions in base. There, a value of P(X > x) is computed for upper.tail=TRUE.} 34 | } 35 | \value{ 36 | Vector/data frame. For comparison a p value based on a binomial distribution is provided as well. 37 | } 38 | \description{ 39 | These set of functions allow the user to calculate a p-value from the lineup after 40 | it has been evaluated by K independent observers. The different functions 41 | accommodate different lineup construction and showing to observers. 42 | Details are in the papers Majumder et al (2012) JASA, and Hofmann et al (2015). 43 | We distinguish between three different scenarios: 44 | \itemize{ 45 | \item Scenario I: in each of K evaluations a different data set and a different set of (m-1) null plots is shown. 46 | \item Scenario II: in each of K evaluations the same data set but a different set of (m-1) null plots is shown. 47 | \item Scenario III: the same lineup, i.e. same data and same set of null plots, is shown to K different observers. 48 | } 49 | } 50 | \examples{ 51 | pvisual(15, 20, m=3) # triangle test 52 | } 53 | -------------------------------------------------------------------------------- /man/reg_dist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distances.r 3 | \name{reg_dist} 4 | \alias{reg_dist} 5 | \title{Distance based on the regression parameters} 6 | \usage{ 7 | reg_dist(X, PX, nbins = 1, intercept = TRUE, scale = TRUE) 8 | } 9 | \arguments{ 10 | \item{X}{a data.frame with two variables, the first column giving 11 | the explanatory variable and the second column giving the response 12 | variable} 13 | 14 | \item{PX}{another data.frame with two variables, the first column giving 15 | the explanatory variable and the second column giving the response 16 | variable} 17 | 18 | \item{nbins}{number of bins on the x-direction, by default nbins = 1} 19 | 20 | \item{intercept}{include the distances between intercepts?} 21 | 22 | \item{scale}{logical value: should the variables be scaled before computing regression coefficients?} 23 | } 24 | \value{ 25 | distance between X and PX 26 | } 27 | \description{ 28 | Dataset X is binned into 5 bins in x-direction. A regression line is fitted to the 29 | data in each bin and the regression coefficients are noted. Same is done for 30 | dataset PX. An euclidean distance is calculated between the two sets of regression 31 | parameters. If the relationship between X and PX looks linear, number of bins should 32 | be equal to 1. 33 | } 34 | \examples{ 35 | with(mtcars, reg_dist(data.frame(wt, mpg), data.frame(sample(wt), mpg))) 36 | } 37 | -------------------------------------------------------------------------------- /man/resid_boot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method-model.r 3 | \name{resid_boot} 4 | \alias{resid_boot} 5 | \title{Bootstrap residuals.} 6 | \usage{ 7 | resid_boot(model, data) 8 | } 9 | \arguments{ 10 | \item{model}{to extract residuals from} 11 | 12 | \item{data}{used to fit model} 13 | } 14 | \description{ 15 | For use with \code{\link{null_lm}} 16 | } 17 | -------------------------------------------------------------------------------- /man/resid_pboot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method-model.r 3 | \name{resid_pboot} 4 | \alias{resid_pboot} 5 | \title{Parametric bootstrap residuals.} 6 | \usage{ 7 | resid_pboot(model, data) 8 | } 9 | \arguments{ 10 | \item{model}{to extract residuals from} 11 | 12 | \item{data}{used to fit model} 13 | } 14 | \description{ 15 | For use with \code{\link{null_lm}} 16 | } 17 | -------------------------------------------------------------------------------- /man/resid_perm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method-model.r 3 | \name{resid_perm} 4 | \alias{resid_perm} 5 | \title{Permutation residuals.} 6 | \usage{ 7 | resid_perm(model, data) 8 | } 9 | \arguments{ 10 | \item{model}{to extract residuals from} 11 | 12 | \item{data}{used to fit model} 13 | } 14 | \description{ 15 | For use with \code{\link{null_lm}} 16 | } 17 | -------------------------------------------------------------------------------- /man/resid_rotate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method-model.r 3 | \name{resid_rotate} 4 | \alias{resid_rotate} 5 | \title{Rotation residuals.} 6 | \usage{ 7 | resid_rotate(model, data) 8 | } 9 | \arguments{ 10 | \item{model}{to extract residuals from} 11 | 12 | \item{data}{used to fit model} 13 | } 14 | \description{ 15 | For use with \code{\link{null_lm}} 16 | } 17 | -------------------------------------------------------------------------------- /man/resid_sigma.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/method-model.r 3 | \name{resid_sigma} 4 | \alias{resid_sigma} 5 | \title{Residuals simulated by a normal model, with specified sigma} 6 | \usage{ 7 | resid_sigma(model, data, sigma = 1) 8 | } 9 | \arguments{ 10 | \item{model}{to extract residuals from} 11 | 12 | \item{data}{used to fit model} 13 | 14 | \item{sigma, }{a specific sigma to model} 15 | } 16 | \description{ 17 | For use with \code{\link{null_lm}} 18 | } 19 | -------------------------------------------------------------------------------- /man/rorschach.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/protocols.r 3 | \name{rorschach} 4 | \alias{rorschach} 5 | \title{The Rorschach protocol.} 6 | \usage{ 7 | rorschach(method, true = NULL, n = 20, p = 0) 8 | } 9 | \arguments{ 10 | \item{method}{method for generating null data sets} 11 | 12 | \item{true}{true data set. If \code{NULL}, \code{\link{find_plot_data}} 13 | will attempt to extract it from the current ggplot2 plot.} 14 | 15 | \item{n}{total number of samples to generate (including true data)} 16 | 17 | \item{p}{probability of including true data with null data.} 18 | } 19 | \description{ 20 | This protocol is used to calibrate the eyes for variation due to sampling. 21 | All plots are typically null data sets, data that is consistent with a null 22 | hypothesis. The protocol is described in Buja, Cook, Hofmann, Lawrence, 23 | Lee, Swayne, Wickham (2009) Statistical inference for exploratory data 24 | analysis and model diagnostics, Phil. Trans. R. Soc. A, 367, 4361-4383. 25 | } 26 | -------------------------------------------------------------------------------- /man/sample_size.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sample_size.r 3 | \name{sample_size} 4 | \alias{sample_size} 5 | \title{Sample size calculator} 6 | \usage{ 7 | sample_size(n = 53:64, m = 20, pA = seq(1/20, 1/3, 0.01), conf = 0.95) 8 | } 9 | \arguments{ 10 | \item{n}{range of sample sizes to check, default is 53:64} 11 | 12 | \item{m}{linup size, default 20} 13 | 14 | \item{pA}{range of estimated detection rates to consider, 15 | default is seq(1/20, 1/3, 0.01)} 16 | 17 | \item{conf}{confidence level to use to simulate from binomial} 18 | } 19 | \description{ 20 | This function calculates a table of sample sizes for 21 | with an experiment, given a lineup size, and 22 | estimates of the detection rate. 23 | } 24 | \examples{ 25 | pow <- sample_size() 26 | pow 27 | library(ggplot2) 28 | library(viridis) 29 | ggplot(pow, aes(x=n, y=pA, fill=prob, group=pA)) + 30 | geom_tile() + 31 | scale_fill_viridis_c("power") + 32 | ylab("detect rate (pA)") + xlab("sample size (n)") + 33 | theme_bw() 34 | } 35 | -------------------------------------------------------------------------------- /man/sep_dist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distances.r 3 | \name{sep_dist} 4 | \alias{sep_dist} 5 | \title{Distance based on separation of clusters} 6 | \usage{ 7 | sep_dist(X, PX, clustering = FALSE, nclust = 3, type = "separation") 8 | } 9 | \arguments{ 10 | \item{X}{a data.frame with two or three columns, the first two columns 11 | providing the dataset} 12 | 13 | \item{PX}{a data.frame with two or three columns, the first two columns 14 | providing the dataset} 15 | 16 | \item{clustering}{LOGICAL; if TRUE, the third column is used as the 17 | clustering variable, by default FALSE} 18 | 19 | \item{nclust}{the number of clusters to be obtained by hierarchical 20 | clustering, by default nclust = 3} 21 | 22 | \item{type}{character string to specify which measure to use for distance, see ?cluster.stats for details} 23 | } 24 | \value{ 25 | distance between X and PX 26 | } 27 | \description{ 28 | The separation between clusters is defined by the minimum distances of a point in 29 | the cluster to a point in another cluster. The number of clusters are provided. 30 | If not, the hierarchical clustering method is used to obtain the clusters. The 31 | separation between the clusters for dataset X is calculated. Same is done for 32 | dataset PX. An euclidean distance is then calculated between these separation for 33 | X and PX. 34 | } 35 | \examples{ 36 | if(require('fpc')) { 37 | with(mtcars, sep_dist(data.frame(wt, mpg, as.numeric(as.factor(mtcars$cyl))), 38 | data.frame(sample(wt), mpg, as.numeric(as.factor(mtcars$cyl))), 39 | clustering = TRUE)) 40 | } 41 | 42 | if (require('fpc')) { 43 | with(mtcars, sep_dist(data.frame(wt, mpg, as.numeric(as.factor(mtcars$cyl))), 44 | data.frame(sample(wt), mpg, as.numeric(as.factor(mtcars$cyl))), 45 | nclust = 3)) 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /man/theme_strip.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/theme.R 3 | \name{theme_strip} 4 | \alias{theme_strip} 5 | \title{A theme to minimally strip away the context} 6 | \usage{ 7 | theme_strip() 8 | } 9 | \description{ 10 | Note this is not a complete theme hence why there are no arguments. 11 | } 12 | \examples{ 13 | library(ggplot2) 14 | ggplot(cars, aes(dist, speed)) + theme_strip() 15 | 16 | } 17 | -------------------------------------------------------------------------------- /man/tips.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.r 3 | \docType{data} 4 | \name{tips} 5 | \alias{tips} 6 | \title{Tipping data} 7 | \format{ 8 | A data frame with 244 rows and 7 variables 9 | } 10 | \usage{ 11 | tips 12 | } 13 | \description{ 14 | One waiter recorded information about each tip he received over a 15 | period of a few months working in one restaurant. He collected several 16 | variables: 17 | } 18 | \details{ 19 | \itemize{ 20 | \item tip in dollars, 21 | \item bill in dollars, 22 | \item sex of the bill payer, 23 | \item whether there were smokers in the party, 24 | \item day of the week, 25 | \item time of day, 26 | \item size of the party. 27 | } 28 | 29 | In all he recorded 244 tips. The data was reported in a collection of 30 | case studies for business statistics (Bryant & Smith 1995). 31 | } 32 | \references{ 33 | Bryant, P. G. and Smith, M (1995) \emph{Practical Data 34 | Analysis: Case Studies in Business Statistics}. Homewood, IL: Richard D. 35 | Irwin Publishing: 36 | } 37 | \keyword{datasets} 38 | -------------------------------------------------------------------------------- /man/turk_results.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.r 3 | \docType{data} 4 | \name{turk_results} 5 | \alias{turk_results} 6 | \title{Sample turk results} 7 | \description{ 8 | Subset of data from a Turk experiment, used to show how to compute power of a lineup 9 | } 10 | -------------------------------------------------------------------------------- /man/uni_dist.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distances.r 3 | \name{uni_dist} 4 | \alias{uni_dist} 5 | \title{Distance for univariate data} 6 | \usage{ 7 | uni_dist(X, PX) 8 | } 9 | \arguments{ 10 | \item{X}{a data.frame where the first column is only used} 11 | 12 | \item{PX}{another data.frame where the first column is only used} 13 | } 14 | \value{ 15 | distance between X and PX 16 | } 17 | \description{ 18 | The first four moments is calculated for data X and data PX. An euclidean distance 19 | is calculated between these moments for X and PX. 20 | } 21 | \examples{ 22 | if(require('moments')){uni_dist(rnorm(100), rpois(100, 2))} 23 | } 24 | -------------------------------------------------------------------------------- /man/visual_power.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/power.r 3 | \name{visual_power} 4 | \alias{visual_power} 5 | \title{Power calculations.} 6 | \usage{ 7 | visual_power(data, m = 20) 8 | } 9 | \arguments{ 10 | \item{data}{summary of the results, containing columns id, pic_id, response, detected} 11 | 12 | \item{m}{size of the lineup} 13 | } 14 | \value{ 15 | vector of powers for each pic_id 16 | } 17 | \description{ 18 | This function simply counts the proportion of people who selected the data plot, 19 | in a set of lineups. It adjusts for multiple picks by the same individual, by weighting 20 | by the total number of choices. 21 | } 22 | \examples{ 23 | data(turk_results) 24 | visual_power(turk_results) 25 | } 26 | -------------------------------------------------------------------------------- /man/wasps.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.r 3 | \docType{data} 4 | \name{wasps} 5 | \alias{wasps} 6 | \title{Wasp gene expression data.} 7 | \description{ 8 | Data from Toth et al (2010) used in Niladri Roy et al (2015) 9 | } 10 | -------------------------------------------------------------------------------- /nullabor.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | ProjectId: bb07e632-3a6d-45c3-b42e-0e789ad86ed7 3 | 4 | RestoreWorkspace: No 5 | SaveWorkspace: No 6 | AlwaysSaveHistory: Default 7 | 8 | EnableCodeIndexing: Yes 9 | UseSpacesForTab: Yes 10 | NumSpacesForTab: 2 11 | Encoding: UTF-8 12 | 13 | RnwWeave: Sweave 14 | LaTeX: pdfLaTeX 15 | 16 | AutoAppendNewline: Yes 17 | StripTrailingWhitespace: Yes 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageBuildArgs: --no-build-vignettes 23 | PackageRoxygenize: rd,collate,namespace 24 | -------------------------------------------------------------------------------- /vignettes/nullabor-distributions.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Assessing distributional assumptions using the nullabor package" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteEngine{knitr::rmarkdown} 6 | %\VignetteIndexEntry{Assessing distributional assumptions using the nullabor package} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | 11 | Assessing distributional assumptions using the **nullabor** package 12 | ======================================= 13 | 14 | 15 | ```{r setup, include=FALSE} 16 | library(knitr) 17 | opts_chunk$set(out.extra='style="display:block; margin: auto"', fig.align="center") 18 | ``` 19 | 20 | The **nullabor** package provides functions to visually assess distributional assumptions. 21 | 22 | ```{r message=FALSE} 23 | library(nullabor) 24 | ``` 25 | 26 | Start by specifying the distribution family under the null hypothesis. The options available are: 27 | 28 | * Beta distribution: `beta` 29 | * Cauchy distribution: `cauchy` 30 | * $\chi^2$ distribution: `chi-squared` 31 | * Gamma distribution: `gamma` 32 | * Geometric distribution: `geometric` 33 | * Lognormal distribution: `lognormal` 34 | * Logistic distribution: `logistic` 35 | * Negative binomial distribution: `negative binomial` 36 | * Binomial distribution: `binomial` 37 | * Normal distribution: `normal` 38 | * Poisson distribution: `poisson` 39 | * Student's t distribution: `t` 40 | * Uniform distribution: `uniform` 41 | * Weibull distribution: `weibull` 42 | 43 | You can also specify the parameters of the distribution. This is required for uniform, beta, and binomial distributions. 44 | 45 | ## Using histograms 46 | The first option is to use histograms with kernel density estimates. 47 | 48 | To test the hypothesis that the variable `total_bill` in the `tips` dataset follows a normal distribution, we draw a histogram lineup plot using `lineup_histograms` as follows: 49 | 50 | ```{r message=FALSE} 51 | data(tips) 52 | lineup_histograms(tips, "total_bill", dist = "normal") 53 | ``` 54 | 55 | Run the `decrypt` code printed in the Console to see which plot belongs to the `tips` data. 56 | 57 | To instead test the hypothesis that the data follow a gamma distribution, we use `dist = "gamma"`: 58 | 59 | ```{r message=FALSE} 60 | lineup_histograms(tips, "total_bill", dist = "gamma") 61 | ``` 62 | 63 | ### Specifying distribution parameters 64 | In some cases, we need (or want) to specify the entire distribution, and not just the family. We then provide the distribution parameters, using the standard format for the distribution (i.e. the same used by `r*`, `d*`, `p*`, and `q*` functions, where `*` is the distribution name). 65 | 66 | As an example, let's say that we want to test whether a dataset comes from a uniform $U(0,1)$ distribution. First, we generate two example variables. `x1` is $U(0,1)$, but `x2` is not. 67 | 68 | ```{r message=FALSE} 69 | example_data <- data.frame(x1 = runif(100, 0, 1), 70 | x2 = rbeta(100, 1/2, 1/2)) 71 | ``` 72 | 73 | For the uniform distribution, the parameters are `min` and `max` (see `?dunif`). To test whether the `x1` data come from a $U(0,1)$ distribution, we specify the distribution parameters as follows: 74 | 75 | ```{r message=FALSE} 76 | lineup_histograms(example_data, "x1", dist = "uniform", params = list(min = 0, max = 1)) 77 | ``` 78 | 79 | And for `x2`: 80 | 81 | ```{r message=FALSE} 82 | lineup_histograms(example_data, "x2", dist = "uniform", params = list(min = 0, max = 1)) 83 | ``` 84 | 85 | 86 | ## Using Q-Q plots 87 | An alternative to histograms is to use quantile-quantile plots, in which the theoretical quantiles of the distribution are compared to the empirical quantiles from the (standardized) data. Under the null hypothesis, the points should lie along the reference line. However, some deviations in the tails are usually expected. A lineup plot is useful to see how much points can deviate from the reference line under the null hypothesis. 88 | 89 | To create a Q-Q lineup plot using the normal distribution as the null distribution, use `lineup_qq` as follows: 90 | 91 | ```{r message=FALSE} 92 | lineup_qq(tips, "total_bill", dist = "normal") 93 | ``` 94 | 95 | Again, some distributions require parameters to be specified. This is done analogously to how we did it for histograms: 96 | 97 | ```{r message=FALSE} 98 | lineup_qq(example_data, "x1", dist = "uniform", params = list(min = 0, max = 1)) 99 | ``` 100 | 101 | ## Changing plot appearance 102 | For both histograms and Q-Q plots, you can style the plot using arguments for color and opacity, as well as using `ggplot2` functions like `theme`. 103 | 104 | Histograms: 105 | 106 | ```{r message=FALSE} 107 | library(ggplot2) 108 | lineup_histograms(example_data, "x1", 109 | dist = "uniform", 110 | params = list(min = 0, max = 1), 111 | color_bars = "white", 112 | fill_bars = "#416B4B", 113 | color_lines = "#7D5AAD" 114 | ) + theme_minimal() 115 | ``` 116 | 117 | Q-Q plots: 118 | 119 | ```{r message=FALSE} 120 | lineup_qq(tips, "total_bill", 121 | dist = "gamma", 122 | color_points = "cyan", 123 | color_lines = "#ED11B7", 124 | alpha_points = 0.25) + 125 | theme_minimal() + 126 | theme(panel.background = element_rect(fill = "navy"), 127 | axis.title = element_text(family = "mono", size = 14)) 128 | ``` 129 | 130 | 131 | 132 | References 133 | ---------- 134 | 135 | Buja, A., Cook, D., Hofmann, H., Lawrence, M., Lee, E.-K., Swayne, D. F, Wickham, H. (2009) Statistical Inference for Exploratory Data Analysis and Model Diagnostics, Royal Society Philosophical Transactions A, 367:4361--4383, DOI: 10.1098/rsta.2009.0120. 136 | -------------------------------------------------------------------------------- /vignettes/nullabor-examples.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Example lineups" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteEngine{knitr::rmarkdown} 6 | %\VignetteIndexEntry{Example lineups} 7 | %\VignetteEncoding[utf8]{inputenc} 8 | --- 9 | 10 | 11 | Example lineups in nullabor 12 | ======================================= 13 | 14 | 15 | ```{r setup, include=FALSE} 16 | library(knitr) 17 | opts_chunk$set(out.extra='style="display:block; margin: auto"', fig.align="center") 18 | library(nullabor) 19 | ``` 20 | 21 | # Electoral Building 22 | 23 | 24 | The idea in this example is to take the margins for each state as reported by a pollster and sample for each state from a normal distribution to get a vector of values representing the margins of a potential election day outcome. The polls here are loosely based on the 2012 US Election polls by \url{http://freedomslighthouse.net/2012-presidential-election-electoral-vote-map/}. 25 | 26 | ```{r} 27 | simPoll <- function(trueData) { 28 | simMargin <- rnorm(nrow(trueData), mean=trueData$Margin, sd=2.5) 29 | simDemocrat <- ((simMargin>0) & trueData$Democrat) | ((simMargin<0) & !trueData$Democrat) 30 | 31 | simMargin <- abs(simMargin) 32 | res <- trueData 33 | res$Democrat <- simDemocrat 34 | res$Margin <- simMargin 35 | res 36 | } 37 | ``` 38 | 39 | `simPoll` is a relatively specialized function that takes polling results for each state and produces a random value from a normal distribution using the polling results as the mean. For now we assume a standard deviation (or 'accuracy') for each state poll of 2.5. 40 | `sim1` is a first instance of the simulation - based on this simulation, we can compute for example the number of Electoral Votes for the Democratic party based on this simulated election day result. 41 | ```{r} 42 | data(electoral, package="nullabor") 43 | margins <- electoral$polls 44 | 45 | sim1 <- simPoll(margins) 46 | sum(sim1$Electoral.Votes[sim1$Democrat]) 47 | ``` 48 | Because the `simPoll` function returns a data set of exactly the same form as the original data, we can use this function as a method in the `lineup` call to get a set of simulations together with the polling results. 49 | Because we want to keep track of the position of the real data, we set the position ourselves (but keep it secret for now by using a random position). 50 | 51 | ```{r} 52 | pos <- sample(20,1) 53 | lpdata <- nullabor::lineup(method = simPoll, true=margins, n=20, pos=pos) 54 | dim(lpdata) 55 | summary(lpdata) 56 | ``` 57 | We need to exchange the polling results for the actual election results. 58 | 59 | ```{r} 60 | election <- electoral$election 61 | idx <- which(lpdata$.sample==pos) 62 | lpdata$Margin[idx] <- election$Margin 63 | ``` 64 | 65 | ... and now we have to build the actual plot. That requires a bit of restructuring of the data: 66 | 67 | ```{r, warning=FALSE, message=FALSE} 68 | library(dplyr) 69 | lpdata <- lpdata %>% arrange(desc(Margin)) 70 | lpdata <- lpdata %>% group_by(.sample, Democrat) %>% mutate( 71 | tower=cumsum(Electoral.Votes[order(Margin, decreasing=TRUE)]) 72 | ) 73 | lpdata$diff <- with(lpdata, Margin*c(1,-1)[as.numeric(Democrat)+1]) 74 | ``` 75 | 76 | And now we can plot the rectangles: 77 | ```{r, fig.height=7, fig.width=6, warning=FALSE, message=FALSE} 78 | library(ggplot2) 79 | dframe <- lpdata 80 | dframe$diff <- with(dframe, diff+sign(diff)*0.075) 81 | dframe$diff <- pmin(50, dframe$diff) 82 | ggplot(aes(x=diff, y=tower, colour = factor(Democrat)), data=dframe) + 83 | scale_colour_manual(values=c("red", "blue"), guide="none") + 84 | scale_fill_manual(values=c("red", "blue"), guide="none") + 85 | scale_x_continuous(breaks=c(-25,0,25), labels=c("25", "0", "25"), 86 | limits=c(-50,50)) + 87 | geom_rect(aes(xmin=pmin(0, diff), xmax=pmax(0,diff), ymin=0, 88 | ymax=tower, fill=Democrat), size=0) + 89 | geom_vline(xintercept=0, colour="white") + 90 | facet_wrap(~.sample) + 91 | theme(axis.text=element_blank(), axis.ticks=element_blank(), 92 | axis.title=element_blank(), 93 | plot.margin=unit(c(0.1,0.1,0,0), "cm")) + 94 | ggtitle("Which of these panels looks the most different?") 95 | ``` 96 | Try to decide for yourself! Which plot looks the most different in this lineup? 97 | Once you have choosen, you can compare it to the number below: 98 | ```{r} 99 | pos 100 | ``` 101 | -------------------------------------------------------------------------------- /vignettes/nullabor-regression.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Residual plots for linear models" 3 | output: rmarkdown::html_vignette 4 | vignette: > 5 | %\VignetteEngine{knitr::rmarkdown} 6 | %\VignetteIndexEntry{Residual plots for linear models} 7 | %\VignetteEncoding{UTF-8} 8 | --- 9 | 10 | 11 | Residual plots using the **nullabor** package 12 | ======================================= 13 | 14 | 15 | ```{r setup, include=FALSE} 16 | library(knitr) 17 | opts_chunk$set(out.extra='style="display:block; margin: auto"', fig.align="center") 18 | ``` 19 | 20 | The **nullabor** package provides functions to draw residual plots for linear regression models using the lineup package. 21 | 22 | ```{r message=FALSE} 23 | library(nullabor) 24 | ``` 25 | 26 | First, fit a linear model: 27 | ```{r message=FALSE} 28 | data(tips) 29 | x <- lm(tip ~ total_bill, data = tips) 30 | ``` 31 | 32 | The `lineup_residuals` function can now be used to generate four types of residual lineup plots. 33 | 34 | The first residual plot shows the residuals versus the fitted values. It is used to test the hypothesis that the response variable is a linear combination of the predictors. If you can spot the true data in the plot, you can formally reject the null hypothesis with p-value 0.05 (Buja et al., 2009; Li et al., 2024). After running the code below, run the `decrypt` message (e.g. `decrypt("XSKz 5xQx Vd Z3jVQV3d ww")`) printed in the R Console to see which dataset is the true data. 35 | 36 | ```{r message=FALSE} 37 | lineup_residuals(x, type = 1) 38 | ``` 39 | 40 | The second plot is a normal Q-Q plot for the residuals, used to test the hypothesis that the errors are normal: 41 | 42 | ```{r message=FALSE} 43 | lineup_residuals(x, type = 2) 44 | ``` 45 | 46 | The third plot is a scale-location plot used to test the hypothesis that the errors are homoscedastic: 47 | 48 | ```{r message=FALSE} 49 | lineup_residuals(x, type = 3) 50 | ``` 51 | 52 | The fourth plot shows leverage, and is used to identify points with high residuals and high leverage, which are likely to have a strong influence on the model fit: 53 | 54 | ```{r message=FALSE} 55 | lineup_residuals(x, type = 4) 56 | ``` 57 | 58 | The plots are created using `ggplot2` and can be modified in the same way as other ggplots. In addition, `lineup_residuals` has arguments for changing the colors used: 59 | 60 | ```{r message=FALSE} 61 | library(ggplot2) 62 | lineup_residuals(x, type = 3, 63 | color_points = "skyblue", 64 | color_trends = "darkorange") + 65 | theme_minimal() 66 | ``` 67 | 68 | If the null hypothesis in the type 1 plot is violated, consider using a different model. If the null hypotheses in the type 2 or 3 plots 69 | are violated, consider using bootstrap p-values; see [Section 8.1.5](https://www.modernstatisticswithr.com/regression.html#bootreg1) of Thulin (2024) for details and recommendations. 70 | 71 | 72 | References 73 | ---------- 74 | 75 | Buja, A., Cook, D., Hofmann, H., Lawrence, M., Lee, E.-K., Swayne, D. F, Wickham, H. (2009) Statistical Inference for Exploratory Data Analysis and Model Diagnostics, Royal Society Philosophical Transactions A, 367:4361--4383, DOI: 10.1098/rsta.2009.0120. 76 | 77 | Li, W., Cook, D., Tanaka, E., & VanderPlas, S. (2024). A plot is worth a thousand tests: Assessing residual diagnostics with the lineup protocol. Journal of Computational and Graphical Statistics, 1-19. 78 | 79 | Thulin, M. (2024) _Modern Statistics with R_. Boca Raton: CRC Press. ISBN 9781032512440. [https://www.modernstatisticswithr.com/](https://www.modernstatisticswithr.com/) 80 | --------------------------------------------------------------------------------