├── .Rbuildignore ├── .gitattributes ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS.md ├── R ├── benchmarks.R ├── binned_es_plot.R ├── ecdf_plot.R ├── es_calcs.R ├── esvis-package.R ├── pp_plot.R ├── seda.R ├── star.R └── utils.R ├── README-binned_plot1-1.png ├── README-binned_plot2-1.png ├── README-ecdf_plot-1.png ├── README-pp_plot1-1.png ├── README.Rmd ├── README.md ├── appveyor.yml ├── cran-comments.md ├── data ├── benchmarks.rda ├── seda.rda └── star.rda ├── docs ├── README-binned_plot1-1.png ├── README-binned_plot2-1.png ├── README-cleanup.R ├── README-ecdf_plot-1.png └── README-pp_plot1-1.png ├── esvis.Rproj ├── inst └── image │ ├── README-binned_plot-1.png │ ├── README-ecdf_plot-1.png │ ├── README-pp_plot-1.png │ ├── README-pp_plot1-1.png │ └── README-pp_plot2-1.png ├── man ├── auc.Rd ├── benchmarks.Rd ├── binned_es.Rd ├── binned_plot.Rd ├── coh.Rd ├── coh_d.Rd ├── descrip_stats.Rd ├── ecdf_fun.Rd ├── ecdf_plot.Rd ├── esvis-package.Rd ├── hedg.Rd ├── hedg_g.Rd ├── pac.Rd ├── pac_compare.Rd ├── paired_ecdf.Rd ├── pp_plot.Rd ├── psd.Rd ├── seda.Rd ├── star.Rd ├── tpac.Rd ├── tpac_compare.Rd └── v.Rd └── tests ├── .DS_Store ├── testthat.R └── testthat ├── test-auc.R ├── test-coh_d.R ├── test-ecdf_plot.R ├── test-hedge_g.R ├── test-pp_plot.R └── test-v.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^\.travis\.yml$ 2 | ^appveyor\.yml$ 3 | cran-comments.md 4 | ^README\.Rmd$ 5 | ^README-.*\.png$ 6 | ^.*\.Rproj$ 7 | ^\.Rproj\.user$ 8 | ^docs$ 9 | ^CRAN-RELEASE$ 10 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto 2 | data/* binary 3 | src/* text=lf 4 | R/* text=lf -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # produced vignettes 21 | vignettes/*.html 22 | vignettes/*.pdf 23 | 24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 25 | .httr-oauth 26 | 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | 31 | # Temporary files created by R markdown 32 | *.utf8.md 33 | *.knit.md 34 | 35 | # Mac garbage 36 | .DS_Store 37 | .Rproj.user 38 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 2 | 3 | os: 4 | - linux 5 | - osx 6 | 7 | r: 8 | - release 9 | 10 | language: R 11 | sudo: required 12 | cache: packages 13 | 14 | r_packages: 15 | - covr 16 | 17 | r_github_packages: 18 | - tidyverse/tidyr 19 | 20 | after_success: 21 | - Rscript -e 'library(covr); codecov()' -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: esvis 2 | Type: Package 3 | Title: Visualization and Estimation of Effect Sizes 4 | Version: 0.3.1 5 | Authors@R: person("Daniel", "Anderson", email = "daniela@uoregon.edu", 6 | role = c("aut", "cre")) 7 | Description: A variety of methods are provided to estimate and visualize 8 | distributional differences in terms of effect sizes. Particular emphasis 9 | is upon evaluating differences between two or more distributions across 10 | the entire scale, rather than at a single point (e.g., differences in 11 | means). For example, Probability-Probability (PP) plots display the 12 | difference between two or more distributions, matched by their empirical 13 | CDFs (see Ho and Reardon, 2012; ), allowing 14 | for examinations of where on the scale distributional differences are 15 | largest or smallest. The area under the PP curve (AUC) is an effect-size 16 | metric, corresponding to the probability that a randomly selected 17 | observation from the x-axis distribution will have a higher value 18 | than a randomly selected observation from the y-axis distribution. 19 | Binned effect size plots are also available, in which the distributions 20 | are split into bins (set by the user) and separate effect sizes (Cohen's 21 | d) are produced for each bin - again providing a means to evaluate the 22 | consistency (or lack thereof) of the difference between two or more 23 | distributions at different points on the scale. Evaluation of empirical 24 | CDFs is also provided, with built-in arguments for providing annotations 25 | to help evaluate distributional differences at specific points (e.g., 26 | semi-transparent shading). All function take a consistent argument 27 | structure. Calculation of specific effect sizes is also possible. The 28 | following effect sizes are estimable: (a) Cohen's d, (b) Hedges' g, 29 | (c) percentage above a cut, (d) transformed (normalized) percentage above 30 | a cut, (e) area under the PP curve, and (f) the V statistic (see Ho, 31 | 2009; ), which essentially transforms the 32 | area under the curve to standard deviation units. By default, effect sizes 33 | are calculated for all possible pairwise comparisons, but a reference 34 | group (distribution) can be specified. 35 | Depends: 36 | R (>= 3.1) 37 | Imports: 38 | sfsmisc, 39 | ggplot2, 40 | magrittr, 41 | dplyr, 42 | rlang, 43 | tidyr (>= 1.0.0), 44 | purrr, 45 | Hmisc, 46 | tibble 47 | URL: https://github.com/datalorax/esvis 48 | BugReports: https://github.com/datalorax/esvis/issues 49 | License: MIT + file LICENSE 50 | LazyData: true 51 | RoxygenNote: 7.0.2 52 | Suggests: 53 | testthat, 54 | viridisLite 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2017 2 | COPYRIGHT HOLDER: Daniel Anderson -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(auc) 4 | export(binned_es) 5 | export(binned_plot) 6 | export(coh_d) 7 | export(ecdf_plot) 8 | export(hedg_g) 9 | export(pac) 10 | export(pac_compare) 11 | export(pp_plot) 12 | export(tpac) 13 | export(tpac_compare) 14 | export(v) 15 | importFrom(Hmisc,cut2) 16 | importFrom(dplyr,arrange) 17 | importFrom(dplyr,bind_cols) 18 | importFrom(dplyr,distinct) 19 | importFrom(dplyr,ends_with) 20 | importFrom(dplyr,everything) 21 | importFrom(dplyr,filter) 22 | importFrom(dplyr,funs) 23 | importFrom(dplyr,group_by) 24 | importFrom(dplyr,group_by_all) 25 | importFrom(dplyr,group_by_at) 26 | importFrom(dplyr,left_join) 27 | importFrom(dplyr,matches) 28 | importFrom(dplyr,mutate) 29 | importFrom(dplyr,mutate_at) 30 | importFrom(dplyr,mutate_if) 31 | importFrom(dplyr,n) 32 | importFrom(dplyr,rename) 33 | importFrom(dplyr,select) 34 | importFrom(dplyr,semi_join) 35 | importFrom(dplyr,summarize) 36 | importFrom(dplyr,summarize_at) 37 | importFrom(dplyr,tbl_df) 38 | importFrom(dplyr,ungroup) 39 | importFrom(dplyr,vars) 40 | importFrom(ggplot2,aes_) 41 | importFrom(ggplot2,facet_grid) 42 | importFrom(ggplot2,facet_wrap) 43 | importFrom(ggplot2,geom_abline) 44 | importFrom(ggplot2,geom_hline) 45 | importFrom(ggplot2,geom_label) 46 | importFrom(ggplot2,geom_line) 47 | importFrom(ggplot2,geom_point) 48 | importFrom(ggplot2,geom_rect) 49 | importFrom(ggplot2,geom_ribbon) 50 | importFrom(ggplot2,geom_segment) 51 | importFrom(ggplot2,geom_step) 52 | importFrom(ggplot2,geom_vline) 53 | importFrom(ggplot2,ggplot) 54 | importFrom(ggplot2,ggplot_build) 55 | importFrom(ggplot2,labs) 56 | importFrom(grDevices,adjustcolor) 57 | importFrom(graphics,layout) 58 | importFrom(graphics,lines) 59 | importFrom(graphics,par) 60 | importFrom(graphics,polygon) 61 | importFrom(graphics,rect) 62 | importFrom(graphics,segments) 63 | importFrom(magrittr,"%>%") 64 | importFrom(purrr,is_atomic) 65 | importFrom(purrr,map) 66 | importFrom(purrr,map2) 67 | importFrom(purrr,map2_df) 68 | importFrom(purrr,map2_lgl) 69 | importFrom(purrr,map_dbl) 70 | importFrom(purrr,map_lgl) 71 | importFrom(rlang,":=") 72 | importFrom(rlang,.data) 73 | importFrom(rlang,parse_quo) 74 | importFrom(rlang,quo) 75 | importFrom(rlang,quo_get_expr) 76 | importFrom(rlang,quos) 77 | importFrom(rlang,set_names) 78 | importFrom(rlang,sym) 79 | importFrom(rlang,syms) 80 | importFrom(sfsmisc,integrate.xy) 81 | importFrom(stats,as.formula) 82 | importFrom(stats,ecdf) 83 | importFrom(stats,na.omit) 84 | importFrom(stats,qnorm) 85 | importFrom(stats,setNames) 86 | importFrom(stats,terms) 87 | importFrom(stats,var) 88 | importFrom(tibble,lst) 89 | importFrom(tibble,tibble) 90 | importFrom(tidyr,crossing) 91 | importFrom(tidyr,fill) 92 | importFrom(tidyr,gather) 93 | importFrom(tidyr,nest) 94 | importFrom(tidyr,separate) 95 | importFrom(tidyr,spread) 96 | importFrom(tidyr,unnest) 97 | importFrom(utils,installed.packages) 98 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # esvis 0.3.1.0000 2 | Minor update to fix breaking changes with the release of dplyr 1.0 3 | 4 | # esvis 0.3.0.0000 5 | This is a major update that: 6 | 7 | * Moves all the plottingfunctionality to ggplot2. 8 | 9 | * Extends the plotting by allowing faceting with `+` in the 10 | formula. Up to two additional variables are possible. 11 | 12 | * Changed the API so the data argument comes first, followed 13 | by the formula. 14 | 15 | # esvis 0.2.1.0000 (no submitted to CRAN) 16 | This releases fixes bugs introduced from dependency updates. 17 | 18 | # esvis 0.2.0.0000 19 | 20 | This release is mostly about reformatting code and minor bug fixes. A few changes: 21 | 22 | * The [viridisLite](https://CRAN.R-project.org/package=viridisLite) package is now listed as a `suggests`, and there are options for the plots to be produced with these color schemes, if the package is installed. 23 | 24 | * A few of the effect sizes were reversed in 0.1, relative to the focal/reference groups. Those have been fixed. 25 | 26 | * There is now a `theme` function that is extensible and allows for custom themes, rather than just the "standard" and "dark" themes. 27 | -------------------------------------------------------------------------------- /R/benchmarks.R: -------------------------------------------------------------------------------- 1 | #' Synthetic benchmark screening data 2 | #' 3 | #' Across the country many schools engage in seasonal benchmark screenings to 4 | #' monitor to progress of their students. These are relatively brief 5 | #' assessments administered to "check-in" on students' progress throughout 6 | #' the year. This dataset was simulated from a real dataset from one large 7 | #' school district using the terrific 8 | #' \href{https://CRAN.R-project.org/package=synthpop}{synthpop} 9 | #' R package. Overall characteristics of the synthetic data are remarkably 10 | #' similar to the real data. 11 | #' 12 | #' @format A data frame with 10240 rows and 9 columns. 13 | #' \describe{ 14 | #' \item{sid}{Integer. Student identifier.} 15 | #' \item{cohort}{Integer. Identifies the cohort from which the student was 16 | #' sampled (1-3).} 17 | #' \item{sped}{Character. Special Education status: "Non-Sped" or "Sped"} 18 | #' \item{ethnicity}{Character. The race/ethnicity to which the student 19 | #' identified. Takes on one of seven values: "Am. Indian", "Asian", 20 | #' "Black", "Hispanic", "Native Am.", "Two or More", and "White"} 21 | #' \item{frl}{Character. Student's eligibility for free or reduced price 22 | #' lunch. Takes on the values "FRL" and "Non-FRL".} 23 | #' \item{ell}{Character. Students' English language learner status. Takes 24 | #' on one of values: "Active", "Monitor", and "Non-ELL". Students 25 | #' coded "Active" were actively receiving English language services 26 | #' at the time of testing. Students coded "Monitor" had previously 27 | #' received services, but not at the time of testing. Students coded 28 | #' "Non-ELL" did not receive services at any time.} 29 | #' \item{season}{Character. The season during which the assessment was 30 | #' administered: "Fall", "Winter", or "Spring"} 31 | #' \item{reading}{Integer. Reading scale score.} 32 | #' \item{math}{Integer. Mathematics scale score.} 33 | #' } 34 | 35 | "benchmarks" -------------------------------------------------------------------------------- /R/binned_es_plot.R: -------------------------------------------------------------------------------- 1 | #' Quantile-binned effect size plot 2 | #' 3 | #' Plots the effect size between focal and reference groups by matched (binned) 4 | #' quantiles (i.e., the results from \link{binned_es}), with the matched 5 | #' quantiles plotted along the x-axis and the effect size plotted along the 6 | #' y-axis. The intent is to examine how (if) the magnitude of the effect size 7 | #' varies at different points of the distributions. The mean differences within 8 | #' each quantile bin are divided by the overall pooled standard deviation for 9 | #' the two groups being compared. 10 | #' 11 | #' @inheritParams pp_plot 12 | #' @param qtile_groups The number of quantile bins to split the data by and 13 | #' calculate effect sizes. Defaults to 3 bins (lower, middle, upper). 14 | #' @param es The effect size to plot. Defaults to \code{"g"}, in which case 15 | #' Hedge's g is plotted, which is better for small samples. At present, the 16 | #' only other option is \code{"d"} for Cohen's D. 17 | #' @param points Logical. Should points be plotted for each \code{qtiles} be 18 | #' plotted? Defaults to \code{TRUE}. 19 | #' @param shade Logical. Should the standard errors around the effect size point 20 | #' estimates be displayed? Defaults to \code{TRUE}, with the uncertainty 21 | #' displayed with shading. 22 | #' @param shade_alpha Transparency level of the standard error shading. 23 | #' Defaults to 0.40. 24 | #' @param refline Logical. Defaults to \code{TRUE}. Should a diagonal 25 | #' reference line, representing the point of equal probabilities, be plotted? 26 | #' @param refline_col The color of the reference line. Defaults to 27 | #' \code{"gray40"} 28 | #' @param refline_lty Line type of the reference line. Defaults to 29 | #' \code{"solid"}. 30 | #' @param refline_lwd Line width of the reference line. Defaults to \code{1.1}. 31 | #' @param rects Logical. Should semi-transparent rectangles be plotted in the 32 | #' background to show the binning? Defaults to \code{TRUE}. 33 | #' @param rect_fill Color fill of rectangles to be plotted in the background, if 34 | #' \code{rects == TRUE}. Defaults to "gray20". 35 | #' @param rect_alpha Transparency level of the rectangles in the background when 36 | #' \code{rects == TRUE}. Defaults to 0.35. 37 | #' @export 38 | #' @examples 39 | #' # Binned Effect Size Plot: Defaults to Hedges' G 40 | #' binned_plot(star, math ~ condition) 41 | #' 42 | #' # Same plot, separated by sex 43 | #' binned_plot(star, math ~ condition + sex) 44 | #' 45 | #' # Same plot by sex and race 46 | #' \dontrun{ 47 | #' pp_plot(star, math ~ condition + sex + race) 48 | #' } 49 | #' ## Evaluate with simulated data: Plot is most interesting when variance 50 | #' # in the distributions being compared differ. 51 | #' 52 | #' library(tidyr) 53 | #' library(ggplot2) 54 | #' 55 | #' # simulate data with different variances 56 | #' set.seed(100) 57 | #' common_vars <- data.frame(low = rnorm(1000, 10, 1), 58 | #' high = rnorm(1000, 12, 1), 59 | #' vars = "common") 60 | #' diff_vars <- data.frame(low = rnorm(1000, 10, 1), 61 | #' high = rnorm(1000, 12, 2), 62 | #' vars = "diff") 63 | #' d <- rbind(common_vars, diff_vars) 64 | #' 65 | #' # Plot distributions 66 | #' d <- d %>% 67 | #' gather(group, value, -vars) 68 | #' 69 | #' ggplot(d, aes(value, color = group)) + 70 | #' geom_density() + 71 | #' facet_wrap(~vars) 72 | #' 73 | #' # Note that the difference between the distributions depends on where you're 74 | #' # evaluating from on the x-axis. The binned plot helps us visualize this. 75 | #' # The below shows the binned plots when there is a common versus different 76 | #' # variance 77 | #' 78 | #' binned_plot(d, value ~ group + vars) 79 | 80 | binned_plot <- function(data, formula, ref_group = NULL, qtile_groups = 3, 81 | es = "g", lines = TRUE, points = TRUE, 82 | shade = TRUE, shade_alpha = 0.40, 83 | rects = TRUE, rect_fill = "gray20", rect_alpha = 0.35, 84 | refline = TRUE, refline_col = "gray40", 85 | refline_lty = "solid", refline_lwd = 1.1) { 86 | rhs <- labels(terms(formula)) 87 | lhs <- all.vars(formula)[1] 88 | 89 | if(length(ref_group) > 1) { 90 | warning(paste0("Please only specify one reference group. Faceting ", 91 | "will be used for other groups. Reference group supplied ", 92 | "for first group will be used.")) 93 | ref_group <- ref_group[1] 94 | } 95 | 96 | if(is.null(ref_group)) { 97 | group_means <- tapply(data[[lhs]], data[[rhs[1]]], mean, na.rm = TRUE) 98 | ref_group <- names(group_means)[which.max(group_means)] 99 | } 100 | if(is.formula(ref_group)) { 101 | ref_group <- gsub("~|`", "", as.character(ref_group))[2] 102 | } 103 | d <- binned_es(data, formula, ref_group, qtile_groups = qtile_groups, 104 | es = es, rename = FALSE) %>% 105 | filter(!!sym(paste0(rhs[1], 1)) != ref_group) 106 | 107 | if(length(rhs) == 2) { 108 | d <- filter(d, !!sym(rhs[2]) == !!sym(paste0(rhs[2], 1))) 109 | } 110 | if(length(rhs) == 3) { 111 | d <- filter(d, 112 | !!sym(rhs[2]) == !!sym(paste0(rhs[2], 1)), 113 | !!sym(rhs[3]) == !!sym(paste0(rhs[3], 1))) 114 | } 115 | if(shade) { 116 | d <- d %>% 117 | mutate(lb = .data$es + (qnorm(0.025)*.data$es_se), 118 | ub = .data$es + (qnorm(0.975)*.data$es_se)) 119 | } 120 | p <- d %>% 121 | mutate(midpoint = .data$qtile_ub - (.data$qtile_ub[1] / 2)) %>% 122 | ggplot(aes_(~midpoint, ~es)) 123 | 124 | if(rects) { 125 | p <- p + 126 | geom_rect(aes_(xmin = ~qtile_lb, 127 | xmax = ~qtile_ub, 128 | ymin = -Inf, 129 | ymax = Inf), 130 | filter(d, as.logical(q %% 2)), 131 | alpha = rect_alpha, 132 | fill = rect_fill, 133 | inherit.aes = FALSE) 134 | } 135 | if(shade) { 136 | p <- p + geom_ribbon(aes_(ymin = ~lb, 137 | ymax = ~ub, 138 | fill = as.name(paste0(rhs[1], 1))), 139 | alpha = shade_alpha) 140 | } 141 | if(refline) { 142 | p <- p + geom_hline(yintercept = 0, 143 | color = refline_col, 144 | lty = refline_lty, 145 | lwd = refline_lwd) 146 | } 147 | if(lines) p <- p + geom_line(aes_(group = as.name(paste0(rhs[1], 1)), color = as.name(paste0(rhs[1], 1)))) 148 | if(points) p <- p + geom_point(aes_(group = as.name(paste0(rhs[1], 1)), color = as.name(paste0(rhs[1], 1)))) 149 | 150 | if(length(rhs) == 2) p <- p + facet_wrap(as.formula(paste0("~", rhs[2]))) 151 | if(length(rhs) == 3) { 152 | p <- p + facet_grid(as.formula(paste0(rhs[2], "~", rhs[3]))) 153 | } 154 | p + labs(x = "Quantile Bin", 155 | y = "Effect Size Estimate") 156 | } -------------------------------------------------------------------------------- /R/ecdf_plot.R: -------------------------------------------------------------------------------- 1 | #' Empirical Cumulative Distribution Plot 2 | #' 3 | #' This is a wrapper function for the \link[ggplot2]{stat_ecdf} function and 4 | #' helps make it easy to directly compare distributions at specific 5 | #' locations along the scale. 6 | #' @param data A tidy data frame containing the data to be plotted. 7 | #' @param formula A formula of the type \code{out ~ group} where \code{out} is 8 | #' the outcome variable and \code{group} is the grouping variable. Note this 9 | #' variable can include any arbitrary number of groups. Additional variables 10 | #' can be included with \code{+} to produce separate plots by the secondary or 11 | #' tertiary varaible (e.g., \code{out ~ group + characteristic1 + 12 | #' characteristic2}). No more than two additional characteristics can be 13 | #' supplied at this time. 14 | #' @param cuts Optional numeric vector stating the location of reference 15 | #' line(s) and/or rectangle(s). 16 | #' @param linewidth Width of ECDF lines. Note that the color of the lines can 17 | #' be controlled through additional functions (e.g., \code{scale_color_brewer, 18 | #' scale_color_manual}). 19 | #' @param ref_line_cols Optional vector (or single value) of colors for 20 | #' \code{cuts} lines. 21 | #' @param ref_linetype Optional vector (or single value) of line types for 22 | #' \code{cuts} lines. Takes any of the arguments supplied by 23 | #' \link[ggplot2]{linetype}. 24 | #' @param center Logical. Should the functions be centered prior to plotting? 25 | #' Defaults to \code{FALSE}. Note that if paneled/faceted plots are produced, 26 | #' the centering occurs by group. 27 | #' @param ref_rect Logical, defaults to \code{TRUE} when \code{cuts} takes 28 | #' any non-null value. Should semi-transparent rectangle(s) be plotted at the 29 | #' locations of \code{cuts}? 30 | #' @param ref_rect_col Color of the fill for the reference rectangles. Defaults 31 | #' to a dark gray. 32 | #' @param ref_rect_alpha Transparency of the fill for the reference rectangles. 33 | #' Defaults to 0.7. 34 | #' @export 35 | #' @examples 36 | #' ecdf_plot(benchmarks, math ~ ell, 37 | #' cuts = c(190, 205, 210), 38 | #' ref_line_cols = c("#D68EE3", "#9BE38E", "#144ECA")) 39 | #' 40 | #' # Customize the plot with ggplot2 functions 41 | #' library(ggplot2) 42 | #' ecdf_plot(benchmarks, math ~ ell, 43 | #' cuts = c(190, 205, 210), 44 | #' ref_line_cols = c("#D68EE3", "#9BE38E", "#144ECA")) + 45 | #' theme_minimal() + 46 | #' theme(legend.position = "bottom") 47 | #' 48 | #' ecdf_plot(seda, mean ~ grade) + 49 | #' scale_fill_brewer(palette = "Set2") + 50 | #' theme_minimal() 51 | #' 52 | #' # Use within the dplyr pipeline 53 | #' library(dplyr) 54 | #' benchmarks %>% 55 | #' mutate(season = factor(season, 56 | #' levels = c("Fall", "Winter", "Spring"))) %>% 57 | #' ecdf_plot(math ~ ell + season + frl) 58 | 59 | ecdf_plot <- function(data, formula, cuts = NULL, linewidth = 1.2, 60 | ref_line_cols = "gray40", ref_linetype = "solid", 61 | center = FALSE, ref_rect = TRUE, 62 | ref_rect_col = "gray40", ref_rect_alpha = 0.15) { 63 | 64 | lhs <- all.vars(formula)[1] 65 | rhs <- labels(terms(formula)) 66 | 67 | if(center) { 68 | data <- data %>% 69 | select(lhs, rhs) %>% 70 | group_by_at(rhs) %>% 71 | mutate(!!sym(lhs) := scale(!!sym(lhs), scale = FALSE)) 72 | } 73 | 74 | d <- ecdf_fun(data, formula, cuts) %>% 75 | unnest(cols = c(.data$ecdf, .data$nd)) 76 | 77 | p <- ggplot(d, aes_(~nd, ~ecdf)) 78 | 79 | if(length(rhs) == 2) { 80 | p <- p + facet_wrap(as.formula(paste0("~", rhs[2]))) 81 | } 82 | if(length(rhs) == 3) { 83 | p <- p + facet_grid(as.formula(paste0(rhs[3], "~", rhs[2]))) 84 | } 85 | 86 | if(!is.null(cuts)) { 87 | p <- p + geom_vline(xintercept = cuts, 88 | color = ref_line_cols, 89 | linetype = ref_linetype) 90 | if(ref_rect) { 91 | ref_cut_d <- as.data.frame(t(cuts)) %>% 92 | gather("dis", "nd") 93 | 94 | p <- p + geom_rect(aes_(xmin = ~nd, 95 | xmax = Inf, 96 | ymin = 0, 97 | ymax = Inf), 98 | ref_cut_d, 99 | fill = ref_rect_col, 100 | alpha = ref_rect_alpha, 101 | inherit.aes = FALSE) 102 | } 103 | } 104 | p + geom_step(aes_(color = as.name(rhs[1])), 105 | size = linewidth) + 106 | labs(x = lhs, 107 | y = "Proportion") 108 | } 109 | 110 | -------------------------------------------------------------------------------- /R/es_calcs.R: -------------------------------------------------------------------------------- 1 | #' Pooled Standard Deviation 2 | #' 3 | #' The denominator for Cohen's d 4 | #' @keywords internal 5 | #' @param n1 The sample size for group 1 6 | #' @param n2 The sample size for group 2 7 | #' @param vr1 The variance for group 1 8 | #' @param vr2 The variance for group 2 9 | #' 10 | psd <- function(n1, n2, vr1, vr2) { 11 | dnum1 <- (n1 - 1)*vr1 12 | dnum2 <- (n2 - 1)*vr2 13 | ddnom <- n1 + n2 - 2 14 | 15 | sqrt((dnum1 + dnum2) / ddnom) 16 | } 17 | 18 | #' Cohen's d 19 | #' 20 | #' Wraps the equation into a function 21 | #' @keywords internal 22 | #' @param n1 The sample size for group 1 23 | #' @param n2 The sample size for group 2 24 | #' @param mn1 The mean for group 1 25 | #' @param mn2 The mean for group 2 26 | #' @param vr1 The variance for group 1 27 | #' @param vr2 The variance for group 2 28 | 29 | coh <- function(n1, n2, mn1, mn2, vr1, vr2) { 30 | (mn1 - mn2) / psd(n1, n2, vr1, vr2) 31 | } 32 | 33 | coh_se <- function(n1, n2, d) { 34 | sqrt((n1 + n2)/(n1*n2) + d^2/(2*((n1 + n2)))) 35 | } 36 | 37 | #' Compute Cohen's \emph{d} 38 | #' 39 | #' This function calculates effect sizes in terms of Cohen's \emph{d}, also 40 | #' called the uncorrected effect size. See \code{\link{hedg_g}} for the sample 41 | #' size corrected version. Also see 42 | #' \href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3840331/}{Lakens (2013)} 43 | #' for a discussion on different types of effect sizes and their 44 | #' interpretation. Note that missing data are removed from the calculations of 45 | #' the means and standard deviations. 46 | #' @param data The data frame used for estimation - ideally structured in a tidy 47 | #' format. 48 | #' @param formula A formula of the type \code{out ~ group} where \code{out} is 49 | #' the outcome variable and \code{group} is the grouping variable. Note this 50 | #' variable can include any arbitrary number of groups. Additional variables 51 | #' can be included with \code{+} to produce separate estimates by the secondary 52 | #' or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 53 | #' + characteristic2}). 54 | #' @param ref_group Optional. A character vector or forumla listing the 55 | #' reference group levels for each variable on the right hand side of the 56 | #' formula, supplied in the same order as the formula. Note that if using the 57 | #' formula version, levels that are numbers, or include hyphens, spaces, etc., 58 | #' should be wrapped in back ticks (e.g., 59 | #' \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 60 | #' in doubt, it is safest to use the back ticks, as they will not interfere 61 | #' with anything if they are not needed. See examples below for more details. 62 | #' @param se Logical. Should the standard error of the effect size be 63 | #' estimated and returned in the resulting data frame? Defaults to 64 | #' \code{TRUE}. 65 | #' @return By default the Cohen's \emph{d} for all possible pairings of 66 | #' the grouping factor(s) are returned. 67 | #' @export 68 | #' @examples 69 | #' 70 | #' # Calculate Cohen's d for all pairwise comparisons 71 | #' coh_d(star, reading ~ condition) 72 | #' 73 | #' # Report only relative to regular-sized classrooms 74 | #' coh_d(star, 75 | #' reading ~ condition, 76 | #' ref_group = "reg") 77 | #' 78 | #' # Report by ELL and FRL groups for each season, compare to non-ELL students 79 | #' # who were not eligible for free or reduced price lunch in the fall (using 80 | #' # the formula interface for reference group referencing). 81 | #' 82 | #' coh_d(benchmarks, 83 | #' math ~ ell + frl + season, 84 | #' ref_group = ~`Non-ELL` + `Non-FRL` + Fall) 85 | #' 86 | #' # Same thing but with character vector supplied, rather than a formula 87 | #' coh_d(benchmarks, 88 | #' math ~ ell + frl + season, 89 | #' ref_group = c("Non-ELL", "Non-FRL", "Fall")) 90 | 91 | coh_d <- function(data, formula, ref_group = NULL, se = TRUE) { 92 | rhs <- labels(terms(formula)) 93 | 94 | stats <- descrip_cross(data, formula, length = length, mean = mean, var = var) %>% 95 | mutate_if(is.integer, as.double) 96 | 97 | d <- stats %>% 98 | mutate(coh_d = coh(.data$length1, 99 | .data$length, 100 | .data$mean1, 101 | .data$mean, 102 | .data$var1, 103 | .data$var), 104 | coh_se = coh_se(.data$length1, 105 | .data$length, 106 | .data$coh_d)) %>% 107 | select(-.data$length, 108 | -.data$length1, 109 | -.data$mean, 110 | -.data$mean1, 111 | -.data$var, 112 | -.data$var1) %>% 113 | ungroup() 114 | 115 | if(!is.null(ref_group)) { 116 | d <- ref_subset(d, formula, ref_group) 117 | } 118 | rename_ref_foc(d, formula) 119 | } 120 | 121 | #' Hedge's g 122 | #' 123 | #' Wraps the equation into a function 124 | #' @keywords internal 125 | #' @param n1 The sample size for group 1 126 | #' @param n2 The sample size for group 2 127 | #' @param d The value of Cohen's d 128 | #' 129 | hedg <- function(n1, n2, d) { 130 | d * (1 - (3 / (4*(n1 + n2) - 9))) 131 | } 132 | 133 | #' Compute Hedges' \emph{g} 134 | #' This function calculates effect sizes in terms of Hedges' \emph{g}, also 135 | #' called the corrected (for sample size) effect size. See 136 | #' \code{\link{coh_d}} for the uncorrected version. Also see 137 | #' \href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3840331/}{Lakens (2013)} 138 | #' for a discussion on different types of effect sizes and their 139 | #' interpretation. Note that missing data are removed from the calculations of 140 | #' the means and standard deviations. 141 | #' @inheritParams coh_d 142 | #' @param keep_d Logical. Should Cohen's \emph{d} be reported along with 143 | #' Hedge's \code{g}? Defaults to \code{TRUE}. 144 | #' @return By default the Hedges' \emph{g} for all possible pairings of 145 | #' the grouping factor are returned as a tidy data frame. 146 | #' @export 147 | #' @examples 148 | #' 149 | #' # Calculate Hedges' g for all pairwise comparisons 150 | #' hedg_g(star, reading ~ condition) 151 | #' 152 | #' # Report only relative to regular-sized classrooms 153 | #' hedg_g(star, 154 | #' reading ~ condition, 155 | #' ref_group = "reg") 156 | #' 157 | #' # Report by ELL and FRL groups for each season, compare to non-ELL students 158 | #' # who were not eligible for free or reduced price lunch in the fall (using 159 | #' # the formula interface for reference group referencing). 160 | #' 161 | #' hedg_g(benchmarks, 162 | #' math ~ ell + frl + season, 163 | #' ref_group = ~`Non-ELL` + `Non-FRL` + Fall) 164 | #' 165 | #' # Same thing but with character vector supplied, rather than a formula 166 | #' hedg_g(benchmarks, 167 | #' math ~ ell + frl + season, 168 | #' ref_group = c("Non-ELL", "Non-FRL", "Fall")) 169 | 170 | hedg_g <- function(data, formula, ref_group = NULL, 171 | keep_d = TRUE) { 172 | stats <- descrip_cross(data, formula, 173 | length = length, mean = mean, var = var) 174 | 175 | g <- stats %>% 176 | mutate(coh_d = coh(.data$length, .data$length1, 177 | .data$mean, .data$mean1, 178 | .data$var, .data$var1), 179 | hedg_g = hedg(.data$length, .data$length1, .data$coh_d)) %>% 180 | select(-.data$length, -.data$length1, 181 | -.data$mean, -.data$mean1, 182 | -.data$var, -.data$var1) 183 | 184 | if(!keep_d) g <- select(g, -.data$coh_d) 185 | 186 | if(!is.null(ref_group)) { 187 | g <- ref_subset(g, formula, ref_group) 188 | } 189 | rename_ref_foc(g, formula) 190 | } 191 | 192 | mean_diff <- function(data, formula, ref_group, qtile_groups = NULL) { 193 | descrip_cross(data, formula, mean = mean, qtile_groups = qtile_groups) %>% 194 | mutate(mean_diff = .data$mean1 - .data$mean) %>% 195 | select(-.data$mean, -.data$mean1) 196 | } 197 | 198 | pooled_sd <- function(data, formula, ref_group, keep_n = FALSE) { 199 | out <- descrip_cross(data, formula, length = length, var = var) %>% 200 | mutate(psd = psd(.data$length, .data$length1, .data$var, .data$var1)) %>% 201 | select(-.data$var, -.data$var1) 202 | 203 | if(!keep_n) { 204 | out <- select(out, -.data$length, -.data$length1) 205 | } 206 | out 207 | } 208 | 209 | #' Calculate binned effect sizes 210 | #' @inheritParams coh_d 211 | #' @param qtile_groups The number of quantile bins to split the data by and 212 | #' calculate effect sizes. Defaults to 3 bins (lower, middle, upper). 213 | #' @param es The effect size to calculate. Currently the only options are 214 | #' "d" or "g". 215 | #' @param rename Logical. Should the column names be relabeled according to 216 | #' the reference and focal groups. Defaults to \code{TRUE}. 217 | #' @return A data frame with the corresponding effect sizes. 218 | #' @export 219 | 220 | binned_es <- function(data, formula, ref_group = NULL, qtile_groups = 3, 221 | es = "g", rename = TRUE) { 222 | mn_diff <- mean_diff(data, formula, qtile_groups = qtile_groups) 223 | p_sd <- pooled_sd(data, formula, keep_n = TRUE) 224 | 225 | if(es != "g" & es != "d") stop("es must be one of `'g'` or `'d'`.") 226 | 227 | d <- suppressMessages(left_join(mn_diff, p_sd)) %>% 228 | mutate(es = .data$mean_diff/.data$psd, 229 | es_se = coh_se(.data$length, .data$length1, .data$es)) 230 | 231 | if(es == "g") { 232 | d <- mutate(d, es = hedg(.data$length, .data$length1, .data$es)) 233 | } 234 | 235 | if(!is.null(ref_group)) { 236 | d <- ref_subset(d, formula, ref_group) 237 | } 238 | if(rename) d <- rename_ref_foc(d, formula) 239 | d 240 | } 241 | 242 | #' Computes the empirical cummulative distribution function for all groups 243 | #' supplied by the formula. 244 | #' @inheritParams coh_d 245 | #' @param cuts Optional vector of cut scores. If supplied, the ECDF will be 246 | #' guaranteed to include these points. Otherwise, there could be gaps in the 247 | #' ECDF at those particular points (used in plotting the cut scores). 248 | #' @keywords internal 249 | 250 | ecdf_fun <- function(data, formula, cuts = NULL) { 251 | if(is.null(cuts)) cuts <- 0 252 | rhs <- labels(terms(formula)) 253 | lhs <- all.vars(formula)[1] 254 | 255 | data %>% 256 | mutate_at(vars(!!!syms(rhs)), list(as.character)) %>% 257 | group_by(!!!syms(rhs)) %>% 258 | nest() %>% 259 | mutate(ecdf = map(.data$data, ~ecdf(.[[lhs]])), 260 | nd = map(.data$data, ~c(-Inf, sort(c(unique(.[[lhs]]), cuts)), Inf)), 261 | ecdf = map2(.data$ecdf,.data$ nd, ~.x(.y))) %>% 262 | select(-.data$data) 263 | } 264 | 265 | #' Pairs empirical cummulative distribution functions for all groups 266 | #' supplied by the formula. 267 | #' @inheritParams ecdf_fun 268 | #' @keywords internal 269 | 270 | paired_ecdf <- function(data, formula, cuts = NULL) { 271 | ecdf <- ecdf2 <- ecdf_fun(data, formula, cuts) %>% 272 | mutate(nd = map2(.data$nd, .data$ecdf, ~data.frame(x = .x, y = .y))) %>% 273 | select(-.data$ecdf) 274 | names(ecdf2) <- paste0(names(ecdf), "1") 275 | 276 | cross(ecdf, ecdf2) %>% 277 | filter(!map2_lgl(.data$nd, .data$nd1, ~identical(.x, .y))) %>% 278 | mutate(matched = map2(.data$nd, .data$nd1, 279 | ~data.frame(x = sort(unique(.x$x, .y$x))) %>% 280 | left_join(.x, by = "x") %>% 281 | left_join(.y, 282 | by = "x", 283 | suffix = c("_ref", "_foc")) %>% 284 | fill(names(.)))) %>% 285 | select(-.data$nd, -.data$nd1) 286 | } 287 | 288 | #' Compute the Area Under the \link{pp_plot} Curve 289 | #' Calculates the area under the \code{pp} curve. The area under the curve is 290 | #' also a useful effect-size like statistic, representing the probability that 291 | #' a randomly selected individual from the \code{x} distribution will have a 292 | #' higher value than a randomly selected individual from the \code{y} 293 | #' distribution. 294 | #' @inheritParams coh_d 295 | #' @param rename Used primarily for internal purposes. Should the column 296 | #' names be renamed to reference the focal and reference groups? Defaults to 297 | #' \code{TRUE}. 298 | #' @return By default the area under the curve for all possible pairings of 299 | #' the grouping factor are returned. 300 | #' @export 301 | #' @examples 302 | #' 303 | #' # Calculate AUC for all pairwise comparisons 304 | #' auc(star, reading ~ condition) 305 | #' 306 | #' # Report only relative to regular-sized classrooms 307 | #' auc(star, 308 | #' reading ~ condition, 309 | #' ref_group = "reg") 310 | #' 311 | #' # Report by ELL and FRL groups for each season, compare to non-ELL students 312 | #' # who were not eligible for free or reduced price lunch in the fall (using 313 | #' # the formula interface for reference group referencing). 314 | #' \dontrun{ 315 | #' auc(benchmarks, 316 | #' math ~ ell + frl + season, 317 | #' ref_group = ~`Non-ELL` + `Non-FRL` + Fall) 318 | #' 319 | #' # Same thing but with character vector supplied, rather than a formula 320 | #' auc(benchmarks, 321 | #' math ~ ell + frl + season, 322 | #' ref_group = c("Non-ELL", "Non-FRL", "Fall")) 323 | #' } 324 | #' 325 | 326 | auc <- function(data, formula, ref_group = NULL, rename = TRUE) { 327 | rhs <- labels(terms(formula)) 328 | 329 | d <- paired_ecdf(data, formula) %>% 330 | mutate(auc = map_dbl(.data$matched, ~integrate.xy(.$y_foc, .$y_ref, 331 | use.spline = FALSE))) %>% 332 | select(-.data$matched) 333 | 334 | if(!is.null(ref_group)) { 335 | d <- ref_subset(d, formula, ref_group) 336 | } 337 | if(rename) d <- rename_ref_foc(d, formula) 338 | d 339 | } 340 | 341 | #' Calculate the V effect size statistic 342 | #' 343 | #' This function calculates the effect size V, as discussed by 344 | #' \href{https://journals.sagepub.com/doi/abs/10.3102/1076998609332755}{Ho, 2009}. The V 345 | #' statistic is a transformation of \code{\link{auc}}, interpreted as the 346 | #' average difference between the distributions in standard deviation units. 347 | #' @inheritParams coh_d 348 | #' @return By default the V statistic for all possible pairings of 349 | #' the grouping factor are returned as a tidy data frame. Alternatively, a 350 | #' vector can be returned, and/or only the V corresponding to a specific 351 | #' reference group can be returned. 352 | #' @export 353 | #' @examples 354 | #' 355 | #' # Calculate V for all pairwise comparisons 356 | #' v(star, reading ~ condition) 357 | #' 358 | #' # Report only relative to regular-sized classrooms 359 | #' v(star, 360 | #' reading ~ condition, 361 | #' ref_group = "reg") 362 | #' 363 | #' # Report by ELL and FRL groups for each season, compare to non-ELL students 364 | #' # who were not eligible for free or reduced price lunch in the fall (using 365 | #' # the formula interface for reference group referencing). 366 | #' 367 | #' \dontrun{ 368 | #' v(benchmarks, 369 | #' math ~ ell + frl + season, 370 | #' ref_group = ~`Non-ELL` + `Non-FRL` + Fall) 371 | #' 372 | #' # Same thing but with character vector supplied, rather than a formula 373 | #' v(benchmarks, 374 | #' math ~ ell + frl + season, 375 | #' ref_group = c("Non-ELL", "Non-FRL", "Fall")) 376 | #' } 377 | #' 378 | 379 | v <- function(data, formula, ref_group = NULL) { 380 | d <- auc(data, formula, rename = FALSE) %>% 381 | mutate(v = sqrt(2)*qnorm(auc)) %>% 382 | select(-auc) 383 | 384 | if(!is.null(ref_group)) { 385 | d <- ref_subset(d, formula, ref_group) 386 | } 387 | rename_ref_foc(d, formula) 388 | } 389 | 390 | #' Compute the proportion above a specific cut location 391 | #' 392 | #' Computes the proportion of the corresponding group, as specified by the 393 | #' \code{formula}, scoring above the specified \code{cuts}. 394 | #' @inheritParams ecdf_fun 395 | #' @inheritParams coh_d 396 | #' @return Tidy data frame of the proportion above the cutoff for 397 | #' each (or selected) groups. 398 | #' @seealso [esvis::pac_compare(), esvis::tpac(), esvis::tpac_diff()] 399 | #' @export 400 | #' @examples 401 | #' # Compute differences for all pairwise comparisons for each of three cuts 402 | #' pac(star, 403 | #' reading ~ condition, 404 | #' cuts = c(450, 500, 550)) 405 | #' 406 | #' pac(star, 407 | #' reading ~ condition + freelunch + race, 408 | #' cuts = c(450, 500)) 409 | #' 410 | #' pac(star, 411 | #' reading ~ condition + freelunch + race, 412 | #' cuts = c(450, 500), 413 | #' ref_group = ~small + no + white) 414 | 415 | pac <- function(data, formula, cuts, ref_group = NULL) { 416 | rhs <- labels(terms(formula)) 417 | d <- ecdf_fun(data, formula, cuts) 418 | 419 | cut_tbl <- data.frame(matrix(rep(cuts, each = nrow(d)), nrow = nrow(d))) 420 | 421 | if(length(cuts) == 1) { 422 | names(cut_tbl) <- "cut" 423 | } 424 | 425 | d <- dplyr::bind_cols(d, cut_tbl) 426 | 427 | if(length(cuts) == 1) { 428 | d <- unnest(d, cols = c(.data$ecdf, .data$nd)) 429 | } 430 | 431 | if(length(cuts) > 1) { 432 | d <- d %>% 433 | gather("dis", "cut", matches("^X\\d")) %>% 434 | unnest(cols = c(.data$ecdf, .data$nd)) %>% 435 | filter(.data$nd == .data$cut) 436 | } 437 | if(!is.null(ref_group)) { 438 | d <- ref_subset(d, formula, ref_group) 439 | } 440 | d %>% 441 | mutate(pac = 1 - .data$ecdf) %>% 442 | distinct() %>% 443 | select(rhs, cut, pac) 444 | } 445 | 446 | #' Compute the difference in the proportion above a specific cut location 447 | #' 448 | #' Computes the difference in the proportion above the specified \code{cuts} 449 | #' for all possible pairwise comparisons of the groups specified by the 450 | #' \code{formula}. 451 | #' @inheritParams ecdf_fun 452 | #' @inheritParams coh_d 453 | #' @return Tidy data frame of the proportion above the cutoff for 454 | #' each (or selected) groups. 455 | #' @seealso [esvis::pac(), esvis::tpac(), esvis::tpac_diff()] 456 | #' @export 457 | #' @examples 458 | #' # Compute differences for all pairwise comparisons for each of three cuts 459 | #' pac_compare(star, 460 | #' reading ~ condition, 461 | #' cuts = c(450, 500, 550)) 462 | #' 463 | #' pac_compare(star, 464 | #' reading ~ condition + freelunch + race, 465 | #' cuts = c(450, 500)) 466 | #' 467 | #' pac_compare(star, 468 | #' reading ~ condition + freelunch + race, 469 | #' cuts = c(450, 500), 470 | #' ref_group = ~small + no + white) 471 | 472 | pac_compare <- function(data, formula, cuts, ref_group = NULL) { 473 | rhs <- labels(terms(formula)) 474 | d1 <- d2 <- pac(data, formula, cuts) 475 | names(d2) <- paste0(names(d1), "1") 476 | 477 | d <- cross(d1, d2) %>% 478 | filter(cut == .data$cut1) %>% 479 | mutate(pac_diff = .data$pac - .data$pac1) 480 | 481 | d <- map2_df(rhs, 482 | paste0(rhs, 1), 483 | ~filter(d, !!sym(.x) != !!sym(.y))) 484 | 485 | if(!is.null(ref_group)) { 486 | d <- ref_subset(d, formula, ref_group) 487 | } 488 | d <- rename_ref_foc(d, formula) 489 | 490 | d %>% 491 | ungroup() %>% 492 | rename("pac_ref" = "pac", 493 | "pac_foc" = "pac1") %>% 494 | select(.data$cut, 495 | ends_with("_ref"), 496 | ends_with("_foc"), 497 | .data$pac_diff, 498 | -.data$cut1) 499 | 500 | } 501 | 502 | #' Transformed proportion above the cut 503 | #' 504 | #' This function transforms calls to \link{pac} into standard deviation units. 505 | #' Function assumes that each distribution is distributed normally with 506 | #' common variances. See 507 | #' \href{http://journals.sagepub.com/doi/abs/10.3102/1076998611411918}{Ho & 508 | #' Reardon, 2012} 509 | #' @inheritParams ecdf_fun 510 | #' @inheritParams coh_d 511 | #' @return Tidy data frame of the proportion above the cutoff for 512 | #' each (or selected) groups. 513 | #' @seealso [esvis::pac(), esvis::pac_diff(), esvis::tpac_compare()] 514 | #' @export 515 | #' @examples 516 | #' # Compute differences for all pairwise comparisons for each of three cuts 517 | #' tpac(star, 518 | #' reading ~ condition, 519 | #' cut = c(450, 500, 550)) 520 | #' 521 | #' tpac(star, 522 | #' reading ~ condition + freelunch + race, 523 | #' cut = c(450, 500)) 524 | #' 525 | #' tpac(star, 526 | #' reading ~ condition + freelunch + race, 527 | #' cut = c(450, 500), 528 | #' ref_group = ~small + no + white) 529 | 530 | tpac <- function(data, formula, cuts, ref_group = NULL) { 531 | pac(data, formula, cuts, ref_group) %>% 532 | mutate(pac = qnorm(.data$pac)) %>% 533 | rename("tpac" = "pac") 534 | } 535 | 536 | #' Compare Transformed Proportion Above the Cut 537 | #' 538 | #' This function compares all possible pairwise comparisons, as supplied by 539 | #' \code{formula}, in terms of the transformed proportion above the cut. This 540 | #' is an effect-size like measure of the differences between two groups as the 541 | #' cut point(s) in the distribution. See 542 | #' \href{http://journals.sagepub.com/doi/abs/10.3102/1076998611411918}{Ho & 543 | #' Reardon, 2012} 544 | #' @inheritParams ecdf_fun 545 | #' @inheritParams coh_d 546 | #' @return Tidy data frame of the proportion above the cutoff for 547 | #' each (or selected) groups. 548 | #' @seealso [esvis::pac(), esvis::pac_diff(), esvis::tpac()] 549 | #' @export 550 | #' @examples 551 | #' # Compute differences for all pairwise comparisons for each of three cuts 552 | #' tpac_compare(star, 553 | #' reading ~ condition, 554 | #' cut = c(450, 500, 550)) 555 | #' 556 | #' tpac_compare(star, 557 | #' reading ~ condition + freelunch + race, 558 | #' cut = c(450, 500)) 559 | #' 560 | #' tpac_compare(star, 561 | #' reading ~ condition + freelunch + race, 562 | #' cut = c(450, 500), 563 | #' ref_group = ~small + no + white) 564 | 565 | tpac_compare <- function(data, formula, cuts, ref_group = NULL) { 566 | pac_compare(data, formula, cuts, ref_group) %>% 567 | mutate(pac_ref = qnorm(.data$pac_ref), 568 | pac_foc = qnorm(.data$pac_foc), 569 | tpac_diff = .data$pac_ref - .data$pac_foc) %>% 570 | rename("tpac_ref" = "pac_ref", 571 | "tpac_foc" = "pac_foc") %>% 572 | select(-.data$pac_diff) 573 | } 574 | -------------------------------------------------------------------------------- /R/esvis-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | #' @importFrom ggplot2 ggplot aes_ facet_wrap facet_grid geom_step geom_point 3 | #' geom_vline ggplot_build geom_label geom_line geom_hline geom_ribbon 4 | #' geom_segment labs geom_abline geom_rect 5 | #' @importFrom dplyr mutate mutate_at select group_by_at arrange distinct 6 | #' filter matches group_by_all group_by summarize summarize_at n left_join 7 | #' semi_join vars funs ends_with rename tbl_df ungroup everything mutate_if 8 | #' bind_cols 9 | #' @importFrom tidyr spread fill gather separate crossing nest unnest 10 | #' @importFrom tibble tibble lst 11 | #' @importFrom purrr map map_dbl map_lgl map2 map2_lgl map2_df is_atomic 12 | #' @importFrom rlang := sym syms quo quos .data parse_quo set_names 13 | #' quo_get_expr 14 | #' @importFrom Hmisc cut2 15 | #' @importFrom magrittr %>% 16 | #' @importFrom graphics par layout lines segments rect polygon 17 | #' @importFrom utils installed.packages 18 | #' @importFrom grDevices adjustcolor 19 | #' @importFrom sfsmisc integrate.xy 20 | #' @importFrom stats as.formula terms setNames ecdf qnorm na.omit var 21 | "_PACKAGE" 22 | 23 | if(getRversion() >= "2.15.1") utils::globalVariables(c(".")) 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /R/pp_plot.R: -------------------------------------------------------------------------------- 1 | #' Produces the paired probability plot for two groups 2 | #' 3 | #' The paired probability plot maps the probability of obtaining a specific 4 | #' score for each of two groups. The area under the curve 5 | #' (\code{\link{auc}}) corresponds to the probability that a randomly 6 | #' selected observation from the x-axis group will have a higher score than 7 | #' a randomly selected observation from the y-axis group. This function 8 | #' extends the basic pp-plot by allowing multiple curves and faceting to 9 | #' facilitate a variety of comparisons. Note that because the plotting is 10 | #' built on top of \link[ggplot2]{ggplot2}, additional customization can 11 | #' be made on top of the plots, as illustrated in the examples. 12 | #' 13 | #' @param data The data frame to be plotted 14 | #' @param formula A formula of the type \code{out ~ group} where \code{out} is 15 | #' the outcome variable and \code{group} is the grouping variable. Note this 16 | #' variable can include any arbitrary number of groups. Additional variables 17 | #' can be included with \code{+} to produce separate plots by the secondary or 18 | #' tertiary variable of interest (e.g., \code{out ~ group + characteristic1 + 19 | #' characteristic2}). No more than two additional characteristics can be 20 | #' supplied at this time. 21 | #' @param ref_group Optional character vector (of length 1) naming the 22 | #' reference group. Defaults to the group with the highest mean score. 23 | #' @param cuts Integer. Optional vector (or single number) of scores used to 24 | #' annotate the plot. If supplied, line segments will extend from the 25 | #' corresponding x and y axes and meet at the PP curve. 26 | #' @param cut_labels Logical. Should the reference lines corresponding to 27 | #' \code{cuts} be labeled? Defaults to \code{TRUE}. 28 | #' @param cut_label_x The x-axis location of the cut labels. Defaults to 0.02. 29 | #' @param cut_label_size The size of the cut labels. Defaults to 3. 30 | #' @param lines Logical. Should the PP Lines be plotted? Defaults to 31 | #' \code{TRUE}. 32 | #' @param linetype The \link[ggplot2]{linetype} for the PP lines. Defaults to 33 | #' "solid". 34 | #' @param linewidth The width of the PP lines. Defaults to 1.1 (just 35 | #' marginally larger than the default ggplot2 lines). 36 | #' @param shade Logical. Should the area under the curve be shaded? Defaults to 37 | #' \code{TRUE}. 38 | #' @param shade_alpha Transparency of the shading. Defaults to 0.2. 39 | #' @param refline Logical. Should a diagonal reference line be plotted, 40 | #' representing the value at which no difference is observed between the 41 | #' reference and focal distributions? Defaults to \code{TRUE}. 42 | #' @param refline_col Color of the reference line. Defaults to a dark gray. 43 | #' @param refline_type The \link[ggplot2]{linetype} for the reference line. 44 | #' Defaults to "dashed". 45 | #' @param refline_width The width of the reference line. Defaults to 1, or 46 | #' just slightly thinner than the PP lines. 47 | #' @return A \link[ggplot2]{ggplot2} object displaying the specified PP plot. 48 | #' @export 49 | #' @examples 50 | #' # PP plot examining differences by condition 51 | #' pp_plot(star, math ~ condition) 52 | #' 53 | #' # The sample size gets very small in the above within cells (e.g., wild 54 | #' # changes within the "other" group in particular). Overall, the effect doesn't 55 | #' # seem to change much by condition. 56 | #' 57 | #' # Look at something a little more interesting 58 | #' \dontrun{ 59 | #' pp_plot(benchmarks, math ~ ell + season + frl) 60 | #' } 61 | #' # Add some cut scores 62 | #' pp_plot(benchmarks, math ~ ell, cuts = c(190, 210, 215)) 63 | #' 64 | #' ## Make another interesting plot. Use ggplot to customize 65 | #' \dontrun{ 66 | #' library(tidyr) 67 | #' library(ggplot2) 68 | #' benchmarks %>% 69 | #' gather(subject, score, reading, math) %>% 70 | #' pp_plot(score ~ ell + subject + season, 71 | #' ref_group = "Non-ELL") + 72 | #' scale_fill_brewer(name = "ELL Status", palette = "Pastel2") + 73 | #' scale_color_brewer(name = "ELL Status", palette = "Pastel2") + 74 | #' labs(title = "Differences among English Language Learning Groups", 75 | #' subtitle = "Note crossing of reference line") + 76 | #' theme_minimal() 77 | #' } 78 | #' 79 | pp_plot <- function(data, formula, ref_group = NULL, cuts = NULL, 80 | cut_labels = TRUE, cut_label_x = 0.02, cut_label_size = 3, 81 | lines = TRUE, linetype = "solid", linewidth = 1.1, 82 | shade = TRUE, shade_alpha = 0.2, refline = TRUE, 83 | refline_col = "gray40", refline_type = "dashed", 84 | refline_width = 1.1) { 85 | 86 | rhs <- labels(terms(formula)) 87 | lhs <- all.vars(formula)[1] 88 | 89 | if(is.null(ref_group)) { 90 | group_means <- tapply(data[[lhs]], data[[rhs[1]]], mean, na.rm = TRUE) 91 | ref_group <- names(group_means)[which.max(group_means)] 92 | } 93 | 94 | d <- paired_ecdf(data, formula, cuts) %>% 95 | unnest(cols = .data$matched) %>% 96 | filter(!!sym(rhs[1]) == ref_group) 97 | 98 | if(length(rhs) == 2) { 99 | d <- filter(d, !!sym(rhs[2]) == !!sym(paste0(rhs[2], 1))) 100 | } 101 | if(length(rhs) == 3) { 102 | d <- filter(d, 103 | !!sym(rhs[2]) == !!sym(paste0(rhs[2], 1)), 104 | !!sym(rhs[3]) == !!sym(paste0(rhs[3], 1))) 105 | } 106 | p <- ggplot(d, aes_(quote(y_foc), quote(y_ref))) 107 | 108 | if(shade) { 109 | p <- p + 110 | geom_ribbon(aes_(fill = as.name(paste0(rhs[1], 1)), 111 | ymin = -Inf, 112 | ymax = quote(y_ref)), 113 | alpha = shade_alpha) 114 | } 115 | if(refline) { 116 | p <- p + geom_abline(intercept = 0, 117 | slope = 1, 118 | color = refline_col, 119 | linetype = refline_type, 120 | size = refline_width) 121 | } 122 | if(lines) { 123 | p <- p + geom_line(aes_(color = as.name(paste0(rhs[1], 1))), 124 | linetype = linetype, 125 | size = linewidth) 126 | } 127 | if(!is.null(cuts)) { 128 | cut_data <- d %>% 129 | filter(.data$x %in% cuts) 130 | 131 | p <- p + 132 | geom_segment(aes_(x = quote(y_foc), 133 | xend = quote(y_foc), 134 | y = -Inf, 135 | yend = quote(y_ref), 136 | color = as.name(paste0(rhs[1], 1))), 137 | cut_data) + 138 | geom_segment(aes_(x = -Inf, 139 | xend = quote(y_foc), 140 | y = quote(y_ref), 141 | yend = quote(y_ref), 142 | color = as.name(paste0(rhs[1], 1))), 143 | cut_data) 144 | if(cut_labels) { 145 | p <- p + 146 | geom_label(aes_(x = 0.02, 147 | y = quote(y_ref), 148 | label = quote(x)), 149 | cut_data, 150 | size = 3) 151 | } 152 | } 153 | if(length(rhs) == 2) { 154 | p <- p + facet_wrap(as.formula(paste0("~", rhs[2]))) 155 | } 156 | if(length(rhs) == 3) { 157 | p <- p + facet_grid(as.formula(paste0(rhs[2], "~", rhs[3]))) 158 | } 159 | p + labs(x = "Focal Group", 160 | y = ref_group) 161 | } 162 | -------------------------------------------------------------------------------- /R/seda.R: -------------------------------------------------------------------------------- 1 | #' Portion of the Stanford Educational Data Archive (SEDA). 2 | #' 3 | #' The full SEDA dataset contains mean test scores on statewide testing data in 4 | #' reading and math for every school district in the United States. See a 5 | #' description of the data 6 | #' \href{https://purl.stanford.edu/db586ns4974}{here}. The data 7 | #' represented in this package represent a random sample of 10% of all the 8 | #' cases in the full dataset. To access the full data, please visit the 9 | #' data archive in the above link. 10 | #' 11 | #' 12 | #' @format A data frame with 32625 rows and 8 columns. 13 | #' \describe{ 14 | #' \item{leaid}{Integer. Local education authority identifier.} 15 | #' \item{leaname}{Character. Local education authority name.} 16 | #' \item{stateabb}{Character. State abbreviation.} 17 | #' \item{year}{Integer. Year the data were collected.} 18 | #' \item{grade}{Integer. Grade level the data were collected.} 19 | #' \item{subject}{Character. Whether the data were from reading or 20 | #' mathematics.} 21 | #' \item{mean}{Double. Mean test score for the LEA in the corresponding 22 | #' subject/grade/year.} 23 | #' \item{se}{Double. Standard error of the mean.} 24 | #' } 25 | #' 26 | #' @source 27 | #' Sean F. Reardon, Demetra Kalogrides, Andrew Ho, Ben Shear, Kenneth Shores, 28 | #' Erin Fahle. (2016). Stanford Education Data Archive. 29 | #' \href{http://purl.stanford.edu/db586ns4974}{ 30 | #' http://purl.stanford.edu/db586ns4974}. For more information, please visit 31 | #' \href{https://edopportunity.org}{https://edopportunity.org}. 32 | "seda" -------------------------------------------------------------------------------- /R/star.R: -------------------------------------------------------------------------------- 1 | #' Data from the Tennessee class size experiment 2 | #' 3 | #' These data come from the Ecdat package and represent a cross-section of 4 | #' data from Project STAR (Student/Teacher Achievement Ratio), where students 5 | #' were randomly assigned to classrooms. 6 | #' 7 | #' @format A data frame with 5748 rows and 9 columns. 8 | #' \describe{ 9 | #' \item{sid}{Integer. Student identifier.} 10 | #' \item{schid}{Integer. School identifier.} 11 | #' \item{condition}{Character. Classroom type the student was enrolled in 12 | #' (randomly assigned to).} 13 | #' \item{tch_experience}{Integer. Number of years of teaching experience 14 | #' for the teacher in the classroom in which the student was 15 | #' enrolled.} 16 | #' \item{sex}{Character. Sex of student: "girl" or "boy".} 17 | #' \item{freelunch}{Character. Eligibility of the student for free or 18 | #' reduced price lunch: "no" or "yes"} 19 | #' \item{race}{Character. The identified race of the student: "white", 20 | #' "black", or "other"} 21 | #' \item{math}{Integer. Math scale score.} 22 | #' \item{reading}{Integer. Reading scale score.} 23 | #' } 24 | 25 | "star" -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | is.formula <- function(x){ 2 | inherits(x,"formula") 3 | } 4 | 5 | rename_ref_foc <- function(out, formula) { 6 | rhs <- labels(terms(formula)) 7 | 8 | ref <- names(out) %in% rhs 9 | foc <- grepl(paste0(rhs, "\\d$", collapse = "|"), names(out)) 10 | 11 | nms_ref <- paste0(names(out)[ref],"_ref") 12 | nms_foc <- gsub("\\d", "_foc", names(out)[foc]) 13 | 14 | names(out)[ref] <- nms_ref 15 | names(out)[foc] <- nms_foc 16 | 17 | out 18 | } 19 | 20 | ref_subset <- function(out, formula, ref_group) { 21 | rhs <- labels(terms(formula)) 22 | 23 | if(is.formula(ref_group)) { 24 | ref_group <- gsub("`", "", labels(terms(ref_group))) 25 | } 26 | ref_join <- data.frame(as.list(as.character(ref_group)), 27 | stringsAsFactors = FALSE) 28 | names(ref_join) <- rhs[seq_along(ref_group)] 29 | 30 | suppressMessages(semi_join(out, ref_join)) 31 | } 32 | 33 | 34 | #' Report descriptive stats for all possible pairings on the rhs of the formula. 35 | #' @keywords internal 36 | #' @param formula A formula of the type \code{out ~ group} where \code{out} is 37 | #' the outcome variable and \code{group} is the grouping variable. Note this 38 | #' variable can include any arbitrary number of groups. Additional variables 39 | #' can be included with \code{+} to produce descriptive stats by the secondary 40 | #' or tertiary variable of interest (e.g., \code{out ~ group + characteristic1 41 | #' + characteristic2}). 42 | 43 | descrip_stats <- function(data, formula, ..., qtile_groups = NULL) { 44 | rhs <- labels(terms(formula)) 45 | lhs <- all.vars(formula)[1] 46 | 47 | if (missing(...)) { 48 | stop("No function supplied to ...") 49 | } 50 | 51 | d <- data %>% 52 | select(rhs, lhs) %>% 53 | na.omit() %>% 54 | mutate_at(vars(!!!syms(rhs)), list(as.character)) %>% 55 | group_by(!!!syms(rhs)) 56 | 57 | if(!is.null(qtile_groups)) { 58 | d <- d %>% 59 | group_by(!!!syms(rhs)) %>% 60 | nest() %>% 61 | mutate(q = map(data, ~as.numeric(cut2(.[[lhs]], g = qtile_groups)))) %>% 62 | unnest() %>% 63 | group_by(!!!syms(rhs), .data$q) 64 | } 65 | d <- d %>% 66 | summarize_at(vars(!!!syms(lhs)), list(...)) 67 | 68 | d 69 | } 70 | 71 | descrip_cross <- function(data, formula, ..., qtile_groups = NULL) { 72 | rhs <- labels(terms(formula)) 73 | f <- quos(...) 74 | 75 | d1 <- d2 <- descrip_stats(data, formula, ..., qtile_groups = qtile_groups) 76 | names(d2) <- paste0(names(d1), "1") 77 | d <- cross(d1, d2) 78 | 79 | zero_group <- paste(rhs, "==", paste0(rhs, 1), collapse = " & ") 80 | if(!is.null(qtile_groups)) zero_group <- paste0("q == q1 & ", zero_group) 81 | 82 | test <- filter(d, !!parse_quo(zero_group, env = parent.frame())) 83 | var <- as.character(quo_get_expr(f[[1]])) 84 | 85 | if(any((test[ ,var] - test[ ,paste0(var, 1)]) != 0)) { 86 | stop("Reference Group Filtering failed. Use `all == TRUE` and 87 | filter manually.") 88 | } 89 | filt_expr <- parse_quo(paste0("!(", zero_group, ")"), 90 | env = parent.frame()) 91 | d <- d %>% 92 | filter(!!filt_expr) 93 | 94 | if(!is.null(qtile_groups)) { 95 | d <- d %>% 96 | filter(.data$q == .data$q1) %>% 97 | mutate(qtile_ub = .data$q / max(.data$q), 98 | qtile_lb = .data$qtile_ub - min(.data$qtile_ub)) %>% 99 | ungroup() %>% 100 | select(.data$q, 101 | .data$qtile_lb, 102 | .data$qtile_ub, 103 | everything(), 104 | -.data$q1) 105 | } 106 | d 107 | } 108 | 109 | 110 | 111 | #### Old version of tidyr::crossing 112 | drop_empty <- function(x, factor = TRUE) { 113 | empty <- map_lgl(x, function(x) length(x) == 0 & (!factor | !is.factor(x))) 114 | x[!empty] 115 | } 116 | seq_nrow <- function(x) seq_len(nrow(x)) 117 | 118 | cross_df <- function(x, y) { 119 | x_idx <- rep(seq_nrow(x), each = nrow(y)) 120 | y_idx <- rep(seq_nrow(y), nrow(x)) 121 | bind_cols(x[x_idx, , drop = FALSE], y[y_idx, , drop = FALSE]) 122 | } 123 | 124 | is_list <- function(x) map_lgl(x, is.list) 125 | 126 | ulevels <- function(x) { 127 | if (is.factor(x)) { 128 | orig_levs <- levels(x) 129 | x <- addNA(x, ifany = TRUE) 130 | levs <- levels(x) 131 | factor(levs, levels = orig_levs, ordered = is.ordered(x), exclude = NULL) 132 | } else if (is.list(x)) { 133 | unique(x) 134 | } else { 135 | sort(unique(x), na.last = TRUE) 136 | } 137 | } 138 | 139 | cross <- function(...) { 140 | x <- lst(...) 141 | stopifnot(is_list(x)) 142 | 143 | x <- drop_empty(x) 144 | if (length(x) == 0) { 145 | return(data.frame()) 146 | } 147 | 148 | is_atomic <- map_lgl(x, is_atomic) 149 | is_df <- map_lgl(x, is.data.frame) 150 | 151 | # turn each atomic vector into single column data frame 152 | col_df <- map(x[is_atomic], function(x) tibble(x = ulevels(x))) 153 | col_df <- map2(col_df, names(x)[is_atomic], set_names) 154 | x[is_atomic] <- col_df 155 | 156 | Reduce(cross_df, x) 157 | } -------------------------------------------------------------------------------- /README-binned_plot1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/README-binned_plot1-1.png -------------------------------------------------------------------------------- /README-binned_plot2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/README-binned_plot2-1.png -------------------------------------------------------------------------------- /README-ecdf_plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/README-ecdf_plot-1.png -------------------------------------------------------------------------------- /README-pp_plot1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/README-pp_plot1-1.png -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, echo = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "README-" 12 | ) 13 | ``` 14 | 15 | # esvis 16 | 17 | R Package for effect size visualization and estimation. 18 | 19 | [![Build Status](https://travis-ci.org/datalorax/esvis.svg?branch=master)](https://travis-ci.org/datalorax/esvis) 20 | [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/datalorax/esvis?branch=master&svg=true)](https://ci.appveyor.com/project/datalorax/esvis) 21 | [![codecov](https://codecov.io/gh/datalorax/esvis/branch/master/graph/badge.svg)](https://codecov.io/gh/datalorax/esvis) 22 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/esvis)](https://cran.r-project.org/package=esvis) 23 | 24 | This package is designed to help you very quickly estimate and visualize distributional differences by categorical factors (e.g., the effect of treatment by gender and income category). Emphasis is placed on evaluating distributional differences across the entirety of the scale, rather than only by measures of central tendency (e.g., means). 25 | 26 | ## Installation 27 | 28 | Install directly from CRAN with 29 | 30 | ```{r cran_install, eval = FALSE} 31 | install.packages("esvis") 32 | ``` 33 | 34 | Or the development version from GitHub with: 35 | 36 | ```{r gh-installation, eval = FALSE} 37 | # install.packages("devtools") 38 | devtools::install_github("datalorax/esvis") 39 | ``` 40 | 41 | ## Plotting methods 42 | 43 | There are three primary data visualizations: (a) binned effect size plots, (b) probability-probability plots, and (c) empirical cumulative distribution functions. All plots use the [ggplot2](http://ggplot2.tidyverse.org) package and are fully manipulable after creation using standard ggplot commands (e.g., changing the theme, labels, etc.). These plots were all produced by first running `library(ggplot2); theme_set(theme_minimal())` to produce the plots with the minimal theme, but no theme structure is imposed on any of the plots. 44 | 45 | ### Binned ES Plot 46 | At present, the binned effect size plot can only be produced with Cohen's *d*, although future development will allow the user to select the type of effect size. The binned effect size plot splits the distribution into quantiles specified by the user (defaults to lower, middle, and upper thirds), calculates the mean difference between groups within each quantile bin, and produces an effect size for each bin by dividing by the overall pooled standard deviation (i.e., not by quantile). For example 47 | 48 | ```{r theme_set, include = FALSE} 49 | library(ggplot2) 50 | library(esvis) 51 | theme_set(theme_minimal()) 52 | ``` 53 | 54 | 55 | ```{r, binned_plot1, fig.width = 8} 56 | library(esvis) 57 | binned_plot(benchmarks, math ~ ell) 58 | ``` 59 | ![](https://github.com/datalorax/esvis/raw/master/docs/README-binned_plot1-1.png) 60 | Note that in this plot one can clearly see that the magnitude of the differences between the groups depends upon scale location, as evidence by the reversal of the effect (negative to positive) for the Non-ELL (non-English Language Learners) group. We could also change the reference group, change the level of quantile binning, and evaluate the effect within other factors. For example, we can look by season eligibility for free or reduced price lunch, with quantiles binning, and non-ELL students as the reference group with 61 | 62 | ```{r, binned_plot2, fig.width = 8} 63 | binned_plot(benchmarks, 64 | math ~ ell + frl + season, 65 | ref_group = "Non-ELL", 66 | qtile_groups = 5) 67 | ``` 68 | ![](https://github.com/datalorax/esvis/raw/master/docs/README-binned_plot2-1.png) 69 | The `ref_group` argument can also supplied as a formula. 70 | 71 | ### PP Plots 72 | Probability-probability plot can be produced with a call to `pp_plot` and an equivalent argument structure. In this case, we're visualizing the difference in reading achievement by race/ethnicity by season. 73 | 74 | ```{r, pp_plot1, fig.width = 8} 75 | pp_plot(benchmarks, reading ~ ethnicity + season) 76 | ``` 77 | ![](https://github.com/datalorax/esvis/raw/master/docs/README-pp_plot1-1.png) 78 | 79 | Essentially, the empirical cummulative distribution function (ECDF) for the reference group (by default, the highest performing group) is mapped against the ECDF for each corresponding group. The magnitude of the achievement gap is then displayed by the distance from the diagonal reference line, representing, essentially, the ECDF for the reference group. 80 | 81 | By default, the area under the curve is shaded, which itself is an effect-size like measure, but this is also manipulable. 82 | 83 | ### ECDF Plot 84 | Finally, the `ecdf_plot` function essentially dresses up the base `plot.ecdf` function, but also adds some nice referencing features through additional, optional arguments. Below, I have included the optional `hor_ref = TRUE` argument such that horizontal reference lines appear, relative to the cuts provided. 85 | 86 | ```{r, ecdf_plot, fig.width = 8} 87 | ecdf_plot(benchmarks, math ~ season, 88 | cuts = c(190, 200, 215)) 89 | ``` 90 | ![](https://github.com/datalorax/esvis/raw/master/docs/README-ecdf_plot-1.png) 91 | These are the curves that go into the PP-Plot, but occasionally can be useful on their own. 92 | 93 | ## Estimation Methods 94 | Compute effect sizes for all possible pairwise comparisons. 95 | 96 | ```{r, coh_d1} 97 | coh_d(benchmarks, math ~ season + frl) 98 | ``` 99 | 100 | Or specify a reference group. In this case, I've used the formula-based interface, but a string vector specifying the specific reference group could also be supplied. 101 | 102 | 103 | ```{r, coh_d2} 104 | coh_d(benchmarks, 105 | math ~ season + frl, 106 | ref_group = ~Fall + `Non-FRL`) 107 | ``` 108 | 109 | Notice that the reference to Non-FRL is wrapped in back-ticks, which should be used anytime there are spaces or other non-standard characters. 110 | 111 | 112 | Other effect sizes are estimated equivalently. For example, compute *V* ([Ho, 2009](https://journals.sagepub.com/doi/10.3102/1076998609332755)) can be estimated with 113 | 114 | ```{r v} 115 | v(benchmarks, 116 | math ~ season + frl, 117 | ref_group = ~Fall + `Non-FRL`) 118 | ``` 119 | 120 | or *AUC* with 121 | 122 | ```{r auc} 123 | auc(benchmarks, 124 | math ~ season + frl, 125 | ref_group = ~Fall + `Non-FRL`) 126 | ``` 127 | 128 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # esvis 5 | 6 | R Package for effect size visualization and estimation. 7 | 8 | [![Build 9 | Status](https://travis-ci.org/datalorax/esvis.svg?branch=master)](https://travis-ci.org/datalorax/esvis) 10 | [![AppVeyor Build 11 | Status](https://ci.appveyor.com/api/projects/status/github/datalorax/esvis?branch=master&svg=true)](https://ci.appveyor.com/project/datalorax/esvis) 12 | [![codecov](https://codecov.io/gh/datalorax/esvis/branch/master/graph/badge.svg)](https://codecov.io/gh/datalorax/esvis) 13 | [![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version/esvis)](https://cran.r-project.org/package=esvis) 14 | 15 | This package is designed to help you very quickly estimate and visualize 16 | distributional differences by categorical factors (e.g., the effect of 17 | treatment by gender and income category). Emphasis is placed on 18 | evaluating distributional differences across the entirety of the scale, 19 | rather than only by measures of central tendency (e.g., means). 20 | 21 | ## Installation 22 | 23 | Install directly from CRAN with 24 | 25 | ``` r 26 | install.packages("esvis") 27 | ``` 28 | 29 | Or the development version from GitHub with: 30 | 31 | ``` r 32 | # install.packages("devtools") 33 | devtools::install_github("datalorax/esvis") 34 | ``` 35 | 36 | ## Plotting methods 37 | 38 | There are three primary data visualizations: (a) binned effect size 39 | plots, (b) probability-probability plots, and (c) empirical cumulative 40 | distribution functions. All plots use the 41 | [ggplot2](http://ggplot2.tidyverse.org) package and are fully 42 | manipulable after creation using standard ggplot commands (e.g., 43 | changing the theme, labels, etc.). These plots were all produced by 44 | first running `library(ggplot2); theme_set(theme_minimal())` to produce 45 | the plots with the minimal theme, but no theme structure is imposed on 46 | any of the plots. 47 | 48 | ### Binned ES Plot 49 | 50 | At present, the binned effect size plot can only be produced with 51 | Cohen’s *d*, although future development will allow the user to select 52 | the type of effect size. The binned effect size plot splits the 53 | distribution into quantiles specified by the user (defaults to lower, 54 | middle, and upper thirds), calculates the mean difference between groups 55 | within each quantile bin, and produces an effect size for each bin by 56 | dividing by the overall pooled standard deviation (i.e., not by 57 | quantile). For example 58 | 59 | ``` r 60 | library(esvis) 61 | binned_plot(benchmarks, math ~ ell) 62 | #> Warning: `cols` is now required. 63 | #> Please use `cols = c(data, q)` 64 | ``` 65 | 66 | ![](README-binned_plot1-1.png) 67 | ![](https://github.com/datalorax/esvis/raw/master/docs/README-binned_plot1-1.png) 68 | Note that in this plot one can clearly see that the magnitude of the 69 | differences between the groups depends upon scale location, as evidence 70 | by the reversal of the effect (negative to positive) for the Non-ELL 71 | (non-English Language Learners) group. We could also change the 72 | reference group, change the level of quantile binning, and evaluate the 73 | effect within other factors. For example, we can look by season 74 | eligibility for free or reduced price lunch, with quantiles binning, and 75 | non-ELL students as the reference group with 76 | 77 | ``` r 78 | binned_plot(benchmarks, 79 | math ~ ell + frl + season, 80 | ref_group = "Non-ELL", 81 | qtile_groups = 5) 82 | #> Warning: `cols` is now required. 83 | #> Please use `cols = c(data, q)` 84 | ``` 85 | 86 | ![](README-binned_plot2-1.png) 87 | ![](https://github.com/datalorax/esvis/raw/master/docs/README-binned_plot2-1.png) 88 | The `ref_group` argument can also supplied as a formula. 89 | 90 | ### PP Plots 91 | 92 | Probability-probability plot can be produced with a call to `pp_plot` 93 | and an equivalent argument structure. In this case, we’re visualizing 94 | the difference in reading achievement by race/ethnicity by season. 95 | 96 | ``` r 97 | pp_plot(benchmarks, reading ~ ethnicity + season) 98 | ``` 99 | 100 | ![](README-pp_plot1-1.png) 101 | ![](https://github.com/datalorax/esvis/raw/master/docs/README-pp_plot1-1.png) 102 | 103 | Essentially, the empirical cummulative distribution function (ECDF) for 104 | the reference group (by default, the highest performing group) is mapped 105 | against the ECDF for each corresponding group. The magnitude of the 106 | achievement gap is then displayed by the distance from the diagonal 107 | reference line, representing, essentially, the ECDF for the reference 108 | group. 109 | 110 | By default, the area under the curve is shaded, which itself is an 111 | effect-size like measure, but this is also manipulable. 112 | 113 | ### ECDF Plot 114 | 115 | Finally, the `ecdf_plot` function essentially dresses up the base 116 | `plot.ecdf` function, but also adds some nice referencing features 117 | through additional, optional arguments. Below, I have included the 118 | optional `hor_ref = TRUE` argument such that horizontal reference lines 119 | appear, relative to the cuts provided. 120 | 121 | ``` r 122 | ecdf_plot(benchmarks, math ~ season, 123 | cuts = c(190, 200, 215)) 124 | ``` 125 | 126 | ![](README-ecdf_plot-1.png) 127 | ![](https://github.com/datalorax/esvis/raw/master/docs/README-ecdf_plot-1.png) 128 | These are the curves that go into the PP-Plot, but occasionally can be 129 | useful on their own. 130 | 131 | ## Estimation Methods 132 | 133 | Compute effect sizes for all possible pairwise comparisons. 134 | 135 | ``` r 136 | coh_d(benchmarks, math ~ season + frl) 137 | #> `mutate_if()` ignored the following grouping variables: 138 | #> Column `season` 139 | #> # A tibble: 30 x 6 140 | #> season_ref frl_ref season_foc frl_foc coh_d coh_se 141 | #> 142 | #> 1 Fall FRL Fall Non-FRL 0.7443868 0.07055679 143 | #> 2 Fall FRL Spring FRL 1.321191 0.04957348 144 | #> 3 Fall FRL Spring Non-FRL 2.008066 0.07873488 145 | #> 4 Fall FRL Winter FRL 0.6246112 0.04716189 146 | #> 5 Fall FRL Winter Non-FRL 1.300031 0.07326622 147 | #> 6 Fall Non-FRL Fall FRL -0.7443868 0.07055679 148 | #> 7 Fall Non-FRL Spring FRL 0.5498306 0.06939873 149 | #> 8 Fall Non-FRL Spring Non-FRL 1.140492 0.09189070 150 | #> 9 Fall Non-FRL Winter FRL -0.1269229 0.06934576 151 | #> 10 Fall Non-FRL Winter Non-FRL 0.5009081 0.08716735 152 | #> # … with 20 more rows 153 | ``` 154 | 155 | Or specify a reference group. In this case, I’ve used the formula-based 156 | interface, but a string vector specifying the specific reference group 157 | could also be supplied. 158 | 159 | ``` r 160 | coh_d(benchmarks, 161 | math ~ season + frl, 162 | ref_group = ~Fall + `Non-FRL`) 163 | #> `mutate_if()` ignored the following grouping variables: 164 | #> Column `season` 165 | #> # A tibble: 5 x 6 166 | #> season_ref frl_ref season_foc frl_foc coh_d coh_se 167 | #> 168 | #> 1 Fall Non-FRL Fall FRL -0.7443868 0.07055679 169 | #> 2 Fall Non-FRL Spring FRL 0.5498306 0.06939873 170 | #> 3 Fall Non-FRL Spring Non-FRL 1.140492 0.09189070 171 | #> 4 Fall Non-FRL Winter FRL -0.1269229 0.06934576 172 | #> 5 Fall Non-FRL Winter Non-FRL 0.5009081 0.08716735 173 | ``` 174 | 175 | Notice that the reference to Non-FRL is wrapped in back-ticks, which 176 | should be used anytime there are spaces or other non-standard 177 | characters. 178 | 179 | Other effect sizes are estimated equivalently. For example, compute *V* 180 | ([Ho, 2009](https://journals.sagepub.com/doi/10.3102/1076998609332755)) 181 | can be estimated with 182 | 183 | ``` r 184 | v(benchmarks, 185 | math ~ season + frl, 186 | ref_group = ~Fall + `Non-FRL`) 187 | #> # A tibble: 5 x 5 188 | #> # Groups: frl, season [1] 189 | #> frl_ref season_ref frl_foc season_foc v 190 | #> 191 | #> 1 Non-FRL Fall Non-FRL Winter 0.5070737 192 | #> 2 Non-FRL Fall FRL Spring 0.5454666 193 | #> 3 Non-FRL Fall FRL Winter -0.1117226 194 | #> 4 Non-FRL Fall Non-FRL Spring 1.139235 195 | #> 5 Non-FRL Fall FRL Fall -0.7051069 196 | ``` 197 | 198 | or *AUC* with 199 | 200 | ``` r 201 | auc(benchmarks, 202 | math ~ season + frl, 203 | ref_group = ~Fall + `Non-FRL`) 204 | #> # A tibble: 5 x 5 205 | #> # Groups: frl, season [1] 206 | #> frl_ref season_ref frl_foc season_foc auc 207 | #> 208 | #> 1 Non-FRL Fall Non-FRL Winter 0.6400361 209 | #> 2 Non-FRL Fall FRL Spring 0.6501417 210 | #> 3 Non-FRL Fall FRL Winter 0.4685164 211 | #> 4 Non-FRL Fall Non-FRL Spring 0.7897519 212 | #> 5 Non-FRL Fall FRL Fall 0.3090356 213 | ``` 214 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | # DO NOT CHANGE the "init" and "install" sections below 2 | 3 | # Download script file from GitHub 4 | init: 5 | ps: | 6 | $ErrorActionPreference = "Stop" 7 | Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" 8 | Import-Module '..\appveyor-tool.ps1' 9 | 10 | install: 11 | - ps: Bootstrap 12 | 13 | # Adapt as necessary starting from here 14 | 15 | build_script: 16 | - travis-tool.sh install_deps 17 | 18 | test_script: 19 | - travis-tool.sh run_tests 20 | 21 | on_failure: 22 | - 7z a failure.zip *.Rcheck\* 23 | - appveyor PushArtifact failure.zip 24 | 25 | artifacts: 26 | - path: '*.Rcheck\**\*.log' 27 | name: Logs 28 | 29 | - path: '*.Rcheck\**\*.out' 30 | name: Logs 31 | 32 | - path: '*.Rcheck\**\*.fail' 33 | name: Logs 34 | 35 | - path: '*.Rcheck\**\*.Rout' 36 | name: Logs 37 | 38 | - path: '\*_*.tar.gz' 39 | name: Bits 40 | 41 | - path: '\*_*.zip' 42 | name: Bits 43 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Release Summary 2 | 3 | This is a minior release of esvis, version 0.3.1, an R package for visualizing and estimating effect sizes. This release implements changes to the codebase to accommodate the release of dplyr 1.0, one of the package dependencies. 4 | 5 | ## Test environments 6 | * Local OS X install, R 3.6.0 7 | * Ubuntu 16.04 (on Travis-CI), R 4.0.0 8 | * Win-builder (devel and release) 9 | 10 | ## R CMD check results 11 | 12 | 0 errors | 0 warnings | 0 notes 13 | 14 | ## Downstream dependencies 15 | 16 | There are currently no downstream dependencies for this package -------------------------------------------------------------------------------- /data/benchmarks.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/data/benchmarks.rda -------------------------------------------------------------------------------- /data/seda.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/data/seda.rda -------------------------------------------------------------------------------- /data/star.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/data/star.rda -------------------------------------------------------------------------------- /docs/README-binned_plot1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/docs/README-binned_plot1-1.png -------------------------------------------------------------------------------- /docs/README-binned_plot2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/docs/README-binned_plot2-1.png -------------------------------------------------------------------------------- /docs/README-cleanup.R: -------------------------------------------------------------------------------- 1 | files <- list.files(pattern = "README-") 2 | 3 | file.copy(files, file.path("docs", files), overwrite = TRUE) 4 | 5 | rm <- glue::glue("sed -i -e 's/!\\[\\]({file})//g' README.md", 6 | file = files) 7 | purrr::walk(rm, system) 8 | 9 | file.remove(c(files, "README.md-e")) 10 | -------------------------------------------------------------------------------- /docs/README-ecdf_plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/docs/README-ecdf_plot-1.png -------------------------------------------------------------------------------- /docs/README-pp_plot1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/docs/README-pp_plot1-1.png -------------------------------------------------------------------------------- /esvis.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | -------------------------------------------------------------------------------- /inst/image/README-binned_plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/inst/image/README-binned_plot-1.png -------------------------------------------------------------------------------- /inst/image/README-ecdf_plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/inst/image/README-ecdf_plot-1.png -------------------------------------------------------------------------------- /inst/image/README-pp_plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/inst/image/README-pp_plot-1.png -------------------------------------------------------------------------------- /inst/image/README-pp_plot1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/inst/image/README-pp_plot1-1.png -------------------------------------------------------------------------------- /inst/image/README-pp_plot2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/inst/image/README-pp_plot2-1.png -------------------------------------------------------------------------------- /man/auc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/es_calcs.R 3 | \name{auc} 4 | \alias{auc} 5 | \title{Compute the Area Under the \link{pp_plot} Curve 6 | Calculates the area under the \code{pp} curve. The area under the curve is 7 | also a useful effect-size like statistic, representing the probability that 8 | a randomly selected individual from the \code{x} distribution will have a 9 | higher value than a randomly selected individual from the \code{y} 10 | distribution.} 11 | \usage{ 12 | auc(data, formula, ref_group = NULL, rename = TRUE) 13 | } 14 | \arguments{ 15 | \item{data}{The data frame used for estimation - ideally structured in a tidy 16 | format.} 17 | 18 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is 19 | the outcome variable and \code{group} is the grouping variable. Note this 20 | variable can include any arbitrary number of groups. Additional variables 21 | can be included with \code{+} to produce separate estimates by the secondary 22 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 23 | + characteristic2}).} 24 | 25 | \item{ref_group}{Optional. A character vector or forumla listing the 26 | reference group levels for each variable on the right hand side of the 27 | formula, supplied in the same order as the formula. Note that if using the 28 | formula version, levels that are numbers, or include hyphens, spaces, etc., 29 | should be wrapped in back ticks (e.g., 30 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 31 | in doubt, it is safest to use the back ticks, as they will not interfere 32 | with anything if they are not needed. See examples below for more details.} 33 | 34 | \item{rename}{Used primarily for internal purposes. Should the column 35 | names be renamed to reference the focal and reference groups? Defaults to 36 | \code{TRUE}.} 37 | } 38 | \value{ 39 | By default the area under the curve for all possible pairings of 40 | the grouping factor are returned. 41 | } 42 | \description{ 43 | Compute the Area Under the \link{pp_plot} Curve 44 | Calculates the area under the \code{pp} curve. The area under the curve is 45 | also a useful effect-size like statistic, representing the probability that 46 | a randomly selected individual from the \code{x} distribution will have a 47 | higher value than a randomly selected individual from the \code{y} 48 | distribution. 49 | } 50 | \examples{ 51 | 52 | # Calculate AUC for all pairwise comparisons 53 | auc(star, reading ~ condition) 54 | 55 | # Report only relative to regular-sized classrooms 56 | auc(star, 57 | reading ~ condition, 58 | ref_group = "reg") 59 | 60 | # Report by ELL and FRL groups for each season, compare to non-ELL students 61 | # who were not eligible for free or reduced price lunch in the fall (using 62 | # the formula interface for reference group referencing). 63 | \dontrun{ 64 | auc(benchmarks, 65 | math ~ ell + frl + season, 66 | ref_group = ~`Non-ELL` + `Non-FRL` + Fall) 67 | 68 | # Same thing but with character vector supplied, rather than a formula 69 | auc(benchmarks, 70 | math ~ ell + frl + season, 71 | ref_group = c("Non-ELL", "Non-FRL", "Fall")) 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /man/benchmarks.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/benchmarks.R 3 | \docType{data} 4 | \name{benchmarks} 5 | \alias{benchmarks} 6 | \title{Synthetic benchmark screening data} 7 | \format{A data frame with 10240 rows and 9 columns. 8 | \describe{ 9 | \item{sid}{Integer. Student identifier.} 10 | \item{cohort}{Integer. Identifies the cohort from which the student was 11 | sampled (1-3).} 12 | \item{sped}{Character. Special Education status: "Non-Sped" or "Sped"} 13 | \item{ethnicity}{Character. The race/ethnicity to which the student 14 | identified. Takes on one of seven values: "Am. Indian", "Asian", 15 | "Black", "Hispanic", "Native Am.", "Two or More", and "White"} 16 | \item{frl}{Character. Student's eligibility for free or reduced price 17 | lunch. Takes on the values "FRL" and "Non-FRL".} 18 | \item{ell}{Character. Students' English language learner status. Takes 19 | on one of values: "Active", "Monitor", and "Non-ELL". Students 20 | coded "Active" were actively receiving English language services 21 | at the time of testing. Students coded "Monitor" had previously 22 | received services, but not at the time of testing. Students coded 23 | "Non-ELL" did not receive services at any time.} 24 | \item{season}{Character. The season during which the assessment was 25 | administered: "Fall", "Winter", or "Spring"} 26 | \item{reading}{Integer. Reading scale score.} 27 | \item{math}{Integer. Mathematics scale score.} 28 | }} 29 | \usage{ 30 | benchmarks 31 | } 32 | \description{ 33 | Across the country many schools engage in seasonal benchmark screenings to 34 | monitor to progress of their students. These are relatively brief 35 | assessments administered to "check-in" on students' progress throughout 36 | the year. This dataset was simulated from a real dataset from one large 37 | school district using the terrific 38 | \href{https://CRAN.R-project.org/package=synthpop}{synthpop} 39 | R package. Overall characteristics of the synthetic data are remarkably 40 | similar to the real data. 41 | } 42 | \keyword{datasets} 43 | -------------------------------------------------------------------------------- /man/binned_es.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/es_calcs.R 3 | \name{binned_es} 4 | \alias{binned_es} 5 | \title{Calculate binned effect sizes} 6 | \usage{ 7 | binned_es( 8 | data, 9 | formula, 10 | ref_group = NULL, 11 | qtile_groups = 3, 12 | es = "g", 13 | rename = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{data}{The data frame used for estimation - ideally structured in a tidy 18 | format.} 19 | 20 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is 21 | the outcome variable and \code{group} is the grouping variable. Note this 22 | variable can include any arbitrary number of groups. Additional variables 23 | can be included with \code{+} to produce separate estimates by the secondary 24 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 25 | + characteristic2}).} 26 | 27 | \item{ref_group}{Optional. A character vector or forumla listing the 28 | reference group levels for each variable on the right hand side of the 29 | formula, supplied in the same order as the formula. Note that if using the 30 | formula version, levels that are numbers, or include hyphens, spaces, etc., 31 | should be wrapped in back ticks (e.g., 32 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 33 | in doubt, it is safest to use the back ticks, as they will not interfere 34 | with anything if they are not needed. See examples below for more details.} 35 | 36 | \item{qtile_groups}{The number of quantile bins to split the data by and 37 | calculate effect sizes. Defaults to 3 bins (lower, middle, upper).} 38 | 39 | \item{es}{The effect size to calculate. Currently the only options are 40 | "d" or "g".} 41 | 42 | \item{rename}{Logical. Should the column names be relabeled according to 43 | the reference and focal groups. Defaults to \code{TRUE}.} 44 | } 45 | \value{ 46 | A data frame with the corresponding effect sizes. 47 | } 48 | \description{ 49 | Calculate binned effect sizes 50 | } 51 | -------------------------------------------------------------------------------- /man/binned_plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/binned_es_plot.R 3 | \name{binned_plot} 4 | \alias{binned_plot} 5 | \title{Quantile-binned effect size plot} 6 | \usage{ 7 | binned_plot( 8 | data, 9 | formula, 10 | ref_group = NULL, 11 | qtile_groups = 3, 12 | es = "g", 13 | lines = TRUE, 14 | points = TRUE, 15 | shade = TRUE, 16 | shade_alpha = 0.4, 17 | rects = TRUE, 18 | rect_fill = "gray20", 19 | rect_alpha = 0.35, 20 | refline = TRUE, 21 | refline_col = "gray40", 22 | refline_lty = "solid", 23 | refline_lwd = 1.1 24 | ) 25 | } 26 | \arguments{ 27 | \item{data}{The data frame to be plotted} 28 | 29 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is 30 | the outcome variable and \code{group} is the grouping variable. Note this 31 | variable can include any arbitrary number of groups. Additional variables 32 | can be included with \code{+} to produce separate plots by the secondary or 33 | tertiary variable of interest (e.g., \code{out ~ group + characteristic1 + 34 | characteristic2}). No more than two additional characteristics can be 35 | supplied at this time.} 36 | 37 | \item{ref_group}{Optional character vector (of length 1) naming the 38 | reference group. Defaults to the group with the highest mean score.} 39 | 40 | \item{qtile_groups}{The number of quantile bins to split the data by and 41 | calculate effect sizes. Defaults to 3 bins (lower, middle, upper).} 42 | 43 | \item{es}{The effect size to plot. Defaults to \code{"g"}, in which case 44 | Hedge's g is plotted, which is better for small samples. At present, the 45 | only other option is \code{"d"} for Cohen's D.} 46 | 47 | \item{lines}{Logical. Should the PP Lines be plotted? Defaults to 48 | \code{TRUE}.} 49 | 50 | \item{points}{Logical. Should points be plotted for each \code{qtiles} be 51 | plotted? Defaults to \code{TRUE}.} 52 | 53 | \item{shade}{Logical. Should the standard errors around the effect size point 54 | estimates be displayed? Defaults to \code{TRUE}, with the uncertainty 55 | displayed with shading.} 56 | 57 | \item{shade_alpha}{Transparency level of the standard error shading. 58 | Defaults to 0.40.} 59 | 60 | \item{rects}{Logical. Should semi-transparent rectangles be plotted in the 61 | background to show the binning? Defaults to \code{TRUE}.} 62 | 63 | \item{rect_fill}{Color fill of rectangles to be plotted in the background, if 64 | \code{rects == TRUE}. Defaults to "gray20".} 65 | 66 | \item{rect_alpha}{Transparency level of the rectangles in the background when 67 | \code{rects == TRUE}. Defaults to 0.35.} 68 | 69 | \item{refline}{Logical. Defaults to \code{TRUE}. Should a diagonal 70 | reference line, representing the point of equal probabilities, be plotted?} 71 | 72 | \item{refline_col}{The color of the reference line. Defaults to 73 | \code{"gray40"}} 74 | 75 | \item{refline_lty}{Line type of the reference line. Defaults to 76 | \code{"solid"}.} 77 | 78 | \item{refline_lwd}{Line width of the reference line. Defaults to \code{1.1}.} 79 | } 80 | \description{ 81 | Plots the effect size between focal and reference groups by matched (binned) 82 | quantiles (i.e., the results from \link{binned_es}), with the matched 83 | quantiles plotted along the x-axis and the effect size plotted along the 84 | y-axis. The intent is to examine how (if) the magnitude of the effect size 85 | varies at different points of the distributions. The mean differences within 86 | each quantile bin are divided by the overall pooled standard deviation for 87 | the two groups being compared. 88 | } 89 | \examples{ 90 | # Binned Effect Size Plot: Defaults to Hedges' G 91 | binned_plot(star, math ~ condition) 92 | 93 | # Same plot, separated by sex 94 | binned_plot(star, math ~ condition + sex) 95 | 96 | # Same plot by sex and race 97 | \dontrun{ 98 | pp_plot(star, math ~ condition + sex + race) 99 | } 100 | ## Evaluate with simulated data: Plot is most interesting when variance 101 | # in the distributions being compared differ. 102 | 103 | library(tidyr) 104 | library(ggplot2) 105 | 106 | # simulate data with different variances 107 | set.seed(100) 108 | common_vars <- data.frame(low = rnorm(1000, 10, 1), 109 | high = rnorm(1000, 12, 1), 110 | vars = "common") 111 | diff_vars <- data.frame(low = rnorm(1000, 10, 1), 112 | high = rnorm(1000, 12, 2), 113 | vars = "diff") 114 | d <- rbind(common_vars, diff_vars) 115 | 116 | # Plot distributions 117 | d <- d \%>\% 118 | gather(group, value, -vars) 119 | 120 | ggplot(d, aes(value, color = group)) + 121 | geom_density() + 122 | facet_wrap(~vars) 123 | 124 | # Note that the difference between the distributions depends on where you're 125 | # evaluating from on the x-axis. The binned plot helps us visualize this. 126 | # The below shows the binned plots when there is a common versus different 127 | # variance 128 | 129 | binned_plot(d, value ~ group + vars) 130 | } 131 | -------------------------------------------------------------------------------- /man/coh.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/es_calcs.R 3 | \name{coh} 4 | \alias{coh} 5 | \title{Cohen's d} 6 | \usage{ 7 | coh(n1, n2, mn1, mn2, vr1, vr2) 8 | } 9 | \arguments{ 10 | \item{n1}{The sample size for group 1} 11 | 12 | \item{n2}{The sample size for group 2} 13 | 14 | \item{mn1}{The mean for group 1} 15 | 16 | \item{mn2}{The mean for group 2} 17 | 18 | \item{vr1}{The variance for group 1} 19 | 20 | \item{vr2}{The variance for group 2} 21 | } 22 | \description{ 23 | Wraps the equation into a function 24 | } 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /man/coh_d.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/es_calcs.R 3 | \name{coh_d} 4 | \alias{coh_d} 5 | \title{Compute Cohen's \emph{d}} 6 | \usage{ 7 | coh_d(data, formula, ref_group = NULL, se = TRUE) 8 | } 9 | \arguments{ 10 | \item{data}{The data frame used for estimation - ideally structured in a tidy 11 | format.} 12 | 13 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is 14 | the outcome variable and \code{group} is the grouping variable. Note this 15 | variable can include any arbitrary number of groups. Additional variables 16 | can be included with \code{+} to produce separate estimates by the secondary 17 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 18 | + characteristic2}).} 19 | 20 | \item{ref_group}{Optional. A character vector or forumla listing the 21 | reference group levels for each variable on the right hand side of the 22 | formula, supplied in the same order as the formula. Note that if using the 23 | formula version, levels that are numbers, or include hyphens, spaces, etc., 24 | should be wrapped in back ticks (e.g., 25 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 26 | in doubt, it is safest to use the back ticks, as they will not interfere 27 | with anything if they are not needed. See examples below for more details.} 28 | 29 | \item{se}{Logical. Should the standard error of the effect size be 30 | estimated and returned in the resulting data frame? Defaults to 31 | \code{TRUE}.} 32 | } 33 | \value{ 34 | By default the Cohen's \emph{d} for all possible pairings of 35 | the grouping factor(s) are returned. 36 | } 37 | \description{ 38 | This function calculates effect sizes in terms of Cohen's \emph{d}, also 39 | called the uncorrected effect size. See \code{\link{hedg_g}} for the sample 40 | size corrected version. Also see 41 | \href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3840331/}{Lakens (2013)} 42 | for a discussion on different types of effect sizes and their 43 | interpretation. Note that missing data are removed from the calculations of 44 | the means and standard deviations. 45 | } 46 | \examples{ 47 | 48 | # Calculate Cohen's d for all pairwise comparisons 49 | coh_d(star, reading ~ condition) 50 | 51 | # Report only relative to regular-sized classrooms 52 | coh_d(star, 53 | reading ~ condition, 54 | ref_group = "reg") 55 | 56 | # Report by ELL and FRL groups for each season, compare to non-ELL students 57 | # who were not eligible for free or reduced price lunch in the fall (using 58 | # the formula interface for reference group referencing). 59 | 60 | coh_d(benchmarks, 61 | math ~ ell + frl + season, 62 | ref_group = ~`Non-ELL` + `Non-FRL` + Fall) 63 | 64 | # Same thing but with character vector supplied, rather than a formula 65 | coh_d(benchmarks, 66 | math ~ ell + frl + season, 67 | ref_group = c("Non-ELL", "Non-FRL", "Fall")) 68 | } 69 | -------------------------------------------------------------------------------- /man/descrip_stats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{descrip_stats} 4 | \alias{descrip_stats} 5 | \title{Report descriptive stats for all possible pairings on the rhs of the formula.} 6 | \usage{ 7 | descrip_stats(data, formula, ..., qtile_groups = NULL) 8 | } 9 | \arguments{ 10 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is 11 | the outcome variable and \code{group} is the grouping variable. Note this 12 | variable can include any arbitrary number of groups. Additional variables 13 | can be included with \code{+} to produce descriptive stats by the secondary 14 | or tertiary variable of interest (e.g., \code{out ~ group + characteristic1 15 | + characteristic2}).} 16 | } 17 | \description{ 18 | Report descriptive stats for all possible pairings on the rhs of the formula. 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/ecdf_fun.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/es_calcs.R 3 | \name{ecdf_fun} 4 | \alias{ecdf_fun} 5 | \title{Computes the empirical cummulative distribution function for all groups 6 | supplied by the formula.} 7 | \usage{ 8 | ecdf_fun(data, formula, cuts = NULL) 9 | } 10 | \arguments{ 11 | \item{data}{The data frame used for estimation - ideally structured in a tidy 12 | format.} 13 | 14 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is 15 | the outcome variable and \code{group} is the grouping variable. Note this 16 | variable can include any arbitrary number of groups. Additional variables 17 | can be included with \code{+} to produce separate estimates by the secondary 18 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 19 | + characteristic2}).} 20 | 21 | \item{cuts}{Optional vector of cut scores. If supplied, the ECDF will be 22 | guaranteed to include these points. Otherwise, there could be gaps in the 23 | ECDF at those particular points (used in plotting the cut scores).} 24 | } 25 | \description{ 26 | Computes the empirical cummulative distribution function for all groups 27 | supplied by the formula. 28 | } 29 | \keyword{internal} 30 | -------------------------------------------------------------------------------- /man/ecdf_plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ecdf_plot.R 3 | \name{ecdf_plot} 4 | \alias{ecdf_plot} 5 | \title{Empirical Cumulative Distribution Plot} 6 | \usage{ 7 | ecdf_plot( 8 | data, 9 | formula, 10 | cuts = NULL, 11 | linewidth = 1.2, 12 | ref_line_cols = "gray40", 13 | ref_linetype = "solid", 14 | center = FALSE, 15 | ref_rect = TRUE, 16 | ref_rect_col = "gray40", 17 | ref_rect_alpha = 0.15 18 | ) 19 | } 20 | \arguments{ 21 | \item{data}{A tidy data frame containing the data to be plotted.} 22 | 23 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is 24 | the outcome variable and \code{group} is the grouping variable. Note this 25 | variable can include any arbitrary number of groups. Additional variables 26 | can be included with \code{+} to produce separate plots by the secondary or 27 | tertiary varaible (e.g., \code{out ~ group + characteristic1 + 28 | characteristic2}). No more than two additional characteristics can be 29 | supplied at this time.} 30 | 31 | \item{cuts}{Optional numeric vector stating the location of reference 32 | line(s) and/or rectangle(s).} 33 | 34 | \item{linewidth}{Width of ECDF lines. Note that the color of the lines can 35 | be controlled through additional functions (e.g., \code{scale_color_brewer, 36 | scale_color_manual}).} 37 | 38 | \item{ref_line_cols}{Optional vector (or single value) of colors for 39 | \code{cuts} lines.} 40 | 41 | \item{ref_linetype}{Optional vector (or single value) of line types for 42 | \code{cuts} lines. Takes any of the arguments supplied by 43 | \link[ggplot2]{linetype}.} 44 | 45 | \item{center}{Logical. Should the functions be centered prior to plotting? 46 | Defaults to \code{FALSE}. Note that if paneled/faceted plots are produced, 47 | the centering occurs by group.} 48 | 49 | \item{ref_rect}{Logical, defaults to \code{TRUE} when \code{cuts} takes 50 | any non-null value. Should semi-transparent rectangle(s) be plotted at the 51 | locations of \code{cuts}?} 52 | 53 | \item{ref_rect_col}{Color of the fill for the reference rectangles. Defaults 54 | to a dark gray.} 55 | 56 | \item{ref_rect_alpha}{Transparency of the fill for the reference rectangles. 57 | Defaults to 0.7.} 58 | } 59 | \description{ 60 | This is a wrapper function for the \link[ggplot2]{stat_ecdf} function and 61 | helps make it easy to directly compare distributions at specific 62 | locations along the scale. 63 | } 64 | \examples{ 65 | ecdf_plot(benchmarks, math ~ ell, 66 | cuts = c(190, 205, 210), 67 | ref_line_cols = c("#D68EE3", "#9BE38E", "#144ECA")) 68 | 69 | # Customize the plot with ggplot2 functions 70 | library(ggplot2) 71 | ecdf_plot(benchmarks, math ~ ell, 72 | cuts = c(190, 205, 210), 73 | ref_line_cols = c("#D68EE3", "#9BE38E", "#144ECA")) + 74 | theme_minimal() + 75 | theme(legend.position = "bottom") 76 | 77 | ecdf_plot(seda, mean ~ grade) + 78 | scale_fill_brewer(palette = "Set2") + 79 | theme_minimal() 80 | 81 | # Use within the dplyr pipeline 82 | library(dplyr) 83 | benchmarks \%>\% 84 | mutate(season = factor(season, 85 | levels = c("Fall", "Winter", "Spring"))) \%>\% 86 | ecdf_plot(math ~ ell + season + frl) 87 | } 88 | -------------------------------------------------------------------------------- /man/esvis-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/esvis-package.R 3 | \docType{package} 4 | \name{esvis-package} 5 | \alias{esvis} 6 | \alias{esvis-package} 7 | \title{esvis: Visualization and Estimation of Effect Sizes} 8 | \description{ 9 | A variety of methods are provided to estimate and visualize 10 | distributional differences in terms of effect sizes. Particular emphasis 11 | is upon evaluating differences between two or more distributions across 12 | the entire scale, rather than at a single point (e.g., differences in 13 | means). For example, Probability-Probability (PP) plots display the 14 | difference between two or more distributions, matched by their empirical 15 | CDFs (see Ho and Reardon, 2012; ), allowing 16 | for examinations of where on the scale distributional differences are 17 | largest or smallest. The area under the PP curve (AUC) is an effect-size 18 | metric, corresponding to the probability that a randomly selected 19 | observation from the x-axis distribution will have a higher value 20 | than a randomly selected observation from the y-axis distribution. 21 | Binned effect size plots are also available, in which the distributions 22 | are split into bins (set by the user) and separate effect sizes (Cohen's 23 | d) are produced for each bin - again providing a means to evaluate the 24 | consistency (or lack thereof) of the difference between two or more 25 | distributions at different points on the scale. Evaluation of empirical 26 | CDFs is also provided, with built-in arguments for providing annotations 27 | to help evaluate distributional differences at specific points (e.g., 28 | semi-transparent shading). All function take a consistent argument 29 | structure. Calculation of specific effect sizes is also possible. The 30 | following effect sizes are estimable: (a) Cohen's d, (b) Hedges' g, 31 | (c) percentage above a cut, (d) transformed (normalized) percentage above 32 | a cut, (e) area under the PP curve, and (f) the V statistic (see Ho, 33 | 2009; ), which essentially transforms the 34 | area under the curve to standard deviation units. By default, effect sizes 35 | are calculated for all possible pairwise comparisons, but a reference 36 | group (distribution) can be specified. 37 | } 38 | \seealso{ 39 | Useful links: 40 | \itemize{ 41 | \item \url{https://github.com/datalorax/esvis} 42 | \item Report bugs at \url{https://github.com/datalorax/esvis/issues} 43 | } 44 | 45 | } 46 | \author{ 47 | \strong{Maintainer}: Daniel Anderson \email{daniela@uoregon.edu} 48 | 49 | } 50 | \keyword{internal} 51 | -------------------------------------------------------------------------------- /man/hedg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/es_calcs.R 3 | \name{hedg} 4 | \alias{hedg} 5 | \title{Hedge's g} 6 | \usage{ 7 | hedg(n1, n2, d) 8 | } 9 | \arguments{ 10 | \item{n1}{The sample size for group 1} 11 | 12 | \item{n2}{The sample size for group 2} 13 | 14 | \item{d}{The value of Cohen's d} 15 | } 16 | \description{ 17 | Wraps the equation into a function 18 | } 19 | \keyword{internal} 20 | -------------------------------------------------------------------------------- /man/hedg_g.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/es_calcs.R 3 | \name{hedg_g} 4 | \alias{hedg_g} 5 | \title{Compute Hedges' \emph{g} 6 | This function calculates effect sizes in terms of Hedges' \emph{g}, also 7 | called the corrected (for sample size) effect size. See 8 | \code{\link{coh_d}} for the uncorrected version. Also see 9 | \href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3840331/}{Lakens (2013)} 10 | for a discussion on different types of effect sizes and their 11 | interpretation. Note that missing data are removed from the calculations of 12 | the means and standard deviations.} 13 | \usage{ 14 | hedg_g(data, formula, ref_group = NULL, keep_d = TRUE) 15 | } 16 | \arguments{ 17 | \item{data}{The data frame used for estimation - ideally structured in a tidy 18 | format.} 19 | 20 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is 21 | the outcome variable and \code{group} is the grouping variable. Note this 22 | variable can include any arbitrary number of groups. Additional variables 23 | can be included with \code{+} to produce separate estimates by the secondary 24 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 25 | + characteristic2}).} 26 | 27 | \item{ref_group}{Optional. A character vector or forumla listing the 28 | reference group levels for each variable on the right hand side of the 29 | formula, supplied in the same order as the formula. Note that if using the 30 | formula version, levels that are numbers, or include hyphens, spaces, etc., 31 | should be wrapped in back ticks (e.g., 32 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 33 | in doubt, it is safest to use the back ticks, as they will not interfere 34 | with anything if they are not needed. See examples below for more details.} 35 | 36 | \item{keep_d}{Logical. Should Cohen's \emph{d} be reported along with 37 | Hedge's \code{g}? Defaults to \code{TRUE}.} 38 | } 39 | \value{ 40 | By default the Hedges' \emph{g} for all possible pairings of 41 | the grouping factor are returned as a tidy data frame. 42 | } 43 | \description{ 44 | Compute Hedges' \emph{g} 45 | This function calculates effect sizes in terms of Hedges' \emph{g}, also 46 | called the corrected (for sample size) effect size. See 47 | \code{\link{coh_d}} for the uncorrected version. Also see 48 | \href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3840331/}{Lakens (2013)} 49 | for a discussion on different types of effect sizes and their 50 | interpretation. Note that missing data are removed from the calculations of 51 | the means and standard deviations. 52 | } 53 | \examples{ 54 | 55 | # Calculate Hedges' g for all pairwise comparisons 56 | hedg_g(star, reading ~ condition) 57 | 58 | # Report only relative to regular-sized classrooms 59 | hedg_g(star, 60 | reading ~ condition, 61 | ref_group = "reg") 62 | 63 | # Report by ELL and FRL groups for each season, compare to non-ELL students 64 | # who were not eligible for free or reduced price lunch in the fall (using 65 | # the formula interface for reference group referencing). 66 | 67 | hedg_g(benchmarks, 68 | math ~ ell + frl + season, 69 | ref_group = ~`Non-ELL` + `Non-FRL` + Fall) 70 | 71 | # Same thing but with character vector supplied, rather than a formula 72 | hedg_g(benchmarks, 73 | math ~ ell + frl + season, 74 | ref_group = c("Non-ELL", "Non-FRL", "Fall")) 75 | } 76 | -------------------------------------------------------------------------------- /man/pac.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/es_calcs.R 3 | \name{pac} 4 | \alias{pac} 5 | \title{Compute the proportion above a specific cut location} 6 | \usage{ 7 | pac(data, formula, cuts, ref_group = NULL) 8 | } 9 | \arguments{ 10 | \item{data}{The data frame used for estimation - ideally structured in a tidy 11 | format.} 12 | 13 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is 14 | the outcome variable and \code{group} is the grouping variable. Note this 15 | variable can include any arbitrary number of groups. Additional variables 16 | can be included with \code{+} to produce separate estimates by the secondary 17 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 18 | + characteristic2}).} 19 | 20 | \item{cuts}{Optional vector of cut scores. If supplied, the ECDF will be 21 | guaranteed to include these points. Otherwise, there could be gaps in the 22 | ECDF at those particular points (used in plotting the cut scores).} 23 | 24 | \item{ref_group}{Optional. A character vector or forumla listing the 25 | reference group levels for each variable on the right hand side of the 26 | formula, supplied in the same order as the formula. Note that if using the 27 | formula version, levels that are numbers, or include hyphens, spaces, etc., 28 | should be wrapped in back ticks (e.g., 29 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 30 | in doubt, it is safest to use the back ticks, as they will not interfere 31 | with anything if they are not needed. See examples below for more details.} 32 | } 33 | \value{ 34 | Tidy data frame of the proportion above the cutoff for 35 | each (or selected) groups. 36 | } 37 | \description{ 38 | Computes the proportion of the corresponding group, as specified by the 39 | \code{formula}, scoring above the specified \code{cuts}. 40 | } 41 | \examples{ 42 | # Compute differences for all pairwise comparisons for each of three cuts 43 | pac(star, 44 | reading ~ condition, 45 | cuts = c(450, 500, 550)) 46 | 47 | pac(star, 48 | reading ~ condition + freelunch + race, 49 | cuts = c(450, 500)) 50 | 51 | pac(star, 52 | reading ~ condition + freelunch + race, 53 | cuts = c(450, 500), 54 | ref_group = ~small + no + white) 55 | } 56 | \seealso{ 57 | [esvis::pac_compare(), esvis::tpac(), esvis::tpac_diff()] 58 | } 59 | -------------------------------------------------------------------------------- /man/pac_compare.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/es_calcs.R 3 | \name{pac_compare} 4 | \alias{pac_compare} 5 | \title{Compute the difference in the proportion above a specific cut location} 6 | \usage{ 7 | pac_compare(data, formula, cuts, ref_group = NULL) 8 | } 9 | \arguments{ 10 | \item{data}{The data frame used for estimation - ideally structured in a tidy 11 | format.} 12 | 13 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is 14 | the outcome variable and \code{group} is the grouping variable. Note this 15 | variable can include any arbitrary number of groups. Additional variables 16 | can be included with \code{+} to produce separate estimates by the secondary 17 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 18 | + characteristic2}).} 19 | 20 | \item{cuts}{Optional vector of cut scores. If supplied, the ECDF will be 21 | guaranteed to include these points. Otherwise, there could be gaps in the 22 | ECDF at those particular points (used in plotting the cut scores).} 23 | 24 | \item{ref_group}{Optional. A character vector or forumla listing the 25 | reference group levels for each variable on the right hand side of the 26 | formula, supplied in the same order as the formula. Note that if using the 27 | formula version, levels that are numbers, or include hyphens, spaces, etc., 28 | should be wrapped in back ticks (e.g., 29 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 30 | in doubt, it is safest to use the back ticks, as they will not interfere 31 | with anything if they are not needed. See examples below for more details.} 32 | } 33 | \value{ 34 | Tidy data frame of the proportion above the cutoff for 35 | each (or selected) groups. 36 | } 37 | \description{ 38 | Computes the difference in the proportion above the specified \code{cuts} 39 | for all possible pairwise comparisons of the groups specified by the 40 | \code{formula}. 41 | } 42 | \examples{ 43 | # Compute differences for all pairwise comparisons for each of three cuts 44 | pac_compare(star, 45 | reading ~ condition, 46 | cuts = c(450, 500, 550)) 47 | 48 | pac_compare(star, 49 | reading ~ condition + freelunch + race, 50 | cuts = c(450, 500)) 51 | 52 | pac_compare(star, 53 | reading ~ condition + freelunch + race, 54 | cuts = c(450, 500), 55 | ref_group = ~small + no + white) 56 | } 57 | \seealso{ 58 | [esvis::pac(), esvis::tpac(), esvis::tpac_diff()] 59 | } 60 | -------------------------------------------------------------------------------- /man/paired_ecdf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/es_calcs.R 3 | \name{paired_ecdf} 4 | \alias{paired_ecdf} 5 | \title{Pairs empirical cummulative distribution functions for all groups 6 | supplied by the formula.} 7 | \usage{ 8 | paired_ecdf(data, formula, cuts = NULL) 9 | } 10 | \arguments{ 11 | \item{data}{The data frame used for estimation - ideally structured in a tidy 12 | format.} 13 | 14 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is 15 | the outcome variable and \code{group} is the grouping variable. Note this 16 | variable can include any arbitrary number of groups. Additional variables 17 | can be included with \code{+} to produce separate estimates by the secondary 18 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 19 | + characteristic2}).} 20 | 21 | \item{cuts}{Optional vector of cut scores. If supplied, the ECDF will be 22 | guaranteed to include these points. Otherwise, there could be gaps in the 23 | ECDF at those particular points (used in plotting the cut scores).} 24 | } 25 | \description{ 26 | Pairs empirical cummulative distribution functions for all groups 27 | supplied by the formula. 28 | } 29 | \keyword{internal} 30 | -------------------------------------------------------------------------------- /man/pp_plot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/pp_plot.R 3 | \name{pp_plot} 4 | \alias{pp_plot} 5 | \title{Produces the paired probability plot for two groups} 6 | \usage{ 7 | pp_plot( 8 | data, 9 | formula, 10 | ref_group = NULL, 11 | cuts = NULL, 12 | cut_labels = TRUE, 13 | cut_label_x = 0.02, 14 | cut_label_size = 3, 15 | lines = TRUE, 16 | linetype = "solid", 17 | linewidth = 1.1, 18 | shade = TRUE, 19 | shade_alpha = 0.2, 20 | refline = TRUE, 21 | refline_col = "gray40", 22 | refline_type = "dashed", 23 | refline_width = 1.1 24 | ) 25 | } 26 | \arguments{ 27 | \item{data}{The data frame to be plotted} 28 | 29 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is 30 | the outcome variable and \code{group} is the grouping variable. Note this 31 | variable can include any arbitrary number of groups. Additional variables 32 | can be included with \code{+} to produce separate plots by the secondary or 33 | tertiary variable of interest (e.g., \code{out ~ group + characteristic1 + 34 | characteristic2}). No more than two additional characteristics can be 35 | supplied at this time.} 36 | 37 | \item{ref_group}{Optional character vector (of length 1) naming the 38 | reference group. Defaults to the group with the highest mean score.} 39 | 40 | \item{cuts}{Integer. Optional vector (or single number) of scores used to 41 | annotate the plot. If supplied, line segments will extend from the 42 | corresponding x and y axes and meet at the PP curve.} 43 | 44 | \item{cut_labels}{Logical. Should the reference lines corresponding to 45 | \code{cuts} be labeled? Defaults to \code{TRUE}.} 46 | 47 | \item{cut_label_x}{The x-axis location of the cut labels. Defaults to 0.02.} 48 | 49 | \item{cut_label_size}{The size of the cut labels. Defaults to 3.} 50 | 51 | \item{lines}{Logical. Should the PP Lines be plotted? Defaults to 52 | \code{TRUE}.} 53 | 54 | \item{linetype}{The \link[ggplot2]{linetype} for the PP lines. Defaults to 55 | "solid".} 56 | 57 | \item{linewidth}{The width of the PP lines. Defaults to 1.1 (just 58 | marginally larger than the default ggplot2 lines).} 59 | 60 | \item{shade}{Logical. Should the area under the curve be shaded? Defaults to 61 | \code{TRUE}.} 62 | 63 | \item{shade_alpha}{Transparency of the shading. Defaults to 0.2.} 64 | 65 | \item{refline}{Logical. Should a diagonal reference line be plotted, 66 | representing the value at which no difference is observed between the 67 | reference and focal distributions? Defaults to \code{TRUE}.} 68 | 69 | \item{refline_col}{Color of the reference line. Defaults to a dark gray.} 70 | 71 | \item{refline_type}{The \link[ggplot2]{linetype} for the reference line. 72 | Defaults to "dashed".} 73 | 74 | \item{refline_width}{The width of the reference line. Defaults to 1, or 75 | just slightly thinner than the PP lines.} 76 | } 77 | \value{ 78 | A \link[ggplot2]{ggplot2} object displaying the specified PP plot. 79 | } 80 | \description{ 81 | The paired probability plot maps the probability of obtaining a specific 82 | score for each of two groups. The area under the curve 83 | (\code{\link{auc}}) corresponds to the probability that a randomly 84 | selected observation from the x-axis group will have a higher score than 85 | a randomly selected observation from the y-axis group. This function 86 | extends the basic pp-plot by allowing multiple curves and faceting to 87 | facilitate a variety of comparisons. Note that because the plotting is 88 | built on top of \link[ggplot2]{ggplot2}, additional customization can 89 | be made on top of the plots, as illustrated in the examples. 90 | } 91 | \examples{ 92 | # PP plot examining differences by condition 93 | pp_plot(star, math ~ condition) 94 | 95 | # The sample size gets very small in the above within cells (e.g., wild 96 | # changes within the "other" group in particular). Overall, the effect doesn't 97 | # seem to change much by condition. 98 | 99 | # Look at something a little more interesting 100 | \dontrun{ 101 | pp_plot(benchmarks, math ~ ell + season + frl) 102 | } 103 | # Add some cut scores 104 | pp_plot(benchmarks, math ~ ell, cuts = c(190, 210, 215)) 105 | 106 | ## Make another interesting plot. Use ggplot to customize 107 | \dontrun{ 108 | library(tidyr) 109 | library(ggplot2) 110 | benchmarks \%>\% 111 | gather(subject, score, reading, math) \%>\% 112 | pp_plot(score ~ ell + subject + season, 113 | ref_group = "Non-ELL") + 114 | scale_fill_brewer(name = "ELL Status", palette = "Pastel2") + 115 | scale_color_brewer(name = "ELL Status", palette = "Pastel2") + 116 | labs(title = "Differences among English Language Learning Groups", 117 | subtitle = "Note crossing of reference line") + 118 | theme_minimal() 119 | } 120 | 121 | } 122 | -------------------------------------------------------------------------------- /man/psd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/es_calcs.R 3 | \name{psd} 4 | \alias{psd} 5 | \title{Pooled Standard Deviation} 6 | \usage{ 7 | psd(n1, n2, vr1, vr2) 8 | } 9 | \arguments{ 10 | \item{n1}{The sample size for group 1} 11 | 12 | \item{n2}{The sample size for group 2} 13 | 14 | \item{vr1}{The variance for group 1} 15 | 16 | \item{vr2}{The variance for group 2} 17 | } 18 | \description{ 19 | The denominator for Cohen's d 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/seda.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/seda.R 3 | \docType{data} 4 | \name{seda} 5 | \alias{seda} 6 | \title{Portion of the Stanford Educational Data Archive (SEDA).} 7 | \format{A data frame with 32625 rows and 8 columns. 8 | \describe{ 9 | \item{leaid}{Integer. Local education authority identifier.} 10 | \item{leaname}{Character. Local education authority name.} 11 | \item{stateabb}{Character. State abbreviation.} 12 | \item{year}{Integer. Year the data were collected.} 13 | \item{grade}{Integer. Grade level the data were collected.} 14 | \item{subject}{Character. Whether the data were from reading or 15 | mathematics.} 16 | \item{mean}{Double. Mean test score for the LEA in the corresponding 17 | subject/grade/year.} 18 | \item{se}{Double. Standard error of the mean.} 19 | }} 20 | \source{ 21 | Sean F. Reardon, Demetra Kalogrides, Andrew Ho, Ben Shear, Kenneth Shores, 22 | Erin Fahle. (2016). Stanford Education Data Archive. 23 | \href{http://purl.stanford.edu/db586ns4974}{ 24 | http://purl.stanford.edu/db586ns4974}. For more information, please visit 25 | \href{https://edopportunity.org}{https://edopportunity.org}. 26 | } 27 | \usage{ 28 | seda 29 | } 30 | \description{ 31 | The full SEDA dataset contains mean test scores on statewide testing data in 32 | reading and math for every school district in the United States. See a 33 | description of the data 34 | \href{https://purl.stanford.edu/db586ns4974}{here}. The data 35 | represented in this package represent a random sample of 10% of all the 36 | cases in the full dataset. To access the full data, please visit the 37 | data archive in the above link. 38 | } 39 | \keyword{datasets} 40 | -------------------------------------------------------------------------------- /man/star.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/star.R 3 | \docType{data} 4 | \name{star} 5 | \alias{star} 6 | \title{Data from the Tennessee class size experiment} 7 | \format{A data frame with 5748 rows and 9 columns. 8 | \describe{ 9 | \item{sid}{Integer. Student identifier.} 10 | \item{schid}{Integer. School identifier.} 11 | \item{condition}{Character. Classroom type the student was enrolled in 12 | (randomly assigned to).} 13 | \item{tch_experience}{Integer. Number of years of teaching experience 14 | for the teacher in the classroom in which the student was 15 | enrolled.} 16 | \item{sex}{Character. Sex of student: "girl" or "boy".} 17 | \item{freelunch}{Character. Eligibility of the student for free or 18 | reduced price lunch: "no" or "yes"} 19 | \item{race}{Character. The identified race of the student: "white", 20 | "black", or "other"} 21 | \item{math}{Integer. Math scale score.} 22 | \item{reading}{Integer. Reading scale score.} 23 | }} 24 | \usage{ 25 | star 26 | } 27 | \description{ 28 | These data come from the Ecdat package and represent a cross-section of 29 | data from Project STAR (Student/Teacher Achievement Ratio), where students 30 | were randomly assigned to classrooms. 31 | } 32 | \keyword{datasets} 33 | -------------------------------------------------------------------------------- /man/tpac.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/es_calcs.R 3 | \name{tpac} 4 | \alias{tpac} 5 | \title{Transformed proportion above the cut} 6 | \usage{ 7 | tpac(data, formula, cuts, ref_group = NULL) 8 | } 9 | \arguments{ 10 | \item{data}{The data frame used for estimation - ideally structured in a tidy 11 | format.} 12 | 13 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is 14 | the outcome variable and \code{group} is the grouping variable. Note this 15 | variable can include any arbitrary number of groups. Additional variables 16 | can be included with \code{+} to produce separate estimates by the secondary 17 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 18 | + characteristic2}).} 19 | 20 | \item{cuts}{Optional vector of cut scores. If supplied, the ECDF will be 21 | guaranteed to include these points. Otherwise, there could be gaps in the 22 | ECDF at those particular points (used in plotting the cut scores).} 23 | 24 | \item{ref_group}{Optional. A character vector or forumla listing the 25 | reference group levels for each variable on the right hand side of the 26 | formula, supplied in the same order as the formula. Note that if using the 27 | formula version, levels that are numbers, or include hyphens, spaces, etc., 28 | should be wrapped in back ticks (e.g., 29 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 30 | in doubt, it is safest to use the back ticks, as they will not interfere 31 | with anything if they are not needed. See examples below for more details.} 32 | } 33 | \value{ 34 | Tidy data frame of the proportion above the cutoff for 35 | each (or selected) groups. 36 | } 37 | \description{ 38 | This function transforms calls to \link{pac} into standard deviation units. 39 | Function assumes that each distribution is distributed normally with 40 | common variances. See 41 | \href{http://journals.sagepub.com/doi/abs/10.3102/1076998611411918}{Ho & 42 | Reardon, 2012} 43 | } 44 | \examples{ 45 | # Compute differences for all pairwise comparisons for each of three cuts 46 | tpac(star, 47 | reading ~ condition, 48 | cut = c(450, 500, 550)) 49 | 50 | tpac(star, 51 | reading ~ condition + freelunch + race, 52 | cut = c(450, 500)) 53 | 54 | tpac(star, 55 | reading ~ condition + freelunch + race, 56 | cut = c(450, 500), 57 | ref_group = ~small + no + white) 58 | } 59 | \seealso{ 60 | [esvis::pac(), esvis::pac_diff(), esvis::tpac_compare()] 61 | } 62 | -------------------------------------------------------------------------------- /man/tpac_compare.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/es_calcs.R 3 | \name{tpac_compare} 4 | \alias{tpac_compare} 5 | \title{Compare Transformed Proportion Above the Cut} 6 | \usage{ 7 | tpac_compare(data, formula, cuts, ref_group = NULL) 8 | } 9 | \arguments{ 10 | \item{data}{The data frame used for estimation - ideally structured in a tidy 11 | format.} 12 | 13 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is 14 | the outcome variable and \code{group} is the grouping variable. Note this 15 | variable can include any arbitrary number of groups. Additional variables 16 | can be included with \code{+} to produce separate estimates by the secondary 17 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 18 | + characteristic2}).} 19 | 20 | \item{cuts}{Optional vector of cut scores. If supplied, the ECDF will be 21 | guaranteed to include these points. Otherwise, there could be gaps in the 22 | ECDF at those particular points (used in plotting the cut scores).} 23 | 24 | \item{ref_group}{Optional. A character vector or forumla listing the 25 | reference group levels for each variable on the right hand side of the 26 | formula, supplied in the same order as the formula. Note that if using the 27 | formula version, levels that are numbers, or include hyphens, spaces, etc., 28 | should be wrapped in back ticks (e.g., 29 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 30 | in doubt, it is safest to use the back ticks, as they will not interfere 31 | with anything if they are not needed. See examples below for more details.} 32 | } 33 | \value{ 34 | Tidy data frame of the proportion above the cutoff for 35 | each (or selected) groups. 36 | } 37 | \description{ 38 | This function compares all possible pairwise comparisons, as supplied by 39 | \code{formula}, in terms of the transformed proportion above the cut. This 40 | is an effect-size like measure of the differences between two groups as the 41 | cut point(s) in the distribution. See 42 | \href{http://journals.sagepub.com/doi/abs/10.3102/1076998611411918}{Ho & 43 | Reardon, 2012} 44 | } 45 | \examples{ 46 | # Compute differences for all pairwise comparisons for each of three cuts 47 | tpac_compare(star, 48 | reading ~ condition, 49 | cut = c(450, 500, 550)) 50 | 51 | tpac_compare(star, 52 | reading ~ condition + freelunch + race, 53 | cut = c(450, 500)) 54 | 55 | tpac_compare(star, 56 | reading ~ condition + freelunch + race, 57 | cut = c(450, 500), 58 | ref_group = ~small + no + white) 59 | } 60 | \seealso{ 61 | [esvis::pac(), esvis::pac_diff(), esvis::tpac()] 62 | } 63 | -------------------------------------------------------------------------------- /man/v.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/es_calcs.R 3 | \name{v} 4 | \alias{v} 5 | \title{Calculate the V effect size statistic} 6 | \usage{ 7 | v(data, formula, ref_group = NULL) 8 | } 9 | \arguments{ 10 | \item{data}{The data frame used for estimation - ideally structured in a tidy 11 | format.} 12 | 13 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is 14 | the outcome variable and \code{group} is the grouping variable. Note this 15 | variable can include any arbitrary number of groups. Additional variables 16 | can be included with \code{+} to produce separate estimates by the secondary 17 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 18 | + characteristic2}).} 19 | 20 | \item{ref_group}{Optional. A character vector or forumla listing the 21 | reference group levels for each variable on the right hand side of the 22 | formula, supplied in the same order as the formula. Note that if using the 23 | formula version, levels that are numbers, or include hyphens, spaces, etc., 24 | should be wrapped in back ticks (e.g., 25 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 26 | in doubt, it is safest to use the back ticks, as they will not interfere 27 | with anything if they are not needed. See examples below for more details.} 28 | } 29 | \value{ 30 | By default the V statistic for all possible pairings of 31 | the grouping factor are returned as a tidy data frame. Alternatively, a 32 | vector can be returned, and/or only the V corresponding to a specific 33 | reference group can be returned. 34 | } 35 | \description{ 36 | This function calculates the effect size V, as discussed by 37 | \href{https://journals.sagepub.com/doi/abs/10.3102/1076998609332755}{Ho, 2009}. The V 38 | statistic is a transformation of \code{\link{auc}}, interpreted as the 39 | average difference between the distributions in standard deviation units. 40 | } 41 | \examples{ 42 | 43 | # Calculate V for all pairwise comparisons 44 | v(star, reading ~ condition) 45 | 46 | # Report only relative to regular-sized classrooms 47 | v(star, 48 | reading ~ condition, 49 | ref_group = "reg") 50 | 51 | # Report by ELL and FRL groups for each season, compare to non-ELL students 52 | # who were not eligible for free or reduced price lunch in the fall (using 53 | # the formula interface for reference group referencing). 54 | 55 | \dontrun{ 56 | v(benchmarks, 57 | math ~ ell + frl + season, 58 | ref_group = ~`Non-ELL` + `Non-FRL` + Fall) 59 | 60 | # Same thing but with character vector supplied, rather than a formula 61 | v(benchmarks, 62 | math ~ ell + frl + season, 63 | ref_group = c("Non-ELL", "Non-FRL", "Fall")) 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /tests/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/tests/.DS_Store -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(esvis) 3 | 4 | test_check("esvis") 5 | -------------------------------------------------------------------------------- /tests/testthat/test-auc.R: -------------------------------------------------------------------------------- 1 | set.seed(100) 2 | test_data1 <- data.frame(g = c(rep(1, 1e4), rep(2, 1e4)), 3 | score = c(round(rnorm(1e4), 5), 4 | round(rnorm(1e4), 5))) 5 | test_data2 <- data.frame(g = c(rep(1, 1e4), rep(2, 1e4)), 6 | score = c(round(rnorm(1e4), 5), 7 | round(rnorm(1e4, 1), 5))) 8 | 9 | test_that("Area under the curve computes and outputs correctly", { 10 | expect_equal(auc(test_data1, score ~ g)$auc[1], .50, tolerance = 0.03) 11 | expect_equal(auc(test_data2, score ~ g)$auc[1], .75, tolerance = 0.03) 12 | }) 13 | 14 | # ((Levels 1 * Levels 2) * (Levels 1 * Levels 2)) - (Levels 1 * Levels 2) 15 | test_that("Reference group subsetting works correctly", { 16 | expect_equal(nrow(auc(seda, mean ~ grade)), 6*5) 17 | expect_equal(nrow(auc(seda, mean ~ grade, ~`8`)), 5) 18 | expect_equal(nrow(auc(seda, mean ~ grade, "8")), 5) 19 | expect_equal(nrow(auc(benchmarks, math ~ season, "Fall")), 2) 20 | expect_equal(nrow(auc(benchmarks, math ~ season, ~Winter)), 2) 21 | expect_equal(nrow(auc(benchmarks, math ~ season + ell)), 22 | ((3*3)*(3*3)) - (3*3)) 23 | expect_equal(nrow(auc(benchmarks, math ~ season + ell, 24 | ~Fall + `Non-ELL`)), 25 | (3*3) - 1) 26 | expect_equal(nrow(auc(benchmarks, math ~ season + ell, 27 | c("Fall", "Non-ELL"))), 28 | (3*3) - 1) 29 | expect_equal(nrow(auc(benchmarks, math ~ season + ell, 30 | ~Fall)), 31 | (3*3*3) - 3) 32 | expect_equal(nrow(auc(benchmarks, math ~ season + ell, 33 | c("Fall"))), 34 | (3*3*3) - 3) 35 | 36 | expect_equal(nrow(auc(benchmarks, math ~ season + frl + ethnicity)), 37 | ((3*2*6)*(3*2*6)) - (3*2*6)) 38 | expect_equal(nrow(auc(benchmarks, math ~ season + frl + ethnicity, 39 | ~Fall + `Non-FRL` + White)), 40 | (3*2*6) - 1) 41 | expect_equal(nrow(auc(benchmarks, math ~ season + frl + ethnicity, 42 | ~Fall + `Non-FRL`)), 43 | ((3*2*6)*6) - 6) 44 | 45 | }) 46 | -------------------------------------------------------------------------------- /tests/testthat/test-coh_d.R: -------------------------------------------------------------------------------- 1 | set.seed(100) 2 | test_data1 <- data.frame(g = c(rep(1, 1e4), rep(2, 1e4)), 3 | score = c(round(rnorm(1e4), 5), 4 | round(rnorm(1e4), 5))) 5 | test_data2 <- data.frame(g = c(rep(1, 1e4), rep(2, 1e4)), 6 | score = c(round(rnorm(1e4), 5), 7 | round(rnorm(1e4, 1), 5))) 8 | 9 | test_that("Hedges g computes and outputs correctly", { 10 | expect_equal(coh_d(test_data1, score ~ g)$coh_d[1], 0, tolerance = 0.03) 11 | expect_equal(coh_d(test_data2, score ~ g)$coh_d[1], 1, tolerance = 0.03) 12 | }) 13 | 14 | # ((Levels 1 * Levels 2) * (Levels 1 * Levels 2)) - (Levels 1 * Levels 2) 15 | test_that("Reference group subsetting works correctly", { 16 | expect_equal(nrow(coh_d(seda, mean ~ grade)), 6*5) 17 | expect_equal(nrow(coh_d(seda, mean ~ grade, ~`8`)), 5) 18 | expect_equal(nrow(coh_d(seda, mean ~ grade, "8")), 5) 19 | expect_equal(nrow(coh_d(benchmarks, math ~ season, "Fall")), 2) 20 | expect_equal(nrow(coh_d(benchmarks, math ~ season, ~Winter)), 2) 21 | expect_equal(nrow(coh_d(benchmarks, math ~ season + ell)), 22 | ((3*3)*(3*3)) - (3*3)) 23 | expect_equal(nrow(coh_d(benchmarks, math ~ season + ell, 24 | ~Fall + `Non-ELL`)), 25 | (3*3) - 1) 26 | expect_equal(nrow(coh_d(benchmarks, math ~ season + ell, 27 | c("Fall", "Non-ELL"))), 28 | (3*3) - 1) 29 | expect_equal(nrow(coh_d(benchmarks, math ~ season + ell, 30 | ~Fall)), 31 | (3*3*3) - 3) 32 | expect_equal(nrow(coh_d(benchmarks, math ~ season + ell, 33 | c("Fall"))), 34 | (3*3*3) - 3) 35 | 36 | expect_equal(nrow(coh_d(benchmarks, math ~ season + frl + ethnicity)), 37 | ((3*2*6)*(3*2*6)) - (3*2*6)) 38 | expect_equal(nrow(coh_d(benchmarks, math ~ season + frl + ethnicity, 39 | ~Fall + `Non-FRL` + White)), 40 | (3*2*6) - 1) 41 | expect_equal(nrow(coh_d(benchmarks, math ~ season + frl + ethnicity, 42 | ~Fall + `Non-FRL`)), 43 | ((3*2*6)*6) - 6) 44 | 45 | }) 46 | -------------------------------------------------------------------------------- /tests/testthat/test-ecdf_plot.R: -------------------------------------------------------------------------------- 1 | test_that("`ecdf_plot` produces expected output", { 2 | p <- ecdf_plot(benchmarks, math ~ ell) 3 | 4 | expect_equal(p$labels$x, "math") 5 | expect_equal(ecdf_plot(star, reading ~ race)$labels$x, "reading") 6 | 7 | expect_equal(length(p$layers), 1) 8 | 9 | expect_equal(length(ecdf_plot(benchmarks, math ~ ell, 10 | cuts = c(180, 190))$layers), 11 | 3) 12 | expect_equal(length(ecdf_plot(benchmarks, math ~ ell, 13 | cuts = c(180, 190), 14 | ref_rect = FALSE)$layers), 15 | 2) 16 | 17 | expect_null(p$facet$params$facets$panel) 18 | expect_null(p$facet$params$rows) 19 | expect_null(p$facet$params$cols) 20 | 21 | p2 <- ecdf_plot(benchmarks, math ~ ell + season) 22 | expect_false(is.null(p2$facet$params$facets)) 23 | 24 | p3 <- ecdf_plot(benchmarks, math ~ ell + season + frl) 25 | expect_false(is.null(p3$facet$params$rows)) 26 | expect_false(is.null(p3$facet$params$cols)) 27 | }) 28 | -------------------------------------------------------------------------------- /tests/testthat/test-hedge_g.R: -------------------------------------------------------------------------------- 1 | set.seed(100) 2 | test_data1 <- data.frame(g = c(rep(1, 1e4), rep(2, 1e4)), 3 | score = c(round(rnorm(1e4), 5), 4 | round(rnorm(1e4), 5))) 5 | test_data2 <- data.frame(g = c(rep(1, 1e4), rep(2, 1e4)), 6 | score = c(round(rnorm(1e4), 5), 7 | round(rnorm(1e4, 1), 5))) 8 | 9 | test_that("Hedges g computes and outputs correctly", { 10 | expect_equal(hedg_g(test_data1, score ~ g)$hedg_g[1], 0, tolerance = 0.03) 11 | expect_equal(hedg_g(test_data2, score ~ g)$hedg_g[1], -1, tolerance = 0.03) 12 | }) 13 | 14 | # ((Levels 1 * Levels 2) * (Levels 1 * Levels 2)) - (Levels 1 * Levels 2) 15 | test_that("Reference group subsetting works correctly", { 16 | expect_equal(nrow(hedg_g(seda, mean ~ grade)), 6*5) 17 | expect_equal(nrow(hedg_g(seda, mean ~ grade, ~`8`)), 5) 18 | expect_equal(nrow(hedg_g(seda, mean ~ grade, "8")), 5) 19 | expect_equal(nrow(hedg_g(benchmarks, math ~ season, "Fall")), 2) 20 | expect_equal(nrow(hedg_g(benchmarks, math ~ season, ~Winter)), 2) 21 | expect_equal(nrow(hedg_g(benchmarks, math ~ season + ell)), 22 | ((3*3)*(3*3)) - (3*3)) 23 | expect_equal(nrow(hedg_g(benchmarks, math ~ season + ell, 24 | ~Fall + `Non-ELL`)), 25 | (3*3) - 1) 26 | expect_equal(nrow(hedg_g(benchmarks, math ~ season + ell, 27 | c("Fall", "Non-ELL"))), 28 | (3*3) - 1) 29 | expect_equal(nrow(hedg_g(benchmarks, math ~ season + ell, 30 | ~Fall)), 31 | (3*3*3) - 3) 32 | expect_equal(nrow(hedg_g(benchmarks, math ~ season + ell, 33 | c("Fall"))), 34 | (3*3*3) - 3) 35 | 36 | expect_equal(nrow(hedg_g(benchmarks, math ~ season + frl + ethnicity)), 37 | ((3*2*6)*(3*2*6)) - (3*2*6)) 38 | expect_equal(nrow(hedg_g(benchmarks, math ~ season + frl + ethnicity, 39 | ~Fall + `Non-FRL` + White)), 40 | (3*2*6) - 1) 41 | expect_equal(nrow(hedg_g(benchmarks, math ~ season + frl + ethnicity, 42 | ~Fall + `Non-FRL`)), 43 | ((3*2*6)*6) - 6) 44 | 45 | }) 46 | -------------------------------------------------------------------------------- /tests/testthat/test-pp_plot.R: -------------------------------------------------------------------------------- 1 | test_that("`pp_plot` produces expected output", { 2 | p1 <- pp_plot(benchmarks, math ~ ell) 3 | expect_equal(p1$labels$y, "Monitor") 4 | 5 | expect_equal(pp_plot(benchmarks, math ~ ell, 6 | ref_group = "Non-ELL")$labels$y, 7 | "Non-ELL") 8 | 9 | expect_equal(length(p1$layers), 3) 10 | 11 | expect_equal(length(pp_plot(benchmarks, math ~ ell, 12 | shade = FALSE)$layers), 13 | 2) 14 | 15 | expect_equal(length(pp_plot(benchmarks, math ~ ell, 16 | lines = FALSE)$layers), 17 | 2) 18 | expect_equal(length(pp_plot(benchmarks, math ~ ell, 19 | refline = FALSE)$layers), 20 | 2) 21 | 22 | expect_equal(length(pp_plot(benchmarks, math ~ ell, 23 | shade = FALSE, 24 | refline = FALSE)$layers), 25 | 1) 26 | 27 | expect_equal(length(pp_plot(benchmarks, math ~ ell, 28 | cuts = c(180, 190), 29 | shade = FALSE, 30 | refline = FALSE)$layers), 31 | 4) 32 | 33 | expect_null(p1$facet$params$facets) 34 | expect_null(p1$facet$params$rows) 35 | expect_null(p1$facet$params$cols) 36 | 37 | p2 <- pp_plot(benchmarks, math ~ ell + season) 38 | expect_false(is.null(p2$facet$params$facets)) 39 | 40 | p3 <- pp_plot(benchmarks, math ~ ell + season + frl) 41 | expect_false(is.null(p3$facet$params$rows)) 42 | expect_false(is.null(p3$facet$params$cols)) 43 | 44 | p4 <- pp_plot(benchmarks, math ~ ell, cuts = c(180, 190)) 45 | expect_equal(length(p4$layers), 6) 46 | 47 | p5 <- pp_plot(benchmarks, math ~ ell + frl, cuts = c(180, 190)) 48 | expect_equal(length(p5$layers), 6) 49 | 50 | p6 <- pp_plot(benchmarks, math ~ ell, 51 | cuts = c(180, 190), 52 | cut_labels = FALSE) 53 | expect_equal(length(p6$layers), 5) 54 | 55 | }) 56 | -------------------------------------------------------------------------------- /tests/testthat/test-v.R: -------------------------------------------------------------------------------- 1 | set.seed(100) 2 | test_data1 <- data.frame(g = c(rep(1, 1e4), rep(2, 1e4)), 3 | score = c(round(rnorm(1e4), 5), 4 | round(rnorm(1e4), 5))) 5 | test_data2 <- data.frame(g = c(rep(1, 1e4), rep(2, 1e4)), 6 | score = c(round(rnorm(1e4), 5), 7 | round(rnorm(1e4, 1), 5))) 8 | 9 | test_that("V computes and outputs correctly", { 10 | expect_equal(v(test_data1, score ~ g)$v[1], 0, tolerance = 0.03) 11 | expect_equal(v(test_data2, score ~ g)$v[1], 1, tolerance = 0.03) 12 | }) 13 | 14 | # ((Levels 1 * Levels 2) * (Levels 1 * Levels 2)) - (Levels 1 * Levels 2) 15 | test_that("Reference group subsetting works correctly", { 16 | expect_equal(nrow(v(seda, mean ~ grade)), 6*5) 17 | expect_equal(nrow(v(seda, mean ~ grade, ~`8`)), 5) 18 | expect_equal(nrow(v(seda, mean ~ grade, "8")), 5) 19 | expect_equal(nrow(v(benchmarks, math ~ season, "Fall")), 2) 20 | expect_equal(nrow(v(benchmarks, math ~ season, ~Winter)), 2) 21 | expect_equal(nrow(v(benchmarks, math ~ season + ell)), 22 | ((3*3)*(3*3)) - (3*3)) 23 | expect_equal(nrow(v(benchmarks, math ~ season + ell, 24 | ~Fall + `Non-ELL`)), 25 | (3*3) - 1) 26 | expect_equal(nrow(v(benchmarks, math ~ season + ell, 27 | c("Fall", "Non-ELL"))), 28 | (3*3) - 1) 29 | expect_equal(nrow(v(benchmarks, math ~ season + ell, 30 | ~Fall)), 31 | (3*3*3) - 3) 32 | expect_equal(nrow(v(benchmarks, math ~ season + ell, 33 | c("Fall"))), 34 | (3*3*3) - 3) 35 | 36 | expect_equal(nrow(v(benchmarks, math ~ season + frl + ethnicity)), 37 | ((3*2*6)*(3*2*6)) - (3*2*6)) 38 | expect_equal(nrow(v(benchmarks, math ~ season + frl + ethnicity, 39 | ~Fall + `Non-FRL` + White)), 40 | (3*2*6) - 1) 41 | expect_equal(nrow(v(benchmarks, math ~ season + frl + ethnicity, 42 | ~Fall + `Non-FRL`)), 43 | ((3*2*6)*6) - 6) 44 | 45 | }) 46 | --------------------------------------------------------------------------------