├── .Rbuildignore
├── .gitattributes
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS.md
├── R
    ├── benchmarks.R
    ├── binned_es_plot.R
    ├── ecdf_plot.R
    ├── es_calcs.R
    ├── esvis-package.R
    ├── pp_plot.R
    ├── seda.R
    ├── star.R
    └── utils.R
├── README-binned_plot1-1.png
├── README-binned_plot2-1.png
├── README-ecdf_plot-1.png
├── README-pp_plot1-1.png
├── README.Rmd
├── README.md
├── appveyor.yml
├── cran-comments.md
├── data
    ├── benchmarks.rda
    ├── seda.rda
    └── star.rda
├── docs
    ├── README-binned_plot1-1.png
    ├── README-binned_plot2-1.png
    ├── README-cleanup.R
    ├── README-ecdf_plot-1.png
    └── README-pp_plot1-1.png
├── esvis.Rproj
├── inst
    └── image
    │   ├── README-binned_plot-1.png
    │   ├── README-ecdf_plot-1.png
    │   ├── README-pp_plot-1.png
    │   ├── README-pp_plot1-1.png
    │   └── README-pp_plot2-1.png
├── man
    ├── auc.Rd
    ├── benchmarks.Rd
    ├── binned_es.Rd
    ├── binned_plot.Rd
    ├── coh.Rd
    ├── coh_d.Rd
    ├── descrip_stats.Rd
    ├── ecdf_fun.Rd
    ├── ecdf_plot.Rd
    ├── esvis-package.Rd
    ├── hedg.Rd
    ├── hedg_g.Rd
    ├── pac.Rd
    ├── pac_compare.Rd
    ├── paired_ecdf.Rd
    ├── pp_plot.Rd
    ├── psd.Rd
    ├── seda.Rd
    ├── star.Rd
    ├── tpac.Rd
    ├── tpac_compare.Rd
    └── v.Rd
└── tests
    ├── .DS_Store
    ├── testthat.R
    └── testthat
        ├── test-auc.R
        ├── test-coh_d.R
        ├── test-ecdf_plot.R
        ├── test-hedge_g.R
        ├── test-pp_plot.R
        └── test-v.R


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^\.travis\.yml$
 2 | ^appveyor\.yml$
 3 | cran-comments.md
 4 | ^README\.Rmd$
 5 | ^README-.*\.png$
 6 | ^.*\.Rproj$
 7 | ^\.Rproj\.user$
 8 | ^docs$
 9 | ^CRAN-RELEASE$
10 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * text=auto
2 | data/* binary
3 | src/* text=lf
4 | R/* text=lf


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | 
 5 | # Session Data files
 6 | .RData
 7 | 
 8 | # Example code in package build process
 9 | *-Ex.R
10 | 
11 | # Output files from R CMD build
12 | /*.tar.gz
13 | 
14 | # Output files from R CMD check
15 | /*.Rcheck/
16 | 
17 | # RStudio files
18 | .Rproj.user/
19 | 
20 | # produced vignettes
21 | vignettes/*.html
22 | vignettes/*.pdf
23 | 
24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
25 | .httr-oauth
26 | 
27 | # knitr and R markdown default cache directories
28 | /*_cache/
29 | /cache/
30 | 
31 | # Temporary files created by R markdown
32 | *.utf8.md
33 | *.knit.md
34 | 
35 | # Mac garbage
36 | .DS_Store
37 | .Rproj.user
38 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r
 2 | 
 3 | os: 
 4 | - linux
 5 | - osx
 6 | 
 7 | r:
 8 | - release
 9 | 
10 | language: R
11 | sudo: required
12 | cache: packages
13 | 
14 | r_packages:
15 |   - covr
16 | 
17 | r_github_packages:
18 | - tidyverse/tidyr
19 | 
20 | after_success:
21 |   - Rscript -e 'library(covr); codecov()'


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: esvis
 2 | Type: Package
 3 | Title: Visualization and Estimation of Effect Sizes
 4 | Version: 0.3.1
 5 | Authors@R: person("Daniel", "Anderson", email = "daniela@uoregon.edu", 
 6 | 	   role = c("aut", "cre"))
 7 | Description: A variety of methods are provided to estimate and visualize
 8 |     distributional differences in terms of effect sizes. Particular emphasis
 9 |     is upon evaluating differences between two or more distributions across
10 |     the entire scale, rather than at a single point (e.g., differences in
11 |     means). For example, Probability-Probability (PP) plots display the
12 |     difference between two or more distributions, matched by their empirical
13 |     CDFs (see Ho and Reardon, 2012; <doi:10.3102/1076998611411918>), allowing
14 |     for examinations of where on the scale distributional differences are
15 |     largest or smallest. The area under the PP curve (AUC) is an effect-size
16 |     metric, corresponding to the probability that a randomly selected
17 |     observation from the x-axis distribution will have a higher value
18 |     than a randomly selected observation from the y-axis distribution. 
19 |     Binned effect size plots are also available, in which the distributions
20 |     are split into bins (set by the user) and separate effect sizes (Cohen's
21 |     d) are produced for each bin - again providing a means to evaluate the
22 |     consistency (or lack thereof) of the difference between two or more 
23 |     distributions at different points on the scale. Evaluation of empirical 
24 |     CDFs is also provided, with  built-in arguments for providing annotations 
25 |     to help evaluate distributional differences at specific points (e.g., 
26 |     semi-transparent shading). All function take a consistent argument 
27 |     structure. Calculation of specific effect sizes is also possible. The
28 |     following effect sizes are estimable: (a) Cohen's d, (b) Hedges' g, 
29 |     (c) percentage above a cut, (d) transformed (normalized) percentage above 
30 |     a cut, (e)  area under the PP curve, and (f) the V statistic (see Ho, 
31 |     2009; <doi:10.3102/1076998609332755>), which essentially transforms the 
32 |     area under the curve to standard deviation units. By default, effect sizes 
33 |     are calculated for all possible pairwise comparisons, but a reference 
34 |     group (distribution) can be specified.
35 | Depends:
36 |     R (>= 3.1)
37 | Imports:
38 |     sfsmisc,
39 |     ggplot2,
40 |     magrittr,
41 |     dplyr,
42 |     rlang,
43 |     tidyr (>= 1.0.0),
44 |     purrr,
45 |     Hmisc,
46 |     tibble
47 | URL: https://github.com/datalorax/esvis
48 | BugReports: https://github.com/datalorax/esvis/issues
49 | License: MIT + file LICENSE
50 | LazyData: true
51 | RoxygenNote: 7.0.2
52 | Suggests:
53 | 	testthat, 
54 | 	viridisLite
55 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2017
2 | COPYRIGHT HOLDER: Daniel Anderson


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(auc)
 4 | export(binned_es)
 5 | export(binned_plot)
 6 | export(coh_d)
 7 | export(ecdf_plot)
 8 | export(hedg_g)
 9 | export(pac)
10 | export(pac_compare)
11 | export(pp_plot)
12 | export(tpac)
13 | export(tpac_compare)
14 | export(v)
15 | importFrom(Hmisc,cut2)
16 | importFrom(dplyr,arrange)
17 | importFrom(dplyr,bind_cols)
18 | importFrom(dplyr,distinct)
19 | importFrom(dplyr,ends_with)
20 | importFrom(dplyr,everything)
21 | importFrom(dplyr,filter)
22 | importFrom(dplyr,funs)
23 | importFrom(dplyr,group_by)
24 | importFrom(dplyr,group_by_all)
25 | importFrom(dplyr,group_by_at)
26 | importFrom(dplyr,left_join)
27 | importFrom(dplyr,matches)
28 | importFrom(dplyr,mutate)
29 | importFrom(dplyr,mutate_at)
30 | importFrom(dplyr,mutate_if)
31 | importFrom(dplyr,n)
32 | importFrom(dplyr,rename)
33 | importFrom(dplyr,select)
34 | importFrom(dplyr,semi_join)
35 | importFrom(dplyr,summarize)
36 | importFrom(dplyr,summarize_at)
37 | importFrom(dplyr,tbl_df)
38 | importFrom(dplyr,ungroup)
39 | importFrom(dplyr,vars)
40 | importFrom(ggplot2,aes_)
41 | importFrom(ggplot2,facet_grid)
42 | importFrom(ggplot2,facet_wrap)
43 | importFrom(ggplot2,geom_abline)
44 | importFrom(ggplot2,geom_hline)
45 | importFrom(ggplot2,geom_label)
46 | importFrom(ggplot2,geom_line)
47 | importFrom(ggplot2,geom_point)
48 | importFrom(ggplot2,geom_rect)
49 | importFrom(ggplot2,geom_ribbon)
50 | importFrom(ggplot2,geom_segment)
51 | importFrom(ggplot2,geom_step)
52 | importFrom(ggplot2,geom_vline)
53 | importFrom(ggplot2,ggplot)
54 | importFrom(ggplot2,ggplot_build)
55 | importFrom(ggplot2,labs)
56 | importFrom(grDevices,adjustcolor)
57 | importFrom(graphics,layout)
58 | importFrom(graphics,lines)
59 | importFrom(graphics,par)
60 | importFrom(graphics,polygon)
61 | importFrom(graphics,rect)
62 | importFrom(graphics,segments)
63 | importFrom(magrittr,"%>%")
64 | importFrom(purrr,is_atomic)
65 | importFrom(purrr,map)
66 | importFrom(purrr,map2)
67 | importFrom(purrr,map2_df)
68 | importFrom(purrr,map2_lgl)
69 | importFrom(purrr,map_dbl)
70 | importFrom(purrr,map_lgl)
71 | importFrom(rlang,":=")
72 | importFrom(rlang,.data)
73 | importFrom(rlang,parse_quo)
74 | importFrom(rlang,quo)
75 | importFrom(rlang,quo_get_expr)
76 | importFrom(rlang,quos)
77 | importFrom(rlang,set_names)
78 | importFrom(rlang,sym)
79 | importFrom(rlang,syms)
80 | importFrom(sfsmisc,integrate.xy)
81 | importFrom(stats,as.formula)
82 | importFrom(stats,ecdf)
83 | importFrom(stats,na.omit)
84 | importFrom(stats,qnorm)
85 | importFrom(stats,setNames)
86 | importFrom(stats,terms)
87 | importFrom(stats,var)
88 | importFrom(tibble,lst)
89 | importFrom(tibble,tibble)
90 | importFrom(tidyr,crossing)
91 | importFrom(tidyr,fill)
92 | importFrom(tidyr,gather)
93 | importFrom(tidyr,nest)
94 | importFrom(tidyr,separate)
95 | importFrom(tidyr,spread)
96 | importFrom(tidyr,unnest)
97 | importFrom(utils,installed.packages)
98 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # esvis 0.3.1.0000
 2 | Minor update to fix breaking changes with the release of dplyr 1.0
 3 | 
 4 | # esvis 0.3.0.0000
 5 | This is a major update that:
 6 | 
 7 | * Moves all the plottingfunctionality to ggplot2. 
 8 | 
 9 | * Extends the plotting by allowing faceting with `+` in the
10 |   formula. Up to two additional variables are possible.
11 |   
12 | * Changed the API so the data argument comes first, followed 
13 |   by the formula.
14 | 
15 | # esvis 0.2.1.0000 (no submitted to CRAN)
16 | This releases fixes bugs introduced from dependency updates.
17 | 
18 | # esvis 0.2.0.0000
19 | 
20 | This release is mostly about reformatting code and minor bug fixes. A few changes:
21 | 
22 | * The [viridisLite](https://CRAN.R-project.org/package=viridisLite) package is now listed as a `suggests`, and there are options for the plots to be produced with these color schemes, if the package is installed.
23 | 
24 | * A few of the effect sizes were reversed in 0.1, relative to the focal/reference groups. Those have been fixed.
25 | 
26 | * There is now a `theme` function that is extensible and allows for custom themes, rather than just the "standard" and "dark" themes.
27 | 


--------------------------------------------------------------------------------
/R/benchmarks.R:
--------------------------------------------------------------------------------
 1 | #' Synthetic benchmark screening data
 2 | #' 
 3 | #' Across the country many schools engage in seasonal benchmark screenings to 
 4 | #' monitor to progress of their students. These are relatively brief
 5 | #' assessments administered to "check-in" on students' progress throughout
 6 | #' the year. This dataset was simulated from a real dataset from one large
 7 | #' school district using the terrific 
 8 | #' \href{https://CRAN.R-project.org/package=synthpop}{synthpop}
 9 | #' R package. Overall characteristics of the synthetic data are remarkably
10 | #' similar to the real data.
11 | #' 
12 | #' @format A data frame with 10240 rows and 9 columns.
13 | #'   \describe{
14 | #'     \item{sid}{Integer. Student identifier.}
15 | #' 	   \item{cohort}{Integer. Identifies the cohort from which the student was
16 | #' 			sampled (1-3).}
17 | #'     \item{sped}{Character. Special Education status: "Non-Sped" or "Sped"}
18 | #' 	   \item{ethnicity}{Character. The race/ethnicity to which the student
19 | #' 			identified. Takes on one of seven values: "Am. Indian", "Asian",
20 | #' 			"Black", "Hispanic", "Native Am.", "Two or More", and "White"}
21 | #' 	   \item{frl}{Character. Student's eligibility for free or reduced price
22 | #' 			lunch. Takes on the values "FRL" and "Non-FRL".}
23 | #' 	   \item{ell}{Character. Students' English language learner status. Takes 
24 | #' 			on one of values: "Active", "Monitor", and "Non-ELL". Students
25 | #' 			coded "Active" were actively receiving English language services
26 | #' 			at the time of testing. Students coded "Monitor" had previously 
27 | #' 			received services, but not at the time of testing. Students coded
28 | #' 			"Non-ELL" did not receive services at any time.}
29 | #'     \item{season}{Character. The season during which the assessment was
30 | #' 			administered: "Fall", "Winter", or "Spring"}
31 | #'     \item{reading}{Integer. Reading scale score.}
32 | #'     \item{math}{Integer. Mathematics scale score.}
33 | #' }
34 | 
35 | "benchmarks"


--------------------------------------------------------------------------------
/R/binned_es_plot.R:
--------------------------------------------------------------------------------
  1 | #' Quantile-binned effect size plot
  2 | #' 
  3 | #' Plots the effect size between focal and reference groups by matched (binned) 
  4 | #' quantiles (i.e., the results from \link{binned_es}), with the matched
  5 | #' quantiles plotted along the x-axis and the effect size plotted along the 
  6 | #' y-axis. The intent is to examine how (if) the magnitude of the effect size
  7 | #' varies at different points of the distributions. The mean differences within
  8 | #' each quantile bin are divided by the overall pooled standard deviation for 
  9 | #' the two groups being compared.
 10 | #' 
 11 | #' @inheritParams pp_plot
 12 | #' @param qtile_groups The number of quantile bins to split the data by and 
 13 | #' calculate effect sizes. Defaults to 3 bins (lower, middle, upper). 
 14 | #' @param es The effect size to plot. Defaults to \code{"g"}, in which case 
 15 | #' Hedge's g is plotted, which is better for small samples. At present, the 
 16 | #' only other option is \code{"d"} for Cohen's D.
 17 | #' @param points Logical. Should points be plotted for each \code{qtiles} be 
 18 | #' plotted? Defaults to \code{TRUE}.
 19 | #' @param shade Logical. Should the standard errors around the effect size point
 20 | #' estimates be displayed? Defaults to \code{TRUE}, with the uncertainty 
 21 | #' displayed with shading. 
 22 | #' @param shade_alpha Transparency level of the standard error shading.
 23 | #' Defaults to 0.40.
 24 | #' @param refline Logical. Defaults to \code{TRUE}. Should a diagonal
 25 | #' reference line, representing the point of equal probabilities, be plotted?
 26 | #' @param refline_col The color of the reference line. Defaults to 
 27 | #'   \code{"gray40"}
 28 | #' @param refline_lty Line type of the reference line. Defaults to
 29 | #'   \code{"solid"}.
 30 | #' @param refline_lwd Line width of the reference line. Defaults to \code{1.1}.
 31 | #' @param rects Logical. Should semi-transparent rectangles be plotted in the 
 32 | #' background to show the binning? Defaults to \code{TRUE}.
 33 | #' @param rect_fill Color fill of rectangles to be plotted in the background, if
 34 | #' \code{rects == TRUE}. Defaults to "gray20".
 35 | #' @param rect_alpha Transparency level of the rectangles in the background when
 36 | #' \code{rects == TRUE}. Defaults to 0.35.
 37 | #' @export
 38 | #' @examples
 39 | #' # Binned Effect Size Plot: Defaults to Hedges' G
 40 | #' binned_plot(star, math ~ condition)
 41 | #'  
 42 | #' # Same plot, separated by sex
 43 | #' binned_plot(star, math ~ condition + sex)
 44 | #' 
 45 | #' # Same plot by sex and race
 46 | #' \dontrun{
 47 | #'   pp_plot(star, math ~ condition + sex + race)
 48 | #' }
 49 | #' ## Evaluate with simulated data: Plot is most interesting when variance
 50 | #' # in the distributions being compared differ.
 51 | #' 
 52 | #' library(tidyr)
 53 | #' library(ggplot2)
 54 | #' 
 55 | #' # simulate data with different variances
 56 | #' set.seed(100)
 57 | #' common_vars <- data.frame(low  = rnorm(1000, 10, 1),
 58 | #'                         high = rnorm(1000, 12, 1),
 59 | #'                         vars = "common")
 60 | #' diff_vars <- data.frame(low  = rnorm(1000, 10, 1),
 61 | #'                       high = rnorm(1000, 12, 2),
 62 | #'                       vars = "diff")
 63 | #' d <- rbind(common_vars, diff_vars)
 64 | #' 
 65 | #' # Plot distributions 
 66 | #' d <- d %>% 
 67 | #' gather(group, value, -vars) 
 68 | #' 
 69 | #' ggplot(d, aes(value, color = group)) +
 70 | #'  geom_density() +
 71 | #'  facet_wrap(~vars)
 72 | #'
 73 | #' # Note that the difference between the distributions depends on where you're 
 74 | #' # evaluating from on the x-axis. The binned plot helps us visualize this. 
 75 | #' # The below shows the binned plots when there is a common versus different
 76 | #' # variance
 77 | #' 
 78 | #' binned_plot(d, value ~ group + vars)   
 79 | 
 80 | binned_plot <- function(data, formula, ref_group = NULL, qtile_groups = 3, 
 81 |                         es = "g", lines = TRUE, points = TRUE, 
 82 |                         shade = TRUE, shade_alpha = 0.40,
 83 |                         rects = TRUE, rect_fill = "gray20", rect_alpha = 0.35,
 84 |                         refline = TRUE, refline_col = "gray40", 
 85 |                         refline_lty = "solid", refline_lwd = 1.1) {
 86 |   rhs  <- labels(terms(formula))
 87 |   lhs  <- all.vars(formula)[1]
 88 |   
 89 |   if(length(ref_group) > 1) {
 90 |     warning(paste0("Please only specify one reference group. Faceting ", 
 91 |                    "will be used for other groups. Reference group supplied ", 
 92 |                    "for first group will be used."))
 93 |     ref_group <- ref_group[1]
 94 |   }
 95 |   
 96 |   if(is.null(ref_group)) {
 97 |     group_means <- tapply(data[[lhs]], data[[rhs[1]]], mean, na.rm = TRUE)
 98 |     ref_group <- names(group_means)[which.max(group_means)]
 99 |   }
100 |   if(is.formula(ref_group)) {
101 |     ref_group <- gsub("~|`", "", as.character(ref_group))[2]
102 |   }
103 |   d <- binned_es(data, formula, ref_group, qtile_groups = qtile_groups,
104 |                  es = es, rename = FALSE) %>% 
105 |     filter(!!sym(paste0(rhs[1], 1)) != ref_group)
106 |   
107 |   if(length(rhs) == 2) {
108 |     d <- filter(d, !!sym(rhs[2]) == !!sym(paste0(rhs[2], 1)))
109 |   }
110 |   if(length(rhs) == 3) {
111 |     d <- filter(d, 
112 |                 !!sym(rhs[2]) == !!sym(paste0(rhs[2], 1)),
113 |                 !!sym(rhs[3]) == !!sym(paste0(rhs[3], 1)))
114 |   }
115 |   if(shade) {
116 |     d <- d %>% 
117 |       mutate(lb = .data$es + (qnorm(0.025)*.data$es_se),
118 |              ub = .data$es + (qnorm(0.975)*.data$es_se))
119 |   }
120 |   p <- d %>% 
121 |     mutate(midpoint = .data$qtile_ub - (.data$qtile_ub[1] / 2)) %>% 
122 |     ggplot(aes_(~midpoint, ~es))
123 |   
124 |   if(rects) {
125 |     p <- p +
126 |       geom_rect(aes_(xmin = ~qtile_lb, 
127 |                     xmax  = ~qtile_ub, 
128 |                     ymin  = -Inf, 
129 |                     ymax  = Inf),
130 |                 filter(d, as.logical(q %% 2)),
131 |                 alpha = rect_alpha,
132 |                 fill = rect_fill,
133 |                 inherit.aes = FALSE)
134 |   }
135 |   if(shade) {
136 |     p <- p + geom_ribbon(aes_(ymin = ~lb, 
137 |                               ymax = ~ub, 
138 |                               fill = as.name(paste0(rhs[1], 1))),
139 |                     alpha = shade_alpha)
140 |   }
141 |   if(refline) {
142 |     p <- p + geom_hline(yintercept = 0, 
143 |                         color = refline_col,
144 |                         lty   = refline_lty,
145 |                         lwd   = refline_lwd)
146 |   }
147 |   if(lines)   p <- p + geom_line(aes_(group = as.name(paste0(rhs[1], 1)), color = as.name(paste0(rhs[1], 1))))
148 |   if(points)  p <- p + geom_point(aes_(group = as.name(paste0(rhs[1], 1)), color = as.name(paste0(rhs[1], 1))))
149 |     
150 |   if(length(rhs) == 2) p <- p + facet_wrap(as.formula(paste0("~", rhs[2])))
151 |   if(length(rhs) == 3) {
152 |     p <- p + facet_grid(as.formula(paste0(rhs[2], "~", rhs[3])))
153 |   } 
154 |   p + labs(x = "Quantile Bin",
155 |            y = "Effect Size Estimate") 
156 | }


--------------------------------------------------------------------------------
/R/ecdf_plot.R:
--------------------------------------------------------------------------------
  1 | #' Empirical Cumulative Distribution Plot
  2 | #' 
  3 | #' This is a wrapper function for the \link[ggplot2]{stat_ecdf} function and 
  4 | #' helps make it easy to directly compare distributions at specific
  5 | #' locations along the scale. 
  6 | #' @param data A tidy data frame containing the data to be plotted.
  7 | #' @param formula A formula of the type \code{out ~ group} where \code{out} is
  8 | #' the outcome variable and \code{group} is the grouping variable. Note this
  9 | #' variable can include any arbitrary number of groups. Additional variables 
 10 | #' can be included with \code{+} to produce separate plots by the secondary or
 11 | #' tertiary varaible (e.g., \code{out ~ group + characteristic1 + 
 12 | #' characteristic2}). No more than two additional characteristics can be 
 13 | #' supplied at this time.
 14 | #' @param cuts Optional numeric vector stating the location of reference 
 15 | #' line(s) and/or rectangle(s).
 16 | #' @param linewidth Width of ECDF lines. Note that the color of the lines can 
 17 | #' be controlled through additional functions (e.g., \code{scale_color_brewer,
 18 | #'   scale_color_manual}).
 19 | #' @param ref_line_cols Optional vector (or single value) of colors for 
 20 | #'   \code{cuts} lines.
 21 | #' @param ref_linetype Optional vector (or single value) of line types for 
 22 | #'   \code{cuts} lines. Takes any of the arguments supplied by 
 23 | #'   \link[ggplot2]{linetype}. 
 24 | #' @param center Logical. Should the functions be centered prior to plotting? 
 25 | #' Defaults to \code{FALSE}. Note that if paneled/faceted plots are produced, 
 26 | #' the centering occurs by group. 
 27 | #' @param ref_rect Logical, defaults to \code{TRUE} when \code{cuts} takes 
 28 | #' any non-null value. Should semi-transparent rectangle(s) be plotted at the 
 29 | #' locations of \code{cuts}? 
 30 | #' @param ref_rect_col Color of the fill for the reference rectangles. Defaults 
 31 | #'   to a dark gray.
 32 | #' @param ref_rect_alpha Transparency of the fill for the reference rectangles. 
 33 | #'   Defaults to 0.7.
 34 | #' @export
 35 | #' @examples
 36 | #' ecdf_plot(benchmarks, math ~ ell, 
 37 | #'           cuts = c(190, 205, 210), 
 38 | #'           ref_line_cols = c("#D68EE3", "#9BE38E", "#144ECA"))
 39 | #' 
 40 | #' # Customize the plot with ggplot2 functions
 41 | #' library(ggplot2)
 42 | #' ecdf_plot(benchmarks, math ~ ell, 
 43 | #'           cuts = c(190, 205, 210), 
 44 | #'           ref_line_cols = c("#D68EE3", "#9BE38E", "#144ECA")) +
 45 | #'   theme_minimal() +
 46 | #'   theme(legend.position = "bottom")
 47 | #'
 48 | #' ecdf_plot(seda, mean ~ grade) +
 49 | #'   scale_fill_brewer(palette = "Set2") +
 50 | #'   theme_minimal()
 51 | #'   
 52 | #' # Use within the dplyr pipeline
 53 | #' library(dplyr)
 54 | #' benchmarks %>% 
 55 | #'   mutate(season = factor(season, 
 56 | #'                          levels = c("Fall", "Winter", "Spring"))) %>% 
 57 | #'   ecdf_plot(math ~ ell + season + frl)
 58 | 
 59 | ecdf_plot <- function(data, formula, cuts = NULL, linewidth = 1.2, 
 60 |                       ref_line_cols = "gray40", ref_linetype = "solid", 
 61 |                       center = FALSE, ref_rect = TRUE,
 62 |                       ref_rect_col = "gray40", ref_rect_alpha = 0.15) {
 63 |   
 64 |   lhs  <- all.vars(formula)[1]
 65 |   rhs  <- labels(terms(formula))
 66 |   
 67 |   if(center) {
 68 |       data <- data %>% 
 69 |         select(lhs, rhs) %>% 
 70 |         group_by_at(rhs) %>% 
 71 |         mutate(!!sym(lhs) := scale(!!sym(lhs), scale = FALSE))
 72 |   }
 73 |   
 74 |   d <- ecdf_fun(data, formula, cuts) %>% 
 75 |     unnest(cols = c(.data$ecdf, .data$nd))
 76 | 
 77 |   p <- ggplot(d, aes_(~nd, ~ecdf))
 78 | 
 79 |   if(length(rhs) == 2) {
 80 |     p <- p + facet_wrap(as.formula(paste0("~", rhs[2])))
 81 |   }
 82 |   if(length(rhs) == 3) {
 83 |     p <- p + facet_grid(as.formula(paste0(rhs[3], "~", rhs[2])))
 84 |   }
 85 |   
 86 |   if(!is.null(cuts)) {
 87 |    p <- p + geom_vline(xintercept = cuts, 
 88 |                        color      = ref_line_cols,
 89 |                        linetype   = ref_linetype) 
 90 |    if(ref_rect) {
 91 |      ref_cut_d <- as.data.frame(t(cuts)) %>% 
 92 |        gather("dis", "nd") 
 93 |    
 94 |      p <- p + geom_rect(aes_(xmin = ~nd,
 95 |                             xmax = Inf,
 96 |                             ymin = 0,
 97 |                             ymax = Inf),
 98 |                         ref_cut_d,
 99 |                         fill  = ref_rect_col, 
100 |                         alpha = ref_rect_alpha,
101 |                    inherit.aes = FALSE) 
102 |     }
103 |   }
104 |   p + geom_step(aes_(color = as.name(rhs[1])),
105 |                 size = linewidth) +
106 |     labs(x = lhs,
107 |          y = "Proportion")
108 | }
109 | 
110 | 


--------------------------------------------------------------------------------
/R/es_calcs.R:
--------------------------------------------------------------------------------
  1 | #' Pooled Standard Deviation
  2 | #' 
  3 | #' The denominator for Cohen's d
  4 | #' @keywords internal
  5 | #' @param n1 The sample size for group 1
  6 | #' @param n2 The sample size for group 2
  7 | #' @param vr1 The variance for group 1
  8 | #' @param vr2 The variance for group 2
  9 | #' 
 10 | psd <- function(n1, n2, vr1, vr2) {
 11 | 		dnum1 <- (n1 - 1)*vr1
 12 |     dnum2 <- (n2 - 1)*vr2
 13 |     ddnom  <- n1 + n2 - 2
 14 |     
 15 |     sqrt((dnum1 + dnum2) / ddnom)
 16 | }
 17 | 
 18 | #' Cohen's d 
 19 | #' 
 20 | #' Wraps the equation into a function
 21 | #' @keywords internal
 22 | #' @param n1 The sample size for group 1
 23 | #' @param n2 The sample size for group 2
 24 | #' @param mn1 The mean for group 1
 25 | #' @param mn2 The mean for group 2
 26 | #' @param vr1 The variance for group 1
 27 | #' @param vr2 The variance for group 2
 28 | 
 29 | coh <- function(n1, n2, mn1, mn2, vr1, vr2) {
 30 | 	(mn1 - mn2) / psd(n1, n2, vr1, vr2)
 31 | }
 32 | 
 33 | coh_se <- function(n1, n2, d) {
 34 |   sqrt((n1 + n2)/(n1*n2) + d^2/(2*((n1 + n2))))
 35 | }
 36 | 
 37 | #' Compute Cohen's \emph{d}
 38 | #' 
 39 | #' This function calculates effect sizes in terms of Cohen's \emph{d}, also
 40 | #' called the uncorrected effect size. See \code{\link{hedg_g}} for the sample
 41 | #' size corrected version. Also see 
 42 | #' \href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3840331/}{Lakens (2013)}
 43 | #' for a discussion on different types of effect sizes and their
 44 | #' interpretation. Note that missing data are removed from the calculations of 
 45 | #' the means and standard deviations.
 46 | #' @param data The data frame used for estimation - ideally structured in a tidy 
 47 | #' format.
 48 | #' @param formula A formula of the type \code{out ~ group} where \code{out} is
 49 | #' the outcome variable and \code{group} is the grouping variable. Note this
 50 | #' variable can include any arbitrary number of groups. Additional variables 
 51 | #' can be included with \code{+} to produce separate estimates by the secondary 
 52 | #' or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 
 53 | #' + characteristic2}). 
 54 | #' @param ref_group Optional. A character vector or forumla listing the 
 55 | #' reference group levels for each variable on the right hand side of the 
 56 | #' formula, supplied in the same order as the formula. Note that if using the
 57 | #' formula version, levels that are numbers, or include hyphens, spaces, etc., 
 58 | #' should be wrapped in back ticks (e.g., 
 59 | #' \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 
 60 | #' in doubt, it is safest to use the back ticks, as they will not interfere 
 61 | #' with anything if they are not needed. See examples below for more details.
 62 | #' @param se Logical. Should the standard error of the effect size be 
 63 | #'   estimated and returned in the resulting data frame? Defaults to 
 64 | #'   \code{TRUE}.
 65 | #' @return By default the Cohen's \emph{d} for all possible pairings of
 66 | #'  the grouping factor(s) are returned.
 67 | #' @export
 68 | #' @examples
 69 | #' 
 70 | #' # Calculate Cohen's d for all pairwise comparisons
 71 | #' coh_d(star, reading ~ condition) 
 72 | #' 
 73 | #' # Report only relative to regular-sized classrooms
 74 | #' coh_d(star,
 75 | #'       reading ~ condition, 
 76 | #' 		   ref_group = "reg")
 77 | #' 
 78 | #' # Report by ELL and FRL groups for each season, compare to non-ELL students
 79 | #' # who were not eligible for free or reduced price lunch in the fall (using
 80 | #' # the formula interface for reference group referencing).
 81 | #' 
 82 | #' coh_d(benchmarks, 
 83 | #'       math ~ ell + frl + season,
 84 | #'       ref_group = ~`Non-ELL` + `Non-FRL` + Fall)
 85 | #' 
 86 | #' # Same thing but with character vector supplied, rather than a formula
 87 | #' coh_d(benchmarks, 
 88 | #'       math ~ ell + frl + season,
 89 | #'       ref_group = c("Non-ELL", "Non-FRL", "Fall"))
 90 | 
 91 | coh_d <- function(data, formula, ref_group = NULL, se = TRUE) {
 92 |   rhs  <- labels(terms(formula))
 93 | 
 94 |   stats <- descrip_cross(data, formula, length = length, mean = mean, var = var) %>% 
 95 |     mutate_if(is.integer, as.double)
 96 |   
 97 |   d <- stats %>% 
 98 |     mutate(coh_d = coh(.data$length1, 
 99 |                        .data$length, 
100 |                        .data$mean1, 
101 |                        .data$mean, 
102 |                        .data$var1, 
103 |                        .data$var),
104 |            coh_se = coh_se(.data$length1,
105 |                            .data$length,
106 |                            .data$coh_d)) %>% 
107 |     select(-.data$length, 
108 |            -.data$length1, 
109 |            -.data$mean, 
110 |            -.data$mean1, 
111 |            -.data$var, 
112 |            -.data$var1) %>% 
113 |     ungroup()
114 |   
115 |   if(!is.null(ref_group)) {
116 |     d <- ref_subset(d, formula, ref_group)
117 |   }
118 |   rename_ref_foc(d, formula)
119 | }
120 |  
121 | #' Hedge's g 
122 | #' 
123 | #' Wraps the equation into a function
124 | #' @keywords internal
125 | #' @param n1 The sample size for group 1
126 | #' @param n2 The sample size for group 2
127 | #' @param d The value of Cohen's d
128 | #' 
129 | hedg <- function(n1, n2, d) {
130 |   d * (1 - (3 / (4*(n1 + n2) - 9)))
131 | }
132 | 
133 | #' Compute Hedges' \emph{g}
134 | #' This function calculates effect sizes in terms of Hedges' \emph{g}, also
135 | #' called the corrected (for sample size) effect size. See
136 | #' \code{\link{coh_d}} for the uncorrected version. Also see 
137 | #' \href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3840331/}{Lakens (2013)}
138 | #' for a discussion on different types of effect sizes and their
139 | #' interpretation. Note that missing data are removed from the calculations of 
140 | #' the means and standard deviations.
141 | #' @inheritParams coh_d 
142 | #' @param keep_d Logical. Should Cohen's \emph{d} be reported along with 
143 | #'   Hedge's \code{g}? Defaults to \code{TRUE}.
144 | #' @return By default the Hedges' \emph{g} for all possible pairings of
145 | #'  the grouping factor are returned as a tidy data frame.
146 | #' @export
147 | #' @examples
148 | #' 
149 | #' # Calculate Hedges' g for all pairwise comparisons
150 | #' hedg_g(star, reading ~ condition) 
151 | #' 
152 | #' # Report only relative to regular-sized classrooms
153 | #' hedg_g(star, 
154 | #'        reading ~ condition, 
155 | #'        ref_group = "reg")
156 | #' 
157 | #' # Report by ELL and FRL groups for each season, compare to non-ELL students
158 | #' # who were not eligible for free or reduced price lunch in the fall (using
159 | #' # the formula interface for reference group referencing).
160 | #' 
161 | #' hedg_g(benchmarks, 
162 | #'       math ~ ell + frl + season,
163 | #'       ref_group = ~`Non-ELL` + `Non-FRL` + Fall)
164 | #' 
165 | #' # Same thing but with character vector supplied, rather than a formula
166 | #' hedg_g(benchmarks, 
167 | #'       math ~ ell + frl + season,
168 | #'       ref_group = c("Non-ELL", "Non-FRL", "Fall"))
169 | 
170 | hedg_g <- function(data, formula, ref_group = NULL,
171 |                    keep_d = TRUE) {
172 |   stats <- descrip_cross(data, formula,
173 |                          length = length, mean = mean, var = var)
174 |   
175 |   g <- stats %>% 
176 |     mutate(coh_d  = coh(.data$length, .data$length1, 
177 |                         .data$mean, .data$mean1, 
178 |                         .data$var, .data$var1),
179 |            hedg_g = hedg(.data$length, .data$length1, .data$coh_d)) %>% 
180 |     select(-.data$length, -.data$length1, 
181 |            -.data$mean, -.data$mean1, 
182 |            -.data$var, -.data$var1)
183 |   
184 |   if(!keep_d) g <- select(g, -.data$coh_d)
185 |   
186 |   if(!is.null(ref_group)) {
187 |     g <- ref_subset(g, formula, ref_group)
188 |   }
189 |   rename_ref_foc(g, formula)
190 | }
191 | 
192 | mean_diff <- function(data, formula, ref_group, qtile_groups = NULL) {
193 |   descrip_cross(data, formula, mean = mean, qtile_groups = qtile_groups) %>% 
194 |     mutate(mean_diff = .data$mean1 - .data$mean) %>% 
195 |     select(-.data$mean, -.data$mean1)
196 | }
197 | 
198 | pooled_sd <- function(data, formula, ref_group, keep_n = FALSE) {
199 |   out <- descrip_cross(data, formula, length = length, var = var) %>% 
200 |     mutate(psd = psd(.data$length, .data$length1, .data$var, .data$var1)) %>% 
201 |     select(-.data$var, -.data$var1)
202 |   
203 |   if(!keep_n) {
204 |     out <- select(out, -.data$length, -.data$length1)
205 |   }
206 |   out
207 | }
208 | 
209 | #' Calculate binned effect sizes
210 | #' @inheritParams coh_d
211 | #' @param qtile_groups The number of quantile bins to split the data by and 
212 | #'   calculate effect sizes. Defaults to 3 bins (lower, middle, upper).
213 | #' @param es The effect size to calculate. Currently the only options are 
214 | #'   "d" or "g".
215 | #' @param rename Logical. Should the column names be relabeled according to
216 | #'   the reference and focal groups. Defaults to \code{TRUE}.
217 | #' @return A data frame with the corresponding effect sizes.
218 | #' @export
219 | 
220 | binned_es <- function(data, formula, ref_group = NULL, qtile_groups = 3,
221 |                       es = "g", rename = TRUE) {
222 |   mn_diff <- mean_diff(data, formula, qtile_groups = qtile_groups)  
223 |   p_sd <- pooled_sd(data, formula, keep_n = TRUE)
224 |   
225 |   if(es != "g" & es != "d") stop("es must be one of `'g'` or `'d'`.")
226 | 
227 |   d <- suppressMessages(left_join(mn_diff, p_sd)) %>% 
228 |     mutate(es    = .data$mean_diff/.data$psd,
229 |            es_se = coh_se(.data$length, .data$length1, .data$es))
230 |     
231 |   if(es == "g") {
232 |     d <- mutate(d, es = hedg(.data$length, .data$length1, .data$es)) 
233 |   }
234 |   
235 |   if(!is.null(ref_group)) {
236 |     d <- ref_subset(d, formula, ref_group)
237 |   }
238 |   if(rename) d <- rename_ref_foc(d, formula)
239 |   d
240 | }
241 | 
242 | #' Computes the empirical cummulative distribution function for all groups
243 | #' supplied by the formula.
244 | #' @inheritParams coh_d
245 | #' @param cuts Optional vector of cut scores. If supplied, the ECDF will be
246 | #' guaranteed to include these points. Otherwise, there could be gaps in the 
247 | #' ECDF at those particular points (used in plotting the cut scores).
248 | #' @keywords internal
249 | 
250 | ecdf_fun <- function(data, formula, cuts = NULL) {
251 |   if(is.null(cuts)) cuts <- 0
252 |   rhs  <- labels(terms(formula))
253 |   lhs  <- all.vars(formula)[1]
254 |   
255 |   data %>% 
256 |     mutate_at(vars(!!!syms(rhs)), list(as.character)) %>% 
257 |     group_by(!!!syms(rhs)) %>% 
258 |     nest() %>% 
259 |     mutate(ecdf = map(.data$data, ~ecdf(.[[lhs]])),
260 |            nd   = map(.data$data, ~c(-Inf, sort(c(unique(.[[lhs]]), cuts)), Inf)),
261 |            ecdf = map2(.data$ecdf,.data$ nd, ~.x(.y))) %>% 
262 |     select(-.data$data) 
263 | }
264 | 
265 | #' Pairs empirical cummulative distribution functions for all groups
266 | #' supplied by the formula.
267 | #' @inheritParams ecdf_fun
268 | #' @keywords internal
269 | 
270 | paired_ecdf <- function(data, formula, cuts = NULL) {
271 |   ecdf <- ecdf2 <- ecdf_fun(data, formula, cuts) %>% 
272 |     mutate(nd = map2(.data$nd, .data$ecdf, ~data.frame(x = .x, y = .y))) %>% 
273 |     select(-.data$ecdf)
274 |   names(ecdf2) <- paste0(names(ecdf), "1")
275 |   
276 |   cross(ecdf, ecdf2) %>% 
277 |     filter(!map2_lgl(.data$nd, .data$nd1, ~identical(.x, .y))) %>% 
278 |     mutate(matched = map2(.data$nd, .data$nd1,
279 |                           ~data.frame(x = sort(unique(.x$x, .y$x))) %>% 
280 |                             left_join(.x, by = "x") %>% 
281 |                             left_join(.y, 
282 |                                       by = "x",
283 |                                       suffix = c("_ref", "_foc")) %>% 
284 |                             fill(names(.)))) %>% 
285 |     select(-.data$nd, -.data$nd1)
286 | }
287 | 
288 | #' Compute the Area Under the \link{pp_plot} Curve
289 | #' Calculates the area under the \code{pp} curve. The area under the curve is 
290 | #' also a useful effect-size like statistic, representing the probability that 
291 | #' a randomly selected individual from the \code{x} distribution will have a 
292 | #' higher value than a randomly selected individual from the \code{y} 
293 | #' distribution.
294 | #' @inheritParams coh_d
295 | #' @param rename Used primarily for internal purposes. Should the column 
296 | #' names be renamed to reference the focal and reference groups? Defaults to
297 | #' \code{TRUE}.
298 | #' @return By default the area under the curve for all possible pairings of
299 | #' the grouping factor are returned. 
300 | #' @export
301 | #' @examples
302 | #' 
303 | #' # Calculate AUC for all pairwise comparisons
304 | #' auc(star, reading ~ condition) 
305 | #' 
306 | #' # Report only relative to regular-sized classrooms
307 | #' auc(star, 
308 | #'     reading ~ condition, 
309 | #'     ref_group = "reg")
310 | #' 
311 | #' # Report by ELL and FRL groups for each season, compare to non-ELL students
312 | #' # who were not eligible for free or reduced price lunch in the fall (using
313 | #' # the formula interface for reference group referencing).
314 | #' \dontrun{
315 | #' auc(benchmarks, 
316 | #'       math ~ ell + frl + season,
317 | #'       ref_group = ~`Non-ELL` + `Non-FRL` + Fall)
318 | #' 
319 | #' # Same thing but with character vector supplied, rather than a formula
320 | #' auc(benchmarks, 
321 | #'       math ~ ell + frl + season,
322 | #'       ref_group = c("Non-ELL", "Non-FRL", "Fall"))
323 | #' }
324 | #' 
325 | 
326 | auc <- function(data, formula, ref_group = NULL, rename = TRUE) {
327 |   rhs <- labels(terms(formula))
328 |   
329 |   d <- paired_ecdf(data, formula) %>% 
330 |     mutate(auc = map_dbl(.data$matched, ~integrate.xy(.$y_foc, .$y_ref, 
331 |                                                       use.spline = FALSE))) %>% 
332 |     select(-.data$matched)
333 |   
334 |   if(!is.null(ref_group)) {
335 |     d <- ref_subset(d, formula, ref_group)
336 |   }
337 |   if(rename) d <- rename_ref_foc(d, formula)
338 |   d
339 | }
340 | 
341 | #' Calculate the V effect size statistic
342 | #' 
343 | #' This function calculates the effect size V, as discussed by 
344 | #' \href{https://journals.sagepub.com/doi/abs/10.3102/1076998609332755}{Ho, 2009}. The V
345 | #' statistic is a transformation of \code{\link{auc}}, interpreted as the 
346 | #' average difference between the distributions in standard deviation units.
347 | #' @inheritParams coh_d
348 | #' @return By default the V statistic for all possible pairings of
349 | #'  the grouping factor are returned as a tidy data frame. Alternatively, a 
350 | #' vector can be returned, and/or only the V corresponding to a specific
351 | #' reference group can be returned.
352 | #' @export
353 | #' @examples 
354 | #' 
355 | #' # Calculate V for all pairwise comparisons
356 | #' v(star, reading ~ condition) 
357 | #' 
358 | #' # Report only relative to regular-sized classrooms
359 | #' v(star, 
360 | #'     reading ~ condition, 
361 | #'     ref_group = "reg")
362 | #' 
363 | #' # Report by ELL and FRL groups for each season, compare to non-ELL students
364 | #' # who were not eligible for free or reduced price lunch in the fall (using
365 | #' # the formula interface for reference group referencing).
366 | #' 
367 | #' \dontrun{
368 | #' v(benchmarks, 
369 | #'       math ~ ell + frl + season,
370 | #'       ref_group = ~`Non-ELL` + `Non-FRL` + Fall)
371 | #' 
372 | #' # Same thing but with character vector supplied, rather than a formula
373 | #' v(benchmarks, 
374 | #'       math ~ ell + frl + season,
375 | #'       ref_group = c("Non-ELL", "Non-FRL", "Fall"))
376 | #' }
377 | #' 
378 | 
379 | v <- function(data, formula, ref_group = NULL) {
380 |   d <- auc(data, formula, rename = FALSE) %>% 
381 |     mutate(v = sqrt(2)*qnorm(auc)) %>% 
382 |     select(-auc)
383 |   
384 |   if(!is.null(ref_group)) {
385 |     d <- ref_subset(d, formula, ref_group)
386 |   }
387 |   rename_ref_foc(d, formula)
388 | }
389 | 
390 | #' Compute the proportion above a specific cut location
391 | #' 
392 | #' Computes the proportion of the corresponding group, as specified by the
393 | #' \code{formula}, scoring above the specified \code{cuts}.
394 | #' @inheritParams ecdf_fun
395 | #' @inheritParams coh_d
396 | #' @return Tidy data frame of the proportion above the cutoff for 
397 | #' each (or selected) groups.
398 | #' @seealso [esvis::pac_compare(), esvis::tpac(), esvis::tpac_diff()]
399 | #' @export 
400 | #' @examples
401 | #' # Compute differences for all pairwise comparisons for each of three cuts
402 | #' pac(star,
403 | #'     reading ~ condition,
404 | #' 		 cuts = c(450, 500, 550))
405 | #' 		 
406 | #' pac(star,
407 | #'     reading ~ condition + freelunch + race, 
408 | #' 		 cuts = c(450, 500))
409 | #' 
410 | #' pac(star,
411 | #'     reading ~ condition + freelunch + race, 
412 | #' 		 cuts = c(450, 500),
413 | #' 		 ref_group = ~small + no + white) 
414 | 
415 | pac <- function(data, formula, cuts, ref_group = NULL) {
416 |   rhs <- labels(terms(formula))
417 |   d <- ecdf_fun(data, formula, cuts) 
418 |   
419 |   cut_tbl <- data.frame(matrix(rep(cuts, each = nrow(d)), nrow = nrow(d)))
420 |   
421 |   if(length(cuts) == 1) {
422 |     names(cut_tbl) <- "cut"
423 |   }
424 |   
425 |   d <- dplyr::bind_cols(d, cut_tbl) 
426 |   
427 |   if(length(cuts) == 1) {
428 |     d <- unnest(d, cols = c(.data$ecdf, .data$nd))
429 |   }
430 |   
431 |   if(length(cuts) > 1) {
432 |     d <- d %>% 
433 |       gather("dis", "cut", matches("^X\\d")) %>% 
434 |       unnest(cols = c(.data$ecdf, .data$nd)) %>% 
435 |       filter(.data$nd == .data$cut)
436 |   }  
437 |   if(!is.null(ref_group)) {
438 |     d <- ref_subset(d, formula, ref_group)
439 |   }
440 |   d %>% 
441 |     mutate(pac = 1 - .data$ecdf) %>% 
442 |     distinct() %>% 
443 |     select(rhs, cut, pac)
444 | }
445 | 
446 | #' Compute the difference in the proportion above a specific cut location
447 | #' 
448 | #' Computes the difference in the proportion above the specified \code{cuts} 
449 | #' for all possible pairwise comparisons of the groups specified by the 
450 | #' \code{formula}.
451 | #' @inheritParams ecdf_fun
452 | #' @inheritParams coh_d
453 | #' @return Tidy data frame of the proportion above the cutoff for 
454 | #' each (or selected) groups.
455 | #' @seealso [esvis::pac(), esvis::tpac(), esvis::tpac_diff()]
456 | #' @export 
457 | #' @examples
458 | #' # Compute differences for all pairwise comparisons for each of three cuts
459 | #' pac_compare(star,
460 | #'     reading ~ condition, 
461 | #' 		 cuts = c(450, 500, 550)) 
462 | #' 		 
463 | #' pac_compare(star,
464 | #'     reading ~ condition + freelunch + race, 
465 | #' 		 cuts = c(450, 500))
466 | #' 
467 | #' pac_compare(star,
468 | #'     reading ~ condition + freelunch + race, 
469 | #' 		 cuts = c(450, 500),
470 | #' 		 ref_group = ~small + no + white) 
471 | 
472 | pac_compare <- function(data, formula, cuts, ref_group = NULL) {
473 |   rhs <- labels(terms(formula))
474 |   d1 <- d2 <- pac(data, formula, cuts)
475 |   names(d2) <- paste0(names(d1), "1")
476 |   
477 |   d <- cross(d1, d2) %>% 
478 |     filter(cut == .data$cut1) %>% 
479 |     mutate(pac_diff = .data$pac - .data$pac1) 
480 |   
481 |   d <- map2_df(rhs, 
482 |           paste0(rhs, 1), 
483 |           ~filter(d, !!sym(.x) != !!sym(.y)))
484 |   
485 |   if(!is.null(ref_group)) {
486 |     d <- ref_subset(d, formula, ref_group)
487 |   }
488 |   d <- rename_ref_foc(d, formula)
489 |   
490 |   d %>%
491 |     ungroup() %>% 
492 |     rename("pac_ref" = "pac",
493 |            "pac_foc" = "pac1") %>%
494 |     select(.data$cut,
495 |            ends_with("_ref"),
496 |            ends_with("_foc"),
497 |            .data$pac_diff, 
498 |            -.data$cut1)
499 |     
500 | }
501 | 
502 | #' Transformed proportion above the cut
503 | #' 
504 | #' This function transforms calls to \link{pac} into standard deviation units.
505 | #' Function assumes that each distribution is distributed normally with 
506 | #' common variances. See 
507 | #' \href{http://journals.sagepub.com/doi/abs/10.3102/1076998611411918}{Ho &
508 | #'  Reardon, 2012}
509 | #' @inheritParams ecdf_fun
510 | #' @inheritParams coh_d
511 | #' @return Tidy data frame of the proportion above the cutoff for 
512 | #' each (or selected) groups.
513 | #' @seealso [esvis::pac(), esvis::pac_diff(), esvis::tpac_compare()]
514 | #' @export 
515 | #' @examples
516 | #' # Compute differences for all pairwise comparisons for each of three cuts
517 | #' tpac(star,
518 | #'     reading ~ condition, 
519 | #' 		 cut = c(450, 500, 550)) 
520 | #' 		 
521 | #' tpac(star,
522 | #'     reading ~ condition + freelunch + race, 
523 | #' 		 cut = c(450, 500))
524 | #' 
525 | #' tpac(star,
526 | #'     reading ~ condition + freelunch + race, 
527 | #' 		 cut = c(450, 500),
528 | #' 		 ref_group = ~small + no + white)  
529 | 
530 | tpac <- function(data, formula, cuts, ref_group = NULL) {
531 |   pac(data, formula, cuts, ref_group) %>% 
532 |     mutate(pac = qnorm(.data$pac)) %>% 
533 |     rename("tpac" = "pac")
534 | }
535 | 
536 | #' Compare Transformed Proportion Above the Cut
537 | #' 
538 | #' This function compares all possible pairwise comparisons, as supplied by 
539 | #' \code{formula}, in terms of the transformed proportion above the cut. This
540 | #' is an effect-size like measure of the differences between two groups as the
541 | #' cut point(s) in the distribution. See 
542 | #' \href{http://journals.sagepub.com/doi/abs/10.3102/1076998611411918}{Ho &
543 | #'  Reardon, 2012}
544 | #' @inheritParams ecdf_fun
545 | #' @inheritParams coh_d
546 | #' @return Tidy data frame of the proportion above the cutoff for 
547 | #' each (or selected) groups.
548 | #' @seealso [esvis::pac(), esvis::pac_diff(), esvis::tpac()]
549 | #' @export 
550 | #' @examples
551 | #' # Compute differences for all pairwise comparisons for each of three cuts
552 | #' tpac_compare(star,
553 | #'     reading ~ condition, 
554 | #' 		 cut = c(450, 500, 550)) 
555 | #' 		 
556 | #' tpac_compare(star,
557 | #'     reading ~ condition + freelunch + race, 
558 | #' 		 cut = c(450, 500))
559 | #' 
560 | #' tpac_compare(star,
561 | #'     reading ~ condition + freelunch + race, 
562 | #' 		 cut = c(450, 500),
563 | #' 		 ref_group = ~small + no + white)  
564 | 
565 | tpac_compare <- function(data, formula, cuts, ref_group = NULL) {
566 |   pac_compare(data, formula, cuts, ref_group) %>% 
567 |     mutate(pac_ref = qnorm(.data$pac_ref),
568 |            pac_foc = qnorm(.data$pac_foc),
569 |            tpac_diff = .data$pac_ref - .data$pac_foc) %>% 
570 |     rename("tpac_ref" = "pac_ref",
571 |            "tpac_foc" = "pac_foc") %>% 
572 |     select(-.data$pac_diff)
573 | }
574 | 


--------------------------------------------------------------------------------
/R/esvis-package.R:
--------------------------------------------------------------------------------
 1 | #' @keywords internal
 2 | #' @importFrom ggplot2 ggplot aes_ facet_wrap facet_grid geom_step geom_point
 3 | #'   geom_vline ggplot_build geom_label geom_line geom_hline geom_ribbon 
 4 | #'   geom_segment labs geom_abline geom_rect 
 5 | #' @importFrom dplyr mutate mutate_at select group_by_at arrange distinct 
 6 | #'   filter matches group_by_all group_by summarize summarize_at n left_join 
 7 | #'   semi_join vars funs ends_with rename tbl_df ungroup everything mutate_if
 8 | #'   bind_cols
 9 | #' @importFrom tidyr spread fill gather separate crossing nest unnest
10 | #' @importFrom tibble tibble lst
11 | #' @importFrom purrr map map_dbl map_lgl map2 map2_lgl map2_df is_atomic
12 | #' @importFrom rlang := sym syms quo quos .data parse_quo set_names 
13 | #'   quo_get_expr
14 | #' @importFrom Hmisc cut2
15 | #' @importFrom magrittr %>%
16 | #' @importFrom graphics par layout lines segments rect polygon
17 | #' @importFrom utils installed.packages
18 | #' @importFrom grDevices adjustcolor
19 | #' @importFrom sfsmisc integrate.xy
20 | #' @importFrom stats as.formula terms setNames ecdf qnorm na.omit var
21 | "_PACKAGE"
22 | 
23 | if(getRversion() >= "2.15.1")  utils::globalVariables(c("."))
24 | 
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/R/pp_plot.R:
--------------------------------------------------------------------------------
  1 | #' Produces the paired probability plot for two groups
  2 | #' 
  3 | #' The paired probability plot maps the probability of obtaining a specific
  4 | #'    score for each of two groups. The area under the curve 
  5 | #'    (\code{\link{auc}}) corresponds to the probability that a randomly
  6 | #'    selected observation from the x-axis group will have a higher score than
  7 | #'    a randomly selected observation from the y-axis group. This function
  8 | #'    extends the basic pp-plot by allowing multiple curves and faceting to
  9 | #'    facilitate a variety of comparisons. Note that because the plotting is
 10 | #'    built on top of \link[ggplot2]{ggplot2}, additional customization can 
 11 | #'    be made on top of the plots, as illustrated in the examples.
 12 | #' 
 13 | #' @param data The data frame to be plotted
 14 | #' @param formula A formula of the type \code{out ~ group} where \code{out} is
 15 | #' the outcome variable and \code{group} is the grouping variable. Note this
 16 | #' variable can include any arbitrary number of groups. Additional variables 
 17 | #' can be included with \code{+} to produce separate plots by the secondary or
 18 | #' tertiary variable of interest (e.g., \code{out ~ group + characteristic1 + 
 19 | #' characteristic2}). No more than two additional characteristics can be 
 20 | #' supplied at this time.
 21 | #' @param ref_group Optional character vector (of length 1) naming the
 22 | #'   reference group. Defaults to the group with the highest mean score.
 23 | #' @param cuts Integer. Optional vector (or single number) of scores used to 
 24 | #' annotate the plot. If supplied, line segments will extend from the 
 25 | #' corresponding x and y axes and meet at the PP curve.
 26 | #' @param cut_labels Logical. Should the reference lines corresponding to
 27 | #'    \code{cuts} be labeled? Defaults to \code{TRUE}.
 28 | #' @param cut_label_x The x-axis location of the cut labels. Defaults to 0.02.
 29 | #' @param cut_label_size The size of the cut labels. Defaults to 3.
 30 | #' @param lines Logical. Should the PP Lines be plotted? Defaults to 
 31 | #'    \code{TRUE}.
 32 | #' @param linetype The \link[ggplot2]{linetype} for the PP lines. Defaults to 
 33 | #'    "solid".
 34 | #' @param linewidth The width of the PP lines. Defaults to 1.1 (just
 35 | #'    marginally larger than the default ggplot2 lines).
 36 | #' @param shade Logical. Should the area under the curve be shaded? Defaults to
 37 | #'    \code{TRUE}.
 38 | #' @param shade_alpha Transparency of the shading. Defaults to 0.2.
 39 | #' @param refline Logical. Should a diagonal reference line be plotted, 
 40 | #'    representing the value at which no difference is observed between the
 41 | #'    reference and focal distributions? Defaults to \code{TRUE}.
 42 | #' @param refline_col Color of the reference line. Defaults to a dark gray.
 43 | #' @param refline_type The \link[ggplot2]{linetype} for the reference line.
 44 | #'    Defaults to "dashed".
 45 | #' @param refline_width The width of the reference line. Defaults to 1, or 
 46 | #'    just slightly thinner than the PP lines. 
 47 | #' @return A \link[ggplot2]{ggplot2} object displaying the specified PP plot.
 48 | #' @export
 49 | #' @examples
 50 | #' # PP plot examining differences by condition
 51 | #' pp_plot(star, math ~ condition)
 52 | #' 
 53 | #' # The sample size gets very small in the above within cells (e.g., wild 
 54 | #' # changes within the "other" group in particular). Overall, the effect doesn't
 55 | #' # seem to change much by condition.
 56 | #' 
 57 | #' # Look at something a little more interesting
 58 | #' \dontrun{
 59 | #' pp_plot(benchmarks, math ~ ell + season + frl)
 60 | #' }
 61 | #' # Add some cut scores
 62 | #' pp_plot(benchmarks, math ~ ell, cuts = c(190, 210, 215))
 63 | #' 
 64 | #' ## Make another interesting plot. Use ggplot to customize
 65 | #' \dontrun{
 66 | #' library(tidyr)
 67 | #' library(ggplot2)
 68 | #' benchmarks %>% 
 69 | #'   gather(subject, score, reading, math) %>% 
 70 | #'   pp_plot(score ~ ell + subject + season,
 71 | #'           ref_group = "Non-ELL") +
 72 | #'   scale_fill_brewer(name = "ELL Status", palette = "Pastel2") +
 73 | #'   scale_color_brewer(name = "ELL Status", palette = "Pastel2") +
 74 | #'   labs(title = "Differences among English Language Learning Groups",
 75 | #'        subtitle = "Note crossing of reference line") +
 76 | #'   theme_minimal()
 77 | #' }
 78 | #' 
 79 | pp_plot <- function(data, formula, ref_group = NULL, cuts = NULL, 
 80 |                     cut_labels = TRUE, cut_label_x = 0.02, cut_label_size = 3, 
 81 |                     lines = TRUE, linetype = "solid", linewidth = 1.1, 
 82 |                     shade = TRUE, shade_alpha = 0.2, refline = TRUE, 
 83 |                     refline_col = "gray40", refline_type = "dashed", 
 84 |                     refline_width = 1.1) {
 85 |   
 86 |   rhs  <- labels(terms(formula))
 87 |   lhs  <- all.vars(formula)[1]
 88 |   
 89 |   if(is.null(ref_group)) {
 90 |     group_means <- tapply(data[[lhs]], data[[rhs[1]]], mean, na.rm = TRUE)
 91 |     ref_group <- names(group_means)[which.max(group_means)]
 92 |   }
 93 |   
 94 |   d <- paired_ecdf(data, formula, cuts) %>%
 95 |     unnest(cols = .data$matched) %>%
 96 |     filter(!!sym(rhs[1]) == ref_group) 
 97 |   
 98 |   if(length(rhs) == 2) {
 99 |     d <- filter(d, !!sym(rhs[2]) == !!sym(paste0(rhs[2], 1)))
100 |   }
101 |   if(length(rhs) == 3) {
102 |     d <- filter(d, 
103 |                 !!sym(rhs[2]) == !!sym(paste0(rhs[2], 1)),
104 |                 !!sym(rhs[3]) == !!sym(paste0(rhs[3], 1)))
105 |   }
106 |   p <- ggplot(d, aes_(quote(y_foc), quote(y_ref))) 
107 |   
108 |   if(shade) {
109 |     p <- p + 
110 |       geom_ribbon(aes_(fill = as.name(paste0(rhs[1], 1)),
111 |                        ymin = -Inf,
112 |                        ymax = quote(y_ref)),
113 |                   alpha = shade_alpha)
114 |   }
115 |   if(refline) {
116 |     p <- p + geom_abline(intercept = 0, 
117 |                          slope     = 1, 
118 |                          color     = refline_col, 
119 |                          linetype  = refline_type,
120 |                          size      = refline_width)
121 |   }
122 |   if(lines) {
123 |     p <- p + geom_line(aes_(color = as.name(paste0(rhs[1], 1))),
124 |                        linetype   = linetype,
125 |                        size       = linewidth)
126 |   }
127 |   if(!is.null(cuts)) {
128 |     cut_data <- d %>% 
129 |       filter(.data$x %in% cuts)
130 |     
131 |     p <- p +
132 |       geom_segment(aes_(x     = quote(y_foc),
133 |                         xend  = quote(y_foc), 
134 |                         y     = -Inf,
135 |                         yend  = quote(y_ref),
136 |                         color = as.name(paste0(rhs[1], 1))),
137 |                    cut_data) +
138 |       geom_segment(aes_(x     = -Inf,
139 |                         xend  = quote(y_foc), 
140 |                         y     = quote(y_ref), 
141 |                         yend  = quote(y_ref), 
142 |                         color = as.name(paste0(rhs[1], 1))),
143 |                    cut_data)
144 |       if(cut_labels) {
145 |         p <- p + 
146 |           geom_label(aes_(x = 0.02,
147 |                           y = quote(y_ref),
148 |                           label = quote(x)),
149 |                      cut_data, 
150 |                  size = 3)
151 |       }
152 |   }
153 |   if(length(rhs) == 2) {
154 |     p <- p + facet_wrap(as.formula(paste0("~", rhs[2])))
155 |   }
156 |   if(length(rhs) == 3) {
157 |     p <- p + facet_grid(as.formula(paste0(rhs[2], "~", rhs[3])))
158 |   } 
159 | p + labs(x = "Focal Group",
160 |          y = ref_group) 
161 | }
162 | 


--------------------------------------------------------------------------------
/R/seda.R:
--------------------------------------------------------------------------------
 1 | #' Portion of the Stanford Educational Data Archive (SEDA). 
 2 | #' 
 3 | #' The full SEDA dataset contains mean test scores on statewide testing data in
 4 | #' reading and math for every school district in the United States. See a
 5 | #' description of the data 
 6 | #' \href{https://purl.stanford.edu/db586ns4974}{here}. The data 
 7 | #' represented in this package represent a random sample of 10% of all the 
 8 | #' cases in the full dataset. To access the full data, please visit the 
 9 | #' data archive in the above link.
10 | #' 
11 | #' 
12 | #' @format A data frame with 32625 rows and 8 columns.
13 | #'   \describe{
14 | #'     \item{leaid}{Integer. Local education authority identifier.}
15 | #' 	   \item{leaname}{Character. Local education authority name.}
16 | #'     \item{stateabb}{Character. State abbreviation.}
17 | #' 	   \item{year}{Integer. Year the data were collected.}
18 | #' 	   \item{grade}{Integer. Grade level the data were collected.}
19 | #'     \item{subject}{Character. Whether the data were from reading or
20 | #' 			 mathematics.}
21 | #'     \item{mean}{Double. Mean test score for the LEA in the corresponding
22 | #' 				   subject/grade/year.}
23 | #'     \item{se}{Double. Standard error of the mean.}
24 | #' }
25 | #' 
26 | #' @source 
27 | #' Sean F. Reardon, Demetra Kalogrides, Andrew Ho, Ben Shear, Kenneth Shores,
28 | #' Erin Fahle. (2016). Stanford Education Data Archive. 
29 | #' \href{http://purl.stanford.edu/db586ns4974}{
30 | #'  http://purl.stanford.edu/db586ns4974}. For more information, please visit
31 | #' \href{https://edopportunity.org}{https://edopportunity.org}.
32 | "seda"


--------------------------------------------------------------------------------
/R/star.R:
--------------------------------------------------------------------------------
 1 | #' Data from the Tennessee class size experiment 
 2 | #' 
 3 | #' These data come from the Ecdat package and represent a cross-section of
 4 | #'  data from Project STAR (Student/Teacher Achievement Ratio), where students
 5 | #'  were randomly assigned to classrooms.
 6 | #' 
 7 | #' @format A data frame with 5748 rows and 9 columns.
 8 | #'   \describe{
 9 | #'     \item{sid}{Integer. Student identifier.}
10 | #' 	   \item{schid}{Integer. School identifier.}
11 | #'     \item{condition}{Character. Classroom type the student was enrolled in 
12 | #' 			(randomly assigned to).}
13 | #' 	   \item{tch_experience}{Integer. Number of years of teaching experience
14 | #' 			 for the teacher in the classroom in which the student was
15 | #' 			 enrolled.}
16 | #' 	   \item{sex}{Character. Sex of student: "girl" or "boy".}
17 | #' 	   \item{freelunch}{Character. Eligibility of the student for free or
18 | #' 			 reduced price lunch: "no" or "yes"}
19 | #'     \item{race}{Character. The identified race of the student: "white",
20 | #' 			 "black", or "other"}
21 | #'     \item{math}{Integer. Math scale score.}
22 | #'     \item{reading}{Integer. Reading scale score.}
23 | #' }
24 | 
25 | "star"


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | is.formula <- function(x){
  2 |    inherits(x,"formula")
  3 | }
  4 | 
  5 | rename_ref_foc <- function(out, formula) {
  6 |    rhs  <- labels(terms(formula))
  7 |    
  8 |    ref <- names(out) %in% rhs
  9 |    foc <- grepl(paste0(rhs, "\\d$", collapse = "|"), names(out))
 10 |    
 11 |    nms_ref <- paste0(names(out)[ref],"_ref")
 12 |    nms_foc <- gsub("\\d", "_foc", names(out)[foc])
 13 |    
 14 |    names(out)[ref] <- nms_ref
 15 |    names(out)[foc] <- nms_foc
 16 |    
 17 |    out
 18 | }
 19 | 
 20 | ref_subset <- function(out, formula, ref_group) {
 21 |   rhs  <- labels(terms(formula))
 22 |   
 23 |   if(is.formula(ref_group)) {
 24 |     ref_group <- gsub("`", "", labels(terms(ref_group)))
 25 |   }
 26 |   ref_join <- data.frame(as.list(as.character(ref_group)), 
 27 |                          stringsAsFactors = FALSE)
 28 |   names(ref_join) <- rhs[seq_along(ref_group)]
 29 |   
 30 |   suppressMessages(semi_join(out, ref_join))
 31 | } 
 32 | 
 33 | 
 34 | #' Report descriptive stats for all possible pairings on the rhs of the formula.
 35 | #' @keywords internal
 36 | #' @param formula A formula of the type \code{out ~ group} where \code{out} is
 37 | #' the outcome variable and \code{group} is the grouping variable. Note this
 38 | #' variable can include any arbitrary number of groups. Additional variables 
 39 | #' can be included with \code{+} to produce descriptive stats by the secondary 
 40 | #' or tertiary variable of interest (e.g., \code{out ~ group + characteristic1 
 41 | #' + characteristic2}). 
 42 | 
 43 | descrip_stats <- function(data, formula, ..., qtile_groups = NULL) {
 44 |   rhs  <- labels(terms(formula))
 45 |   lhs  <- all.vars(formula)[1]
 46 |   
 47 |   if (missing(...)) {
 48 |     stop("No function supplied to ...")
 49 |   }
 50 |   
 51 |   d <- data %>%
 52 |     select(rhs, lhs) %>% 
 53 |     na.omit() %>% 
 54 |     mutate_at(vars(!!!syms(rhs)), list(as.character)) %>%
 55 |     group_by(!!!syms(rhs)) 
 56 |   
 57 |   if(!is.null(qtile_groups)) {
 58 |     d <- d %>% 
 59 |       group_by(!!!syms(rhs)) %>% 
 60 |       nest() %>% 
 61 |       mutate(q = map(data, ~as.numeric(cut2(.[[lhs]], g = qtile_groups)))) %>%  
 62 |       unnest() %>%
 63 |       group_by(!!!syms(rhs), .data$q)
 64 |   }
 65 |   d <- d %>%
 66 |     summarize_at(vars(!!!syms(lhs)), list(...)) 
 67 |   
 68 |   d
 69 | }
 70 | 
 71 | descrip_cross <- function(data, formula, ..., qtile_groups = NULL) {
 72 |   rhs  <- labels(terms(formula))
 73 |   f <- quos(...)
 74 |   
 75 |   d1 <- d2 <- descrip_stats(data, formula, ..., qtile_groups = qtile_groups) 
 76 |   names(d2) <- paste0(names(d1), "1")
 77 |   d <- cross(d1, d2)
 78 | 
 79 |   zero_group <- paste(rhs, "==", paste0(rhs, 1), collapse = " & ")
 80 |   if(!is.null(qtile_groups)) zero_group <- paste0("q == q1 & ", zero_group)
 81 | 
 82 |   test <- filter(d, !!parse_quo(zero_group, env = parent.frame()))
 83 |   var <- as.character(quo_get_expr(f[[1]]))
 84 | 
 85 |   if(any((test[ ,var] - test[ ,paste0(var, 1)]) != 0)) {
 86 |     stop("Reference Group Filtering failed. Use `all == TRUE` and
 87 |          filter manually.")
 88 |   }
 89 |   filt_expr <- parse_quo(paste0("!(", zero_group, ")"),
 90 |                          env = parent.frame())
 91 |   d <- d %>%
 92 |     filter(!!filt_expr)
 93 | 
 94 |   if(!is.null(qtile_groups)) {
 95 |     d <- d %>%
 96 |       filter(.data$q == .data$q1) %>%
 97 |       mutate(qtile_ub = .data$q / max(.data$q),
 98 |              qtile_lb = .data$qtile_ub - min(.data$qtile_ub)) %>%
 99 |       ungroup() %>%
100 |       select(.data$q,
101 |              .data$qtile_lb,
102 |              .data$qtile_ub,
103 |              everything(),
104 |              -.data$q1)
105 |   }
106 |  d
107 | }
108 | 
109 | 
110 | 
111 | #### Old version of tidyr::crossing
112 | drop_empty <- function(x, factor = TRUE) {
113 |   empty <- map_lgl(x, function(x) length(x) == 0 & (!factor | !is.factor(x)))
114 |   x[!empty]
115 | }
116 | seq_nrow <- function(x) seq_len(nrow(x))
117 | 
118 | cross_df <- function(x, y) {
119 |   x_idx <- rep(seq_nrow(x), each = nrow(y))
120 |   y_idx <- rep(seq_nrow(y), nrow(x))
121 |   bind_cols(x[x_idx, , drop = FALSE], y[y_idx, , drop = FALSE])
122 | }
123 | 
124 | is_list <- function(x) map_lgl(x, is.list)
125 | 
126 | ulevels <- function(x) {
127 |   if (is.factor(x)) {
128 |     orig_levs <- levels(x)
129 |     x <- addNA(x, ifany = TRUE)
130 |     levs <- levels(x)
131 |     factor(levs, levels = orig_levs, ordered = is.ordered(x), exclude = NULL)
132 |   } else if (is.list(x)) {
133 |     unique(x)
134 |   } else {
135 |     sort(unique(x), na.last = TRUE)
136 |   }
137 | }
138 | 
139 | cross <- function(...) {
140 |   x <- lst(...)
141 |   stopifnot(is_list(x))
142 |   
143 |   x <- drop_empty(x)
144 |   if (length(x) == 0) {
145 |     return(data.frame())
146 |   }
147 |   
148 |   is_atomic <- map_lgl(x, is_atomic)
149 |   is_df <- map_lgl(x, is.data.frame)
150 |   
151 |   # turn each atomic vector into single column data frame
152 |   col_df <- map(x[is_atomic], function(x) tibble(x = ulevels(x)))
153 |   col_df <- map2(col_df, names(x)[is_atomic], set_names)
154 |   x[is_atomic] <- col_df
155 |   
156 |   Reduce(cross_df, x)
157 | }


--------------------------------------------------------------------------------
/README-binned_plot1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/README-binned_plot1-1.png


--------------------------------------------------------------------------------
/README-binned_plot2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/README-binned_plot2-1.png


--------------------------------------------------------------------------------
/README-ecdf_plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/README-ecdf_plot-1.png


--------------------------------------------------------------------------------
/README-pp_plot1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/README-pp_plot1-1.png


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: github_document
  3 | ---
  4 | 
  5 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  6 | 
  7 | ```{r, echo = FALSE}
  8 | knitr::opts_chunk$set(
  9 |   collapse = TRUE,
 10 |   comment = "#>",
 11 |   fig.path = "README-"
 12 | )
 13 | ```
 14 | 
 15 | # esvis
 16 | 
 17 | R Package for effect size visualization and estimation.
 18 | 
 19 | [![Build Status](https://travis-ci.org/datalorax/esvis.svg?branch=master)](https://travis-ci.org/datalorax/esvis)
 20 | [![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/datalorax/esvis?branch=master&svg=true)](https://ci.appveyor.com/project/datalorax/esvis) 
 21 | [![codecov](https://codecov.io/gh/datalorax/esvis/branch/master/graph/badge.svg)](https://codecov.io/gh/datalorax/esvis)
 22 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/esvis)](https://cran.r-project.org/package=esvis)
 23 | 
 24 | This package is designed to help you very quickly estimate and visualize distributional differences by categorical factors (e.g., the effect of treatment by gender and income category). Emphasis is placed on evaluating distributional differences across the entirety of the scale, rather than only by measures of central tendency (e.g., means). 
 25 | 
 26 | ## Installation
 27 | 
 28 | Install directly from CRAN with
 29 | 
 30 | ```{r cran_install, eval = FALSE}
 31 | install.packages("esvis")
 32 | ```
 33 | 
 34 | Or the development version from GitHub with:
 35 | 
 36 | ```{r gh-installation, eval = FALSE}
 37 | # install.packages("devtools")
 38 | devtools::install_github("datalorax/esvis")
 39 | ```
 40 | 
 41 | ## Plotting methods
 42 | 
 43 | There are three primary data visualizations: (a) binned effect size plots, (b) probability-probability plots, and (c) empirical cumulative distribution functions. All plots use the [ggplot2](http://ggplot2.tidyverse.org) package and are fully manipulable after creation using standard ggplot commands (e.g., changing the theme, labels, etc.). These plots were all produced by first running `library(ggplot2); theme_set(theme_minimal())` to produce the plots with the minimal theme, but no theme structure is imposed on any of the plots.
 44 | 
 45 | ### Binned ES Plot
 46 | At present, the binned effect size plot can only be produced with Cohen's *d*, although future development will allow the user to select the type of effect size. The binned effect size plot splits the distribution into quantiles specified by the user (defaults to lower, middle, and upper thirds), calculates the mean difference between groups within each quantile bin, and produces an effect size for each bin by dividing by the overall pooled standard deviation (i.e., not by quantile). For example
 47 | 
 48 | ```{r theme_set, include = FALSE}
 49 | library(ggplot2)
 50 | library(esvis)
 51 | theme_set(theme_minimal())
 52 | ```
 53 | 
 54 | 
 55 | ```{r, binned_plot1, fig.width = 8}
 56 | library(esvis)
 57 | binned_plot(benchmarks, math ~ ell)
 58 | ```
 59 | ![](https://github.com/datalorax/esvis/raw/master/docs/README-binned_plot1-1.png)
 60 | Note that in this plot one can clearly see that the magnitude of the differences between the groups depends upon scale location, as evidence by the reversal of the effect (negative to positive) for the Non-ELL (non-English Language Learners) group. We could also change the reference group, change the level of quantile binning, and evaluate the effect within other factors. For example, we can look by season eligibility for free or reduced price lunch, with quantiles binning, and non-ELL students as the reference group with 
 61 | 
 62 | ```{r, binned_plot2, fig.width = 8}
 63 | binned_plot(benchmarks, 
 64 |             math ~ ell + frl + season, 
 65 |             ref_group = "Non-ELL",
 66 |             qtile_groups = 5)
 67 | ```
 68 | ![](https://github.com/datalorax/esvis/raw/master/docs/README-binned_plot2-1.png)
 69 | The `ref_group` argument can also supplied as a formula.
 70 | 
 71 | ### PP Plots
 72 | Probability-probability plot can be produced with a call to `pp_plot` and an equivalent argument structure. In this case, we're visualizing the difference in reading achievement by race/ethnicity by season. 
 73 | 
 74 | ```{r, pp_plot1, fig.width = 8}
 75 | pp_plot(benchmarks, reading ~ ethnicity + season)
 76 | ```
 77 | ![](https://github.com/datalorax/esvis/raw/master/docs/README-pp_plot1-1.png)
 78 | 
 79 | Essentially, the empirical cummulative distribution function (ECDF) for the reference group (by default, the highest performing group) is mapped against the ECDF for each corresponding group. The magnitude of the achievement gap is then displayed by the distance from the diagonal reference line, representing, essentially, the ECDF for the reference group. 
 80 | 
 81 | By default, the area under the curve is shaded, which itself is an effect-size like measure, but this is also manipulable. 
 82 | 
 83 | ### ECDF Plot
 84 | Finally, the `ecdf_plot` function essentially dresses up the base `plot.ecdf` function, but also adds some nice referencing features through additional, optional arguments. Below, I have included the optional `hor_ref = TRUE` argument such that horizontal reference lines appear, relative to the cuts provided.
 85 | 
 86 | ```{r, ecdf_plot, fig.width = 8}
 87 | ecdf_plot(benchmarks, math ~ season, 
 88 | 	cuts = c(190, 200, 215))
 89 | ```
 90 | ![](https://github.com/datalorax/esvis/raw/master/docs/README-ecdf_plot-1.png)
 91 | These are the curves that go into the PP-Plot, but occasionally can be useful on their own.
 92 | 
 93 | ## Estimation Methods
 94 | Compute effect sizes for all possible pairwise comparisons.
 95 | 
 96 | ```{r, coh_d1} 
 97 | coh_d(benchmarks, math ~ season + frl)
 98 | ```
 99 | 
100 | Or specify a reference group. In this case, I've used the formula-based interface, but a string vector specifying the specific reference group could also be supplied.
101 | 
102 | 
103 | ```{r, coh_d2}
104 | coh_d(benchmarks, 
105 |       math ~ season + frl, 
106 |       ref_group = ~Fall + `Non-FRL`)
107 | ```
108 | 
109 | Notice that the reference to Non-FRL is wrapped in back-ticks, which should be used anytime there are spaces or other non-standard characters.
110 | 
111 | 
112 | Other effect sizes are estimated equivalently. For example, compute *V* ([Ho, 2009](https://journals.sagepub.com/doi/10.3102/1076998609332755)) can be estimated with
113 | 
114 | ```{r v}
115 | v(benchmarks, 
116 |   math ~ season + frl, 
117 |   ref_group = ~Fall + `Non-FRL`)
118 | ```
119 | 
120 | or *AUC* with
121 | 
122 | ```{r auc}
123 | auc(benchmarks, 
124 |     math ~ season + frl, 
125 |     ref_group = ~Fall + `Non-FRL`)
126 | ```
127 | 
128 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <!-- README.md is generated from README.Rmd. Please edit that file -->
  3 | 
  4 | # esvis
  5 | 
  6 | R Package for effect size visualization and estimation.
  7 | 
  8 | [![Build
  9 | Status](https://travis-ci.org/datalorax/esvis.svg?branch=master)](https://travis-ci.org/datalorax/esvis)
 10 | [![AppVeyor Build
 11 | Status](https://ci.appveyor.com/api/projects/status/github/datalorax/esvis?branch=master&svg=true)](https://ci.appveyor.com/project/datalorax/esvis)
 12 | [![codecov](https://codecov.io/gh/datalorax/esvis/branch/master/graph/badge.svg)](https://codecov.io/gh/datalorax/esvis)
 13 | [![CRAN\_Status\_Badge](http://www.r-pkg.org/badges/version/esvis)](https://cran.r-project.org/package=esvis)
 14 | 
 15 | This package is designed to help you very quickly estimate and visualize
 16 | distributional differences by categorical factors (e.g., the effect of
 17 | treatment by gender and income category). Emphasis is placed on
 18 | evaluating distributional differences across the entirety of the scale,
 19 | rather than only by measures of central tendency (e.g., means).
 20 | 
 21 | ## Installation
 22 | 
 23 | Install directly from CRAN with
 24 | 
 25 | ``` r
 26 | install.packages("esvis")
 27 | ```
 28 | 
 29 | Or the development version from GitHub with:
 30 | 
 31 | ``` r
 32 | # install.packages("devtools")
 33 | devtools::install_github("datalorax/esvis")
 34 | ```
 35 | 
 36 | ## Plotting methods
 37 | 
 38 | There are three primary data visualizations: (a) binned effect size
 39 | plots, (b) probability-probability plots, and (c) empirical cumulative
 40 | distribution functions. All plots use the
 41 | [ggplot2](http://ggplot2.tidyverse.org) package and are fully
 42 | manipulable after creation using standard ggplot commands (e.g.,
 43 | changing the theme, labels, etc.). These plots were all produced by
 44 | first running `library(ggplot2); theme_set(theme_minimal())` to produce
 45 | the plots with the minimal theme, but no theme structure is imposed on
 46 | any of the plots.
 47 | 
 48 | ### Binned ES Plot
 49 | 
 50 | At present, the binned effect size plot can only be produced with
 51 | Cohen’s *d*, although future development will allow the user to select
 52 | the type of effect size. The binned effect size plot splits the
 53 | distribution into quantiles specified by the user (defaults to lower,
 54 | middle, and upper thirds), calculates the mean difference between groups
 55 | within each quantile bin, and produces an effect size for each bin by
 56 | dividing by the overall pooled standard deviation (i.e., not by
 57 | quantile). For example
 58 | 
 59 | ``` r
 60 | library(esvis)
 61 | binned_plot(benchmarks, math ~ ell)
 62 | #> Warning: `cols` is now required.
 63 | #> Please use `cols = c(data, q)`
 64 | ```
 65 | 
 66 | ![](README-binned_plot1-1.png)<!-- -->
 67 | ![](https://github.com/datalorax/esvis/raw/master/docs/README-binned_plot1-1.png)
 68 | Note that in this plot one can clearly see that the magnitude of the
 69 | differences between the groups depends upon scale location, as evidence
 70 | by the reversal of the effect (negative to positive) for the Non-ELL
 71 | (non-English Language Learners) group. We could also change the
 72 | reference group, change the level of quantile binning, and evaluate the
 73 | effect within other factors. For example, we can look by season
 74 | eligibility for free or reduced price lunch, with quantiles binning, and
 75 | non-ELL students as the reference group with
 76 | 
 77 | ``` r
 78 | binned_plot(benchmarks, 
 79 |             math ~ ell + frl + season, 
 80 |             ref_group = "Non-ELL",
 81 |             qtile_groups = 5)
 82 | #> Warning: `cols` is now required.
 83 | #> Please use `cols = c(data, q)`
 84 | ```
 85 | 
 86 | ![](README-binned_plot2-1.png)<!-- -->
 87 | ![](https://github.com/datalorax/esvis/raw/master/docs/README-binned_plot2-1.png)
 88 | The `ref_group` argument can also supplied as a formula.
 89 | 
 90 | ### PP Plots
 91 | 
 92 | Probability-probability plot can be produced with a call to `pp_plot`
 93 | and an equivalent argument structure. In this case, we’re visualizing
 94 | the difference in reading achievement by race/ethnicity by season.
 95 | 
 96 | ``` r
 97 | pp_plot(benchmarks, reading ~ ethnicity + season)
 98 | ```
 99 | 
100 | ![](README-pp_plot1-1.png)<!-- -->
101 | ![](https://github.com/datalorax/esvis/raw/master/docs/README-pp_plot1-1.png)
102 | 
103 | Essentially, the empirical cummulative distribution function (ECDF) for
104 | the reference group (by default, the highest performing group) is mapped
105 | against the ECDF for each corresponding group. The magnitude of the
106 | achievement gap is then displayed by the distance from the diagonal
107 | reference line, representing, essentially, the ECDF for the reference
108 | group.
109 | 
110 | By default, the area under the curve is shaded, which itself is an
111 | effect-size like measure, but this is also manipulable.
112 | 
113 | ### ECDF Plot
114 | 
115 | Finally, the `ecdf_plot` function essentially dresses up the base
116 | `plot.ecdf` function, but also adds some nice referencing features
117 | through additional, optional arguments. Below, I have included the
118 | optional `hor_ref = TRUE` argument such that horizontal reference lines
119 | appear, relative to the cuts provided.
120 | 
121 | ``` r
122 | ecdf_plot(benchmarks, math ~ season, 
123 |     cuts = c(190, 200, 215))
124 | ```
125 | 
126 | ![](README-ecdf_plot-1.png)<!-- -->
127 | ![](https://github.com/datalorax/esvis/raw/master/docs/README-ecdf_plot-1.png)
128 | These are the curves that go into the PP-Plot, but occasionally can be
129 | useful on their own.
130 | 
131 | ## Estimation Methods
132 | 
133 | Compute effect sizes for all possible pairwise comparisons.
134 | 
135 | ``` r
136 | coh_d(benchmarks, math ~ season + frl)
137 | #> `mutate_if()` ignored the following grouping variables:
138 | #> Column `season`
139 | #> # A tibble: 30 x 6
140 | #>    season_ref frl_ref season_foc frl_foc      coh_d     coh_se
141 | #>    <chr>      <chr>   <chr>      <chr>        <dbl>      <dbl>
142 | #>  1 Fall       FRL     Fall       Non-FRL  0.7443868 0.07055679
143 | #>  2 Fall       FRL     Spring     FRL      1.321191  0.04957348
144 | #>  3 Fall       FRL     Spring     Non-FRL  2.008066  0.07873488
145 | #>  4 Fall       FRL     Winter     FRL      0.6246112 0.04716189
146 | #>  5 Fall       FRL     Winter     Non-FRL  1.300031  0.07326622
147 | #>  6 Fall       Non-FRL Fall       FRL     -0.7443868 0.07055679
148 | #>  7 Fall       Non-FRL Spring     FRL      0.5498306 0.06939873
149 | #>  8 Fall       Non-FRL Spring     Non-FRL  1.140492  0.09189070
150 | #>  9 Fall       Non-FRL Winter     FRL     -0.1269229 0.06934576
151 | #> 10 Fall       Non-FRL Winter     Non-FRL  0.5009081 0.08716735
152 | #> # … with 20 more rows
153 | ```
154 | 
155 | Or specify a reference group. In this case, I’ve used the formula-based
156 | interface, but a string vector specifying the specific reference group
157 | could also be supplied.
158 | 
159 | ``` r
160 | coh_d(benchmarks, 
161 |       math ~ season + frl, 
162 |       ref_group = ~Fall + `Non-FRL`)
163 | #> `mutate_if()` ignored the following grouping variables:
164 | #> Column `season`
165 | #> # A tibble: 5 x 6
166 | #>   season_ref frl_ref season_foc frl_foc      coh_d     coh_se
167 | #>   <chr>      <chr>   <chr>      <chr>        <dbl>      <dbl>
168 | #> 1 Fall       Non-FRL Fall       FRL     -0.7443868 0.07055679
169 | #> 2 Fall       Non-FRL Spring     FRL      0.5498306 0.06939873
170 | #> 3 Fall       Non-FRL Spring     Non-FRL  1.140492  0.09189070
171 | #> 4 Fall       Non-FRL Winter     FRL     -0.1269229 0.06934576
172 | #> 5 Fall       Non-FRL Winter     Non-FRL  0.5009081 0.08716735
173 | ```
174 | 
175 | Notice that the reference to Non-FRL is wrapped in back-ticks, which
176 | should be used anytime there are spaces or other non-standard
177 | characters.
178 | 
179 | Other effect sizes are estimated equivalently. For example, compute *V*
180 | ([Ho, 2009](https://journals.sagepub.com/doi/10.3102/1076998609332755))
181 | can be estimated with
182 | 
183 | ``` r
184 | v(benchmarks, 
185 |   math ~ season + frl, 
186 |   ref_group = ~Fall + `Non-FRL`)
187 | #> # A tibble: 5 x 5
188 | #> # Groups:   frl, season [1]
189 | #>   frl_ref season_ref frl_foc season_foc          v
190 | #>   <chr>   <chr>      <chr>   <chr>           <dbl>
191 | #> 1 Non-FRL Fall       Non-FRL Winter      0.5070737
192 | #> 2 Non-FRL Fall       FRL     Spring      0.5454666
193 | #> 3 Non-FRL Fall       FRL     Winter     -0.1117226
194 | #> 4 Non-FRL Fall       Non-FRL Spring      1.139235 
195 | #> 5 Non-FRL Fall       FRL     Fall       -0.7051069
196 | ```
197 | 
198 | or *AUC* with
199 | 
200 | ``` r
201 | auc(benchmarks, 
202 |     math ~ season + frl, 
203 |     ref_group = ~Fall + `Non-FRL`)
204 | #> # A tibble: 5 x 5
205 | #> # Groups:   frl, season [1]
206 | #>   frl_ref season_ref frl_foc season_foc       auc
207 | #>   <chr>   <chr>      <chr>   <chr>          <dbl>
208 | #> 1 Non-FRL Fall       Non-FRL Winter     0.6400361
209 | #> 2 Non-FRL Fall       FRL     Spring     0.6501417
210 | #> 3 Non-FRL Fall       FRL     Winter     0.4685164
211 | #> 4 Non-FRL Fall       Non-FRL Spring     0.7897519
212 | #> 5 Non-FRL Fall       FRL     Fall       0.3090356
213 | ```
214 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | # DO NOT CHANGE the "init" and "install" sections below
 2 | 
 3 | # Download script file from GitHub
 4 | init:
 5 |   ps: |
 6 |         $ErrorActionPreference = "Stop"
 7 |         Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
 8 |         Import-Module '..\appveyor-tool.ps1'
 9 | 
10 | install:
11 |   - ps: Bootstrap
12 | 
13 | # Adapt as necessary starting from here
14 | 
15 | build_script:
16 |   - travis-tool.sh install_deps
17 | 
18 | test_script:
19 |   - travis-tool.sh run_tests
20 | 
21 | on_failure:
22 |   - 7z a failure.zip *.Rcheck\*
23 |   - appveyor PushArtifact failure.zip
24 | 
25 | artifacts:
26 |   - path: '*.Rcheck\**\*.log'
27 |     name: Logs
28 | 
29 |   - path: '*.Rcheck\**\*.out'
30 |     name: Logs
31 | 
32 |   - path: '*.Rcheck\**\*.fail'
33 |     name: Logs
34 | 
35 |   - path: '*.Rcheck\**\*.Rout'
36 |     name: Logs
37 | 
38 |   - path: '\*_*.tar.gz'
39 |     name: Bits
40 | 
41 |   - path: '\*_*.zip'
42 |     name: Bits
43 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Release Summary
 2 | 
 3 | This is a minior release of esvis, version 0.3.1, an R package for visualizing and estimating effect sizes. This release implements changes to the codebase to accommodate the release of dplyr 1.0, one of the package dependencies. 
 4 | 
 5 | ## Test environments
 6 | * Local OS X install, R 3.6.0
 7 | * Ubuntu 16.04 (on Travis-CI), R 4.0.0
 8 | * Win-builder (devel and release)
 9 | 
10 | ## R CMD check results
11 | 
12 | 0 errors | 0 warnings | 0 notes
13 | 
14 | ## Downstream dependencies
15 | 
16 | There are currently no downstream dependencies for this package


--------------------------------------------------------------------------------
/data/benchmarks.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/data/benchmarks.rda


--------------------------------------------------------------------------------
/data/seda.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/data/seda.rda


--------------------------------------------------------------------------------
/data/star.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/data/star.rda


--------------------------------------------------------------------------------
/docs/README-binned_plot1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/docs/README-binned_plot1-1.png


--------------------------------------------------------------------------------
/docs/README-binned_plot2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/docs/README-binned_plot2-1.png


--------------------------------------------------------------------------------
/docs/README-cleanup.R:
--------------------------------------------------------------------------------
 1 | files <- list.files(pattern = "README-")
 2 | 
 3 | file.copy(files, file.path("docs", files), overwrite = TRUE)
 4 | 
 5 | rm <- glue::glue("sed -i -e 's/!\\[\\]({file})//g' README.md", 
 6 |                 file = files)
 7 | purrr::walk(rm, system)
 8 | 
 9 | file.remove(c(files, "README.md-e"))
10 | 


--------------------------------------------------------------------------------
/docs/README-ecdf_plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/docs/README-ecdf_plot-1.png


--------------------------------------------------------------------------------
/docs/README-pp_plot1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/docs/README-pp_plot1-1.png


--------------------------------------------------------------------------------
/esvis.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | 


--------------------------------------------------------------------------------
/inst/image/README-binned_plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/inst/image/README-binned_plot-1.png


--------------------------------------------------------------------------------
/inst/image/README-ecdf_plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/inst/image/README-ecdf_plot-1.png


--------------------------------------------------------------------------------
/inst/image/README-pp_plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/inst/image/README-pp_plot-1.png


--------------------------------------------------------------------------------
/inst/image/README-pp_plot1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/inst/image/README-pp_plot1-1.png


--------------------------------------------------------------------------------
/inst/image/README-pp_plot2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/inst/image/README-pp_plot2-1.png


--------------------------------------------------------------------------------
/man/auc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/es_calcs.R
 3 | \name{auc}
 4 | \alias{auc}
 5 | \title{Compute the Area Under the \link{pp_plot} Curve
 6 | Calculates the area under the \code{pp} curve. The area under the curve is 
 7 | also a useful effect-size like statistic, representing the probability that 
 8 | a randomly selected individual from the \code{x} distribution will have a 
 9 | higher value than a randomly selected individual from the \code{y} 
10 | distribution.}
11 | \usage{
12 | auc(data, formula, ref_group = NULL, rename = TRUE)
13 | }
14 | \arguments{
15 | \item{data}{The data frame used for estimation - ideally structured in a tidy 
16 | format.}
17 | 
18 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is
19 | the outcome variable and \code{group} is the grouping variable. Note this
20 | variable can include any arbitrary number of groups. Additional variables 
21 | can be included with \code{+} to produce separate estimates by the secondary 
22 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 
23 | + characteristic2}).}
24 | 
25 | \item{ref_group}{Optional. A character vector or forumla listing the 
26 | reference group levels for each variable on the right hand side of the 
27 | formula, supplied in the same order as the formula. Note that if using the
28 | formula version, levels that are numbers, or include hyphens, spaces, etc., 
29 | should be wrapped in back ticks (e.g., 
30 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 
31 | in doubt, it is safest to use the back ticks, as they will not interfere 
32 | with anything if they are not needed. See examples below for more details.}
33 | 
34 | \item{rename}{Used primarily for internal purposes. Should the column 
35 | names be renamed to reference the focal and reference groups? Defaults to
36 | \code{TRUE}.}
37 | }
38 | \value{
39 | By default the area under the curve for all possible pairings of
40 | the grouping factor are returned.
41 | }
42 | \description{
43 | Compute the Area Under the \link{pp_plot} Curve
44 | Calculates the area under the \code{pp} curve. The area under the curve is 
45 | also a useful effect-size like statistic, representing the probability that 
46 | a randomly selected individual from the \code{x} distribution will have a 
47 | higher value than a randomly selected individual from the \code{y} 
48 | distribution.
49 | }
50 | \examples{
51 | 
52 | # Calculate AUC for all pairwise comparisons
53 | auc(star, reading ~ condition) 
54 | 
55 | # Report only relative to regular-sized classrooms
56 | auc(star, 
57 |     reading ~ condition, 
58 |     ref_group = "reg")
59 | 
60 | # Report by ELL and FRL groups for each season, compare to non-ELL students
61 | # who were not eligible for free or reduced price lunch in the fall (using
62 | # the formula interface for reference group referencing).
63 | \dontrun{
64 | auc(benchmarks, 
65 |       math ~ ell + frl + season,
66 |       ref_group = ~`Non-ELL` + `Non-FRL` + Fall)
67 | 
68 | # Same thing but with character vector supplied, rather than a formula
69 | auc(benchmarks, 
70 |       math ~ ell + frl + season,
71 |       ref_group = c("Non-ELL", "Non-FRL", "Fall"))
72 | }
73 | 
74 | }
75 | 


--------------------------------------------------------------------------------
/man/benchmarks.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/benchmarks.R
 3 | \docType{data}
 4 | \name{benchmarks}
 5 | \alias{benchmarks}
 6 | \title{Synthetic benchmark screening data}
 7 | \format{A data frame with 10240 rows and 9 columns.
 8 |   \describe{
 9 |     \item{sid}{Integer. Student identifier.}
10 | 	   \item{cohort}{Integer. Identifies the cohort from which the student was
11 | 			sampled (1-3).}
12 |     \item{sped}{Character. Special Education status: "Non-Sped" or "Sped"}
13 | 	   \item{ethnicity}{Character. The race/ethnicity to which the student
14 | 			identified. Takes on one of seven values: "Am. Indian", "Asian",
15 | 			"Black", "Hispanic", "Native Am.", "Two or More", and "White"}
16 | 	   \item{frl}{Character. Student's eligibility for free or reduced price
17 | 			lunch. Takes on the values "FRL" and "Non-FRL".}
18 | 	   \item{ell}{Character. Students' English language learner status. Takes 
19 | 			on one of values: "Active", "Monitor", and "Non-ELL". Students
20 | 			coded "Active" were actively receiving English language services
21 | 			at the time of testing. Students coded "Monitor" had previously 
22 | 			received services, but not at the time of testing. Students coded
23 | 			"Non-ELL" did not receive services at any time.}
24 |     \item{season}{Character. The season during which the assessment was
25 | 			administered: "Fall", "Winter", or "Spring"}
26 |     \item{reading}{Integer. Reading scale score.}
27 |     \item{math}{Integer. Mathematics scale score.}
28 | }}
29 | \usage{
30 | benchmarks
31 | }
32 | \description{
33 | Across the country many schools engage in seasonal benchmark screenings to 
34 | monitor to progress of their students. These are relatively brief
35 | assessments administered to "check-in" on students' progress throughout
36 | the year. This dataset was simulated from a real dataset from one large
37 | school district using the terrific 
38 | \href{https://CRAN.R-project.org/package=synthpop}{synthpop}
39 | R package. Overall characteristics of the synthetic data are remarkably
40 | similar to the real data.
41 | }
42 | \keyword{datasets}
43 | 


--------------------------------------------------------------------------------
/man/binned_es.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/es_calcs.R
 3 | \name{binned_es}
 4 | \alias{binned_es}
 5 | \title{Calculate binned effect sizes}
 6 | \usage{
 7 | binned_es(
 8 |   data,
 9 |   formula,
10 |   ref_group = NULL,
11 |   qtile_groups = 3,
12 |   es = "g",
13 |   rename = TRUE
14 | )
15 | }
16 | \arguments{
17 | \item{data}{The data frame used for estimation - ideally structured in a tidy 
18 | format.}
19 | 
20 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is
21 | the outcome variable and \code{group} is the grouping variable. Note this
22 | variable can include any arbitrary number of groups. Additional variables 
23 | can be included with \code{+} to produce separate estimates by the secondary 
24 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 
25 | + characteristic2}).}
26 | 
27 | \item{ref_group}{Optional. A character vector or forumla listing the 
28 | reference group levels for each variable on the right hand side of the 
29 | formula, supplied in the same order as the formula. Note that if using the
30 | formula version, levels that are numbers, or include hyphens, spaces, etc., 
31 | should be wrapped in back ticks (e.g., 
32 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 
33 | in doubt, it is safest to use the back ticks, as they will not interfere 
34 | with anything if they are not needed. See examples below for more details.}
35 | 
36 | \item{qtile_groups}{The number of quantile bins to split the data by and 
37 | calculate effect sizes. Defaults to 3 bins (lower, middle, upper).}
38 | 
39 | \item{es}{The effect size to calculate. Currently the only options are 
40 | "d" or "g".}
41 | 
42 | \item{rename}{Logical. Should the column names be relabeled according to
43 | the reference and focal groups. Defaults to \code{TRUE}.}
44 | }
45 | \value{
46 | A data frame with the corresponding effect sizes.
47 | }
48 | \description{
49 | Calculate binned effect sizes
50 | }
51 | 


--------------------------------------------------------------------------------
/man/binned_plot.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/binned_es_plot.R
  3 | \name{binned_plot}
  4 | \alias{binned_plot}
  5 | \title{Quantile-binned effect size plot}
  6 | \usage{
  7 | binned_plot(
  8 |   data,
  9 |   formula,
 10 |   ref_group = NULL,
 11 |   qtile_groups = 3,
 12 |   es = "g",
 13 |   lines = TRUE,
 14 |   points = TRUE,
 15 |   shade = TRUE,
 16 |   shade_alpha = 0.4,
 17 |   rects = TRUE,
 18 |   rect_fill = "gray20",
 19 |   rect_alpha = 0.35,
 20 |   refline = TRUE,
 21 |   refline_col = "gray40",
 22 |   refline_lty = "solid",
 23 |   refline_lwd = 1.1
 24 | )
 25 | }
 26 | \arguments{
 27 | \item{data}{The data frame to be plotted}
 28 | 
 29 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is
 30 | the outcome variable and \code{group} is the grouping variable. Note this
 31 | variable can include any arbitrary number of groups. Additional variables 
 32 | can be included with \code{+} to produce separate plots by the secondary or
 33 | tertiary variable of interest (e.g., \code{out ~ group + characteristic1 + 
 34 | characteristic2}). No more than two additional characteristics can be 
 35 | supplied at this time.}
 36 | 
 37 | \item{ref_group}{Optional character vector (of length 1) naming the
 38 | reference group. Defaults to the group with the highest mean score.}
 39 | 
 40 | \item{qtile_groups}{The number of quantile bins to split the data by and 
 41 | calculate effect sizes. Defaults to 3 bins (lower, middle, upper).}
 42 | 
 43 | \item{es}{The effect size to plot. Defaults to \code{"g"}, in which case 
 44 | Hedge's g is plotted, which is better for small samples. At present, the 
 45 | only other option is \code{"d"} for Cohen's D.}
 46 | 
 47 | \item{lines}{Logical. Should the PP Lines be plotted? Defaults to 
 48 | \code{TRUE}.}
 49 | 
 50 | \item{points}{Logical. Should points be plotted for each \code{qtiles} be 
 51 | plotted? Defaults to \code{TRUE}.}
 52 | 
 53 | \item{shade}{Logical. Should the standard errors around the effect size point
 54 | estimates be displayed? Defaults to \code{TRUE}, with the uncertainty 
 55 | displayed with shading.}
 56 | 
 57 | \item{shade_alpha}{Transparency level of the standard error shading.
 58 | Defaults to 0.40.}
 59 | 
 60 | \item{rects}{Logical. Should semi-transparent rectangles be plotted in the 
 61 | background to show the binning? Defaults to \code{TRUE}.}
 62 | 
 63 | \item{rect_fill}{Color fill of rectangles to be plotted in the background, if
 64 | \code{rects == TRUE}. Defaults to "gray20".}
 65 | 
 66 | \item{rect_alpha}{Transparency level of the rectangles in the background when
 67 | \code{rects == TRUE}. Defaults to 0.35.}
 68 | 
 69 | \item{refline}{Logical. Defaults to \code{TRUE}. Should a diagonal
 70 | reference line, representing the point of equal probabilities, be plotted?}
 71 | 
 72 | \item{refline_col}{The color of the reference line. Defaults to 
 73 | \code{"gray40"}}
 74 | 
 75 | \item{refline_lty}{Line type of the reference line. Defaults to
 76 | \code{"solid"}.}
 77 | 
 78 | \item{refline_lwd}{Line width of the reference line. Defaults to \code{1.1}.}
 79 | }
 80 | \description{
 81 | Plots the effect size between focal and reference groups by matched (binned) 
 82 | quantiles (i.e., the results from \link{binned_es}), with the matched
 83 | quantiles plotted along the x-axis and the effect size plotted along the 
 84 | y-axis. The intent is to examine how (if) the magnitude of the effect size
 85 | varies at different points of the distributions. The mean differences within
 86 | each quantile bin are divided by the overall pooled standard deviation for 
 87 | the two groups being compared.
 88 | }
 89 | \examples{
 90 | # Binned Effect Size Plot: Defaults to Hedges' G
 91 | binned_plot(star, math ~ condition)
 92 |  
 93 | # Same plot, separated by sex
 94 | binned_plot(star, math ~ condition + sex)
 95 | 
 96 | # Same plot by sex and race
 97 | \dontrun{
 98 |   pp_plot(star, math ~ condition + sex + race)
 99 | }
100 | ## Evaluate with simulated data: Plot is most interesting when variance
101 | # in the distributions being compared differ.
102 | 
103 | library(tidyr)
104 | library(ggplot2)
105 | 
106 | # simulate data with different variances
107 | set.seed(100)
108 | common_vars <- data.frame(low  = rnorm(1000, 10, 1),
109 |                         high = rnorm(1000, 12, 1),
110 |                         vars = "common")
111 | diff_vars <- data.frame(low  = rnorm(1000, 10, 1),
112 |                       high = rnorm(1000, 12, 2),
113 |                       vars = "diff")
114 | d <- rbind(common_vars, diff_vars)
115 | 
116 | # Plot distributions 
117 | d <- d \%>\% 
118 | gather(group, value, -vars) 
119 | 
120 | ggplot(d, aes(value, color = group)) +
121 |  geom_density() +
122 |  facet_wrap(~vars)
123 | 
124 | # Note that the difference between the distributions depends on where you're 
125 | # evaluating from on the x-axis. The binned plot helps us visualize this. 
126 | # The below shows the binned plots when there is a common versus different
127 | # variance
128 | 
129 | binned_plot(d, value ~ group + vars)   
130 | }
131 | 


--------------------------------------------------------------------------------
/man/coh.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/es_calcs.R
 3 | \name{coh}
 4 | \alias{coh}
 5 | \title{Cohen's d}
 6 | \usage{
 7 | coh(n1, n2, mn1, mn2, vr1, vr2)
 8 | }
 9 | \arguments{
10 | \item{n1}{The sample size for group 1}
11 | 
12 | \item{n2}{The sample size for group 2}
13 | 
14 | \item{mn1}{The mean for group 1}
15 | 
16 | \item{mn2}{The mean for group 2}
17 | 
18 | \item{vr1}{The variance for group 1}
19 | 
20 | \item{vr2}{The variance for group 2}
21 | }
22 | \description{
23 | Wraps the equation into a function
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/man/coh_d.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/es_calcs.R
 3 | \name{coh_d}
 4 | \alias{coh_d}
 5 | \title{Compute Cohen's \emph{d}}
 6 | \usage{
 7 | coh_d(data, formula, ref_group = NULL, se = TRUE)
 8 | }
 9 | \arguments{
10 | \item{data}{The data frame used for estimation - ideally structured in a tidy 
11 | format.}
12 | 
13 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is
14 | the outcome variable and \code{group} is the grouping variable. Note this
15 | variable can include any arbitrary number of groups. Additional variables 
16 | can be included with \code{+} to produce separate estimates by the secondary 
17 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 
18 | + characteristic2}).}
19 | 
20 | \item{ref_group}{Optional. A character vector or forumla listing the 
21 | reference group levels for each variable on the right hand side of the 
22 | formula, supplied in the same order as the formula. Note that if using the
23 | formula version, levels that are numbers, or include hyphens, spaces, etc., 
24 | should be wrapped in back ticks (e.g., 
25 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 
26 | in doubt, it is safest to use the back ticks, as they will not interfere 
27 | with anything if they are not needed. See examples below for more details.}
28 | 
29 | \item{se}{Logical. Should the standard error of the effect size be 
30 | estimated and returned in the resulting data frame? Defaults to 
31 | \code{TRUE}.}
32 | }
33 | \value{
34 | By default the Cohen's \emph{d} for all possible pairings of
35 |  the grouping factor(s) are returned.
36 | }
37 | \description{
38 | This function calculates effect sizes in terms of Cohen's \emph{d}, also
39 | called the uncorrected effect size. See \code{\link{hedg_g}} for the sample
40 | size corrected version. Also see 
41 | \href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3840331/}{Lakens (2013)}
42 | for a discussion on different types of effect sizes and their
43 | interpretation. Note that missing data are removed from the calculations of 
44 | the means and standard deviations.
45 | }
46 | \examples{
47 | 
48 | # Calculate Cohen's d for all pairwise comparisons
49 | coh_d(star, reading ~ condition) 
50 | 
51 | # Report only relative to regular-sized classrooms
52 | coh_d(star,
53 |       reading ~ condition, 
54 | 		   ref_group = "reg")
55 | 
56 | # Report by ELL and FRL groups for each season, compare to non-ELL students
57 | # who were not eligible for free or reduced price lunch in the fall (using
58 | # the formula interface for reference group referencing).
59 | 
60 | coh_d(benchmarks, 
61 |       math ~ ell + frl + season,
62 |       ref_group = ~`Non-ELL` + `Non-FRL` + Fall)
63 | 
64 | # Same thing but with character vector supplied, rather than a formula
65 | coh_d(benchmarks, 
66 |       math ~ ell + frl + season,
67 |       ref_group = c("Non-ELL", "Non-FRL", "Fall"))
68 | }
69 | 


--------------------------------------------------------------------------------
/man/descrip_stats.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{descrip_stats}
 4 | \alias{descrip_stats}
 5 | \title{Report descriptive stats for all possible pairings on the rhs of the formula.}
 6 | \usage{
 7 | descrip_stats(data, formula, ..., qtile_groups = NULL)
 8 | }
 9 | \arguments{
10 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is
11 | the outcome variable and \code{group} is the grouping variable. Note this
12 | variable can include any arbitrary number of groups. Additional variables 
13 | can be included with \code{+} to produce descriptive stats by the secondary 
14 | or tertiary variable of interest (e.g., \code{out ~ group + characteristic1 
15 | + characteristic2}).}
16 | }
17 | \description{
18 | Report descriptive stats for all possible pairings on the rhs of the formula.
19 | }
20 | \keyword{internal}
21 | 


--------------------------------------------------------------------------------
/man/ecdf_fun.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/es_calcs.R
 3 | \name{ecdf_fun}
 4 | \alias{ecdf_fun}
 5 | \title{Computes the empirical cummulative distribution function for all groups
 6 | supplied by the formula.}
 7 | \usage{
 8 | ecdf_fun(data, formula, cuts = NULL)
 9 | }
10 | \arguments{
11 | \item{data}{The data frame used for estimation - ideally structured in a tidy 
12 | format.}
13 | 
14 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is
15 | the outcome variable and \code{group} is the grouping variable. Note this
16 | variable can include any arbitrary number of groups. Additional variables 
17 | can be included with \code{+} to produce separate estimates by the secondary 
18 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 
19 | + characteristic2}).}
20 | 
21 | \item{cuts}{Optional vector of cut scores. If supplied, the ECDF will be
22 | guaranteed to include these points. Otherwise, there could be gaps in the 
23 | ECDF at those particular points (used in plotting the cut scores).}
24 | }
25 | \description{
26 | Computes the empirical cummulative distribution function for all groups
27 | supplied by the formula.
28 | }
29 | \keyword{internal}
30 | 


--------------------------------------------------------------------------------
/man/ecdf_plot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ecdf_plot.R
 3 | \name{ecdf_plot}
 4 | \alias{ecdf_plot}
 5 | \title{Empirical Cumulative Distribution Plot}
 6 | \usage{
 7 | ecdf_plot(
 8 |   data,
 9 |   formula,
10 |   cuts = NULL,
11 |   linewidth = 1.2,
12 |   ref_line_cols = "gray40",
13 |   ref_linetype = "solid",
14 |   center = FALSE,
15 |   ref_rect = TRUE,
16 |   ref_rect_col = "gray40",
17 |   ref_rect_alpha = 0.15
18 | )
19 | }
20 | \arguments{
21 | \item{data}{A tidy data frame containing the data to be plotted.}
22 | 
23 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is
24 | the outcome variable and \code{group} is the grouping variable. Note this
25 | variable can include any arbitrary number of groups. Additional variables 
26 | can be included with \code{+} to produce separate plots by the secondary or
27 | tertiary varaible (e.g., \code{out ~ group + characteristic1 + 
28 | characteristic2}). No more than two additional characteristics can be 
29 | supplied at this time.}
30 | 
31 | \item{cuts}{Optional numeric vector stating the location of reference 
32 | line(s) and/or rectangle(s).}
33 | 
34 | \item{linewidth}{Width of ECDF lines. Note that the color of the lines can 
35 | be controlled through additional functions (e.g., \code{scale_color_brewer,
36 |   scale_color_manual}).}
37 | 
38 | \item{ref_line_cols}{Optional vector (or single value) of colors for 
39 | \code{cuts} lines.}
40 | 
41 | \item{ref_linetype}{Optional vector (or single value) of line types for 
42 | \code{cuts} lines. Takes any of the arguments supplied by 
43 | \link[ggplot2]{linetype}.}
44 | 
45 | \item{center}{Logical. Should the functions be centered prior to plotting? 
46 | Defaults to \code{FALSE}. Note that if paneled/faceted plots are produced, 
47 | the centering occurs by group.}
48 | 
49 | \item{ref_rect}{Logical, defaults to \code{TRUE} when \code{cuts} takes 
50 | any non-null value. Should semi-transparent rectangle(s) be plotted at the 
51 | locations of \code{cuts}?}
52 | 
53 | \item{ref_rect_col}{Color of the fill for the reference rectangles. Defaults 
54 | to a dark gray.}
55 | 
56 | \item{ref_rect_alpha}{Transparency of the fill for the reference rectangles. 
57 | Defaults to 0.7.}
58 | }
59 | \description{
60 | This is a wrapper function for the \link[ggplot2]{stat_ecdf} function and 
61 | helps make it easy to directly compare distributions at specific
62 | locations along the scale.
63 | }
64 | \examples{
65 | ecdf_plot(benchmarks, math ~ ell, 
66 |           cuts = c(190, 205, 210), 
67 |           ref_line_cols = c("#D68EE3", "#9BE38E", "#144ECA"))
68 | 
69 | # Customize the plot with ggplot2 functions
70 | library(ggplot2)
71 | ecdf_plot(benchmarks, math ~ ell, 
72 |           cuts = c(190, 205, 210), 
73 |           ref_line_cols = c("#D68EE3", "#9BE38E", "#144ECA")) +
74 |   theme_minimal() +
75 |   theme(legend.position = "bottom")
76 | 
77 | ecdf_plot(seda, mean ~ grade) +
78 |   scale_fill_brewer(palette = "Set2") +
79 |   theme_minimal()
80 |   
81 | # Use within the dplyr pipeline
82 | library(dplyr)
83 | benchmarks \%>\% 
84 |   mutate(season = factor(season, 
85 |                          levels = c("Fall", "Winter", "Spring"))) \%>\% 
86 |   ecdf_plot(math ~ ell + season + frl)
87 | }
88 | 


--------------------------------------------------------------------------------
/man/esvis-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/esvis-package.R
 3 | \docType{package}
 4 | \name{esvis-package}
 5 | \alias{esvis}
 6 | \alias{esvis-package}
 7 | \title{esvis: Visualization and Estimation of Effect Sizes}
 8 | \description{
 9 | A variety of methods are provided to estimate and visualize
10 |     distributional differences in terms of effect sizes. Particular emphasis
11 |     is upon evaluating differences between two or more distributions across
12 |     the entire scale, rather than at a single point (e.g., differences in
13 |     means). For example, Probability-Probability (PP) plots display the
14 |     difference between two or more distributions, matched by their empirical
15 |     CDFs (see Ho and Reardon, 2012; <doi:10.3102/1076998611411918>), allowing
16 |     for examinations of where on the scale distributional differences are
17 |     largest or smallest. The area under the PP curve (AUC) is an effect-size
18 |     metric, corresponding to the probability that a randomly selected
19 |     observation from the x-axis distribution will have a higher value
20 |     than a randomly selected observation from the y-axis distribution. 
21 |     Binned effect size plots are also available, in which the distributions
22 |     are split into bins (set by the user) and separate effect sizes (Cohen's
23 |     d) are produced for each bin - again providing a means to evaluate the
24 |     consistency (or lack thereof) of the difference between two or more 
25 |     distributions at different points on the scale. Evaluation of empirical 
26 |     CDFs is also provided, with  built-in arguments for providing annotations 
27 |     to help evaluate distributional differences at specific points (e.g., 
28 |     semi-transparent shading). All function take a consistent argument 
29 |     structure. Calculation of specific effect sizes is also possible. The
30 |     following effect sizes are estimable: (a) Cohen's d, (b) Hedges' g, 
31 |     (c) percentage above a cut, (d) transformed (normalized) percentage above 
32 |     a cut, (e)  area under the PP curve, and (f) the V statistic (see Ho, 
33 |     2009; <doi:10.3102/1076998609332755>), which essentially transforms the 
34 |     area under the curve to standard deviation units. By default, effect sizes 
35 |     are calculated for all possible pairwise comparisons, but a reference 
36 |     group (distribution) can be specified.
37 | }
38 | \seealso{
39 | Useful links:
40 | \itemize{
41 |   \item \url{https://github.com/datalorax/esvis}
42 |   \item Report bugs at \url{https://github.com/datalorax/esvis/issues}
43 | }
44 | 
45 | }
46 | \author{
47 | \strong{Maintainer}: Daniel Anderson \email{daniela@uoregon.edu}
48 | 
49 | }
50 | \keyword{internal}
51 | 


--------------------------------------------------------------------------------
/man/hedg.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/es_calcs.R
 3 | \name{hedg}
 4 | \alias{hedg}
 5 | \title{Hedge's g}
 6 | \usage{
 7 | hedg(n1, n2, d)
 8 | }
 9 | \arguments{
10 | \item{n1}{The sample size for group 1}
11 | 
12 | \item{n2}{The sample size for group 2}
13 | 
14 | \item{d}{The value of Cohen's d}
15 | }
16 | \description{
17 | Wraps the equation into a function
18 | }
19 | \keyword{internal}
20 | 


--------------------------------------------------------------------------------
/man/hedg_g.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/es_calcs.R
 3 | \name{hedg_g}
 4 | \alias{hedg_g}
 5 | \title{Compute Hedges' \emph{g}
 6 | This function calculates effect sizes in terms of Hedges' \emph{g}, also
 7 | called the corrected (for sample size) effect size. See
 8 | \code{\link{coh_d}} for the uncorrected version. Also see 
 9 | \href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3840331/}{Lakens (2013)}
10 | for a discussion on different types of effect sizes and their
11 | interpretation. Note that missing data are removed from the calculations of 
12 | the means and standard deviations.}
13 | \usage{
14 | hedg_g(data, formula, ref_group = NULL, keep_d = TRUE)
15 | }
16 | \arguments{
17 | \item{data}{The data frame used for estimation - ideally structured in a tidy 
18 | format.}
19 | 
20 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is
21 | the outcome variable and \code{group} is the grouping variable. Note this
22 | variable can include any arbitrary number of groups. Additional variables 
23 | can be included with \code{+} to produce separate estimates by the secondary 
24 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 
25 | + characteristic2}).}
26 | 
27 | \item{ref_group}{Optional. A character vector or forumla listing the 
28 | reference group levels for each variable on the right hand side of the 
29 | formula, supplied in the same order as the formula. Note that if using the
30 | formula version, levels that are numbers, or include hyphens, spaces, etc., 
31 | should be wrapped in back ticks (e.g., 
32 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 
33 | in doubt, it is safest to use the back ticks, as they will not interfere 
34 | with anything if they are not needed. See examples below for more details.}
35 | 
36 | \item{keep_d}{Logical. Should Cohen's \emph{d} be reported along with 
37 | Hedge's \code{g}? Defaults to \code{TRUE}.}
38 | }
39 | \value{
40 | By default the Hedges' \emph{g} for all possible pairings of
41 |  the grouping factor are returned as a tidy data frame.
42 | }
43 | \description{
44 | Compute Hedges' \emph{g}
45 | This function calculates effect sizes in terms of Hedges' \emph{g}, also
46 | called the corrected (for sample size) effect size. See
47 | \code{\link{coh_d}} for the uncorrected version. Also see 
48 | \href{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3840331/}{Lakens (2013)}
49 | for a discussion on different types of effect sizes and their
50 | interpretation. Note that missing data are removed from the calculations of 
51 | the means and standard deviations.
52 | }
53 | \examples{
54 | 
55 | # Calculate Hedges' g for all pairwise comparisons
56 | hedg_g(star, reading ~ condition) 
57 | 
58 | # Report only relative to regular-sized classrooms
59 | hedg_g(star, 
60 |        reading ~ condition, 
61 |        ref_group = "reg")
62 | 
63 | # Report by ELL and FRL groups for each season, compare to non-ELL students
64 | # who were not eligible for free or reduced price lunch in the fall (using
65 | # the formula interface for reference group referencing).
66 | 
67 | hedg_g(benchmarks, 
68 |       math ~ ell + frl + season,
69 |       ref_group = ~`Non-ELL` + `Non-FRL` + Fall)
70 | 
71 | # Same thing but with character vector supplied, rather than a formula
72 | hedg_g(benchmarks, 
73 |       math ~ ell + frl + season,
74 |       ref_group = c("Non-ELL", "Non-FRL", "Fall"))
75 | }
76 | 


--------------------------------------------------------------------------------
/man/pac.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/es_calcs.R
 3 | \name{pac}
 4 | \alias{pac}
 5 | \title{Compute the proportion above a specific cut location}
 6 | \usage{
 7 | pac(data, formula, cuts, ref_group = NULL)
 8 | }
 9 | \arguments{
10 | \item{data}{The data frame used for estimation - ideally structured in a tidy 
11 | format.}
12 | 
13 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is
14 | the outcome variable and \code{group} is the grouping variable. Note this
15 | variable can include any arbitrary number of groups. Additional variables 
16 | can be included with \code{+} to produce separate estimates by the secondary 
17 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 
18 | + characteristic2}).}
19 | 
20 | \item{cuts}{Optional vector of cut scores. If supplied, the ECDF will be
21 | guaranteed to include these points. Otherwise, there could be gaps in the 
22 | ECDF at those particular points (used in plotting the cut scores).}
23 | 
24 | \item{ref_group}{Optional. A character vector or forumla listing the 
25 | reference group levels for each variable on the right hand side of the 
26 | formula, supplied in the same order as the formula. Note that if using the
27 | formula version, levels that are numbers, or include hyphens, spaces, etc., 
28 | should be wrapped in back ticks (e.g., 
29 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 
30 | in doubt, it is safest to use the back ticks, as they will not interfere 
31 | with anything if they are not needed. See examples below for more details.}
32 | }
33 | \value{
34 | Tidy data frame of the proportion above the cutoff for 
35 | each (or selected) groups.
36 | }
37 | \description{
38 | Computes the proportion of the corresponding group, as specified by the
39 | \code{formula}, scoring above the specified \code{cuts}.
40 | }
41 | \examples{
42 | # Compute differences for all pairwise comparisons for each of three cuts
43 | pac(star,
44 |     reading ~ condition,
45 | 		 cuts = c(450, 500, 550))
46 | 		 
47 | pac(star,
48 |     reading ~ condition + freelunch + race, 
49 | 		 cuts = c(450, 500))
50 | 
51 | pac(star,
52 |     reading ~ condition + freelunch + race, 
53 | 		 cuts = c(450, 500),
54 | 		 ref_group = ~small + no + white) 
55 | }
56 | \seealso{
57 | [esvis::pac_compare(), esvis::tpac(), esvis::tpac_diff()]
58 | }
59 | 


--------------------------------------------------------------------------------
/man/pac_compare.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/es_calcs.R
 3 | \name{pac_compare}
 4 | \alias{pac_compare}
 5 | \title{Compute the difference in the proportion above a specific cut location}
 6 | \usage{
 7 | pac_compare(data, formula, cuts, ref_group = NULL)
 8 | }
 9 | \arguments{
10 | \item{data}{The data frame used for estimation - ideally structured in a tidy 
11 | format.}
12 | 
13 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is
14 | the outcome variable and \code{group} is the grouping variable. Note this
15 | variable can include any arbitrary number of groups. Additional variables 
16 | can be included with \code{+} to produce separate estimates by the secondary 
17 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 
18 | + characteristic2}).}
19 | 
20 | \item{cuts}{Optional vector of cut scores. If supplied, the ECDF will be
21 | guaranteed to include these points. Otherwise, there could be gaps in the 
22 | ECDF at those particular points (used in plotting the cut scores).}
23 | 
24 | \item{ref_group}{Optional. A character vector or forumla listing the 
25 | reference group levels for each variable on the right hand side of the 
26 | formula, supplied in the same order as the formula. Note that if using the
27 | formula version, levels that are numbers, or include hyphens, spaces, etc., 
28 | should be wrapped in back ticks (e.g., 
29 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 
30 | in doubt, it is safest to use the back ticks, as they will not interfere 
31 | with anything if they are not needed. See examples below for more details.}
32 | }
33 | \value{
34 | Tidy data frame of the proportion above the cutoff for 
35 | each (or selected) groups.
36 | }
37 | \description{
38 | Computes the difference in the proportion above the specified \code{cuts} 
39 | for all possible pairwise comparisons of the groups specified by the 
40 | \code{formula}.
41 | }
42 | \examples{
43 | # Compute differences for all pairwise comparisons for each of three cuts
44 | pac_compare(star,
45 |     reading ~ condition, 
46 | 		 cuts = c(450, 500, 550)) 
47 | 		 
48 | pac_compare(star,
49 |     reading ~ condition + freelunch + race, 
50 | 		 cuts = c(450, 500))
51 | 
52 | pac_compare(star,
53 |     reading ~ condition + freelunch + race, 
54 | 		 cuts = c(450, 500),
55 | 		 ref_group = ~small + no + white) 
56 | }
57 | \seealso{
58 | [esvis::pac(), esvis::tpac(), esvis::tpac_diff()]
59 | }
60 | 


--------------------------------------------------------------------------------
/man/paired_ecdf.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/es_calcs.R
 3 | \name{paired_ecdf}
 4 | \alias{paired_ecdf}
 5 | \title{Pairs empirical cummulative distribution functions for all groups
 6 | supplied by the formula.}
 7 | \usage{
 8 | paired_ecdf(data, formula, cuts = NULL)
 9 | }
10 | \arguments{
11 | \item{data}{The data frame used for estimation - ideally structured in a tidy 
12 | format.}
13 | 
14 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is
15 | the outcome variable and \code{group} is the grouping variable. Note this
16 | variable can include any arbitrary number of groups. Additional variables 
17 | can be included with \code{+} to produce separate estimates by the secondary 
18 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 
19 | + characteristic2}).}
20 | 
21 | \item{cuts}{Optional vector of cut scores. If supplied, the ECDF will be
22 | guaranteed to include these points. Otherwise, there could be gaps in the 
23 | ECDF at those particular points (used in plotting the cut scores).}
24 | }
25 | \description{
26 | Pairs empirical cummulative distribution functions for all groups
27 | supplied by the formula.
28 | }
29 | \keyword{internal}
30 | 


--------------------------------------------------------------------------------
/man/pp_plot.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/pp_plot.R
  3 | \name{pp_plot}
  4 | \alias{pp_plot}
  5 | \title{Produces the paired probability plot for two groups}
  6 | \usage{
  7 | pp_plot(
  8 |   data,
  9 |   formula,
 10 |   ref_group = NULL,
 11 |   cuts = NULL,
 12 |   cut_labels = TRUE,
 13 |   cut_label_x = 0.02,
 14 |   cut_label_size = 3,
 15 |   lines = TRUE,
 16 |   linetype = "solid",
 17 |   linewidth = 1.1,
 18 |   shade = TRUE,
 19 |   shade_alpha = 0.2,
 20 |   refline = TRUE,
 21 |   refline_col = "gray40",
 22 |   refline_type = "dashed",
 23 |   refline_width = 1.1
 24 | )
 25 | }
 26 | \arguments{
 27 | \item{data}{The data frame to be plotted}
 28 | 
 29 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is
 30 | the outcome variable and \code{group} is the grouping variable. Note this
 31 | variable can include any arbitrary number of groups. Additional variables 
 32 | can be included with \code{+} to produce separate plots by the secondary or
 33 | tertiary variable of interest (e.g., \code{out ~ group + characteristic1 + 
 34 | characteristic2}). No more than two additional characteristics can be 
 35 | supplied at this time.}
 36 | 
 37 | \item{ref_group}{Optional character vector (of length 1) naming the
 38 | reference group. Defaults to the group with the highest mean score.}
 39 | 
 40 | \item{cuts}{Integer. Optional vector (or single number) of scores used to 
 41 | annotate the plot. If supplied, line segments will extend from the 
 42 | corresponding x and y axes and meet at the PP curve.}
 43 | 
 44 | \item{cut_labels}{Logical. Should the reference lines corresponding to
 45 | \code{cuts} be labeled? Defaults to \code{TRUE}.}
 46 | 
 47 | \item{cut_label_x}{The x-axis location of the cut labels. Defaults to 0.02.}
 48 | 
 49 | \item{cut_label_size}{The size of the cut labels. Defaults to 3.}
 50 | 
 51 | \item{lines}{Logical. Should the PP Lines be plotted? Defaults to 
 52 | \code{TRUE}.}
 53 | 
 54 | \item{linetype}{The \link[ggplot2]{linetype} for the PP lines. Defaults to 
 55 | "solid".}
 56 | 
 57 | \item{linewidth}{The width of the PP lines. Defaults to 1.1 (just
 58 | marginally larger than the default ggplot2 lines).}
 59 | 
 60 | \item{shade}{Logical. Should the area under the curve be shaded? Defaults to
 61 | \code{TRUE}.}
 62 | 
 63 | \item{shade_alpha}{Transparency of the shading. Defaults to 0.2.}
 64 | 
 65 | \item{refline}{Logical. Should a diagonal reference line be plotted, 
 66 | representing the value at which no difference is observed between the
 67 | reference and focal distributions? Defaults to \code{TRUE}.}
 68 | 
 69 | \item{refline_col}{Color of the reference line. Defaults to a dark gray.}
 70 | 
 71 | \item{refline_type}{The \link[ggplot2]{linetype} for the reference line.
 72 | Defaults to "dashed".}
 73 | 
 74 | \item{refline_width}{The width of the reference line. Defaults to 1, or 
 75 | just slightly thinner than the PP lines.}
 76 | }
 77 | \value{
 78 | A \link[ggplot2]{ggplot2} object displaying the specified PP plot.
 79 | }
 80 | \description{
 81 | The paired probability plot maps the probability of obtaining a specific
 82 |    score for each of two groups. The area under the curve 
 83 |    (\code{\link{auc}}) corresponds to the probability that a randomly
 84 |    selected observation from the x-axis group will have a higher score than
 85 |    a randomly selected observation from the y-axis group. This function
 86 |    extends the basic pp-plot by allowing multiple curves and faceting to
 87 |    facilitate a variety of comparisons. Note that because the plotting is
 88 |    built on top of \link[ggplot2]{ggplot2}, additional customization can 
 89 |    be made on top of the plots, as illustrated in the examples.
 90 | }
 91 | \examples{
 92 | # PP plot examining differences by condition
 93 | pp_plot(star, math ~ condition)
 94 | 
 95 | # The sample size gets very small in the above within cells (e.g., wild 
 96 | # changes within the "other" group in particular). Overall, the effect doesn't
 97 | # seem to change much by condition.
 98 | 
 99 | # Look at something a little more interesting
100 | \dontrun{
101 | pp_plot(benchmarks, math ~ ell + season + frl)
102 | }
103 | # Add some cut scores
104 | pp_plot(benchmarks, math ~ ell, cuts = c(190, 210, 215))
105 | 
106 | ## Make another interesting plot. Use ggplot to customize
107 | \dontrun{
108 | library(tidyr)
109 | library(ggplot2)
110 | benchmarks \%>\% 
111 |   gather(subject, score, reading, math) \%>\% 
112 |   pp_plot(score ~ ell + subject + season,
113 |           ref_group = "Non-ELL") +
114 |   scale_fill_brewer(name = "ELL Status", palette = "Pastel2") +
115 |   scale_color_brewer(name = "ELL Status", palette = "Pastel2") +
116 |   labs(title = "Differences among English Language Learning Groups",
117 |        subtitle = "Note crossing of reference line") +
118 |   theme_minimal()
119 | }
120 | 
121 | }
122 | 


--------------------------------------------------------------------------------
/man/psd.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/es_calcs.R
 3 | \name{psd}
 4 | \alias{psd}
 5 | \title{Pooled Standard Deviation}
 6 | \usage{
 7 | psd(n1, n2, vr1, vr2)
 8 | }
 9 | \arguments{
10 | \item{n1}{The sample size for group 1}
11 | 
12 | \item{n2}{The sample size for group 2}
13 | 
14 | \item{vr1}{The variance for group 1}
15 | 
16 | \item{vr2}{The variance for group 2}
17 | }
18 | \description{
19 | The denominator for Cohen's d
20 | }
21 | \keyword{internal}
22 | 


--------------------------------------------------------------------------------
/man/seda.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/seda.R
 3 | \docType{data}
 4 | \name{seda}
 5 | \alias{seda}
 6 | \title{Portion of the Stanford Educational Data Archive (SEDA).}
 7 | \format{A data frame with 32625 rows and 8 columns.
 8 |   \describe{
 9 |     \item{leaid}{Integer. Local education authority identifier.}
10 | 	   \item{leaname}{Character. Local education authority name.}
11 |     \item{stateabb}{Character. State abbreviation.}
12 | 	   \item{year}{Integer. Year the data were collected.}
13 | 	   \item{grade}{Integer. Grade level the data were collected.}
14 |     \item{subject}{Character. Whether the data were from reading or
15 | 			 mathematics.}
16 |     \item{mean}{Double. Mean test score for the LEA in the corresponding
17 | 				   subject/grade/year.}
18 |     \item{se}{Double. Standard error of the mean.}
19 | }}
20 | \source{
21 | Sean F. Reardon, Demetra Kalogrides, Andrew Ho, Ben Shear, Kenneth Shores,
22 | Erin Fahle. (2016). Stanford Education Data Archive. 
23 | \href{http://purl.stanford.edu/db586ns4974}{
24 |  http://purl.stanford.edu/db586ns4974}. For more information, please visit
25 | \href{https://edopportunity.org}{https://edopportunity.org}.
26 | }
27 | \usage{
28 | seda
29 | }
30 | \description{
31 | The full SEDA dataset contains mean test scores on statewide testing data in
32 | reading and math for every school district in the United States. See a
33 | description of the data 
34 | \href{https://purl.stanford.edu/db586ns4974}{here}. The data 
35 | represented in this package represent a random sample of 10% of all the 
36 | cases in the full dataset. To access the full data, please visit the 
37 | data archive in the above link.
38 | }
39 | \keyword{datasets}
40 | 


--------------------------------------------------------------------------------
/man/star.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/star.R
 3 | \docType{data}
 4 | \name{star}
 5 | \alias{star}
 6 | \title{Data from the Tennessee class size experiment}
 7 | \format{A data frame with 5748 rows and 9 columns.
 8 |   \describe{
 9 |     \item{sid}{Integer. Student identifier.}
10 | 	   \item{schid}{Integer. School identifier.}
11 |     \item{condition}{Character. Classroom type the student was enrolled in 
12 | 			(randomly assigned to).}
13 | 	   \item{tch_experience}{Integer. Number of years of teaching experience
14 | 			 for the teacher in the classroom in which the student was
15 | 			 enrolled.}
16 | 	   \item{sex}{Character. Sex of student: "girl" or "boy".}
17 | 	   \item{freelunch}{Character. Eligibility of the student for free or
18 | 			 reduced price lunch: "no" or "yes"}
19 |     \item{race}{Character. The identified race of the student: "white",
20 | 			 "black", or "other"}
21 |     \item{math}{Integer. Math scale score.}
22 |     \item{reading}{Integer. Reading scale score.}
23 | }}
24 | \usage{
25 | star
26 | }
27 | \description{
28 | These data come from the Ecdat package and represent a cross-section of
29 |  data from Project STAR (Student/Teacher Achievement Ratio), where students
30 |  were randomly assigned to classrooms.
31 | }
32 | \keyword{datasets}
33 | 


--------------------------------------------------------------------------------
/man/tpac.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/es_calcs.R
 3 | \name{tpac}
 4 | \alias{tpac}
 5 | \title{Transformed proportion above the cut}
 6 | \usage{
 7 | tpac(data, formula, cuts, ref_group = NULL)
 8 | }
 9 | \arguments{
10 | \item{data}{The data frame used for estimation - ideally structured in a tidy 
11 | format.}
12 | 
13 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is
14 | the outcome variable and \code{group} is the grouping variable. Note this
15 | variable can include any arbitrary number of groups. Additional variables 
16 | can be included with \code{+} to produce separate estimates by the secondary 
17 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 
18 | + characteristic2}).}
19 | 
20 | \item{cuts}{Optional vector of cut scores. If supplied, the ECDF will be
21 | guaranteed to include these points. Otherwise, there could be gaps in the 
22 | ECDF at those particular points (used in plotting the cut scores).}
23 | 
24 | \item{ref_group}{Optional. A character vector or forumla listing the 
25 | reference group levels for each variable on the right hand side of the 
26 | formula, supplied in the same order as the formula. Note that if using the
27 | formula version, levels that are numbers, or include hyphens, spaces, etc., 
28 | should be wrapped in back ticks (e.g., 
29 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 
30 | in doubt, it is safest to use the back ticks, as they will not interfere 
31 | with anything if they are not needed. See examples below for more details.}
32 | }
33 | \value{
34 | Tidy data frame of the proportion above the cutoff for 
35 | each (or selected) groups.
36 | }
37 | \description{
38 | This function transforms calls to \link{pac} into standard deviation units.
39 | Function assumes that each distribution is distributed normally with 
40 | common variances. See 
41 | \href{http://journals.sagepub.com/doi/abs/10.3102/1076998611411918}{Ho &
42 |  Reardon, 2012}
43 | }
44 | \examples{
45 | # Compute differences for all pairwise comparisons for each of three cuts
46 | tpac(star,
47 |     reading ~ condition, 
48 | 		 cut = c(450, 500, 550)) 
49 | 		 
50 | tpac(star,
51 |     reading ~ condition + freelunch + race, 
52 | 		 cut = c(450, 500))
53 | 
54 | tpac(star,
55 |     reading ~ condition + freelunch + race, 
56 | 		 cut = c(450, 500),
57 | 		 ref_group = ~small + no + white)  
58 | }
59 | \seealso{
60 | [esvis::pac(), esvis::pac_diff(), esvis::tpac_compare()]
61 | }
62 | 


--------------------------------------------------------------------------------
/man/tpac_compare.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/es_calcs.R
 3 | \name{tpac_compare}
 4 | \alias{tpac_compare}
 5 | \title{Compare Transformed Proportion Above the Cut}
 6 | \usage{
 7 | tpac_compare(data, formula, cuts, ref_group = NULL)
 8 | }
 9 | \arguments{
10 | \item{data}{The data frame used for estimation - ideally structured in a tidy 
11 | format.}
12 | 
13 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is
14 | the outcome variable and \code{group} is the grouping variable. Note this
15 | variable can include any arbitrary number of groups. Additional variables 
16 | can be included with \code{+} to produce separate estimates by the secondary 
17 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 
18 | + characteristic2}).}
19 | 
20 | \item{cuts}{Optional vector of cut scores. If supplied, the ECDF will be
21 | guaranteed to include these points. Otherwise, there could be gaps in the 
22 | ECDF at those particular points (used in plotting the cut scores).}
23 | 
24 | \item{ref_group}{Optional. A character vector or forumla listing the 
25 | reference group levels for each variable on the right hand side of the 
26 | formula, supplied in the same order as the formula. Note that if using the
27 | formula version, levels that are numbers, or include hyphens, spaces, etc., 
28 | should be wrapped in back ticks (e.g., 
29 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 
30 | in doubt, it is safest to use the back ticks, as they will not interfere 
31 | with anything if they are not needed. See examples below for more details.}
32 | }
33 | \value{
34 | Tidy data frame of the proportion above the cutoff for 
35 | each (or selected) groups.
36 | }
37 | \description{
38 | This function compares all possible pairwise comparisons, as supplied by 
39 | \code{formula}, in terms of the transformed proportion above the cut. This
40 | is an effect-size like measure of the differences between two groups as the
41 | cut point(s) in the distribution. See 
42 | \href{http://journals.sagepub.com/doi/abs/10.3102/1076998611411918}{Ho &
43 |  Reardon, 2012}
44 | }
45 | \examples{
46 | # Compute differences for all pairwise comparisons for each of three cuts
47 | tpac_compare(star,
48 |     reading ~ condition, 
49 | 		 cut = c(450, 500, 550)) 
50 | 		 
51 | tpac_compare(star,
52 |     reading ~ condition + freelunch + race, 
53 | 		 cut = c(450, 500))
54 | 
55 | tpac_compare(star,
56 |     reading ~ condition + freelunch + race, 
57 | 		 cut = c(450, 500),
58 | 		 ref_group = ~small + no + white)  
59 | }
60 | \seealso{
61 | [esvis::pac(), esvis::pac_diff(), esvis::tpac()]
62 | }
63 | 


--------------------------------------------------------------------------------
/man/v.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/es_calcs.R
 3 | \name{v}
 4 | \alias{v}
 5 | \title{Calculate the V effect size statistic}
 6 | \usage{
 7 | v(data, formula, ref_group = NULL)
 8 | }
 9 | \arguments{
10 | \item{data}{The data frame used for estimation - ideally structured in a tidy 
11 | format.}
12 | 
13 | \item{formula}{A formula of the type \code{out ~ group} where \code{out} is
14 | the outcome variable and \code{group} is the grouping variable. Note this
15 | variable can include any arbitrary number of groups. Additional variables 
16 | can be included with \code{+} to produce separate estimates by the secondary 
17 | or tertiary variables of interest (e.g., \code{out ~ group + characteristic1 
18 | + characteristic2}).}
19 | 
20 | \item{ref_group}{Optional. A character vector or forumla listing the 
21 | reference group levels for each variable on the right hand side of the 
22 | formula, supplied in the same order as the formula. Note that if using the
23 | formula version, levels that are numbers, or include hyphens, spaces, etc., 
24 | should be wrapped in back ticks (e.g., 
25 | \code{ref_group = ~ Active + `Non-FRL`}, or \code{ref_group = ~`8`}). When 
26 | in doubt, it is safest to use the back ticks, as they will not interfere 
27 | with anything if they are not needed. See examples below for more details.}
28 | }
29 | \value{
30 | By default the V statistic for all possible pairings of
31 |  the grouping factor are returned as a tidy data frame. Alternatively, a 
32 | vector can be returned, and/or only the V corresponding to a specific
33 | reference group can be returned.
34 | }
35 | \description{
36 | This function calculates the effect size V, as discussed by 
37 | \href{https://journals.sagepub.com/doi/abs/10.3102/1076998609332755}{Ho, 2009}. The V
38 | statistic is a transformation of \code{\link{auc}}, interpreted as the 
39 | average difference between the distributions in standard deviation units.
40 | }
41 | \examples{
42 | 
43 | # Calculate V for all pairwise comparisons
44 | v(star, reading ~ condition) 
45 | 
46 | # Report only relative to regular-sized classrooms
47 | v(star, 
48 |     reading ~ condition, 
49 |     ref_group = "reg")
50 | 
51 | # Report by ELL and FRL groups for each season, compare to non-ELL students
52 | # who were not eligible for free or reduced price lunch in the fall (using
53 | # the formula interface for reference group referencing).
54 | 
55 | \dontrun{
56 | v(benchmarks, 
57 |       math ~ ell + frl + season,
58 |       ref_group = ~`Non-ELL` + `Non-FRL` + Fall)
59 | 
60 | # Same thing but with character vector supplied, rather than a formula
61 | v(benchmarks, 
62 |       math ~ ell + frl + season,
63 |       ref_group = c("Non-ELL", "Non-FRL", "Fall"))
64 | }
65 | 
66 | }
67 | 


--------------------------------------------------------------------------------
/tests/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/datalorax/esvis/faf941c6b7d5a1a89916725b3d067bd3546501b7/tests/.DS_Store


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(esvis)
3 | 
4 | test_check("esvis")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test-auc.R:
--------------------------------------------------------------------------------
 1 | set.seed(100)
 2 | test_data1 <- data.frame(g     = c(rep(1, 1e4), rep(2, 1e4)),
 3 |                          score = c(round(rnorm(1e4), 5), 
 4 |                                    round(rnorm(1e4), 5)))
 5 | test_data2 <- data.frame(g     = c(rep(1, 1e4), rep(2, 1e4)),
 6 |                          score = c(round(rnorm(1e4), 5), 
 7 |                                    round(rnorm(1e4, 1), 5)))
 8 | 
 9 | test_that("Area under the curve computes and outputs correctly", {
10 | 	expect_equal(auc(test_data1, score ~ g)$auc[1], .50, tolerance = 0.03)
11 | 	expect_equal(auc(test_data2, score ~ g)$auc[1], .75, tolerance = 0.03)
12 | })
13 | 
14 | # ((Levels 1 * Levels 2) * (Levels 1 * Levels 2)) - (Levels 1 * Levels 2) 
15 | test_that("Reference group subsetting works correctly", {
16 |   expect_equal(nrow(auc(seda, mean ~ grade)), 6*5)
17 |   expect_equal(nrow(auc(seda, mean ~ grade, ~`8`)), 5)
18 |   expect_equal(nrow(auc(seda, mean ~ grade, "8")), 5)
19 |   expect_equal(nrow(auc(benchmarks, math ~ season, "Fall")), 2)
20 |   expect_equal(nrow(auc(benchmarks, math ~ season, ~Winter)), 2)
21 |   expect_equal(nrow(auc(benchmarks, math ~ season + ell)), 
22 |                ((3*3)*(3*3)) - (3*3))
23 |   expect_equal(nrow(auc(benchmarks, math ~ season + ell, 
24 |                         ~Fall + `Non-ELL`)), 
25 |                (3*3) - 1)
26 |   expect_equal(nrow(auc(benchmarks, math ~ season + ell, 
27 |                         c("Fall", "Non-ELL"))), 
28 |                (3*3) - 1)
29 |   expect_equal(nrow(auc(benchmarks, math ~ season + ell, 
30 |                         ~Fall)), 
31 |                (3*3*3) - 3)
32 |   expect_equal(nrow(auc(benchmarks, math ~ season + ell, 
33 |                         c("Fall"))), 
34 |                (3*3*3) - 3)
35 |   
36 |   expect_equal(nrow(auc(benchmarks, math ~ season + frl + ethnicity)), 
37 |                ((3*2*6)*(3*2*6)) - (3*2*6))
38 |   expect_equal(nrow(auc(benchmarks, math ~ season + frl + ethnicity,
39 |                         ~Fall + `Non-FRL` + White)), 
40 |                (3*2*6) - 1)
41 |   expect_equal(nrow(auc(benchmarks, math ~ season + frl + ethnicity,
42 |                         ~Fall + `Non-FRL`)), 
43 |                ((3*2*6)*6) - 6)
44 |                
45 | })
46 | 


--------------------------------------------------------------------------------
/tests/testthat/test-coh_d.R:
--------------------------------------------------------------------------------
 1 | set.seed(100)
 2 | test_data1 <- data.frame(g     = c(rep(1, 1e4), rep(2, 1e4)),
 3 |                          score = c(round(rnorm(1e4), 5), 
 4 |                                    round(rnorm(1e4), 5)))
 5 | test_data2 <- data.frame(g     = c(rep(1, 1e4), rep(2, 1e4)),
 6 |                          score = c(round(rnorm(1e4), 5), 
 7 |                                    round(rnorm(1e4, 1), 5)))
 8 | 
 9 | test_that("Hedges g computes and outputs correctly", {
10 | 	expect_equal(coh_d(test_data1, score ~ g)$coh_d[1], 0, tolerance = 0.03)
11 | 	expect_equal(coh_d(test_data2, score ~ g)$coh_d[1], 1, tolerance = 0.03)
12 | })
13 | 
14 | # ((Levels 1 * Levels 2) * (Levels 1 * Levels 2)) - (Levels 1 * Levels 2) 
15 | test_that("Reference group subsetting works correctly", {
16 |   expect_equal(nrow(coh_d(seda, mean ~ grade)), 6*5)
17 |   expect_equal(nrow(coh_d(seda, mean ~ grade, ~`8`)), 5)
18 |   expect_equal(nrow(coh_d(seda, mean ~ grade, "8")), 5)
19 |   expect_equal(nrow(coh_d(benchmarks, math ~ season, "Fall")), 2)
20 |   expect_equal(nrow(coh_d(benchmarks, math ~ season, ~Winter)), 2)
21 |   expect_equal(nrow(coh_d(benchmarks, math ~ season + ell)), 
22 |                ((3*3)*(3*3)) - (3*3))
23 |   expect_equal(nrow(coh_d(benchmarks, math ~ season + ell, 
24 |                         ~Fall + `Non-ELL`)), 
25 |                (3*3) - 1)
26 |   expect_equal(nrow(coh_d(benchmarks, math ~ season + ell, 
27 |                         c("Fall", "Non-ELL"))), 
28 |                (3*3) - 1)
29 |   expect_equal(nrow(coh_d(benchmarks, math ~ season + ell, 
30 |                         ~Fall)), 
31 |                (3*3*3) - 3)
32 |   expect_equal(nrow(coh_d(benchmarks, math ~ season + ell, 
33 |                         c("Fall"))), 
34 |                (3*3*3) - 3)
35 |   
36 |   expect_equal(nrow(coh_d(benchmarks, math ~ season + frl + ethnicity)), 
37 |                ((3*2*6)*(3*2*6)) - (3*2*6))
38 |   expect_equal(nrow(coh_d(benchmarks, math ~ season + frl + ethnicity,
39 |                         ~Fall + `Non-FRL` + White)), 
40 |                (3*2*6) - 1)
41 |   expect_equal(nrow(coh_d(benchmarks, math ~ season + frl + ethnicity,
42 |                         ~Fall + `Non-FRL`)), 
43 |                ((3*2*6)*6) - 6)
44 |                
45 | })
46 | 


--------------------------------------------------------------------------------
/tests/testthat/test-ecdf_plot.R:
--------------------------------------------------------------------------------
 1 | test_that("`ecdf_plot` produces expected output", {
 2 |   p <- ecdf_plot(benchmarks, math ~ ell)
 3 | 
 4 |   expect_equal(p$labels$x, "math")
 5 |   expect_equal(ecdf_plot(star, reading ~ race)$labels$x, "reading")
 6 | 
 7 | 	expect_equal(length(p$layers), 1)
 8 | 
 9 | 	expect_equal(length(ecdf_plot(benchmarks, math ~ ell,
10 | 	                            cuts = c(180, 190))$layers), 
11 | 				3)
12 | 	expect_equal(length(ecdf_plot(benchmarks, math ~ ell,
13 | 	                            cuts = c(180, 190),
14 | 	                            ref_rect = FALSE)$layers), 
15 | 				2)
16 | 	
17 | 	expect_null(p$facet$params$facets$panel)
18 | 	expect_null(p$facet$params$rows)
19 | 	expect_null(p$facet$params$cols)
20 | 	
21 | 	p2 <- ecdf_plot(benchmarks, math ~ ell + season)
22 | 	expect_false(is.null(p2$facet$params$facets))
23 | 	
24 | 	p3 <- ecdf_plot(benchmarks, math ~ ell + season + frl)
25 |   expect_false(is.null(p3$facet$params$rows))
26 | 	expect_false(is.null(p3$facet$params$cols))
27 | })
28 | 


--------------------------------------------------------------------------------
/tests/testthat/test-hedge_g.R:
--------------------------------------------------------------------------------
 1 | set.seed(100)
 2 | test_data1 <- data.frame(g     = c(rep(1, 1e4), rep(2, 1e4)),
 3 |                          score = c(round(rnorm(1e4), 5), 
 4 |                                    round(rnorm(1e4), 5)))
 5 | test_data2 <- data.frame(g     = c(rep(1, 1e4), rep(2, 1e4)),
 6 |                          score = c(round(rnorm(1e4), 5), 
 7 |                                    round(rnorm(1e4, 1), 5)))
 8 | 
 9 | test_that("Hedges g computes and outputs correctly", {
10 | 	expect_equal(hedg_g(test_data1, score ~ g)$hedg_g[1], 0, tolerance = 0.03)
11 | 	expect_equal(hedg_g(test_data2, score ~ g)$hedg_g[1], -1, tolerance = 0.03)
12 | })
13 | 
14 | # ((Levels 1 * Levels 2) * (Levels 1 * Levels 2)) - (Levels 1 * Levels 2) 
15 | test_that("Reference group subsetting works correctly", {
16 |   expect_equal(nrow(hedg_g(seda, mean ~ grade)), 6*5)
17 |   expect_equal(nrow(hedg_g(seda, mean ~ grade, ~`8`)), 5)
18 |   expect_equal(nrow(hedg_g(seda, mean ~ grade, "8")), 5)
19 |   expect_equal(nrow(hedg_g(benchmarks, math ~ season, "Fall")), 2)
20 |   expect_equal(nrow(hedg_g(benchmarks, math ~ season, ~Winter)), 2)
21 |   expect_equal(nrow(hedg_g(benchmarks, math ~ season + ell)), 
22 |                ((3*3)*(3*3)) - (3*3))
23 |   expect_equal(nrow(hedg_g(benchmarks, math ~ season + ell, 
24 |                         ~Fall + `Non-ELL`)), 
25 |                (3*3) - 1)
26 |   expect_equal(nrow(hedg_g(benchmarks, math ~ season + ell, 
27 |                         c("Fall", "Non-ELL"))), 
28 |                (3*3) - 1)
29 |   expect_equal(nrow(hedg_g(benchmarks, math ~ season + ell, 
30 |                         ~Fall)), 
31 |                (3*3*3) - 3)
32 |   expect_equal(nrow(hedg_g(benchmarks, math ~ season + ell, 
33 |                         c("Fall"))), 
34 |                (3*3*3) - 3)
35 |   
36 |   expect_equal(nrow(hedg_g(benchmarks, math ~ season + frl + ethnicity)), 
37 |                ((3*2*6)*(3*2*6)) - (3*2*6))
38 |   expect_equal(nrow(hedg_g(benchmarks, math ~ season + frl + ethnicity,
39 |                         ~Fall + `Non-FRL` + White)), 
40 |                (3*2*6) - 1)
41 |   expect_equal(nrow(hedg_g(benchmarks, math ~ season + frl + ethnicity,
42 |                         ~Fall + `Non-FRL`)), 
43 |                ((3*2*6)*6) - 6)
44 |                
45 | })
46 | 


--------------------------------------------------------------------------------
/tests/testthat/test-pp_plot.R:
--------------------------------------------------------------------------------
 1 | test_that("`pp_plot` produces expected output", {
 2 | 	p1 <- pp_plot(benchmarks, math ~ ell)
 3 |   expect_equal(p1$labels$y, "Monitor")
 4 | 	
 5 | 	expect_equal(pp_plot(benchmarks, math ~ ell,
 6 | 	                     ref_group = "Non-ELL")$labels$y, 
 7 | 				"Non-ELL")
 8 | 	
 9 | 	expect_equal(length(p1$layers), 3)
10 | 	
11 | 	expect_equal(length(pp_plot(benchmarks, math ~ ell,
12 | 	                     shade = FALSE)$layers), 
13 | 				2)
14 | 	
15 | 	expect_equal(length(pp_plot(benchmarks, math ~ ell,
16 | 	                     lines = FALSE)$layers), 
17 | 				2)
18 | 	expect_equal(length(pp_plot(benchmarks, math ~ ell,
19 | 	                     refline = FALSE)$layers), 
20 | 				2)
21 | 	
22 | 	expect_equal(length(pp_plot(benchmarks, math ~ ell,
23 | 	                            shade = FALSE,
24 | 	                            refline = FALSE)$layers), 
25 | 				1)
26 | 	
27 | 	expect_equal(length(pp_plot(benchmarks, math ~ ell,
28 | 	                            cuts = c(180, 190),
29 | 	                            shade = FALSE,
30 | 	                            refline = FALSE)$layers), 
31 | 				4)
32 | 	
33 | 	expect_null(p1$facet$params$facets)
34 | 	expect_null(p1$facet$params$rows)
35 | 	expect_null(p1$facet$params$cols)
36 | 	
37 | 	p2 <- pp_plot(benchmarks, math ~ ell + season)
38 | 	expect_false(is.null(p2$facet$params$facets))
39 | 	
40 | 	p3 <- pp_plot(benchmarks, math ~ ell + season + frl)
41 | 	expect_false(is.null(p3$facet$params$rows))
42 | 	expect_false(is.null(p3$facet$params$cols))
43 | 	
44 | 	p4 <- pp_plot(benchmarks, math ~ ell, cuts = c(180, 190))
45 |   expect_equal(length(p4$layers), 6)
46 |   
47 |   p5 <- pp_plot(benchmarks, math ~ ell + frl, cuts = c(180, 190))
48 |   expect_equal(length(p5$layers), 6)
49 |   
50 |   p6 <- pp_plot(benchmarks, math ~ ell, 
51 |                 cuts = c(180, 190), 
52 |                 cut_labels = FALSE)
53 |   expect_equal(length(p6$layers), 5)
54 | 	
55 | })
56 | 


--------------------------------------------------------------------------------
/tests/testthat/test-v.R:
--------------------------------------------------------------------------------
 1 | set.seed(100)
 2 | test_data1 <- data.frame(g     = c(rep(1, 1e4), rep(2, 1e4)),
 3 |                          score = c(round(rnorm(1e4), 5), 
 4 |                                    round(rnorm(1e4), 5)))
 5 | test_data2 <- data.frame(g     = c(rep(1, 1e4), rep(2, 1e4)),
 6 |                          score = c(round(rnorm(1e4), 5), 
 7 |                                    round(rnorm(1e4, 1), 5)))
 8 | 
 9 | test_that("V computes and outputs correctly", {
10 | 	expect_equal(v(test_data1, score ~ g)$v[1], 0, tolerance = 0.03)
11 | 	expect_equal(v(test_data2, score ~ g)$v[1], 1, tolerance = 0.03)
12 | })
13 | 
14 | # ((Levels 1 * Levels 2) * (Levels 1 * Levels 2)) - (Levels 1 * Levels 2) 
15 | test_that("Reference group subsetting works correctly", {
16 |   expect_equal(nrow(v(seda, mean ~ grade)), 6*5)
17 |   expect_equal(nrow(v(seda, mean ~ grade, ~`8`)), 5)
18 |   expect_equal(nrow(v(seda, mean ~ grade, "8")), 5)
19 |   expect_equal(nrow(v(benchmarks, math ~ season, "Fall")), 2)
20 |   expect_equal(nrow(v(benchmarks, math ~ season, ~Winter)), 2)
21 |   expect_equal(nrow(v(benchmarks, math ~ season + ell)), 
22 |                ((3*3)*(3*3)) - (3*3))
23 |   expect_equal(nrow(v(benchmarks, math ~ season + ell, 
24 |                         ~Fall + `Non-ELL`)), 
25 |                (3*3) - 1)
26 |   expect_equal(nrow(v(benchmarks, math ~ season + ell, 
27 |                         c("Fall", "Non-ELL"))), 
28 |                (3*3) - 1)
29 |   expect_equal(nrow(v(benchmarks, math ~ season + ell, 
30 |                         ~Fall)), 
31 |                (3*3*3) - 3)
32 |   expect_equal(nrow(v(benchmarks, math ~ season + ell, 
33 |                         c("Fall"))), 
34 |                (3*3*3) - 3)
35 |   
36 |   expect_equal(nrow(v(benchmarks, math ~ season + frl + ethnicity)), 
37 |                ((3*2*6)*(3*2*6)) - (3*2*6))
38 |   expect_equal(nrow(v(benchmarks, math ~ season + frl + ethnicity,
39 |                         ~Fall + `Non-FRL` + White)), 
40 |                (3*2*6) - 1)
41 |   expect_equal(nrow(v(benchmarks, math ~ season + frl + ethnicity,
42 |                         ~Fall + `Non-FRL`)), 
43 |                ((3*2*6)*6) - 6)
44 |                
45 | })
46 | 


--------------------------------------------------------------------------------