├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ ├── pkgdown.yaml │ └── test-coverage.yaml ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── add_ess_header.R ├── data.R ├── ess.R ├── geom_ecdf.R ├── geom_mirrored_histogram.R ├── tidyselect-reexports.R ├── tidysmd-rexports.R └── utils.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── codecov.yml ├── data-raw └── nhefs_weights.R ├── data └── nhefs_weights.rda ├── halfmoon.Rproj ├── man ├── add_ess_header.Rd ├── ess.Rd ├── figures │ ├── README-example-1.png │ ├── README-example-2.png │ ├── README-example-3.png │ ├── README-pressure-1.png │ ├── README-unnamed-chunk-3-1.png │ ├── README-unnamed-chunk-5-1.png │ ├── README-unnamed-chunk-6-1.png │ └── logo.png ├── geom_ecdf.Rd ├── geom_mirror_histogram.Rd ├── nhefs_weights.Rd └── reexports.Rd ├── pkgdown └── favicon │ ├── apple-touch-icon.png │ ├── favicon-96x96.png │ ├── favicon.ico │ ├── favicon.svg │ ├── site.webmanifest │ ├── web-app-manifest-192x192.png │ └── web-app-manifest-512x512.png └── tests ├── testthat.R └── testthat ├── Rplots.pdf ├── _snaps ├── geom_ecdf │ ├── ecdf-no-weights.svg │ └── ecdf-weights.svg ├── geom_mirrored_histogram.md └── geom_mirrored_histogram │ └── layered-weighted-and-unweighted.svg ├── helper-vdiffr.R ├── test-add_ess_header.R ├── test-ess.R ├── test-geom_ecdf.R └── test-geom_mirrored_histogram.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^halfmoon\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^README\.Rmd$ 4 | ^data-raw$ 5 | ^LICENSE\.md$ 6 | ^\.github$ 7 | ^_pkgdown\.yml$ 8 | ^docs$ 9 | ^pkgdown$ 10 | ^cran-comments\.md$ 11 | ^rhub\.R$ 12 | ^codecov\.yml$ 13 | ^CRAN-SUBMISSION$ 14 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | 15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | config: 21 | - {os: macos-latest, r: 'release'} 22 | - {os: windows-latest, r: 'release'} 23 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 24 | - {os: ubuntu-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'oldrel-1'} 26 | 27 | env: 28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 29 | R_KEEP_PKG_SOURCE: yes 30 | 31 | steps: 32 | - uses: actions/checkout@v3 33 | 34 | - uses: r-lib/actions/setup-pandoc@v2 35 | 36 | - uses: r-lib/actions/setup-r@v2 37 | with: 38 | r-version: ${{ matrix.config.r }} 39 | http-user-agent: ${{ matrix.config.http-user-agent }} 40 | use-public-rspm: true 41 | 42 | - uses: r-lib/actions/setup-r-dependencies@v2 43 | with: 44 | extra-packages: any::rcmdcheck 45 | needs: check 46 | 47 | - uses: r-lib/actions/check-r-package@v2 48 | with: 49 | upload-snapshots: true 50 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: pkgdown 13 | 14 | jobs: 15 | pkgdown: 16 | runs-on: ubuntu-latest 17 | # Only restrict concurrency for non-PR jobs 18 | concurrency: 19 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 20 | env: 21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 22 | steps: 23 | - uses: actions/checkout@v3 24 | 25 | - uses: r-lib/actions/setup-pandoc@v2 26 | 27 | - uses: r-lib/actions/setup-r@v2 28 | with: 29 | use-public-rspm: true 30 | 31 | - uses: r-lib/actions/setup-r-dependencies@v2 32 | with: 33 | extra-packages: any::pkgdown, local::. 34 | needs: website 35 | 36 | - name: Build site 37 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 38 | shell: Rscript {0} 39 | 40 | - name: Deploy to GitHub pages 🚀 41 | if: github.event_name != 'pull_request' 42 | uses: JamesIves/github-pages-deploy-action@v4.4.1 43 | with: 44 | clean: false 45 | branch: gh-pages 46 | folder: docs 47 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | 8 | name: test-coverage.yaml 9 | 10 | permissions: read-all 11 | 12 | jobs: 13 | test-coverage: 14 | runs-on: ubuntu-latest 15 | env: 16 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 17 | 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - uses: r-lib/actions/setup-r@v2 22 | with: 23 | use-public-rspm: true 24 | 25 | - uses: r-lib/actions/setup-r-dependencies@v2 26 | with: 27 | extra-packages: any::covr, any::xml2 28 | needs: coverage 29 | 30 | - name: Test coverage 31 | run: | 32 | cov <- covr::package_coverage( 33 | quiet = FALSE, 34 | clean = FALSE, 35 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") 36 | ) 37 | covr::to_cobertura(cov) 38 | shell: Rscript {0} 39 | 40 | - uses: codecov/codecov-action@v4 41 | with: 42 | # Fail if error if not on PR, or if on PR and token is given 43 | fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }} 44 | file: ./cobertura.xml 45 | plugin: noop 46 | disable_search: true 47 | token: ${{ secrets.CODECOV_TOKEN }} 48 | 49 | - name: Show testthat output 50 | if: always() 51 | run: | 52 | ## -------------------------------------------------------------------- 53 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true 54 | shell: bash 55 | 56 | - name: Upload test results 57 | if: failure() 58 | uses: actions/upload-artifact@v4 59 | with: 60 | name: coverage-test-failures 61 | path: ${{ runner.temp }}/package 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .Rdata 4 | .httr-oauth 5 | .DS_Store 6 | docs 7 | cran-comments.md 8 | rhub.R 9 | CRAN-SUBMISSION 10 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: halfmoon 2 | Title: Techniques to Build Better Balance 3 | Version: 0.1.0.9000 4 | Authors@R: 5 | person("Malcolm", "Barrett", , "malcolmbarrett@gmail.com", role = c("aut", "cre", "cph"), 6 | comment = c(ORCID = "0000-0003-0299-5825")) 7 | Description: Build better balance in causal inference models. 'halfmoon' 8 | helps you assess propensity score models for balance between groups 9 | using metrics like standardized mean differences and visualization 10 | techniques like mirrored histograms. 'halfmoon' supports both 11 | weighting and matching techniques. 12 | License: MIT + file LICENSE 13 | URL: https://github.com/r-causal/halfmoon, 14 | https://r-causal.github.io/halfmoon/ 15 | BugReports: https://github.com/r-causal/halfmoon/issues 16 | Depends: 17 | R (>= 2.10) 18 | Imports: 19 | cardx (>= 0.2.3), 20 | cli, 21 | ggplot2, 22 | gtsummary (>= 2.1.0), 23 | rlang, 24 | tidyselect, 25 | tidysmd (>= 0.2.0) 26 | Suggests: 27 | cards, 28 | covr, 29 | dplyr, 30 | survey, 31 | testthat (>= 3.0.0), 32 | vdiffr 33 | Config/testthat/edition: 3 34 | Encoding: UTF-8 35 | LazyData: true 36 | Roxygen: list(markdown = TRUE) 37 | RoxygenNote: 7.3.2 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2023 2 | COPYRIGHT HOLDER: halfmoon authors 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2023 halfmoon authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(add_ess_header) 4 | export(bind_matches) 5 | export(contains) 6 | export(ends_with) 7 | export(ess) 8 | export(everything) 9 | export(geom_ecdf) 10 | export(geom_love) 11 | export(geom_mirror_histogram) 12 | export(last_col) 13 | export(love_plot) 14 | export(matches) 15 | export(num_range) 16 | export(one_of) 17 | export(peek_vars) 18 | export(starts_with) 19 | export(tidy_smd) 20 | importFrom(rlang,.data) 21 | importFrom(rlang,.env) 22 | importFrom(tidyselect,contains) 23 | importFrom(tidyselect,ends_with) 24 | importFrom(tidyselect,everything) 25 | importFrom(tidyselect,last_col) 26 | importFrom(tidyselect,matches) 27 | importFrom(tidyselect,num_range) 28 | importFrom(tidyselect,one_of) 29 | importFrom(tidyselect,peek_vars) 30 | importFrom(tidyselect,starts_with) 31 | importFrom(tidysmd,bind_matches) 32 | importFrom(tidysmd,geom_love) 33 | importFrom(tidysmd,love_plot) 34 | importFrom(tidysmd,tidy_smd) 35 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # halfmoon 0.1.0.9000 2 | 3 | * Added a `NEWS.md` file to track changes to the package. 4 | -------------------------------------------------------------------------------- /R/add_ess_header.R: -------------------------------------------------------------------------------- 1 | #' Add ESS Table Header 2 | #' 3 | #' This function replaces the counts in the default header of 4 | #' `gtsummary::tbl_svysummary()` tables to counts representing the 5 | #' Effective Sample Size (ESS). See [`ess()`] for details. 6 | #' 7 | #' @param x (`tbl_svysummary`)\cr 8 | #' Object of class `'tbl_svysummary'` typically created with `gtsummary::tbl_svysummary()`. 9 | #' @param header (`string`)\cr 10 | #' String specifying updated header. 11 | #' Review `gtsummary::modify_header()` for details on use. 12 | #' 13 | #' @returns a 'gtsummary' table 14 | #' @export 15 | #' @importFrom rlang .env .data 16 | #' 17 | #' @examplesIf rlang::is_installed(c("survey", "gtsummary", "cards", "cardx", "dplyr")) 18 | #' svy <- survey::svydesign(~1, data = nhefs_weights, weights = ~ w_ate) 19 | #' 20 | #' gtsummary::tbl_svysummary(svy, include = c(age, sex, smokeyrs)) |> 21 | #' add_ess_header() 22 | #' hdr <- paste0( 23 | #' "**{level}** \n", 24 | #' "N = {n_unweighted}; ESS = {format(n, digits = 1, nsmall = 1)}" 25 | #' ) 26 | #' gtsummary::tbl_svysummary(svy, by = qsmk, include = c(age, sex, smokeyrs)) |> 27 | #' add_ess_header(header = hdr) 28 | add_ess_header <- function(x, header = "**{level}** \nESS = {format(n, digits = 1, nsmall = 1)}") { 29 | # check inputs --------------------------------------------------------------- 30 | rlang::check_installed(c("cards", "dplyr")) 31 | if (!inherits(x, "tbl_svysummary")) { 32 | cli::cli_abort("Argument {.arg x} must be class {.cls tbl_svysummary} and typically created with {.fun gtsummary::tbl_svysummary}.") 33 | } 34 | if (!rlang::is_string(header)) { 35 | cli::cli_abort("Argument {.arg header} must be a string.") 36 | } 37 | updated_call <- append(x$call_list, list(add_ess_header = match.call())) 38 | 39 | # calculate ARD with ESS counts ---------------------------------------------- 40 | ard_ess <- ard_survey_ess(data = x$inputs$data, by = x$inputs$by) 41 | if ("add_overall" %in% names(x$call_list)) { 42 | ard_ess_overall <- ard_survey_ess(data = x$inputs$data) 43 | } 44 | 45 | # replace statistics in `x$table_styling$table_header` with ESS -------------- 46 | # no `tbl_svysummary(by)` specified 47 | if (rlang::is_empty(x$inputs$by)) { 48 | x$table_styling$header$modify_stat_level <- "Overall" 49 | x$table_styling$header$modify_stat_N <- ard_ess$stat[[1]] 50 | x$table_styling$header$modify_stat_n <- ard_ess$stat[[1]] 51 | x$table_styling$header$modify_stat_p <- 1 52 | } 53 | # with a `tbl_svysummary(by)` value but no overall column 54 | else if (!"add_overall" %in% names(x$call_list)) { 55 | x$table_styling$header <- 56 | dplyr::rows_update( 57 | x$table_styling$header, 58 | ard_ess |> 59 | dplyr::mutate( 60 | column = paste0("stat_", seq_len(nrow(.env$ard_ess))), 61 | modify_stat_level = lapply(.data$group1_level, FUN = as.character) |> unlist(), 62 | modify_stat_n = unlist(.data$stat), 63 | modify_stat_N = sum(.data$modify_stat_n), 64 | modify_stat_p = .data$modify_stat_n / .data$modify_stat_N 65 | ) |> 66 | dplyr::select("column", tidyselect::starts_with("modify_stat_")), 67 | by = "column" 68 | ) |> 69 | dplyr::mutate( 70 | modify_stat_N = sum(.data$modify_stat_n, na.rm = TRUE) 71 | ) 72 | } 73 | # with both a `tbl_svysummary(by)` value and an overall column 74 | else { 75 | x$table_styling$header <- 76 | dplyr::rows_update( 77 | x$table_styling$header, 78 | dplyr::bind_rows(ard_ess_overall, ard_ess) |> 79 | dplyr::mutate( 80 | column = paste0("stat_", dplyr::row_number() - 1L), 81 | modify_stat_level = lapply(.data$group1_level, FUN = \(x) as.character(x %||% "Overall")) |> unlist(), 82 | modify_stat_n = unlist(.data$stat) 83 | ) |> 84 | dplyr::select("column", tidyselect::starts_with("modify_stat_")), 85 | by = "column" 86 | ) |> 87 | dplyr::mutate( 88 | modify_stat_N = unlist(.env$ard_ess_overall$stat), 89 | modify_stat_p = .data$modify_stat_n / .data$modify_stat_N 90 | ) 91 | } 92 | 93 | # update the header and return table ----------------------------------------- 94 | x <- gtsummary::modify_header(x, gtsummary::all_stat_cols() ~ header) # replace header 95 | x$cards$add_ess_header <- ard_ess # add ESS ARD to results 96 | x$call_list <- updated_call # update the call list 97 | 98 | # add abbreviation ----------------------------------------------------------- 99 | if (grepl(pattern = "ESS", x = header, fixed = TRUE)) { 100 | x <- gtsummary::modify_abbreviation(x, "ESS = Effective Sample Size") 101 | } 102 | 103 | x 104 | } 105 | 106 | # this is an ARD function in the style of the cardx::ard_survey_*() functions 107 | ard_survey_ess <- function(data, by = NULL) { 108 | # calculate ESS -------------------------------------------------------------- 109 | cards::ard_continuous( 110 | data = 111 | data$variables |> 112 | dplyr::mutate(...cards_survey_design_weights_column... = stats::weights(data)), 113 | variables = "...cards_survey_design_weights_column...", 114 | by = {{ by }}, 115 | statistic = everything() ~ list(ess = \(x) ess(x)) 116 | ) |> 117 | dplyr::mutate( 118 | variable = "..ess..", 119 | context = "survey_ess", 120 | stat_label = "Effective Sample Size" 121 | ) 122 | } 123 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' NHEFS with various propensity score weights 2 | #' 3 | #' A dataset containing various propensity score weights for 4 | #' `causaldata::nhefs_complete`. 5 | #' 6 | #' @format A data frame with 1566 rows and 14 variables: \describe{ 7 | #' \item{qsmk}{Quit smoking} \item{race}{Race} \item{age}{Age} \item{sex}{Sex} 8 | #' \item{education}{Education level} \item{smokeintensity}{Smoking intensity} 9 | #' \item{smokeyrs}{Number of smoke-years} \item{exercise}{Exercise level} 10 | #' \item{active}{Daily activity level} \item{wt71}{Participant weight in 1971 11 | #' (baseline)} \item{w_ate}{ATE weight} \item{w_att}{ATT weight} 12 | #' \item{w_atc}{ATC weight} \item{w_atm}{ATM weight} \item{w_ato}{ATO weight} 13 | #' \item{.fitted}{Propensity score} 14 | #' } 15 | "nhefs_weights" 16 | -------------------------------------------------------------------------------- /R/ess.R: -------------------------------------------------------------------------------- 1 | #' Calculate the Effective Sample Size (ESS) 2 | #' 3 | #' This function computes the effective sample size (ESS) given a vector of 4 | #' weights, using the classical \eqn{(\sum w)^2 / \sum(w^2)} formula (sometimes 5 | #' referred to as "Kish's effective sample size"). 6 | #' 7 | #' @param wts A numeric vector of weights (e.g., from survey or 8 | #' inverse-probability weighting). 9 | #' 10 | #' @return A single numeric value representing the effective sample size. 11 | #' 12 | #' @details The effective sample size (ESS) reflects how many observations you 13 | #' would have if all were equally weighted. If the weights vary substantially, 14 | #' the ESS can be much smaller than the actual number of observations. 15 | #' Formally: 16 | #' 17 | #' \deqn{ 18 | #' \mathrm{ESS} = \frac{\left(\sum_i w_i\right)^2}{\sum_i w_i^2}. 19 | #' } 20 | #' 21 | #' **Diagnostic Value**: 22 | #' * **Indicator of Weight Concentration**: A large discrepancy between ESS 23 | #' and the actual sample size indicates that a few observations carry 24 | #' disproportionately large weights, effectively reducing the usable 25 | #' information in the dataset. 26 | #' * **Variance Inflation**: A small ESS signals that weighted estimates are 27 | #' more sensitive to a handful of observations, inflating the variance and 28 | #' standard errors. 29 | #' * **Practical Guidance**: If ESS is much lower than the total sample 30 | #' size, it is advisable to investigate why some weights are extremely large 31 | #' or small. Techniques like weight trimming or stabilized weights might be 32 | #' employed to mitigate the issue 33 | #' 34 | #' @examples 35 | #' # Suppose we have five observations with equal weights 36 | #' wts1 <- rep(1.2, 5) 37 | #' # returns 5, because all weights are equal 38 | #' ess(wts1) 39 | #' 40 | #' # If weights vary more, smaller than 5 41 | #' wts2 <- c(0.5, 2, 2, 0.1, 0.8) 42 | #' ess(wts2) 43 | #' 44 | #' @export 45 | ess <- function(wts) { 46 | sum(wts)^2 / sum(wts^2) 47 | } 48 | 49 | -------------------------------------------------------------------------------- /R/geom_ecdf.R: -------------------------------------------------------------------------------- 1 | #' Calculate weighted and unweighted empirical cumulative distributions 2 | #' 3 | #' The empirical cumulative distribution function (ECDF) provides an alternative 4 | #' visualization of distribution. `geom_ecdf()` is similar to 5 | #' [`ggplot2::stat_ecdf()`] but it can also calculate weighted ECDFs. 6 | #' 7 | #' @section Aesthetics: In addition to the aesthetics for 8 | #' [`ggplot2::stat_ecdf()`], `geom_ecdf()` also accepts: \itemize{ \item 9 | #' weights } 10 | #' 11 | #' @inheritParams ggplot2::stat_ecdf 12 | #' 13 | #' @return a geom 14 | #' @export 15 | #' 16 | #' @examples 17 | #' library(ggplot2) 18 | #' 19 | #' ggplot( 20 | #' nhefs_weights, 21 | #' aes(x = smokeyrs, color = qsmk) 22 | #' ) + 23 | #' geom_ecdf(aes(weights = w_ato)) + 24 | #' xlab("Smoking Years") + 25 | #' ylab("Proportion <= x") 26 | #' 27 | geom_ecdf <- function(mapping = NULL, data = NULL, geom = "step", position = "identity", 28 | ..., n = NULL, pad = TRUE, na.rm = FALSE, show.legend = NA, 29 | inherit.aes = TRUE) { 30 | ggplot2::layer( 31 | data = data, mapping = mapping, stat = StatWeightedECDF, geom = geom, 32 | position = position, show.legend = show.legend, inherit.aes = inherit.aes, 33 | params = list(n = n, pad = pad, na.rm = na.rm, ...) 34 | ) 35 | } 36 | 37 | StatWeightedECDF <- ggplot2::ggproto( 38 | "StatWeightedECDF", 39 | ggplot2::StatEcdf, 40 | compute_group = function(data, scales, n = NULL, pad = NULL) { 41 | if ("weights" %in% names(data)) { 42 | data <- data[order(data$x), ] 43 | # ggplot2 3.4.1 changed this stat's name from `y` to `ecdf` 44 | if (packageVersion("ggplot2") >= "3.4.1") { 45 | data$ecdf <- cumsum(data$weights) / sum(data$weights) 46 | } else { 47 | data$y <- cumsum(data$weights) / sum(data$weights) 48 | } 49 | data 50 | } else { 51 | ggplot2::StatEcdf$compute_group(data, scales, n = n, pad = pad) 52 | } 53 | }, 54 | required_aes = c("x"), 55 | optional_aes = "weights" 56 | ) 57 | -------------------------------------------------------------------------------- /R/geom_mirrored_histogram.R: -------------------------------------------------------------------------------- 1 | #' Create mirrored histograms 2 | #' 3 | #' @inheritParams ggplot2::geom_histogram 4 | #' 5 | #' @return a geom 6 | #' @export 7 | #' 8 | #' @examples 9 | #' library(ggplot2) 10 | #' ggplot(nhefs_weights, aes(.fitted)) + 11 | #' geom_mirror_histogram( 12 | #' aes(group = qsmk), 13 | #' bins = 50 14 | #' ) + 15 | #' geom_mirror_histogram( 16 | #' aes(fill = qsmk, weight = w_ate), 17 | #' bins = 50, 18 | #' alpha = 0.5 19 | #' ) + 20 | #' scale_y_continuous(labels = abs) 21 | geom_mirror_histogram <- function( 22 | mapping = NULL, 23 | data = NULL, 24 | position = "stack", 25 | ..., 26 | binwidth = NULL, 27 | bins = NULL, 28 | na.rm = FALSE, 29 | orientation = NA, 30 | show.legend = NA, 31 | inherit.aes = TRUE 32 | ) { 33 | ggplot2::geom_histogram( 34 | mapping = mapping, 35 | data = data, 36 | stat = StatMirrorCount, 37 | position = position, 38 | ..., 39 | binwidth = binwidth, 40 | bins = bins, 41 | na.rm = na.rm, 42 | orientation = orientation, 43 | show.legend = show.legend, 44 | inherit.aes = inherit.aes 45 | ) 46 | } 47 | 48 | StatMirrorCount <- ggplot2::ggproto( 49 | "StatMirrorCount", 50 | ggplot2::StatBin, 51 | compute_group = function(data, scales, binwidth = NULL, bins = NULL, 52 | center = NULL, boundary = NULL, 53 | closed = c("right", "left"), pad = FALSE, 54 | breaks = NULL, flipped_aes = FALSE, 55 | origin = NULL, right = NULL, drop = NULL) { 56 | group <- unique(data$group) 57 | data <- ggplot2::StatBin$compute_group(data = data, scales = scales, binwidth = binwidth, bins = bins, 58 | center = center, boundary = boundary, 59 | closed = closed, pad = pad, 60 | breaks = breaks, flipped_aes = flipped_aes, 61 | origin = origin, right = right, drop = drop) 62 | if (group == 1) { 63 | data$count <- -data$count 64 | } else if (group > 2) { 65 | abort( 66 | "Groups of three or greater not supported in `geom_mirror_histogram()`" 67 | ) 68 | } else if (group == -1) { 69 | abort(c( 70 | "No group detected.", 71 | "*" = "Do you need to use {.var aes(group = ...)} \\ 72 | with your grouping variable?" 73 | )) 74 | } 75 | data 76 | } 77 | ) 78 | -------------------------------------------------------------------------------- /R/tidyselect-reexports.R: -------------------------------------------------------------------------------- 1 | # re-export functions from {tidyselect} 2 | 3 | #' @aliases select_helpers 4 | #' @importFrom tidyselect peek_vars 5 | #' @export 6 | tidyselect::peek_vars 7 | #' @aliases select_helpers 8 | #' @importFrom tidyselect contains 9 | #' @export 10 | tidyselect::contains 11 | #' @importFrom tidyselect ends_with 12 | #' @export 13 | tidyselect::ends_with 14 | #' @importFrom tidyselect everything 15 | #' @export 16 | tidyselect::everything 17 | #' @importFrom tidyselect matches 18 | #' @export 19 | tidyselect::matches 20 | #' @importFrom tidyselect num_range 21 | #' @export 22 | tidyselect::num_range 23 | #' @importFrom tidyselect one_of 24 | #' @export 25 | tidyselect::one_of 26 | #' @importFrom tidyselect starts_with 27 | #' @export 28 | tidyselect::starts_with 29 | #' @importFrom tidyselect last_col 30 | #' @export 31 | tidyselect::last_col 32 | -------------------------------------------------------------------------------- /R/tidysmd-rexports.R: -------------------------------------------------------------------------------- 1 | # re-export functions from {tidysmd} 2 | 3 | #' @aliases tidysmd 4 | #' @importFrom tidysmd geom_love 5 | #' @export 6 | tidysmd::geom_love 7 | #' @aliases tidysmd 8 | #' @importFrom tidysmd love_plot 9 | #' @export 10 | tidysmd::love_plot 11 | #' @aliases tidysmd 12 | #' @importFrom tidysmd tidy_smd 13 | #' @export 14 | tidysmd::tidy_smd 15 | #' @aliases tidysmd 16 | #' @importFrom tidysmd bind_matches 17 | #' @export 18 | tidysmd::bind_matches 19 | 20 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | # future implementations for interface 2 | # 3 | # be_quiet <- function() { 4 | # getOption("halfmoon.quiet", default = FALSE) 5 | # } 6 | # 7 | # alert_info <- function(.message) { 8 | # if (!be_quiet()) { 9 | # cli::cli_alert_info(text = .message) 10 | # } 11 | # } 12 | 13 | abort <- function(.message) { 14 | cli::cli_abort(message = .message) 15 | } 16 | 17 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | # halfmoon 17 | 18 | 19 | [![R-CMD-check](https://github.com/r-causal/halfmoon/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/r-causal/halfmoon/actions/workflows/R-CMD-check.yaml) 20 | [![Codecov test coverage](https://codecov.io/gh/malcolmbarrett/halfmoon/branch/main/graph/badge.svg)](https://app.codecov.io/gh/malcolmbarrett/halfmoon?branch=main) 21 | [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) 22 | [![CRAN status](https://www.r-pkg.org/badges/version/halfmoon)](https://CRAN.R-project.org/package=halfmoon) 23 | 24 | 25 | > Within light there is darkness, 26 | but do not try to understand that darkness. 27 | Within darkness there is light, 28 | but do not look for that light. 29 | Light and darkness are a pair 30 | like the foot before and the foot behind in walking. 31 | 32 | -- From the Zen teaching poem [Sandokai](https://en.wikipedia.org/wiki/Sandokai). 33 | 34 | The goal of halfmoon is to cultivate balance in propensity score models. 35 | 36 | ## Installation 37 | 38 | You can install the most recent version of halfmoon from CRAN with: 39 | 40 | ``` r 41 | install.packages("halfmoon") 42 | ``` 43 | 44 | You can also install the development version of halfmoon from [GitHub](https://github.com/) with: 45 | 46 | ``` r 47 | # install.packages("devtools") 48 | devtools::install_github("malcolmbarrett/halfmoon") 49 | ``` 50 | 51 | ## Example: Weighting 52 | 53 | halfmoon includes several techniques for assessing the balance created by propensity score weights. 54 | 55 | ```{r example} 56 | library(halfmoon) 57 | library(ggplot2) 58 | 59 | # weighted mirrored histograms 60 | ggplot(nhefs_weights, aes(.fitted)) + 61 | geom_mirror_histogram( 62 | aes(group = qsmk), 63 | bins = 50 64 | ) + 65 | geom_mirror_histogram( 66 | aes(fill = qsmk, weight = w_ate), 67 | bins = 50, 68 | alpha = 0.5 69 | ) + scale_y_continuous(labels = abs) 70 | 71 | # weighted ecdf 72 | ggplot( 73 | nhefs_weights, 74 | aes(x = smokeyrs, color = qsmk) 75 | ) + 76 | geom_ecdf(aes(weights = w_ato)) + 77 | xlab("Smoking Years") + 78 | ylab("Proportion <= x") 79 | 80 | # weighted SMDs 81 | plot_df <- tidy_smd( 82 | nhefs_weights, 83 | race:active, 84 | .group = qsmk, 85 | .wts = starts_with("w_") 86 | ) 87 | 88 | ggplot( 89 | plot_df, 90 | aes( 91 | x = abs(smd), 92 | y = variable, 93 | group = method, 94 | color = method 95 | ) 96 | ) + 97 | geom_love() 98 | ``` 99 | 100 | ## Example: Matching 101 | 102 | halfmoon also has support for working with matched datasets. Consider these two objects from the [MatchIt](https://github.com/kosukeimai/MatchIt) documentation: 103 | 104 | ```{r} 105 | library(MatchIt) 106 | # Default: 1:1 NN PS matching w/o replacement 107 | m.out1 <- matchit(treat ~ age + educ + race + nodegree + 108 | married + re74 + re75, data = lalonde) 109 | 110 | # 1:1 NN Mahalanobis distance matching w/ replacement and 111 | # exact matching on married and race 112 | m.out2 <- matchit(treat ~ age + educ + race + nodegree + 113 | married + re74 + re75, data = lalonde, 114 | distance = "mahalanobis", replace = TRUE, 115 | exact = ~ married + race) 116 | ``` 117 | 118 | One option is to just look at the matched dataset with halfmoon: 119 | 120 | ```{r} 121 | matched_data <- get_matches(m.out1) 122 | 123 | match_smd <- tidy_smd( 124 | matched_data, 125 | c(age, educ, race, nodegree, married, re74, re75), 126 | .group = treat 127 | ) 128 | 129 | love_plot(match_smd) 130 | ``` 131 | 132 | The downside here is that you can't compare multiple matching strategies to the observed dataset; the label on the plot is also wrong. halfmoon comes with a helper function, `bind_matches()`, that creates a dataset more appropriate for this task: 133 | 134 | ```{r} 135 | matches <- bind_matches(lalonde, m.out1, m.out2) 136 | head(matches) 137 | ``` 138 | 139 | `matches` includes an binary variable for each `matchit` object which indicates if the row was included in the match or not. Since downweighting to 0 is equivalent to filtering the datasets to the matches, we can more easily compare multiple matched datasets with `.wts`: 140 | 141 | ```{r} 142 | many_matched_smds <- tidy_smd( 143 | matches, 144 | c(age, educ, race, nodegree, married, re74, re75), 145 | .group = treat, 146 | .wts = c(m.out1, m.out2) 147 | ) 148 | 149 | love_plot(many_matched_smds) 150 | ``` 151 | 152 | We can also extend the idea that matching indicators are weights to weighted mirrored histograms, giving us a good idea of the range of propensity scores that are being removed from the dataset. 153 | 154 | ```{r} 155 | # use the distance as the propensity score 156 | matches$ps <- m.out1$distance 157 | 158 | ggplot(matches, aes(ps)) + 159 | geom_mirror_histogram( 160 | aes(group = factor(treat)), 161 | bins = 50 162 | ) + 163 | geom_mirror_histogram( 164 | aes(fill = factor(treat), weight = m.out1), 165 | bins = 50, 166 | alpha = 0.5 167 | ) + scale_y_continuous(labels = abs) 168 | ``` 169 | 170 | 171 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # halfmoon 5 | 6 | 7 | 8 | [![R-CMD-check](https://github.com/r-causal/halfmoon/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/r-causal/halfmoon/actions/workflows/R-CMD-check.yaml) 9 | [![Codecov test 10 | coverage](https://codecov.io/gh/malcolmbarrett/halfmoon/branch/main/graph/badge.svg)](https://app.codecov.io/gh/malcolmbarrett/halfmoon?branch=main) 11 | [![Lifecycle: 12 | experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html#experimental) 13 | [![CRAN 14 | status](https://www.r-pkg.org/badges/version/halfmoon)](https://CRAN.R-project.org/package=halfmoon) 15 | 16 | 17 | > Within light there is darkness, but do not try to understand that 18 | > darkness. Within darkness there is light, but do not look for that 19 | > light. Light and darkness are a pair like the foot before and the foot 20 | > behind in walking. 21 | 22 | – From the Zen teaching poem 23 | [Sandokai](https://en.wikipedia.org/wiki/Sandokai). 24 | 25 | The goal of halfmoon is to cultivate balance in propensity score models. 26 | 27 | ## Installation 28 | 29 | You can install the most recent version of halfmoon from CRAN with: 30 | 31 | ``` r 32 | install.packages("halfmoon") 33 | ``` 34 | 35 | You can also install the development version of halfmoon from 36 | [GitHub](https://github.com/) with: 37 | 38 | ``` r 39 | # install.packages("devtools") 40 | devtools::install_github("malcolmbarrett/halfmoon") 41 | ``` 42 | 43 | ## Example: Weighting 44 | 45 | halfmoon includes several techniques for assessing the balance created 46 | by propensity score weights. 47 | 48 | ``` r 49 | library(halfmoon) 50 | library(ggplot2) 51 | 52 | # weighted mirrored histograms 53 | ggplot(nhefs_weights, aes(.fitted)) + 54 | geom_mirror_histogram( 55 | aes(group = qsmk), 56 | bins = 50 57 | ) + 58 | geom_mirror_histogram( 59 | aes(fill = qsmk, weight = w_ate), 60 | bins = 50, 61 | alpha = 0.5 62 | ) + scale_y_continuous(labels = abs) 63 | ``` 64 | 65 | 66 | 67 | ``` r 68 | 69 | # weighted ecdf 70 | ggplot( 71 | nhefs_weights, 72 | aes(x = smokeyrs, color = qsmk) 73 | ) + 74 | geom_ecdf(aes(weights = w_ato)) + 75 | xlab("Smoking Years") + 76 | ylab("Proportion <= x") 77 | ``` 78 | 79 | 80 | 81 | ``` r 82 | 83 | # weighted SMDs 84 | plot_df <- tidy_smd( 85 | nhefs_weights, 86 | race:active, 87 | .group = qsmk, 88 | .wts = starts_with("w_") 89 | ) 90 | 91 | ggplot( 92 | plot_df, 93 | aes( 94 | x = abs(smd), 95 | y = variable, 96 | group = method, 97 | color = method 98 | ) 99 | ) + 100 | geom_love() 101 | ``` 102 | 103 | 104 | 105 | ## Example: Matching 106 | 107 | halfmoon also has support for working with matched datasets. Consider 108 | these two objects from the 109 | [MatchIt](https://github.com/kosukeimai/MatchIt) documentation: 110 | 111 | ``` r 112 | library(MatchIt) 113 | # Default: 1:1 NN PS matching w/o replacement 114 | m.out1 <- matchit(treat ~ age + educ + race + nodegree + 115 | married + re74 + re75, data = lalonde) 116 | 117 | # 1:1 NN Mahalanobis distance matching w/ replacement and 118 | # exact matching on married and race 119 | m.out2 <- matchit(treat ~ age + educ + race + nodegree + 120 | married + re74 + re75, data = lalonde, 121 | distance = "mahalanobis", replace = TRUE, 122 | exact = ~ married + race) 123 | ``` 124 | 125 | One option is to just look at the matched dataset with halfmoon: 126 | 127 | ``` r 128 | matched_data <- get_matches(m.out1) 129 | 130 | match_smd <- tidy_smd( 131 | matched_data, 132 | c(age, educ, race, nodegree, married, re74, re75), 133 | .group = treat 134 | ) 135 | 136 | love_plot(match_smd) 137 | ``` 138 | 139 | 140 | 141 | The downside here is that you can’t compare multiple matching strategies 142 | to the observed dataset; the label on the plot is also wrong. halfmoon 143 | comes with a helper function, `bind_matches()`, that creates a dataset 144 | more appropriate for this task: 145 | 146 | ``` r 147 | matches <- bind_matches(lalonde, m.out1, m.out2) 148 | head(matches) 149 | #> treat age educ race married nodegree re74 re75 re78 m.out1 m.out2 150 | #> NSW1 1 37 11 black 1 1 0 0 9930.0460 1 1 151 | #> NSW2 1 22 9 hispan 0 1 0 0 3595.8940 1 1 152 | #> NSW3 1 30 12 black 0 0 0 0 24909.4500 1 1 153 | #> NSW4 1 27 11 black 0 1 0 0 7506.1460 1 1 154 | #> NSW5 1 33 8 black 0 1 0 0 289.7899 1 1 155 | #> NSW6 1 22 9 black 0 1 0 0 4056.4940 1 1 156 | ``` 157 | 158 | `matches` includes an binary variable for each `matchit` object which 159 | indicates if the row was included in the match or not. Since 160 | downweighting to 0 is equivalent to filtering the datasets to the 161 | matches, we can more easily compare multiple matched datasets with 162 | `.wts`: 163 | 164 | ``` r 165 | many_matched_smds <- tidy_smd( 166 | matches, 167 | c(age, educ, race, nodegree, married, re74, re75), 168 | .group = treat, 169 | .wts = c(m.out1, m.out2) 170 | ) 171 | 172 | love_plot(many_matched_smds) 173 | ``` 174 | 175 | 176 | 177 | We can also extend the idea that matching indicators are weights to 178 | weighted mirrored histograms, giving us a good idea of the range of 179 | propensity scores that are being removed from the dataset. 180 | 181 | ``` r 182 | # use the distance as the propensity score 183 | matches$ps <- m.out1$distance 184 | 185 | ggplot(matches, aes(ps)) + 186 | geom_mirror_histogram( 187 | aes(group = factor(treat)), 188 | bins = 50 189 | ) + 190 | geom_mirror_histogram( 191 | aes(fill = factor(treat), weight = m.out1), 192 | bins = 50, 193 | alpha = 0.5 194 | ) + scale_y_continuous(labels = abs) 195 | ``` 196 | 197 | 198 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://r-causal.github.io/halfmoon/ 2 | template: 3 | math-rendering: mathjax 4 | bootstrap: 5 5 | 6 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /data-raw/nhefs_weights.R: -------------------------------------------------------------------------------- 1 | ## code to prepare `nhefs_weights` dataset goes here 2 | ## code to prepare `nhefs_weights` dataset goes here 3 | library(tidyverse) 4 | library(broom) 5 | library(causaldata) 6 | propensity_model <- glm( 7 | qsmk ~ sex + 8 | race + age + I(age^2) + education + 9 | smokeintensity + I(smokeintensity^2) + 10 | smokeyrs + I(smokeyrs^2) + exercise + active + 11 | wt71 + I(wt71^2), 12 | family = binomial(), 13 | data = nhefs_complete 14 | ) 15 | 16 | nhefs_weights <- propensity_model %>% 17 | augment(type.predict = "response", data = nhefs_complete) %>% 18 | mutate( 19 | wts = 1 / ifelse(qsmk == 0, 1 - .fitted, .fitted), 20 | w_ate = (qsmk / .fitted) + 21 | ((1 - qsmk) / (1 - .fitted)), 22 | w_att = ((.fitted * qsmk) / .fitted) + 23 | ((.fitted * (1 - qsmk)) / (1 - .fitted)), 24 | w_atc = (((1 - .fitted) * qsmk) / .fitted) + 25 | (((1 - .fitted) * (1 - qsmk)) / (1 - .fitted)), 26 | w_atm = pmin(.fitted, 1 - .fitted) / 27 | (qsmk * .fitted + (1 - qsmk) * (1 - .fitted)), 28 | w_ato = (1 - .fitted) * qsmk + 29 | .fitted * (1 - qsmk) 30 | ) %>% 31 | select( 32 | qsmk, 33 | race, 34 | age, 35 | sex, 36 | education, 37 | smokeintensity, 38 | smokeyrs, 39 | exercise, 40 | active, 41 | wt71, 42 | starts_with("w_"), 43 | .fitted 44 | ) %>% 45 | mutate(qsmk = factor(qsmk)) 46 | 47 | usethis::use_data(nhefs_weights, overwrite = TRUE) 48 | -------------------------------------------------------------------------------- /data/nhefs_weights.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/data/nhefs_weights.rda -------------------------------------------------------------------------------- /halfmoon.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | ProjectId: 69b122f4-ff94-42bb-90ff-53e4e8e8cf6a 3 | 4 | RestoreWorkspace: No 5 | SaveWorkspace: No 6 | AlwaysSaveHistory: Default 7 | 8 | EnableCodeIndexing: Yes 9 | UseSpacesForTab: Yes 10 | NumSpacesForTab: 2 11 | Encoding: UTF-8 12 | 13 | RnwWeave: Sweave 14 | LaTeX: pdfLaTeX 15 | 16 | AutoAppendNewline: Yes 17 | StripTrailingWhitespace: Yes 18 | LineEndingConversion: Posix 19 | 20 | BuildType: Package 21 | PackageUseDevtools: Yes 22 | PackageInstallArgs: --no-multiarch --with-keep.source 23 | PackageRoxygenize: rd,collate,namespace 24 | -------------------------------------------------------------------------------- /man/add_ess_header.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/add_ess_header.R 3 | \name{add_ess_header} 4 | \alias{add_ess_header} 5 | \title{Add ESS Table Header} 6 | \usage{ 7 | add_ess_header( 8 | x, 9 | header = "**{level}** \\nESS = {format(n, digits = 1, nsmall = 1)}" 10 | ) 11 | } 12 | \arguments{ 13 | \item{x}{(\code{tbl_svysummary})\cr 14 | Object of class \code{'tbl_svysummary'} typically created with \code{gtsummary::tbl_svysummary()}.} 15 | 16 | \item{header}{(\code{string})\cr 17 | String specifying updated header. 18 | Review \code{gtsummary::modify_header()} for details on use.} 19 | } 20 | \value{ 21 | a 'gtsummary' table 22 | } 23 | \description{ 24 | This function replaces the counts in the default header of 25 | \code{gtsummary::tbl_svysummary()} tables to counts representing the 26 | Effective Sample Size (ESS). See \code{\link[=ess]{ess()}} for details. 27 | } 28 | \examples{ 29 | \dontshow{if (rlang::is_installed(c("survey", "gtsummary", "cards", "cardx", "dplyr"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 30 | svy <- survey::svydesign(~1, data = nhefs_weights, weights = ~ w_ate) 31 | 32 | gtsummary::tbl_svysummary(svy, include = c(age, sex, smokeyrs)) |> 33 | add_ess_header() 34 | hdr <- paste0( 35 | "**{level}** \n", 36 | "N = {n_unweighted}; ESS = {format(n, digits = 1, nsmall = 1)}" 37 | ) 38 | gtsummary::tbl_svysummary(svy, by = qsmk, include = c(age, sex, smokeyrs)) |> 39 | add_ess_header(header = hdr) 40 | \dontshow{\}) # examplesIf} 41 | } 42 | -------------------------------------------------------------------------------- /man/ess.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ess.R 3 | \name{ess} 4 | \alias{ess} 5 | \title{Calculate the Effective Sample Size (ESS)} 6 | \usage{ 7 | ess(wts) 8 | } 9 | \arguments{ 10 | \item{wts}{A numeric vector of weights (e.g., from survey or 11 | inverse-probability weighting).} 12 | } 13 | \value{ 14 | A single numeric value representing the effective sample size. 15 | } 16 | \description{ 17 | This function computes the effective sample size (ESS) given a vector of 18 | weights, using the classical \eqn{(\sum w)^2 / \sum(w^2)} formula (sometimes 19 | referred to as "Kish's effective sample size"). 20 | } 21 | \details{ 22 | The effective sample size (ESS) reflects how many observations you 23 | would have if all were equally weighted. If the weights vary substantially, 24 | the ESS can be much smaller than the actual number of observations. 25 | Formally: 26 | 27 | \deqn{ 28 | \mathrm{ESS} = \frac{\left(\sum_i w_i\right)^2}{\sum_i w_i^2}. 29 | } 30 | 31 | \strong{Diagnostic Value}: 32 | \itemize{ 33 | \item \strong{Indicator of Weight Concentration}: A large discrepancy between ESS 34 | and the actual sample size indicates that a few observations carry 35 | disproportionately large weights, effectively reducing the usable 36 | information in the dataset. 37 | \item \strong{Variance Inflation}: A small ESS signals that weighted estimates are 38 | more sensitive to a handful of observations, inflating the variance and 39 | standard errors. 40 | \item \strong{Practical Guidance}: If ESS is much lower than the total sample 41 | size, it is advisable to investigate why some weights are extremely large 42 | or small. Techniques like weight trimming or stabilized weights might be 43 | employed to mitigate the issue 44 | } 45 | } 46 | \examples{ 47 | # Suppose we have five observations with equal weights 48 | wts1 <- rep(1.2, 5) 49 | # returns 5, because all weights are equal 50 | ess(wts1) 51 | 52 | # If weights vary more, smaller than 5 53 | wts2 <- c(0.5, 2, 2, 0.1, 0.8) 54 | ess(wts2) 55 | 56 | } 57 | -------------------------------------------------------------------------------- /man/figures/README-example-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/man/figures/README-example-1.png -------------------------------------------------------------------------------- /man/figures/README-example-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/man/figures/README-example-2.png -------------------------------------------------------------------------------- /man/figures/README-example-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/man/figures/README-example-3.png -------------------------------------------------------------------------------- /man/figures/README-pressure-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/man/figures/README-pressure-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/man/figures/README-unnamed-chunk-3-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/man/figures/README-unnamed-chunk-5-1.png -------------------------------------------------------------------------------- /man/figures/README-unnamed-chunk-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/man/figures/README-unnamed-chunk-6-1.png -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/man/figures/logo.png -------------------------------------------------------------------------------- /man/geom_ecdf.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/geom_ecdf.R 3 | \name{geom_ecdf} 4 | \alias{geom_ecdf} 5 | \title{Calculate weighted and unweighted empirical cumulative distributions} 6 | \usage{ 7 | geom_ecdf( 8 | mapping = NULL, 9 | data = NULL, 10 | geom = "step", 11 | position = "identity", 12 | ..., 13 | n = NULL, 14 | pad = TRUE, 15 | na.rm = FALSE, 16 | show.legend = NA, 17 | inherit.aes = TRUE 18 | ) 19 | } 20 | \arguments{ 21 | \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and 22 | \code{inherit.aes = TRUE} (the default), it is combined with the default mapping 23 | at the top level of the plot. You must supply \code{mapping} if there is no plot 24 | mapping.} 25 | 26 | \item{data}{The data to be displayed in this layer. There are three 27 | options: 28 | 29 | If \code{NULL}, the default, the data is inherited from the plot 30 | data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}. 31 | 32 | A \code{data.frame}, or other object, will override the plot 33 | data. All objects will be fortified to produce a data frame. See 34 | \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created. 35 | 36 | A \code{function} will be called with a single argument, 37 | the plot data. The return value must be a \code{data.frame}, and 38 | will be used as the layer data. A \code{function} can be created 39 | from a \code{formula} (e.g. \code{~ head(.x, 10)}).} 40 | 41 | \item{geom}{The geometric object to use to display the data for this layer. 42 | When using a \verb{stat_*()} function to construct a layer, the \code{geom} argument 43 | can be used to override the default coupling between stats and geoms. The 44 | \code{geom} argument accepts the following: 45 | \itemize{ 46 | \item A \code{Geom} ggproto subclass, for example \code{GeomPoint}. 47 | \item A string naming the geom. To give the geom as a string, strip the 48 | function name of the \code{geom_} prefix. For example, to use \code{geom_point()}, 49 | give the geom as \code{"point"}. 50 | \item For more information and other ways to specify the geom, see the 51 | \link[ggplot2:layer_geoms]{layer geom} documentation. 52 | }} 53 | 54 | \item{position}{A position adjustment to use on the data for this layer. This 55 | can be used in various ways, including to prevent overplotting and 56 | improving the display. The \code{position} argument accepts the following: 57 | \itemize{ 58 | \item The result of calling a position function, such as \code{position_jitter()}. 59 | This method allows for passing extra arguments to the position. 60 | \item A string naming the position adjustment. To give the position as a 61 | string, strip the function name of the \code{position_} prefix. For example, 62 | to use \code{position_jitter()}, give the position as \code{"jitter"}. 63 | \item For more information and other ways to specify the position, see the 64 | \link[ggplot2:layer_positions]{layer position} documentation. 65 | }} 66 | 67 | \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}'s \code{params} argument. These 68 | arguments broadly fall into one of 4 categories below. Notably, further 69 | arguments to the \code{position} argument, or aesthetics that are required 70 | can \emph{not} be passed through \code{...}. Unknown arguments that are not part 71 | of the 4 categories below are ignored. 72 | \itemize{ 73 | \item Static aesthetics that are not mapped to a scale, but are at a fixed 74 | value and apply to the layer as a whole. For example, \code{colour = "red"} 75 | or \code{linewidth = 3}. The geom's documentation has an \strong{Aesthetics} 76 | section that lists the available options. The 'required' aesthetics 77 | cannot be passed on to the \code{params}. Please note that while passing 78 | unmapped aesthetics as vectors is technically possible, the order and 79 | required length is not guaranteed to be parallel to the input data. 80 | \item When constructing a layer using 81 | a \verb{stat_*()} function, the \code{...} argument can be used to pass on 82 | parameters to the \code{geom} part of the layer. An example of this is 83 | \code{stat_density(geom = "area", outline.type = "both")}. The geom's 84 | documentation lists which parameters it can accept. 85 | \item Inversely, when constructing a layer using a 86 | \verb{geom_*()} function, the \code{...} argument can be used to pass on parameters 87 | to the \code{stat} part of the layer. An example of this is 88 | \code{geom_area(stat = "density", adjust = 0.5)}. The stat's documentation 89 | lists which parameters it can accept. 90 | \item The \code{key_glyph} argument of \code{\link[ggplot2:layer]{layer()}} may also be passed on through 91 | \code{...}. This can be one of the functions described as 92 | \link[ggplot2:draw_key]{key glyphs}, to change the display of the layer in the legend. 93 | }} 94 | 95 | \item{n}{if NULL, do not interpolate. If not NULL, this is the number 96 | of points to interpolate with.} 97 | 98 | \item{pad}{If \code{TRUE}, pad the ecdf with additional points (-Inf, 0) 99 | and (Inf, 1)} 100 | 101 | \item{na.rm}{If \code{FALSE} (the default), removes missing values with 102 | a warning. If \code{TRUE} silently removes missing values.} 103 | 104 | \item{show.legend}{logical. Should this layer be included in the legends? 105 | \code{NA}, the default, includes if any aesthetics are mapped. 106 | \code{FALSE} never includes, and \code{TRUE} always includes. 107 | It can also be a named logical vector to finely select the aesthetics to 108 | display.} 109 | 110 | \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, 111 | rather than combining with them. This is most useful for helper functions 112 | that define both data and aesthetics and shouldn't inherit behaviour from 113 | the default plot specification, e.g. \code{\link[ggplot2:borders]{borders()}}.} 114 | } 115 | \value{ 116 | a geom 117 | } 118 | \description{ 119 | The empirical cumulative distribution function (ECDF) provides an alternative 120 | visualization of distribution. \code{geom_ecdf()} is similar to 121 | \code{\link[ggplot2:stat_ecdf]{ggplot2::stat_ecdf()}} but it can also calculate weighted ECDFs. 122 | } 123 | \section{Aesthetics}{ 124 | In addition to the aesthetics for 125 | \code{\link[ggplot2:stat_ecdf]{ggplot2::stat_ecdf()}}, \code{geom_ecdf()} also accepts: \itemize{ \item 126 | weights } 127 | } 128 | 129 | \examples{ 130 | library(ggplot2) 131 | 132 | ggplot( 133 | nhefs_weights, 134 | aes(x = smokeyrs, color = qsmk) 135 | ) + 136 | geom_ecdf(aes(weights = w_ato)) + 137 | xlab("Smoking Years") + 138 | ylab("Proportion <= x") 139 | 140 | } 141 | -------------------------------------------------------------------------------- /man/geom_mirror_histogram.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/geom_mirrored_histogram.R 3 | \name{geom_mirror_histogram} 4 | \alias{geom_mirror_histogram} 5 | \title{Create mirrored histograms} 6 | \usage{ 7 | geom_mirror_histogram( 8 | mapping = NULL, 9 | data = NULL, 10 | position = "stack", 11 | ..., 12 | binwidth = NULL, 13 | bins = NULL, 14 | na.rm = FALSE, 15 | orientation = NA, 16 | show.legend = NA, 17 | inherit.aes = TRUE 18 | ) 19 | } 20 | \arguments{ 21 | \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and 22 | \code{inherit.aes = TRUE} (the default), it is combined with the default mapping 23 | at the top level of the plot. You must supply \code{mapping} if there is no plot 24 | mapping.} 25 | 26 | \item{data}{The data to be displayed in this layer. There are three 27 | options: 28 | 29 | If \code{NULL}, the default, the data is inherited from the plot 30 | data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}. 31 | 32 | A \code{data.frame}, or other object, will override the plot 33 | data. All objects will be fortified to produce a data frame. See 34 | \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created. 35 | 36 | A \code{function} will be called with a single argument, 37 | the plot data. The return value must be a \code{data.frame}, and 38 | will be used as the layer data. A \code{function} can be created 39 | from a \code{formula} (e.g. \code{~ head(.x, 10)}).} 40 | 41 | \item{position}{A position adjustment to use on the data for this layer. This 42 | can be used in various ways, including to prevent overplotting and 43 | improving the display. The \code{position} argument accepts the following: 44 | \itemize{ 45 | \item The result of calling a position function, such as \code{position_jitter()}. 46 | This method allows for passing extra arguments to the position. 47 | \item A string naming the position adjustment. To give the position as a 48 | string, strip the function name of the \code{position_} prefix. For example, 49 | to use \code{position_jitter()}, give the position as \code{"jitter"}. 50 | \item For more information and other ways to specify the position, see the 51 | \link[ggplot2:layer_positions]{layer position} documentation. 52 | }} 53 | 54 | \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}'s \code{params} argument. These 55 | arguments broadly fall into one of 4 categories below. Notably, further 56 | arguments to the \code{position} argument, or aesthetics that are required 57 | can \emph{not} be passed through \code{...}. Unknown arguments that are not part 58 | of the 4 categories below are ignored. 59 | \itemize{ 60 | \item Static aesthetics that are not mapped to a scale, but are at a fixed 61 | value and apply to the layer as a whole. For example, \code{colour = "red"} 62 | or \code{linewidth = 3}. The geom's documentation has an \strong{Aesthetics} 63 | section that lists the available options. The 'required' aesthetics 64 | cannot be passed on to the \code{params}. Please note that while passing 65 | unmapped aesthetics as vectors is technically possible, the order and 66 | required length is not guaranteed to be parallel to the input data. 67 | \item When constructing a layer using 68 | a \verb{stat_*()} function, the \code{...} argument can be used to pass on 69 | parameters to the \code{geom} part of the layer. An example of this is 70 | \code{stat_density(geom = "area", outline.type = "both")}. The geom's 71 | documentation lists which parameters it can accept. 72 | \item Inversely, when constructing a layer using a 73 | \verb{geom_*()} function, the \code{...} argument can be used to pass on parameters 74 | to the \code{stat} part of the layer. An example of this is 75 | \code{geom_area(stat = "density", adjust = 0.5)}. The stat's documentation 76 | lists which parameters it can accept. 77 | \item The \code{key_glyph} argument of \code{\link[ggplot2:layer]{layer()}} may also be passed on through 78 | \code{...}. This can be one of the functions described as 79 | \link[ggplot2:draw_key]{key glyphs}, to change the display of the layer in the legend. 80 | }} 81 | 82 | \item{binwidth}{The width of the bins. Can be specified as a numeric value 83 | or as a function that calculates width from unscaled x. Here, "unscaled x" 84 | refers to the original x values in the data, before application of any 85 | scale transformation. When specifying a function along with a grouping 86 | structure, the function will be called once per group. 87 | The default is to use the number of bins in \code{bins}, 88 | covering the range of the data. You should always override 89 | this value, exploring multiple widths to find the best to illustrate the 90 | stories in your data. 91 | 92 | The bin width of a date variable is the number of days in each time; the 93 | bin width of a time variable is the number of seconds.} 94 | 95 | \item{bins}{Number of bins. Overridden by \code{binwidth}. Defaults to 30.} 96 | 97 | \item{na.rm}{If \code{FALSE}, the default, missing values are removed with 98 | a warning. If \code{TRUE}, missing values are silently removed.} 99 | 100 | \item{orientation}{The orientation of the layer. The default (\code{NA}) 101 | automatically determines the orientation from the aesthetic mapping. In the 102 | rare event that this fails it can be given explicitly by setting \code{orientation} 103 | to either \code{"x"} or \code{"y"}. See the \emph{Orientation} section for more detail.} 104 | 105 | \item{show.legend}{logical. Should this layer be included in the legends? 106 | \code{NA}, the default, includes if any aesthetics are mapped. 107 | \code{FALSE} never includes, and \code{TRUE} always includes. 108 | It can also be a named logical vector to finely select the aesthetics to 109 | display.} 110 | 111 | \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics, 112 | rather than combining with them. This is most useful for helper functions 113 | that define both data and aesthetics and shouldn't inherit behaviour from 114 | the default plot specification, e.g. \code{\link[ggplot2:borders]{borders()}}.} 115 | } 116 | \value{ 117 | a geom 118 | } 119 | \description{ 120 | Create mirrored histograms 121 | } 122 | \examples{ 123 | library(ggplot2) 124 | ggplot(nhefs_weights, aes(.fitted)) + 125 | geom_mirror_histogram( 126 | aes(group = qsmk), 127 | bins = 50 128 | ) + 129 | geom_mirror_histogram( 130 | aes(fill = qsmk, weight = w_ate), 131 | bins = 50, 132 | alpha = 0.5 133 | ) + 134 | scale_y_continuous(labels = abs) 135 | } 136 | -------------------------------------------------------------------------------- /man/nhefs_weights.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{nhefs_weights} 5 | \alias{nhefs_weights} 6 | \title{NHEFS with various propensity score weights} 7 | \format{ 8 | A data frame with 1566 rows and 14 variables: \describe{ 9 | \item{qsmk}{Quit smoking} \item{race}{Race} \item{age}{Age} \item{sex}{Sex} 10 | \item{education}{Education level} \item{smokeintensity}{Smoking intensity} 11 | \item{smokeyrs}{Number of smoke-years} \item{exercise}{Exercise level} 12 | \item{active}{Daily activity level} \item{wt71}{Participant weight in 1971 13 | (baseline)} \item{w_ate}{ATE weight} \item{w_att}{ATT weight} 14 | \item{w_atc}{ATC weight} \item{w_atm}{ATM weight} \item{w_ato}{ATO weight} 15 | \item{.fitted}{Propensity score} 16 | } 17 | } 18 | \usage{ 19 | nhefs_weights 20 | } 21 | \description{ 22 | A dataset containing various propensity score weights for 23 | \code{causaldata::nhefs_complete}. 24 | } 25 | \keyword{datasets} 26 | -------------------------------------------------------------------------------- /man/reexports.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tidyselect-reexports.R, R/tidysmd-rexports.R 3 | \docType{import} 4 | \name{reexports} 5 | \alias{reexports} 6 | \alias{peek_vars} 7 | \alias{select_helpers} 8 | \alias{contains} 9 | \alias{ends_with} 10 | \alias{everything} 11 | \alias{matches} 12 | \alias{num_range} 13 | \alias{one_of} 14 | \alias{starts_with} 15 | \alias{last_col} 16 | \alias{geom_love} 17 | \alias{tidysmd} 18 | \alias{love_plot} 19 | \alias{tidy_smd} 20 | \alias{bind_matches} 21 | \title{Objects exported from other packages} 22 | \keyword{internal} 23 | \description{ 24 | These objects are imported from other packages. Follow the links 25 | below to see their documentation. 26 | 27 | \describe{ 28 | \item{tidyselect}{\code{\link[tidyselect:starts_with]{contains}}, \code{\link[tidyselect:starts_with]{ends_with}}, \code{\link[tidyselect]{everything}}, \code{\link[tidyselect:everything]{last_col}}, \code{\link[tidyselect:starts_with]{matches}}, \code{\link[tidyselect:starts_with]{num_range}}, \code{\link[tidyselect]{one_of}}, \code{\link[tidyselect]{peek_vars}}, \code{\link[tidyselect]{starts_with}}} 29 | 30 | \item{tidysmd}{\code{\link[tidysmd]{bind_matches}}, \code{\link[tidysmd]{geom_love}}, \code{\link[tidysmd:geom_love]{love_plot}}, \code{\link[tidysmd]{tidy_smd}}} 31 | }} 32 | 33 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-96x96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/pkgdown/favicon/favicon-96x96.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /pkgdown/favicon/site.webmanifest: -------------------------------------------------------------------------------- 1 | { 2 | "name": "", 3 | "short_name": "", 4 | "icons": [ 5 | { 6 | "src": "/web-app-manifest-192x192.png", 7 | "sizes": "192x192", 8 | "type": "image/png", 9 | "purpose": "maskable" 10 | }, 11 | { 12 | "src": "/web-app-manifest-512x512.png", 13 | "sizes": "512x512", 14 | "type": "image/png", 15 | "purpose": "maskable" 16 | } 17 | ], 18 | "theme_color": "#ffffff", 19 | "background_color": "#ffffff", 20 | "display": "standalone" 21 | } -------------------------------------------------------------------------------- /pkgdown/favicon/web-app-manifest-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/pkgdown/favicon/web-app-manifest-192x192.png -------------------------------------------------------------------------------- /pkgdown/favicon/web-app-manifest-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/pkgdown/favicon/web-app-manifest-512x512.png -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | # This file is part of the standard setup for testthat. 2 | # It is recommended that you do not modify it. 3 | # 4 | # Where should you do additional test configuration? 5 | # Learn more about the roles of various files in: 6 | # * https://r-pkgs.org/tests.html 7 | # * https://testthat.r-lib.org/reference/test_package.html#special-files 8 | 9 | library(testthat) 10 | library(halfmoon) 11 | 12 | test_check("halfmoon") 13 | -------------------------------------------------------------------------------- /tests/testthat/Rplots.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/tests/testthat/Rplots.pdf -------------------------------------------------------------------------------- /tests/testthat/_snaps/geom_ecdf/ecdf-no-weights.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 0.00 36 | 0.25 37 | 0.50 38 | 0.75 39 | 1.00 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 0 50 | 20 51 | 40 52 | 60 53 | Smoking Years 54 | Proportion <= x 55 | 56 | qsmk 57 | 58 | 59 | 60 | 61 | 0 62 | 1 63 | ecdf (no weights) 64 | 65 | 66 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/geom_ecdf/ecdf-weights.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 0.00 36 | 0.25 37 | 0.50 38 | 0.75 39 | 1.00 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 0 50 | 20 51 | 40 52 | 60 53 | Smoking Years 54 | Proportion <= x 55 | 56 | qsmk 57 | 58 | 59 | 60 | 61 | 0 62 | 1 63 | ecdf (weights) 64 | 65 | 66 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/geom_mirrored_histogram.md: -------------------------------------------------------------------------------- 1 | # geom_mirrored_histogram errors/warns correctly 2 | 3 | Computation failed in `stat_mirror_count()`. 4 | Caused by error in `abort()`: 5 | ! Groups of three or greater not supported in `geom_mirror_histogram()` 6 | 7 | --- 8 | 9 | Computation failed in `stat_mirror_count()`. 10 | Caused by error in `abort()`: 11 | ! No group detected. 12 | * Do you need to use `aes(group = ...)` with your grouping variable? 13 | 14 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/geom_mirrored_histogram/layered-weighted-and-unweighted.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 100 234 | 50 235 | 0 236 | 50 237 | 100 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 0.0 249 | 0.2 250 | 0.4 251 | 0.6 252 | 0.8 253 | .fitted 254 | count 255 | 256 | qsmk 257 | 258 | 259 | 260 | 261 | 0 262 | 1 263 | layered (weighted and unweighted) 264 | 265 | 266 | -------------------------------------------------------------------------------- /tests/testthat/helper-vdiffr.R: -------------------------------------------------------------------------------- 1 | expect_doppelganger <- function(title, fig, ...) { 2 | testthat::skip_if_not_installed("vdiffr") 3 | vdiffr::expect_doppelganger(title, fig, ...) 4 | } 5 | -------------------------------------------------------------------------------- /tests/testthat/test-add_ess_header.R: -------------------------------------------------------------------------------- 1 | suppressPackageStartupMessages(library(survey)) 2 | suppressPackageStartupMessages(library(gtsummary)) 3 | suppressPackageStartupMessages(library(dplyr)) 4 | 5 | # Create survey design and gtsummary tables. 6 | svy <- svydesign(~1, data = nhefs_weights, weights = ~ w_ate) 7 | tbl <- tbl_svysummary(svy, include = c(age, sex, smokeyrs)) 8 | tbl_by <- tbl_svysummary(svy, by = qsmk, include = c(age, sex, smokeyrs)) 9 | 10 | # Tests -------------------------------------------------------------------- 11 | 12 | test_that("Non-by case ESS values match ess() results", { 13 | res <- add_ess_header(tbl) 14 | 15 | # Compute expected ESS from the survey design weights. 16 | expected_ess <- ess(weights(svy)) 17 | 18 | # For a non-by table, the header has one row. 19 | expect_equal(res$table_styling$header$modify_stat_n[[1]], expected_ess) 20 | expect_equal(res$table_styling$header$modify_stat_N[[1]], expected_ess) 21 | expect_equal(res$table_styling$header$modify_stat_p[[1]], 1) 22 | 23 | # Verify that the ESS result stored in cards matches. 24 | expect_equal(res$cards$add_ess_header$stat[[1]], expected_ess) 25 | }) 26 | 27 | test_that("By case ESS values match ess() results", { 28 | res_by <- add_ess_header(tbl_by) 29 | header_tbl <- res_by$table_styling$header 30 | 31 | # In a by table, the header may include extra rows. 32 | # We'll restrict to rows with a non-missing group label. 33 | by_rows <- header_tbl |> filter(!is.na(modify_stat_level)) 34 | 35 | # Compute expected ESS by group. 36 | expected_by <- nhefs_weights |> 37 | group_by(qsmk) |> 38 | summarize(expected_ess = ess(w_ate), .groups = "drop") |> 39 | arrange(as.character(qsmk)) 40 | 41 | # Compare group labels. 42 | expect_equal(by_rows$modify_stat_level, as.character(expected_by$qsmk)) 43 | # Compare group ESS values. 44 | expect_equal(by_rows$modify_stat_n, expected_by$expected_ess) 45 | 46 | # Total ESS is the sum of the group ESS values. 47 | total_expected <- sum(expected_by$expected_ess) 48 | expect_equal(by_rows$modify_stat_N, rep(total_expected, nrow(by_rows))) 49 | 50 | # Compare proportions. 51 | expected_prop <- expected_by$expected_ess / total_expected 52 | expect_equal(by_rows$modify_stat_p, expected_prop) 53 | 54 | # The ESS results stored in cards may be a list-column; unlist before comparing. 55 | expect_equal(unlist(res_by$cards$add_ess_header$stat), expected_by$expected_ess) 56 | }) 57 | 58 | test_that("Error if `x` is not a tbl_svysummary", { 59 | expect_error( 60 | add_ess_header(1), 61 | regexp = "Argument `x` must be class ", 62 | fixed = TRUE 63 | ) 64 | }) 65 | 66 | test_that("Error if `header` is not a string", { 67 | expect_error( 68 | add_ess_header(tbl, header = 123), 69 | regexp = "Argument `header` must be a string.", 70 | fixed = TRUE 71 | ) 72 | }) 73 | 74 | 75 | -------------------------------------------------------------------------------- /tests/testthat/test-ess.R: -------------------------------------------------------------------------------- 1 | test_that("ess returns correct result for equal weights", { 2 | # 5 observations, each weight = 2 3 | wts_equal <- rep(2, 5) 4 | # ESS should be 5 5 | expect_equal(ess(wts_equal), 5) 6 | }) 7 | 8 | test_that("ess returns correct result for varied weights", { 9 | # 5 observations, each weight varies 10 | wts_equal <- runif(5, max = 5) 11 | # ESS should always be less than 5 12 | expect_lt(ess(wts_equal), 5) 13 | }) 14 | 15 | test_that("ess handles one large weight", { 16 | # 5 observations, 1 large weight 17 | wts_big <- c(1000, rep(0, 4)) 18 | # The sum is 1000, sum of squares is 1,000^2 = 1e6 19 | # ESS = (1000^2) / 1,000^2 = 1 20 | expect_equal(ess(wts_big), 1) 21 | }) 22 | 23 | test_that("ess gives `NaN` if all weights are 0", { 24 | wts_zero <- rep(0, 5) 25 | # sum(wts) = 0, sum(wts^2) = 0 -> 0/0 is NaN 26 | expect_true(is.nan(ess(wts_zero))) 27 | }) 28 | -------------------------------------------------------------------------------- /tests/testthat/test-geom_ecdf.R: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | test_that("geom_ecdf works", { 3 | p_no_wts <- ggplot( 4 | nhefs_weights, 5 | aes(x = smokeyrs, color = qsmk) 6 | ) + 7 | geom_ecdf() + 8 | xlab("Smoking Years") + 9 | ylab("Proportion <= x") 10 | 11 | p_wts <- ggplot( 12 | nhefs_weights, 13 | aes(x = smokeyrs, color = qsmk) 14 | ) + 15 | geom_ecdf(aes(weights = w_ato)) + 16 | xlab("Smoking Years") + 17 | ylab("Proportion <= x") 18 | 19 | expect_doppelganger("ecdf (no weights)", p_no_wts) 20 | expect_doppelganger("ecdf (weights)", p_wts) 21 | 22 | }) 23 | 24 | -------------------------------------------------------------------------------- /tests/testthat/test-geom_mirrored_histogram.R: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | test_that("geom_mirrored_histogram works", { 3 | p <- ggplot(nhefs_weights, aes(.fitted)) + 4 | geom_mirror_histogram( 5 | aes(group = qsmk), 6 | bins = 50 7 | ) + 8 | geom_mirror_histogram( 9 | aes(fill = qsmk, weight = w_ate), 10 | bins = 50, 11 | alpha = 0.5 12 | ) + 13 | scale_y_continuous(labels = abs) 14 | 15 | expect_doppelganger("layered (weighted and unweighted)", p) 16 | }) 17 | 18 | test_that("geom_mirrored_histogram errors/warns correctly", { 19 | # group of 3 or more 20 | edu_group <- ggplot(nhefs_weights, aes(.fitted)) + 21 | geom_mirror_histogram( 22 | aes(group = education), 23 | bins = 50 24 | ) 25 | expect_snapshot_warning(print(edu_group)) 26 | 27 | # no group 28 | no_group <- ggplot(nhefs_weights, aes(.fitted)) + 29 | geom_mirror_histogram(bins = 50) 30 | 31 | expect_snapshot_warning(print(no_group)) 32 | }) 33 | 34 | test_that("NO_GROUP is still -1", { 35 | skip_on_cran() 36 | expect_equal(asNamespace("ggplot2")$NO_GROUP, -1) 37 | }) 38 | --------------------------------------------------------------------------------