├── .Rbuildignore
├── .github
├── .gitignore
└── workflows
│ ├── R-CMD-check.yaml
│ ├── pkgdown.yaml
│ └── test-coverage.yaml
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
├── add_ess_header.R
├── data.R
├── ess.R
├── geom_ecdf.R
├── geom_mirrored_histogram.R
├── tidyselect-reexports.R
├── tidysmd-rexports.R
└── utils.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── codecov.yml
├── data-raw
└── nhefs_weights.R
├── data
└── nhefs_weights.rda
├── halfmoon.Rproj
├── man
├── add_ess_header.Rd
├── ess.Rd
├── figures
│ ├── README-example-1.png
│ ├── README-example-2.png
│ ├── README-example-3.png
│ ├── README-pressure-1.png
│ ├── README-unnamed-chunk-3-1.png
│ ├── README-unnamed-chunk-5-1.png
│ ├── README-unnamed-chunk-6-1.png
│ └── logo.png
├── geom_ecdf.Rd
├── geom_mirror_histogram.Rd
├── nhefs_weights.Rd
└── reexports.Rd
├── pkgdown
└── favicon
│ ├── apple-touch-icon.png
│ ├── favicon-96x96.png
│ ├── favicon.ico
│ ├── favicon.svg
│ ├── site.webmanifest
│ ├── web-app-manifest-192x192.png
│ └── web-app-manifest-512x512.png
└── tests
├── testthat.R
└── testthat
├── Rplots.pdf
├── _snaps
├── geom_ecdf
│ ├── ecdf-no-weights.svg
│ └── ecdf-weights.svg
├── geom_mirrored_histogram.md
└── geom_mirrored_histogram
│ └── layered-weighted-and-unweighted.svg
├── helper-vdiffr.R
├── test-add_ess_header.R
├── test-ess.R
├── test-geom_ecdf.R
└── test-geom_mirrored_histogram.R
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^halfmoon\.Rproj$
2 | ^\.Rproj\.user$
3 | ^README\.Rmd$
4 | ^data-raw$
5 | ^LICENSE\.md$
6 | ^\.github$
7 | ^_pkgdown\.yml$
8 | ^docs$
9 | ^pkgdown$
10 | ^cran-comments\.md$
11 | ^rhub\.R$
12 | ^codecov\.yml$
13 | ^CRAN-SUBMISSION$
14 |
--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 |
--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 | branches: [main, master]
8 |
9 | name: R-CMD-check
10 |
11 | jobs:
12 | R-CMD-check:
13 | runs-on: ${{ matrix.config.os }}
14 |
15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 |
17 | strategy:
18 | fail-fast: false
19 | matrix:
20 | config:
21 | - {os: macos-latest, r: 'release'}
22 | - {os: windows-latest, r: 'release'}
23 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
24 | - {os: ubuntu-latest, r: 'release'}
25 | - {os: ubuntu-latest, r: 'oldrel-1'}
26 |
27 | env:
28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 | R_KEEP_PKG_SOURCE: yes
30 |
31 | steps:
32 | - uses: actions/checkout@v3
33 |
34 | - uses: r-lib/actions/setup-pandoc@v2
35 |
36 | - uses: r-lib/actions/setup-r@v2
37 | with:
38 | r-version: ${{ matrix.config.r }}
39 | http-user-agent: ${{ matrix.config.http-user-agent }}
40 | use-public-rspm: true
41 |
42 | - uses: r-lib/actions/setup-r-dependencies@v2
43 | with:
44 | extra-packages: any::rcmdcheck
45 | needs: check
46 |
47 | - uses: r-lib/actions/check-r-package@v2
48 | with:
49 | upload-snapshots: true
50 |
--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 | branches: [main, master]
8 | release:
9 | types: [published]
10 | workflow_dispatch:
11 |
12 | name: pkgdown
13 |
14 | jobs:
15 | pkgdown:
16 | runs-on: ubuntu-latest
17 | # Only restrict concurrency for non-PR jobs
18 | concurrency:
19 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
20 | env:
21 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
22 | steps:
23 | - uses: actions/checkout@v3
24 |
25 | - uses: r-lib/actions/setup-pandoc@v2
26 |
27 | - uses: r-lib/actions/setup-r@v2
28 | with:
29 | use-public-rspm: true
30 |
31 | - uses: r-lib/actions/setup-r-dependencies@v2
32 | with:
33 | extra-packages: any::pkgdown, local::.
34 | needs: website
35 |
36 | - name: Build site
37 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
38 | shell: Rscript {0}
39 |
40 | - name: Deploy to GitHub pages 🚀
41 | if: github.event_name != 'pull_request'
42 | uses: JamesIves/github-pages-deploy-action@v4.4.1
43 | with:
44 | clean: false
45 | branch: gh-pages
46 | folder: docs
47 |
--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 |
8 | name: test-coverage.yaml
9 |
10 | permissions: read-all
11 |
12 | jobs:
13 | test-coverage:
14 | runs-on: ubuntu-latest
15 | env:
16 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
17 |
18 | steps:
19 | - uses: actions/checkout@v4
20 |
21 | - uses: r-lib/actions/setup-r@v2
22 | with:
23 | use-public-rspm: true
24 |
25 | - uses: r-lib/actions/setup-r-dependencies@v2
26 | with:
27 | extra-packages: any::covr, any::xml2
28 | needs: coverage
29 |
30 | - name: Test coverage
31 | run: |
32 | cov <- covr::package_coverage(
33 | quiet = FALSE,
34 | clean = FALSE,
35 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
36 | )
37 | covr::to_cobertura(cov)
38 | shell: Rscript {0}
39 |
40 | - uses: codecov/codecov-action@v4
41 | with:
42 | # Fail if error if not on PR, or if on PR and token is given
43 | fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}
44 | file: ./cobertura.xml
45 | plugin: noop
46 | disable_search: true
47 | token: ${{ secrets.CODECOV_TOKEN }}
48 |
49 | - name: Show testthat output
50 | if: always()
51 | run: |
52 | ## --------------------------------------------------------------------
53 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
54 | shell: bash
55 |
56 | - name: Upload test results
57 | if: failure()
58 | uses: actions/upload-artifact@v4
59 | with:
60 | name: coverage-test-failures
61 | path: ${{ runner.temp }}/package
62 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .Rdata
4 | .httr-oauth
5 | .DS_Store
6 | docs
7 | cran-comments.md
8 | rhub.R
9 | CRAN-SUBMISSION
10 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: halfmoon
2 | Title: Techniques to Build Better Balance
3 | Version: 0.1.0.9000
4 | Authors@R:
5 | person("Malcolm", "Barrett", , "malcolmbarrett@gmail.com", role = c("aut", "cre", "cph"),
6 | comment = c(ORCID = "0000-0003-0299-5825"))
7 | Description: Build better balance in causal inference models. 'halfmoon'
8 | helps you assess propensity score models for balance between groups
9 | using metrics like standardized mean differences and visualization
10 | techniques like mirrored histograms. 'halfmoon' supports both
11 | weighting and matching techniques.
12 | License: MIT + file LICENSE
13 | URL: https://github.com/r-causal/halfmoon,
14 | https://r-causal.github.io/halfmoon/
15 | BugReports: https://github.com/r-causal/halfmoon/issues
16 | Depends:
17 | R (>= 2.10)
18 | Imports:
19 | cardx (>= 0.2.3),
20 | cli,
21 | ggplot2,
22 | gtsummary (>= 2.1.0),
23 | rlang,
24 | tidyselect,
25 | tidysmd (>= 0.2.0)
26 | Suggests:
27 | cards,
28 | covr,
29 | dplyr,
30 | survey,
31 | testthat (>= 3.0.0),
32 | vdiffr
33 | Config/testthat/edition: 3
34 | Encoding: UTF-8
35 | LazyData: true
36 | Roxygen: list(markdown = TRUE)
37 | RoxygenNote: 7.3.2
38 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2023
2 | COPYRIGHT HOLDER: halfmoon authors
3 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | Copyright (c) 2023 halfmoon authors
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(add_ess_header)
4 | export(bind_matches)
5 | export(contains)
6 | export(ends_with)
7 | export(ess)
8 | export(everything)
9 | export(geom_ecdf)
10 | export(geom_love)
11 | export(geom_mirror_histogram)
12 | export(last_col)
13 | export(love_plot)
14 | export(matches)
15 | export(num_range)
16 | export(one_of)
17 | export(peek_vars)
18 | export(starts_with)
19 | export(tidy_smd)
20 | importFrom(rlang,.data)
21 | importFrom(rlang,.env)
22 | importFrom(tidyselect,contains)
23 | importFrom(tidyselect,ends_with)
24 | importFrom(tidyselect,everything)
25 | importFrom(tidyselect,last_col)
26 | importFrom(tidyselect,matches)
27 | importFrom(tidyselect,num_range)
28 | importFrom(tidyselect,one_of)
29 | importFrom(tidyselect,peek_vars)
30 | importFrom(tidyselect,starts_with)
31 | importFrom(tidysmd,bind_matches)
32 | importFrom(tidysmd,geom_love)
33 | importFrom(tidysmd,love_plot)
34 | importFrom(tidysmd,tidy_smd)
35 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # halfmoon 0.1.0.9000
2 |
3 | * Added a `NEWS.md` file to track changes to the package.
4 |
--------------------------------------------------------------------------------
/R/add_ess_header.R:
--------------------------------------------------------------------------------
1 | #' Add ESS Table Header
2 | #'
3 | #' This function replaces the counts in the default header of
4 | #' `gtsummary::tbl_svysummary()` tables to counts representing the
5 | #' Effective Sample Size (ESS). See [`ess()`] for details.
6 | #'
7 | #' @param x (`tbl_svysummary`)\cr
8 | #' Object of class `'tbl_svysummary'` typically created with `gtsummary::tbl_svysummary()`.
9 | #' @param header (`string`)\cr
10 | #' String specifying updated header.
11 | #' Review `gtsummary::modify_header()` for details on use.
12 | #'
13 | #' @returns a 'gtsummary' table
14 | #' @export
15 | #' @importFrom rlang .env .data
16 | #'
17 | #' @examplesIf rlang::is_installed(c("survey", "gtsummary", "cards", "cardx", "dplyr"))
18 | #' svy <- survey::svydesign(~1, data = nhefs_weights, weights = ~ w_ate)
19 | #'
20 | #' gtsummary::tbl_svysummary(svy, include = c(age, sex, smokeyrs)) |>
21 | #' add_ess_header()
22 | #' hdr <- paste0(
23 | #' "**{level}** \n",
24 | #' "N = {n_unweighted}; ESS = {format(n, digits = 1, nsmall = 1)}"
25 | #' )
26 | #' gtsummary::tbl_svysummary(svy, by = qsmk, include = c(age, sex, smokeyrs)) |>
27 | #' add_ess_header(header = hdr)
28 | add_ess_header <- function(x, header = "**{level}** \nESS = {format(n, digits = 1, nsmall = 1)}") {
29 | # check inputs ---------------------------------------------------------------
30 | rlang::check_installed(c("cards", "dplyr"))
31 | if (!inherits(x, "tbl_svysummary")) {
32 | cli::cli_abort("Argument {.arg x} must be class {.cls tbl_svysummary} and typically created with {.fun gtsummary::tbl_svysummary}.")
33 | }
34 | if (!rlang::is_string(header)) {
35 | cli::cli_abort("Argument {.arg header} must be a string.")
36 | }
37 | updated_call <- append(x$call_list, list(add_ess_header = match.call()))
38 |
39 | # calculate ARD with ESS counts ----------------------------------------------
40 | ard_ess <- ard_survey_ess(data = x$inputs$data, by = x$inputs$by)
41 | if ("add_overall" %in% names(x$call_list)) {
42 | ard_ess_overall <- ard_survey_ess(data = x$inputs$data)
43 | }
44 |
45 | # replace statistics in `x$table_styling$table_header` with ESS --------------
46 | # no `tbl_svysummary(by)` specified
47 | if (rlang::is_empty(x$inputs$by)) {
48 | x$table_styling$header$modify_stat_level <- "Overall"
49 | x$table_styling$header$modify_stat_N <- ard_ess$stat[[1]]
50 | x$table_styling$header$modify_stat_n <- ard_ess$stat[[1]]
51 | x$table_styling$header$modify_stat_p <- 1
52 | }
53 | # with a `tbl_svysummary(by)` value but no overall column
54 | else if (!"add_overall" %in% names(x$call_list)) {
55 | x$table_styling$header <-
56 | dplyr::rows_update(
57 | x$table_styling$header,
58 | ard_ess |>
59 | dplyr::mutate(
60 | column = paste0("stat_", seq_len(nrow(.env$ard_ess))),
61 | modify_stat_level = lapply(.data$group1_level, FUN = as.character) |> unlist(),
62 | modify_stat_n = unlist(.data$stat),
63 | modify_stat_N = sum(.data$modify_stat_n),
64 | modify_stat_p = .data$modify_stat_n / .data$modify_stat_N
65 | ) |>
66 | dplyr::select("column", tidyselect::starts_with("modify_stat_")),
67 | by = "column"
68 | ) |>
69 | dplyr::mutate(
70 | modify_stat_N = sum(.data$modify_stat_n, na.rm = TRUE)
71 | )
72 | }
73 | # with both a `tbl_svysummary(by)` value and an overall column
74 | else {
75 | x$table_styling$header <-
76 | dplyr::rows_update(
77 | x$table_styling$header,
78 | dplyr::bind_rows(ard_ess_overall, ard_ess) |>
79 | dplyr::mutate(
80 | column = paste0("stat_", dplyr::row_number() - 1L),
81 | modify_stat_level = lapply(.data$group1_level, FUN = \(x) as.character(x %||% "Overall")) |> unlist(),
82 | modify_stat_n = unlist(.data$stat)
83 | ) |>
84 | dplyr::select("column", tidyselect::starts_with("modify_stat_")),
85 | by = "column"
86 | ) |>
87 | dplyr::mutate(
88 | modify_stat_N = unlist(.env$ard_ess_overall$stat),
89 | modify_stat_p = .data$modify_stat_n / .data$modify_stat_N
90 | )
91 | }
92 |
93 | # update the header and return table -----------------------------------------
94 | x <- gtsummary::modify_header(x, gtsummary::all_stat_cols() ~ header) # replace header
95 | x$cards$add_ess_header <- ard_ess # add ESS ARD to results
96 | x$call_list <- updated_call # update the call list
97 |
98 | # add abbreviation -----------------------------------------------------------
99 | if (grepl(pattern = "ESS", x = header, fixed = TRUE)) {
100 | x <- gtsummary::modify_abbreviation(x, "ESS = Effective Sample Size")
101 | }
102 |
103 | x
104 | }
105 |
106 | # this is an ARD function in the style of the cardx::ard_survey_*() functions
107 | ard_survey_ess <- function(data, by = NULL) {
108 | # calculate ESS --------------------------------------------------------------
109 | cards::ard_continuous(
110 | data =
111 | data$variables |>
112 | dplyr::mutate(...cards_survey_design_weights_column... = stats::weights(data)),
113 | variables = "...cards_survey_design_weights_column...",
114 | by = {{ by }},
115 | statistic = everything() ~ list(ess = \(x) ess(x))
116 | ) |>
117 | dplyr::mutate(
118 | variable = "..ess..",
119 | context = "survey_ess",
120 | stat_label = "Effective Sample Size"
121 | )
122 | }
123 |
--------------------------------------------------------------------------------
/R/data.R:
--------------------------------------------------------------------------------
1 | #' NHEFS with various propensity score weights
2 | #'
3 | #' A dataset containing various propensity score weights for
4 | #' `causaldata::nhefs_complete`.
5 | #'
6 | #' @format A data frame with 1566 rows and 14 variables: \describe{
7 | #' \item{qsmk}{Quit smoking} \item{race}{Race} \item{age}{Age} \item{sex}{Sex}
8 | #' \item{education}{Education level} \item{smokeintensity}{Smoking intensity}
9 | #' \item{smokeyrs}{Number of smoke-years} \item{exercise}{Exercise level}
10 | #' \item{active}{Daily activity level} \item{wt71}{Participant weight in 1971
11 | #' (baseline)} \item{w_ate}{ATE weight} \item{w_att}{ATT weight}
12 | #' \item{w_atc}{ATC weight} \item{w_atm}{ATM weight} \item{w_ato}{ATO weight}
13 | #' \item{.fitted}{Propensity score}
14 | #' }
15 | "nhefs_weights"
16 |
--------------------------------------------------------------------------------
/R/ess.R:
--------------------------------------------------------------------------------
1 | #' Calculate the Effective Sample Size (ESS)
2 | #'
3 | #' This function computes the effective sample size (ESS) given a vector of
4 | #' weights, using the classical \eqn{(\sum w)^2 / \sum(w^2)} formula (sometimes
5 | #' referred to as "Kish's effective sample size").
6 | #'
7 | #' @param wts A numeric vector of weights (e.g., from survey or
8 | #' inverse-probability weighting).
9 | #'
10 | #' @return A single numeric value representing the effective sample size.
11 | #'
12 | #' @details The effective sample size (ESS) reflects how many observations you
13 | #' would have if all were equally weighted. If the weights vary substantially,
14 | #' the ESS can be much smaller than the actual number of observations.
15 | #' Formally:
16 | #'
17 | #' \deqn{
18 | #' \mathrm{ESS} = \frac{\left(\sum_i w_i\right)^2}{\sum_i w_i^2}.
19 | #' }
20 | #'
21 | #' **Diagnostic Value**:
22 | #' * **Indicator of Weight Concentration**: A large discrepancy between ESS
23 | #' and the actual sample size indicates that a few observations carry
24 | #' disproportionately large weights, effectively reducing the usable
25 | #' information in the dataset.
26 | #' * **Variance Inflation**: A small ESS signals that weighted estimates are
27 | #' more sensitive to a handful of observations, inflating the variance and
28 | #' standard errors.
29 | #' * **Practical Guidance**: If ESS is much lower than the total sample
30 | #' size, it is advisable to investigate why some weights are extremely large
31 | #' or small. Techniques like weight trimming or stabilized weights might be
32 | #' employed to mitigate the issue
33 | #'
34 | #' @examples
35 | #' # Suppose we have five observations with equal weights
36 | #' wts1 <- rep(1.2, 5)
37 | #' # returns 5, because all weights are equal
38 | #' ess(wts1)
39 | #'
40 | #' # If weights vary more, smaller than 5
41 | #' wts2 <- c(0.5, 2, 2, 0.1, 0.8)
42 | #' ess(wts2)
43 | #'
44 | #' @export
45 | ess <- function(wts) {
46 | sum(wts)^2 / sum(wts^2)
47 | }
48 |
49 |
--------------------------------------------------------------------------------
/R/geom_ecdf.R:
--------------------------------------------------------------------------------
1 | #' Calculate weighted and unweighted empirical cumulative distributions
2 | #'
3 | #' The empirical cumulative distribution function (ECDF) provides an alternative
4 | #' visualization of distribution. `geom_ecdf()` is similar to
5 | #' [`ggplot2::stat_ecdf()`] but it can also calculate weighted ECDFs.
6 | #'
7 | #' @section Aesthetics: In addition to the aesthetics for
8 | #' [`ggplot2::stat_ecdf()`], `geom_ecdf()` also accepts: \itemize{ \item
9 | #' weights }
10 | #'
11 | #' @inheritParams ggplot2::stat_ecdf
12 | #'
13 | #' @return a geom
14 | #' @export
15 | #'
16 | #' @examples
17 | #' library(ggplot2)
18 | #'
19 | #' ggplot(
20 | #' nhefs_weights,
21 | #' aes(x = smokeyrs, color = qsmk)
22 | #' ) +
23 | #' geom_ecdf(aes(weights = w_ato)) +
24 | #' xlab("Smoking Years") +
25 | #' ylab("Proportion <= x")
26 | #'
27 | geom_ecdf <- function(mapping = NULL, data = NULL, geom = "step", position = "identity",
28 | ..., n = NULL, pad = TRUE, na.rm = FALSE, show.legend = NA,
29 | inherit.aes = TRUE) {
30 | ggplot2::layer(
31 | data = data, mapping = mapping, stat = StatWeightedECDF, geom = geom,
32 | position = position, show.legend = show.legend, inherit.aes = inherit.aes,
33 | params = list(n = n, pad = pad, na.rm = na.rm, ...)
34 | )
35 | }
36 |
37 | StatWeightedECDF <- ggplot2::ggproto(
38 | "StatWeightedECDF",
39 | ggplot2::StatEcdf,
40 | compute_group = function(data, scales, n = NULL, pad = NULL) {
41 | if ("weights" %in% names(data)) {
42 | data <- data[order(data$x), ]
43 | # ggplot2 3.4.1 changed this stat's name from `y` to `ecdf`
44 | if (packageVersion("ggplot2") >= "3.4.1") {
45 | data$ecdf <- cumsum(data$weights) / sum(data$weights)
46 | } else {
47 | data$y <- cumsum(data$weights) / sum(data$weights)
48 | }
49 | data
50 | } else {
51 | ggplot2::StatEcdf$compute_group(data, scales, n = n, pad = pad)
52 | }
53 | },
54 | required_aes = c("x"),
55 | optional_aes = "weights"
56 | )
57 |
--------------------------------------------------------------------------------
/R/geom_mirrored_histogram.R:
--------------------------------------------------------------------------------
1 | #' Create mirrored histograms
2 | #'
3 | #' @inheritParams ggplot2::geom_histogram
4 | #'
5 | #' @return a geom
6 | #' @export
7 | #'
8 | #' @examples
9 | #' library(ggplot2)
10 | #' ggplot(nhefs_weights, aes(.fitted)) +
11 | #' geom_mirror_histogram(
12 | #' aes(group = qsmk),
13 | #' bins = 50
14 | #' ) +
15 | #' geom_mirror_histogram(
16 | #' aes(fill = qsmk, weight = w_ate),
17 | #' bins = 50,
18 | #' alpha = 0.5
19 | #' ) +
20 | #' scale_y_continuous(labels = abs)
21 | geom_mirror_histogram <- function(
22 | mapping = NULL,
23 | data = NULL,
24 | position = "stack",
25 | ...,
26 | binwidth = NULL,
27 | bins = NULL,
28 | na.rm = FALSE,
29 | orientation = NA,
30 | show.legend = NA,
31 | inherit.aes = TRUE
32 | ) {
33 | ggplot2::geom_histogram(
34 | mapping = mapping,
35 | data = data,
36 | stat = StatMirrorCount,
37 | position = position,
38 | ...,
39 | binwidth = binwidth,
40 | bins = bins,
41 | na.rm = na.rm,
42 | orientation = orientation,
43 | show.legend = show.legend,
44 | inherit.aes = inherit.aes
45 | )
46 | }
47 |
48 | StatMirrorCount <- ggplot2::ggproto(
49 | "StatMirrorCount",
50 | ggplot2::StatBin,
51 | compute_group = function(data, scales, binwidth = NULL, bins = NULL,
52 | center = NULL, boundary = NULL,
53 | closed = c("right", "left"), pad = FALSE,
54 | breaks = NULL, flipped_aes = FALSE,
55 | origin = NULL, right = NULL, drop = NULL) {
56 | group <- unique(data$group)
57 | data <- ggplot2::StatBin$compute_group(data = data, scales = scales, binwidth = binwidth, bins = bins,
58 | center = center, boundary = boundary,
59 | closed = closed, pad = pad,
60 | breaks = breaks, flipped_aes = flipped_aes,
61 | origin = origin, right = right, drop = drop)
62 | if (group == 1) {
63 | data$count <- -data$count
64 | } else if (group > 2) {
65 | abort(
66 | "Groups of three or greater not supported in `geom_mirror_histogram()`"
67 | )
68 | } else if (group == -1) {
69 | abort(c(
70 | "No group detected.",
71 | "*" = "Do you need to use {.var aes(group = ...)} \\
72 | with your grouping variable?"
73 | ))
74 | }
75 | data
76 | }
77 | )
78 |
--------------------------------------------------------------------------------
/R/tidyselect-reexports.R:
--------------------------------------------------------------------------------
1 | # re-export functions from {tidyselect}
2 |
3 | #' @aliases select_helpers
4 | #' @importFrom tidyselect peek_vars
5 | #' @export
6 | tidyselect::peek_vars
7 | #' @aliases select_helpers
8 | #' @importFrom tidyselect contains
9 | #' @export
10 | tidyselect::contains
11 | #' @importFrom tidyselect ends_with
12 | #' @export
13 | tidyselect::ends_with
14 | #' @importFrom tidyselect everything
15 | #' @export
16 | tidyselect::everything
17 | #' @importFrom tidyselect matches
18 | #' @export
19 | tidyselect::matches
20 | #' @importFrom tidyselect num_range
21 | #' @export
22 | tidyselect::num_range
23 | #' @importFrom tidyselect one_of
24 | #' @export
25 | tidyselect::one_of
26 | #' @importFrom tidyselect starts_with
27 | #' @export
28 | tidyselect::starts_with
29 | #' @importFrom tidyselect last_col
30 | #' @export
31 | tidyselect::last_col
32 |
--------------------------------------------------------------------------------
/R/tidysmd-rexports.R:
--------------------------------------------------------------------------------
1 | # re-export functions from {tidysmd}
2 |
3 | #' @aliases tidysmd
4 | #' @importFrom tidysmd geom_love
5 | #' @export
6 | tidysmd::geom_love
7 | #' @aliases tidysmd
8 | #' @importFrom tidysmd love_plot
9 | #' @export
10 | tidysmd::love_plot
11 | #' @aliases tidysmd
12 | #' @importFrom tidysmd tidy_smd
13 | #' @export
14 | tidysmd::tidy_smd
15 | #' @aliases tidysmd
16 | #' @importFrom tidysmd bind_matches
17 | #' @export
18 | tidysmd::bind_matches
19 |
20 |
--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
1 | # future implementations for interface
2 | #
3 | # be_quiet <- function() {
4 | # getOption("halfmoon.quiet", default = FALSE)
5 | # }
6 | #
7 | # alert_info <- function(.message) {
8 | # if (!be_quiet()) {
9 | # cli::cli_alert_info(text = .message)
10 | # }
11 | # }
12 |
13 | abort <- function(.message) {
14 | cli::cli_abort(message = .message)
15 | }
16 |
17 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output: github_document
3 | ---
4 |
5 |
6 |
7 | ```{r, include = FALSE}
8 | knitr::opts_chunk$set(
9 | collapse = TRUE,
10 | comment = "#>",
11 | fig.path = "man/figures/README-",
12 | out.width = "100%"
13 | )
14 | ```
15 |
16 | # halfmoon
17 |
18 |
19 | [](https://github.com/r-causal/halfmoon/actions/workflows/R-CMD-check.yaml)
20 | [](https://app.codecov.io/gh/malcolmbarrett/halfmoon?branch=main)
21 | [](https://lifecycle.r-lib.org/articles/stages.html#experimental)
22 | [](https://CRAN.R-project.org/package=halfmoon)
23 |
24 |
25 | > Within light there is darkness,
26 | but do not try to understand that darkness.
27 | Within darkness there is light,
28 | but do not look for that light.
29 | Light and darkness are a pair
30 | like the foot before and the foot behind in walking.
31 |
32 | -- From the Zen teaching poem [Sandokai](https://en.wikipedia.org/wiki/Sandokai).
33 |
34 | The goal of halfmoon is to cultivate balance in propensity score models.
35 |
36 | ## Installation
37 |
38 | You can install the most recent version of halfmoon from CRAN with:
39 |
40 | ``` r
41 | install.packages("halfmoon")
42 | ```
43 |
44 | You can also install the development version of halfmoon from [GitHub](https://github.com/) with:
45 |
46 | ``` r
47 | # install.packages("devtools")
48 | devtools::install_github("malcolmbarrett/halfmoon")
49 | ```
50 |
51 | ## Example: Weighting
52 |
53 | halfmoon includes several techniques for assessing the balance created by propensity score weights.
54 |
55 | ```{r example}
56 | library(halfmoon)
57 | library(ggplot2)
58 |
59 | # weighted mirrored histograms
60 | ggplot(nhefs_weights, aes(.fitted)) +
61 | geom_mirror_histogram(
62 | aes(group = qsmk),
63 | bins = 50
64 | ) +
65 | geom_mirror_histogram(
66 | aes(fill = qsmk, weight = w_ate),
67 | bins = 50,
68 | alpha = 0.5
69 | ) + scale_y_continuous(labels = abs)
70 |
71 | # weighted ecdf
72 | ggplot(
73 | nhefs_weights,
74 | aes(x = smokeyrs, color = qsmk)
75 | ) +
76 | geom_ecdf(aes(weights = w_ato)) +
77 | xlab("Smoking Years") +
78 | ylab("Proportion <= x")
79 |
80 | # weighted SMDs
81 | plot_df <- tidy_smd(
82 | nhefs_weights,
83 | race:active,
84 | .group = qsmk,
85 | .wts = starts_with("w_")
86 | )
87 |
88 | ggplot(
89 | plot_df,
90 | aes(
91 | x = abs(smd),
92 | y = variable,
93 | group = method,
94 | color = method
95 | )
96 | ) +
97 | geom_love()
98 | ```
99 |
100 | ## Example: Matching
101 |
102 | halfmoon also has support for working with matched datasets. Consider these two objects from the [MatchIt](https://github.com/kosukeimai/MatchIt) documentation:
103 |
104 | ```{r}
105 | library(MatchIt)
106 | # Default: 1:1 NN PS matching w/o replacement
107 | m.out1 <- matchit(treat ~ age + educ + race + nodegree +
108 | married + re74 + re75, data = lalonde)
109 |
110 | # 1:1 NN Mahalanobis distance matching w/ replacement and
111 | # exact matching on married and race
112 | m.out2 <- matchit(treat ~ age + educ + race + nodegree +
113 | married + re74 + re75, data = lalonde,
114 | distance = "mahalanobis", replace = TRUE,
115 | exact = ~ married + race)
116 | ```
117 |
118 | One option is to just look at the matched dataset with halfmoon:
119 |
120 | ```{r}
121 | matched_data <- get_matches(m.out1)
122 |
123 | match_smd <- tidy_smd(
124 | matched_data,
125 | c(age, educ, race, nodegree, married, re74, re75),
126 | .group = treat
127 | )
128 |
129 | love_plot(match_smd)
130 | ```
131 |
132 | The downside here is that you can't compare multiple matching strategies to the observed dataset; the label on the plot is also wrong. halfmoon comes with a helper function, `bind_matches()`, that creates a dataset more appropriate for this task:
133 |
134 | ```{r}
135 | matches <- bind_matches(lalonde, m.out1, m.out2)
136 | head(matches)
137 | ```
138 |
139 | `matches` includes an binary variable for each `matchit` object which indicates if the row was included in the match or not. Since downweighting to 0 is equivalent to filtering the datasets to the matches, we can more easily compare multiple matched datasets with `.wts`:
140 |
141 | ```{r}
142 | many_matched_smds <- tidy_smd(
143 | matches,
144 | c(age, educ, race, nodegree, married, re74, re75),
145 | .group = treat,
146 | .wts = c(m.out1, m.out2)
147 | )
148 |
149 | love_plot(many_matched_smds)
150 | ```
151 |
152 | We can also extend the idea that matching indicators are weights to weighted mirrored histograms, giving us a good idea of the range of propensity scores that are being removed from the dataset.
153 |
154 | ```{r}
155 | # use the distance as the propensity score
156 | matches$ps <- m.out1$distance
157 |
158 | ggplot(matches, aes(ps)) +
159 | geom_mirror_histogram(
160 | aes(group = factor(treat)),
161 | bins = 50
162 | ) +
163 | geom_mirror_histogram(
164 | aes(fill = factor(treat), weight = m.out1),
165 | bins = 50,
166 | alpha = 0.5
167 | ) + scale_y_continuous(labels = abs)
168 | ```
169 |
170 |
171 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # halfmoon
5 |
6 |
7 |
8 | [](https://github.com/r-causal/halfmoon/actions/workflows/R-CMD-check.yaml)
9 | [](https://app.codecov.io/gh/malcolmbarrett/halfmoon?branch=main)
11 | [](https://lifecycle.r-lib.org/articles/stages.html#experimental)
13 | [](https://CRAN.R-project.org/package=halfmoon)
15 |
16 |
17 | > Within light there is darkness, but do not try to understand that
18 | > darkness. Within darkness there is light, but do not look for that
19 | > light. Light and darkness are a pair like the foot before and the foot
20 | > behind in walking.
21 |
22 | – From the Zen teaching poem
23 | [Sandokai](https://en.wikipedia.org/wiki/Sandokai).
24 |
25 | The goal of halfmoon is to cultivate balance in propensity score models.
26 |
27 | ## Installation
28 |
29 | You can install the most recent version of halfmoon from CRAN with:
30 |
31 | ``` r
32 | install.packages("halfmoon")
33 | ```
34 |
35 | You can also install the development version of halfmoon from
36 | [GitHub](https://github.com/) with:
37 |
38 | ``` r
39 | # install.packages("devtools")
40 | devtools::install_github("malcolmbarrett/halfmoon")
41 | ```
42 |
43 | ## Example: Weighting
44 |
45 | halfmoon includes several techniques for assessing the balance created
46 | by propensity score weights.
47 |
48 | ``` r
49 | library(halfmoon)
50 | library(ggplot2)
51 |
52 | # weighted mirrored histograms
53 | ggplot(nhefs_weights, aes(.fitted)) +
54 | geom_mirror_histogram(
55 | aes(group = qsmk),
56 | bins = 50
57 | ) +
58 | geom_mirror_histogram(
59 | aes(fill = qsmk, weight = w_ate),
60 | bins = 50,
61 | alpha = 0.5
62 | ) + scale_y_continuous(labels = abs)
63 | ```
64 |
65 |
66 |
67 | ``` r
68 |
69 | # weighted ecdf
70 | ggplot(
71 | nhefs_weights,
72 | aes(x = smokeyrs, color = qsmk)
73 | ) +
74 | geom_ecdf(aes(weights = w_ato)) +
75 | xlab("Smoking Years") +
76 | ylab("Proportion <= x")
77 | ```
78 |
79 |
80 |
81 | ``` r
82 |
83 | # weighted SMDs
84 | plot_df <- tidy_smd(
85 | nhefs_weights,
86 | race:active,
87 | .group = qsmk,
88 | .wts = starts_with("w_")
89 | )
90 |
91 | ggplot(
92 | plot_df,
93 | aes(
94 | x = abs(smd),
95 | y = variable,
96 | group = method,
97 | color = method
98 | )
99 | ) +
100 | geom_love()
101 | ```
102 |
103 |
104 |
105 | ## Example: Matching
106 |
107 | halfmoon also has support for working with matched datasets. Consider
108 | these two objects from the
109 | [MatchIt](https://github.com/kosukeimai/MatchIt) documentation:
110 |
111 | ``` r
112 | library(MatchIt)
113 | # Default: 1:1 NN PS matching w/o replacement
114 | m.out1 <- matchit(treat ~ age + educ + race + nodegree +
115 | married + re74 + re75, data = lalonde)
116 |
117 | # 1:1 NN Mahalanobis distance matching w/ replacement and
118 | # exact matching on married and race
119 | m.out2 <- matchit(treat ~ age + educ + race + nodegree +
120 | married + re74 + re75, data = lalonde,
121 | distance = "mahalanobis", replace = TRUE,
122 | exact = ~ married + race)
123 | ```
124 |
125 | One option is to just look at the matched dataset with halfmoon:
126 |
127 | ``` r
128 | matched_data <- get_matches(m.out1)
129 |
130 | match_smd <- tidy_smd(
131 | matched_data,
132 | c(age, educ, race, nodegree, married, re74, re75),
133 | .group = treat
134 | )
135 |
136 | love_plot(match_smd)
137 | ```
138 |
139 |
140 |
141 | The downside here is that you can’t compare multiple matching strategies
142 | to the observed dataset; the label on the plot is also wrong. halfmoon
143 | comes with a helper function, `bind_matches()`, that creates a dataset
144 | more appropriate for this task:
145 |
146 | ``` r
147 | matches <- bind_matches(lalonde, m.out1, m.out2)
148 | head(matches)
149 | #> treat age educ race married nodegree re74 re75 re78 m.out1 m.out2
150 | #> NSW1 1 37 11 black 1 1 0 0 9930.0460 1 1
151 | #> NSW2 1 22 9 hispan 0 1 0 0 3595.8940 1 1
152 | #> NSW3 1 30 12 black 0 0 0 0 24909.4500 1 1
153 | #> NSW4 1 27 11 black 0 1 0 0 7506.1460 1 1
154 | #> NSW5 1 33 8 black 0 1 0 0 289.7899 1 1
155 | #> NSW6 1 22 9 black 0 1 0 0 4056.4940 1 1
156 | ```
157 |
158 | `matches` includes an binary variable for each `matchit` object which
159 | indicates if the row was included in the match or not. Since
160 | downweighting to 0 is equivalent to filtering the datasets to the
161 | matches, we can more easily compare multiple matched datasets with
162 | `.wts`:
163 |
164 | ``` r
165 | many_matched_smds <- tidy_smd(
166 | matches,
167 | c(age, educ, race, nodegree, married, re74, re75),
168 | .group = treat,
169 | .wts = c(m.out1, m.out2)
170 | )
171 |
172 | love_plot(many_matched_smds)
173 | ```
174 |
175 |
176 |
177 | We can also extend the idea that matching indicators are weights to
178 | weighted mirrored histograms, giving us a good idea of the range of
179 | propensity scores that are being removed from the dataset.
180 |
181 | ``` r
182 | # use the distance as the propensity score
183 | matches$ps <- m.out1$distance
184 |
185 | ggplot(matches, aes(ps)) +
186 | geom_mirror_histogram(
187 | aes(group = factor(treat)),
188 | bins = 50
189 | ) +
190 | geom_mirror_histogram(
191 | aes(fill = factor(treat), weight = m.out1),
192 | bins = 50,
193 | alpha = 0.5
194 | ) + scale_y_continuous(labels = abs)
195 | ```
196 |
197 |
198 |
--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://r-causal.github.io/halfmoon/
2 | template:
3 | math-rendering: mathjax
4 | bootstrap: 5
5 |
6 |
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 |
3 | coverage:
4 | status:
5 | project:
6 | default:
7 | target: auto
8 | threshold: 1%
9 | informational: true
10 | patch:
11 | default:
12 | target: auto
13 | threshold: 1%
14 | informational: true
15 |
--------------------------------------------------------------------------------
/data-raw/nhefs_weights.R:
--------------------------------------------------------------------------------
1 | ## code to prepare `nhefs_weights` dataset goes here
2 | ## code to prepare `nhefs_weights` dataset goes here
3 | library(tidyverse)
4 | library(broom)
5 | library(causaldata)
6 | propensity_model <- glm(
7 | qsmk ~ sex +
8 | race + age + I(age^2) + education +
9 | smokeintensity + I(smokeintensity^2) +
10 | smokeyrs + I(smokeyrs^2) + exercise + active +
11 | wt71 + I(wt71^2),
12 | family = binomial(),
13 | data = nhefs_complete
14 | )
15 |
16 | nhefs_weights <- propensity_model %>%
17 | augment(type.predict = "response", data = nhefs_complete) %>%
18 | mutate(
19 | wts = 1 / ifelse(qsmk == 0, 1 - .fitted, .fitted),
20 | w_ate = (qsmk / .fitted) +
21 | ((1 - qsmk) / (1 - .fitted)),
22 | w_att = ((.fitted * qsmk) / .fitted) +
23 | ((.fitted * (1 - qsmk)) / (1 - .fitted)),
24 | w_atc = (((1 - .fitted) * qsmk) / .fitted) +
25 | (((1 - .fitted) * (1 - qsmk)) / (1 - .fitted)),
26 | w_atm = pmin(.fitted, 1 - .fitted) /
27 | (qsmk * .fitted + (1 - qsmk) * (1 - .fitted)),
28 | w_ato = (1 - .fitted) * qsmk +
29 | .fitted * (1 - qsmk)
30 | ) %>%
31 | select(
32 | qsmk,
33 | race,
34 | age,
35 | sex,
36 | education,
37 | smokeintensity,
38 | smokeyrs,
39 | exercise,
40 | active,
41 | wt71,
42 | starts_with("w_"),
43 | .fitted
44 | ) %>%
45 | mutate(qsmk = factor(qsmk))
46 |
47 | usethis::use_data(nhefs_weights, overwrite = TRUE)
48 |
--------------------------------------------------------------------------------
/data/nhefs_weights.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/data/nhefs_weights.rda
--------------------------------------------------------------------------------
/halfmoon.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 | ProjectId: 69b122f4-ff94-42bb-90ff-53e4e8e8cf6a
3 |
4 | RestoreWorkspace: No
5 | SaveWorkspace: No
6 | AlwaysSaveHistory: Default
7 |
8 | EnableCodeIndexing: Yes
9 | UseSpacesForTab: Yes
10 | NumSpacesForTab: 2
11 | Encoding: UTF-8
12 |
13 | RnwWeave: Sweave
14 | LaTeX: pdfLaTeX
15 |
16 | AutoAppendNewline: Yes
17 | StripTrailingWhitespace: Yes
18 | LineEndingConversion: Posix
19 |
20 | BuildType: Package
21 | PackageUseDevtools: Yes
22 | PackageInstallArgs: --no-multiarch --with-keep.source
23 | PackageRoxygenize: rd,collate,namespace
24 |
--------------------------------------------------------------------------------
/man/add_ess_header.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/add_ess_header.R
3 | \name{add_ess_header}
4 | \alias{add_ess_header}
5 | \title{Add ESS Table Header}
6 | \usage{
7 | add_ess_header(
8 | x,
9 | header = "**{level}** \\nESS = {format(n, digits = 1, nsmall = 1)}"
10 | )
11 | }
12 | \arguments{
13 | \item{x}{(\code{tbl_svysummary})\cr
14 | Object of class \code{'tbl_svysummary'} typically created with \code{gtsummary::tbl_svysummary()}.}
15 |
16 | \item{header}{(\code{string})\cr
17 | String specifying updated header.
18 | Review \code{gtsummary::modify_header()} for details on use.}
19 | }
20 | \value{
21 | a 'gtsummary' table
22 | }
23 | \description{
24 | This function replaces the counts in the default header of
25 | \code{gtsummary::tbl_svysummary()} tables to counts representing the
26 | Effective Sample Size (ESS). See \code{\link[=ess]{ess()}} for details.
27 | }
28 | \examples{
29 | \dontshow{if (rlang::is_installed(c("survey", "gtsummary", "cards", "cardx", "dplyr"))) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
30 | svy <- survey::svydesign(~1, data = nhefs_weights, weights = ~ w_ate)
31 |
32 | gtsummary::tbl_svysummary(svy, include = c(age, sex, smokeyrs)) |>
33 | add_ess_header()
34 | hdr <- paste0(
35 | "**{level}** \n",
36 | "N = {n_unweighted}; ESS = {format(n, digits = 1, nsmall = 1)}"
37 | )
38 | gtsummary::tbl_svysummary(svy, by = qsmk, include = c(age, sex, smokeyrs)) |>
39 | add_ess_header(header = hdr)
40 | \dontshow{\}) # examplesIf}
41 | }
42 |
--------------------------------------------------------------------------------
/man/ess.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/ess.R
3 | \name{ess}
4 | \alias{ess}
5 | \title{Calculate the Effective Sample Size (ESS)}
6 | \usage{
7 | ess(wts)
8 | }
9 | \arguments{
10 | \item{wts}{A numeric vector of weights (e.g., from survey or
11 | inverse-probability weighting).}
12 | }
13 | \value{
14 | A single numeric value representing the effective sample size.
15 | }
16 | \description{
17 | This function computes the effective sample size (ESS) given a vector of
18 | weights, using the classical \eqn{(\sum w)^2 / \sum(w^2)} formula (sometimes
19 | referred to as "Kish's effective sample size").
20 | }
21 | \details{
22 | The effective sample size (ESS) reflects how many observations you
23 | would have if all were equally weighted. If the weights vary substantially,
24 | the ESS can be much smaller than the actual number of observations.
25 | Formally:
26 |
27 | \deqn{
28 | \mathrm{ESS} = \frac{\left(\sum_i w_i\right)^2}{\sum_i w_i^2}.
29 | }
30 |
31 | \strong{Diagnostic Value}:
32 | \itemize{
33 | \item \strong{Indicator of Weight Concentration}: A large discrepancy between ESS
34 | and the actual sample size indicates that a few observations carry
35 | disproportionately large weights, effectively reducing the usable
36 | information in the dataset.
37 | \item \strong{Variance Inflation}: A small ESS signals that weighted estimates are
38 | more sensitive to a handful of observations, inflating the variance and
39 | standard errors.
40 | \item \strong{Practical Guidance}: If ESS is much lower than the total sample
41 | size, it is advisable to investigate why some weights are extremely large
42 | or small. Techniques like weight trimming or stabilized weights might be
43 | employed to mitigate the issue
44 | }
45 | }
46 | \examples{
47 | # Suppose we have five observations with equal weights
48 | wts1 <- rep(1.2, 5)
49 | # returns 5, because all weights are equal
50 | ess(wts1)
51 |
52 | # If weights vary more, smaller than 5
53 | wts2 <- c(0.5, 2, 2, 0.1, 0.8)
54 | ess(wts2)
55 |
56 | }
57 |
--------------------------------------------------------------------------------
/man/figures/README-example-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/man/figures/README-example-1.png
--------------------------------------------------------------------------------
/man/figures/README-example-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/man/figures/README-example-2.png
--------------------------------------------------------------------------------
/man/figures/README-example-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/man/figures/README-example-3.png
--------------------------------------------------------------------------------
/man/figures/README-pressure-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/man/figures/README-pressure-1.png
--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/man/figures/README-unnamed-chunk-3-1.png
--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/man/figures/README-unnamed-chunk-5-1.png
--------------------------------------------------------------------------------
/man/figures/README-unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/man/figures/README-unnamed-chunk-6-1.png
--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/man/figures/logo.png
--------------------------------------------------------------------------------
/man/geom_ecdf.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/geom_ecdf.R
3 | \name{geom_ecdf}
4 | \alias{geom_ecdf}
5 | \title{Calculate weighted and unweighted empirical cumulative distributions}
6 | \usage{
7 | geom_ecdf(
8 | mapping = NULL,
9 | data = NULL,
10 | geom = "step",
11 | position = "identity",
12 | ...,
13 | n = NULL,
14 | pad = TRUE,
15 | na.rm = FALSE,
16 | show.legend = NA,
17 | inherit.aes = TRUE
18 | )
19 | }
20 | \arguments{
21 | \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and
22 | \code{inherit.aes = TRUE} (the default), it is combined with the default mapping
23 | at the top level of the plot. You must supply \code{mapping} if there is no plot
24 | mapping.}
25 |
26 | \item{data}{The data to be displayed in this layer. There are three
27 | options:
28 |
29 | If \code{NULL}, the default, the data is inherited from the plot
30 | data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}.
31 |
32 | A \code{data.frame}, or other object, will override the plot
33 | data. All objects will be fortified to produce a data frame. See
34 | \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created.
35 |
36 | A \code{function} will be called with a single argument,
37 | the plot data. The return value must be a \code{data.frame}, and
38 | will be used as the layer data. A \code{function} can be created
39 | from a \code{formula} (e.g. \code{~ head(.x, 10)}).}
40 |
41 | \item{geom}{The geometric object to use to display the data for this layer.
42 | When using a \verb{stat_*()} function to construct a layer, the \code{geom} argument
43 | can be used to override the default coupling between stats and geoms. The
44 | \code{geom} argument accepts the following:
45 | \itemize{
46 | \item A \code{Geom} ggproto subclass, for example \code{GeomPoint}.
47 | \item A string naming the geom. To give the geom as a string, strip the
48 | function name of the \code{geom_} prefix. For example, to use \code{geom_point()},
49 | give the geom as \code{"point"}.
50 | \item For more information and other ways to specify the geom, see the
51 | \link[ggplot2:layer_geoms]{layer geom} documentation.
52 | }}
53 |
54 | \item{position}{A position adjustment to use on the data for this layer. This
55 | can be used in various ways, including to prevent overplotting and
56 | improving the display. The \code{position} argument accepts the following:
57 | \itemize{
58 | \item The result of calling a position function, such as \code{position_jitter()}.
59 | This method allows for passing extra arguments to the position.
60 | \item A string naming the position adjustment. To give the position as a
61 | string, strip the function name of the \code{position_} prefix. For example,
62 | to use \code{position_jitter()}, give the position as \code{"jitter"}.
63 | \item For more information and other ways to specify the position, see the
64 | \link[ggplot2:layer_positions]{layer position} documentation.
65 | }}
66 |
67 | \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}'s \code{params} argument. These
68 | arguments broadly fall into one of 4 categories below. Notably, further
69 | arguments to the \code{position} argument, or aesthetics that are required
70 | can \emph{not} be passed through \code{...}. Unknown arguments that are not part
71 | of the 4 categories below are ignored.
72 | \itemize{
73 | \item Static aesthetics that are not mapped to a scale, but are at a fixed
74 | value and apply to the layer as a whole. For example, \code{colour = "red"}
75 | or \code{linewidth = 3}. The geom's documentation has an \strong{Aesthetics}
76 | section that lists the available options. The 'required' aesthetics
77 | cannot be passed on to the \code{params}. Please note that while passing
78 | unmapped aesthetics as vectors is technically possible, the order and
79 | required length is not guaranteed to be parallel to the input data.
80 | \item When constructing a layer using
81 | a \verb{stat_*()} function, the \code{...} argument can be used to pass on
82 | parameters to the \code{geom} part of the layer. An example of this is
83 | \code{stat_density(geom = "area", outline.type = "both")}. The geom's
84 | documentation lists which parameters it can accept.
85 | \item Inversely, when constructing a layer using a
86 | \verb{geom_*()} function, the \code{...} argument can be used to pass on parameters
87 | to the \code{stat} part of the layer. An example of this is
88 | \code{geom_area(stat = "density", adjust = 0.5)}. The stat's documentation
89 | lists which parameters it can accept.
90 | \item The \code{key_glyph} argument of \code{\link[ggplot2:layer]{layer()}} may also be passed on through
91 | \code{...}. This can be one of the functions described as
92 | \link[ggplot2:draw_key]{key glyphs}, to change the display of the layer in the legend.
93 | }}
94 |
95 | \item{n}{if NULL, do not interpolate. If not NULL, this is the number
96 | of points to interpolate with.}
97 |
98 | \item{pad}{If \code{TRUE}, pad the ecdf with additional points (-Inf, 0)
99 | and (Inf, 1)}
100 |
101 | \item{na.rm}{If \code{FALSE} (the default), removes missing values with
102 | a warning. If \code{TRUE} silently removes missing values.}
103 |
104 | \item{show.legend}{logical. Should this layer be included in the legends?
105 | \code{NA}, the default, includes if any aesthetics are mapped.
106 | \code{FALSE} never includes, and \code{TRUE} always includes.
107 | It can also be a named logical vector to finely select the aesthetics to
108 | display.}
109 |
110 | \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics,
111 | rather than combining with them. This is most useful for helper functions
112 | that define both data and aesthetics and shouldn't inherit behaviour from
113 | the default plot specification, e.g. \code{\link[ggplot2:borders]{borders()}}.}
114 | }
115 | \value{
116 | a geom
117 | }
118 | \description{
119 | The empirical cumulative distribution function (ECDF) provides an alternative
120 | visualization of distribution. \code{geom_ecdf()} is similar to
121 | \code{\link[ggplot2:stat_ecdf]{ggplot2::stat_ecdf()}} but it can also calculate weighted ECDFs.
122 | }
123 | \section{Aesthetics}{
124 | In addition to the aesthetics for
125 | \code{\link[ggplot2:stat_ecdf]{ggplot2::stat_ecdf()}}, \code{geom_ecdf()} also accepts: \itemize{ \item
126 | weights }
127 | }
128 |
129 | \examples{
130 | library(ggplot2)
131 |
132 | ggplot(
133 | nhefs_weights,
134 | aes(x = smokeyrs, color = qsmk)
135 | ) +
136 | geom_ecdf(aes(weights = w_ato)) +
137 | xlab("Smoking Years") +
138 | ylab("Proportion <= x")
139 |
140 | }
141 |
--------------------------------------------------------------------------------
/man/geom_mirror_histogram.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/geom_mirrored_histogram.R
3 | \name{geom_mirror_histogram}
4 | \alias{geom_mirror_histogram}
5 | \title{Create mirrored histograms}
6 | \usage{
7 | geom_mirror_histogram(
8 | mapping = NULL,
9 | data = NULL,
10 | position = "stack",
11 | ...,
12 | binwidth = NULL,
13 | bins = NULL,
14 | na.rm = FALSE,
15 | orientation = NA,
16 | show.legend = NA,
17 | inherit.aes = TRUE
18 | )
19 | }
20 | \arguments{
21 | \item{mapping}{Set of aesthetic mappings created by \code{\link[ggplot2:aes]{aes()}}. If specified and
22 | \code{inherit.aes = TRUE} (the default), it is combined with the default mapping
23 | at the top level of the plot. You must supply \code{mapping} if there is no plot
24 | mapping.}
25 |
26 | \item{data}{The data to be displayed in this layer. There are three
27 | options:
28 |
29 | If \code{NULL}, the default, the data is inherited from the plot
30 | data as specified in the call to \code{\link[ggplot2:ggplot]{ggplot()}}.
31 |
32 | A \code{data.frame}, or other object, will override the plot
33 | data. All objects will be fortified to produce a data frame. See
34 | \code{\link[ggplot2:fortify]{fortify()}} for which variables will be created.
35 |
36 | A \code{function} will be called with a single argument,
37 | the plot data. The return value must be a \code{data.frame}, and
38 | will be used as the layer data. A \code{function} can be created
39 | from a \code{formula} (e.g. \code{~ head(.x, 10)}).}
40 |
41 | \item{position}{A position adjustment to use on the data for this layer. This
42 | can be used in various ways, including to prevent overplotting and
43 | improving the display. The \code{position} argument accepts the following:
44 | \itemize{
45 | \item The result of calling a position function, such as \code{position_jitter()}.
46 | This method allows for passing extra arguments to the position.
47 | \item A string naming the position adjustment. To give the position as a
48 | string, strip the function name of the \code{position_} prefix. For example,
49 | to use \code{position_jitter()}, give the position as \code{"jitter"}.
50 | \item For more information and other ways to specify the position, see the
51 | \link[ggplot2:layer_positions]{layer position} documentation.
52 | }}
53 |
54 | \item{...}{Other arguments passed on to \code{\link[ggplot2:layer]{layer()}}'s \code{params} argument. These
55 | arguments broadly fall into one of 4 categories below. Notably, further
56 | arguments to the \code{position} argument, or aesthetics that are required
57 | can \emph{not} be passed through \code{...}. Unknown arguments that are not part
58 | of the 4 categories below are ignored.
59 | \itemize{
60 | \item Static aesthetics that are not mapped to a scale, but are at a fixed
61 | value and apply to the layer as a whole. For example, \code{colour = "red"}
62 | or \code{linewidth = 3}. The geom's documentation has an \strong{Aesthetics}
63 | section that lists the available options. The 'required' aesthetics
64 | cannot be passed on to the \code{params}. Please note that while passing
65 | unmapped aesthetics as vectors is technically possible, the order and
66 | required length is not guaranteed to be parallel to the input data.
67 | \item When constructing a layer using
68 | a \verb{stat_*()} function, the \code{...} argument can be used to pass on
69 | parameters to the \code{geom} part of the layer. An example of this is
70 | \code{stat_density(geom = "area", outline.type = "both")}. The geom's
71 | documentation lists which parameters it can accept.
72 | \item Inversely, when constructing a layer using a
73 | \verb{geom_*()} function, the \code{...} argument can be used to pass on parameters
74 | to the \code{stat} part of the layer. An example of this is
75 | \code{geom_area(stat = "density", adjust = 0.5)}. The stat's documentation
76 | lists which parameters it can accept.
77 | \item The \code{key_glyph} argument of \code{\link[ggplot2:layer]{layer()}} may also be passed on through
78 | \code{...}. This can be one of the functions described as
79 | \link[ggplot2:draw_key]{key glyphs}, to change the display of the layer in the legend.
80 | }}
81 |
82 | \item{binwidth}{The width of the bins. Can be specified as a numeric value
83 | or as a function that calculates width from unscaled x. Here, "unscaled x"
84 | refers to the original x values in the data, before application of any
85 | scale transformation. When specifying a function along with a grouping
86 | structure, the function will be called once per group.
87 | The default is to use the number of bins in \code{bins},
88 | covering the range of the data. You should always override
89 | this value, exploring multiple widths to find the best to illustrate the
90 | stories in your data.
91 |
92 | The bin width of a date variable is the number of days in each time; the
93 | bin width of a time variable is the number of seconds.}
94 |
95 | \item{bins}{Number of bins. Overridden by \code{binwidth}. Defaults to 30.}
96 |
97 | \item{na.rm}{If \code{FALSE}, the default, missing values are removed with
98 | a warning. If \code{TRUE}, missing values are silently removed.}
99 |
100 | \item{orientation}{The orientation of the layer. The default (\code{NA})
101 | automatically determines the orientation from the aesthetic mapping. In the
102 | rare event that this fails it can be given explicitly by setting \code{orientation}
103 | to either \code{"x"} or \code{"y"}. See the \emph{Orientation} section for more detail.}
104 |
105 | \item{show.legend}{logical. Should this layer be included in the legends?
106 | \code{NA}, the default, includes if any aesthetics are mapped.
107 | \code{FALSE} never includes, and \code{TRUE} always includes.
108 | It can also be a named logical vector to finely select the aesthetics to
109 | display.}
110 |
111 | \item{inherit.aes}{If \code{FALSE}, overrides the default aesthetics,
112 | rather than combining with them. This is most useful for helper functions
113 | that define both data and aesthetics and shouldn't inherit behaviour from
114 | the default plot specification, e.g. \code{\link[ggplot2:borders]{borders()}}.}
115 | }
116 | \value{
117 | a geom
118 | }
119 | \description{
120 | Create mirrored histograms
121 | }
122 | \examples{
123 | library(ggplot2)
124 | ggplot(nhefs_weights, aes(.fitted)) +
125 | geom_mirror_histogram(
126 | aes(group = qsmk),
127 | bins = 50
128 | ) +
129 | geom_mirror_histogram(
130 | aes(fill = qsmk, weight = w_ate),
131 | bins = 50,
132 | alpha = 0.5
133 | ) +
134 | scale_y_continuous(labels = abs)
135 | }
136 |
--------------------------------------------------------------------------------
/man/nhefs_weights.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{nhefs_weights}
5 | \alias{nhefs_weights}
6 | \title{NHEFS with various propensity score weights}
7 | \format{
8 | A data frame with 1566 rows and 14 variables: \describe{
9 | \item{qsmk}{Quit smoking} \item{race}{Race} \item{age}{Age} \item{sex}{Sex}
10 | \item{education}{Education level} \item{smokeintensity}{Smoking intensity}
11 | \item{smokeyrs}{Number of smoke-years} \item{exercise}{Exercise level}
12 | \item{active}{Daily activity level} \item{wt71}{Participant weight in 1971
13 | (baseline)} \item{w_ate}{ATE weight} \item{w_att}{ATT weight}
14 | \item{w_atc}{ATC weight} \item{w_atm}{ATM weight} \item{w_ato}{ATO weight}
15 | \item{.fitted}{Propensity score}
16 | }
17 | }
18 | \usage{
19 | nhefs_weights
20 | }
21 | \description{
22 | A dataset containing various propensity score weights for
23 | \code{causaldata::nhefs_complete}.
24 | }
25 | \keyword{datasets}
26 |
--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/tidyselect-reexports.R, R/tidysmd-rexports.R
3 | \docType{import}
4 | \name{reexports}
5 | \alias{reexports}
6 | \alias{peek_vars}
7 | \alias{select_helpers}
8 | \alias{contains}
9 | \alias{ends_with}
10 | \alias{everything}
11 | \alias{matches}
12 | \alias{num_range}
13 | \alias{one_of}
14 | \alias{starts_with}
15 | \alias{last_col}
16 | \alias{geom_love}
17 | \alias{tidysmd}
18 | \alias{love_plot}
19 | \alias{tidy_smd}
20 | \alias{bind_matches}
21 | \title{Objects exported from other packages}
22 | \keyword{internal}
23 | \description{
24 | These objects are imported from other packages. Follow the links
25 | below to see their documentation.
26 |
27 | \describe{
28 | \item{tidyselect}{\code{\link[tidyselect:starts_with]{contains}}, \code{\link[tidyselect:starts_with]{ends_with}}, \code{\link[tidyselect]{everything}}, \code{\link[tidyselect:everything]{last_col}}, \code{\link[tidyselect:starts_with]{matches}}, \code{\link[tidyselect:starts_with]{num_range}}, \code{\link[tidyselect]{one_of}}, \code{\link[tidyselect]{peek_vars}}, \code{\link[tidyselect]{starts_with}}}
29 |
30 | \item{tidysmd}{\code{\link[tidysmd]{bind_matches}}, \code{\link[tidysmd]{geom_love}}, \code{\link[tidysmd:geom_love]{love_plot}}, \code{\link[tidysmd]{tidy_smd}}}
31 | }}
32 |
33 |
--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/pkgdown/favicon/apple-touch-icon.png
--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/pkgdown/favicon/favicon-96x96.png
--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/pkgdown/favicon/favicon.ico
--------------------------------------------------------------------------------
/pkgdown/favicon/site.webmanifest:
--------------------------------------------------------------------------------
1 | {
2 | "name": "",
3 | "short_name": "",
4 | "icons": [
5 | {
6 | "src": "/web-app-manifest-192x192.png",
7 | "sizes": "192x192",
8 | "type": "image/png",
9 | "purpose": "maskable"
10 | },
11 | {
12 | "src": "/web-app-manifest-512x512.png",
13 | "sizes": "512x512",
14 | "type": "image/png",
15 | "purpose": "maskable"
16 | }
17 | ],
18 | "theme_color": "#ffffff",
19 | "background_color": "#ffffff",
20 | "display": "standalone"
21 | }
--------------------------------------------------------------------------------
/pkgdown/favicon/web-app-manifest-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/pkgdown/favicon/web-app-manifest-192x192.png
--------------------------------------------------------------------------------
/pkgdown/favicon/web-app-manifest-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/pkgdown/favicon/web-app-manifest-512x512.png
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | # This file is part of the standard setup for testthat.
2 | # It is recommended that you do not modify it.
3 | #
4 | # Where should you do additional test configuration?
5 | # Learn more about the roles of various files in:
6 | # * https://r-pkgs.org/tests.html
7 | # * https://testthat.r-lib.org/reference/test_package.html#special-files
8 |
9 | library(testthat)
10 | library(halfmoon)
11 |
12 | test_check("halfmoon")
13 |
--------------------------------------------------------------------------------
/tests/testthat/Rplots.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-causal/halfmoon/be787f213be3f159cb94863328e61f7254f074dd/tests/testthat/Rplots.pdf
--------------------------------------------------------------------------------
/tests/testthat/_snaps/geom_ecdf/ecdf-no-weights.svg:
--------------------------------------------------------------------------------
1 |
2 |
66 |
--------------------------------------------------------------------------------
/tests/testthat/_snaps/geom_ecdf/ecdf-weights.svg:
--------------------------------------------------------------------------------
1 |
2 |
66 |
--------------------------------------------------------------------------------
/tests/testthat/_snaps/geom_mirrored_histogram.md:
--------------------------------------------------------------------------------
1 | # geom_mirrored_histogram errors/warns correctly
2 |
3 | Computation failed in `stat_mirror_count()`.
4 | Caused by error in `abort()`:
5 | ! Groups of three or greater not supported in `geom_mirror_histogram()`
6 |
7 | ---
8 |
9 | Computation failed in `stat_mirror_count()`.
10 | Caused by error in `abort()`:
11 | ! No group detected.
12 | * Do you need to use `aes(group = ...)` with your grouping variable?
13 |
14 |
--------------------------------------------------------------------------------
/tests/testthat/_snaps/geom_mirrored_histogram/layered-weighted-and-unweighted.svg:
--------------------------------------------------------------------------------
1 |
2 |
266 |
--------------------------------------------------------------------------------
/tests/testthat/helper-vdiffr.R:
--------------------------------------------------------------------------------
1 | expect_doppelganger <- function(title, fig, ...) {
2 | testthat::skip_if_not_installed("vdiffr")
3 | vdiffr::expect_doppelganger(title, fig, ...)
4 | }
5 |
--------------------------------------------------------------------------------
/tests/testthat/test-add_ess_header.R:
--------------------------------------------------------------------------------
1 | suppressPackageStartupMessages(library(survey))
2 | suppressPackageStartupMessages(library(gtsummary))
3 | suppressPackageStartupMessages(library(dplyr))
4 |
5 | # Create survey design and gtsummary tables.
6 | svy <- svydesign(~1, data = nhefs_weights, weights = ~ w_ate)
7 | tbl <- tbl_svysummary(svy, include = c(age, sex, smokeyrs))
8 | tbl_by <- tbl_svysummary(svy, by = qsmk, include = c(age, sex, smokeyrs))
9 |
10 | # Tests --------------------------------------------------------------------
11 |
12 | test_that("Non-by case ESS values match ess() results", {
13 | res <- add_ess_header(tbl)
14 |
15 | # Compute expected ESS from the survey design weights.
16 | expected_ess <- ess(weights(svy))
17 |
18 | # For a non-by table, the header has one row.
19 | expect_equal(res$table_styling$header$modify_stat_n[[1]], expected_ess)
20 | expect_equal(res$table_styling$header$modify_stat_N[[1]], expected_ess)
21 | expect_equal(res$table_styling$header$modify_stat_p[[1]], 1)
22 |
23 | # Verify that the ESS result stored in cards matches.
24 | expect_equal(res$cards$add_ess_header$stat[[1]], expected_ess)
25 | })
26 |
27 | test_that("By case ESS values match ess() results", {
28 | res_by <- add_ess_header(tbl_by)
29 | header_tbl <- res_by$table_styling$header
30 |
31 | # In a by table, the header may include extra rows.
32 | # We'll restrict to rows with a non-missing group label.
33 | by_rows <- header_tbl |> filter(!is.na(modify_stat_level))
34 |
35 | # Compute expected ESS by group.
36 | expected_by <- nhefs_weights |>
37 | group_by(qsmk) |>
38 | summarize(expected_ess = ess(w_ate), .groups = "drop") |>
39 | arrange(as.character(qsmk))
40 |
41 | # Compare group labels.
42 | expect_equal(by_rows$modify_stat_level, as.character(expected_by$qsmk))
43 | # Compare group ESS values.
44 | expect_equal(by_rows$modify_stat_n, expected_by$expected_ess)
45 |
46 | # Total ESS is the sum of the group ESS values.
47 | total_expected <- sum(expected_by$expected_ess)
48 | expect_equal(by_rows$modify_stat_N, rep(total_expected, nrow(by_rows)))
49 |
50 | # Compare proportions.
51 | expected_prop <- expected_by$expected_ess / total_expected
52 | expect_equal(by_rows$modify_stat_p, expected_prop)
53 |
54 | # The ESS results stored in cards may be a list-column; unlist before comparing.
55 | expect_equal(unlist(res_by$cards$add_ess_header$stat), expected_by$expected_ess)
56 | })
57 |
58 | test_that("Error if `x` is not a tbl_svysummary", {
59 | expect_error(
60 | add_ess_header(1),
61 | regexp = "Argument `x` must be class ",
62 | fixed = TRUE
63 | )
64 | })
65 |
66 | test_that("Error if `header` is not a string", {
67 | expect_error(
68 | add_ess_header(tbl, header = 123),
69 | regexp = "Argument `header` must be a string.",
70 | fixed = TRUE
71 | )
72 | })
73 |
74 |
75 |
--------------------------------------------------------------------------------
/tests/testthat/test-ess.R:
--------------------------------------------------------------------------------
1 | test_that("ess returns correct result for equal weights", {
2 | # 5 observations, each weight = 2
3 | wts_equal <- rep(2, 5)
4 | # ESS should be 5
5 | expect_equal(ess(wts_equal), 5)
6 | })
7 |
8 | test_that("ess returns correct result for varied weights", {
9 | # 5 observations, each weight varies
10 | wts_equal <- runif(5, max = 5)
11 | # ESS should always be less than 5
12 | expect_lt(ess(wts_equal), 5)
13 | })
14 |
15 | test_that("ess handles one large weight", {
16 | # 5 observations, 1 large weight
17 | wts_big <- c(1000, rep(0, 4))
18 | # The sum is 1000, sum of squares is 1,000^2 = 1e6
19 | # ESS = (1000^2) / 1,000^2 = 1
20 | expect_equal(ess(wts_big), 1)
21 | })
22 |
23 | test_that("ess gives `NaN` if all weights are 0", {
24 | wts_zero <- rep(0, 5)
25 | # sum(wts) = 0, sum(wts^2) = 0 -> 0/0 is NaN
26 | expect_true(is.nan(ess(wts_zero)))
27 | })
28 |
--------------------------------------------------------------------------------
/tests/testthat/test-geom_ecdf.R:
--------------------------------------------------------------------------------
1 | library(ggplot2)
2 | test_that("geom_ecdf works", {
3 | p_no_wts <- ggplot(
4 | nhefs_weights,
5 | aes(x = smokeyrs, color = qsmk)
6 | ) +
7 | geom_ecdf() +
8 | xlab("Smoking Years") +
9 | ylab("Proportion <= x")
10 |
11 | p_wts <- ggplot(
12 | nhefs_weights,
13 | aes(x = smokeyrs, color = qsmk)
14 | ) +
15 | geom_ecdf(aes(weights = w_ato)) +
16 | xlab("Smoking Years") +
17 | ylab("Proportion <= x")
18 |
19 | expect_doppelganger("ecdf (no weights)", p_no_wts)
20 | expect_doppelganger("ecdf (weights)", p_wts)
21 |
22 | })
23 |
24 |
--------------------------------------------------------------------------------
/tests/testthat/test-geom_mirrored_histogram.R:
--------------------------------------------------------------------------------
1 | library(ggplot2)
2 | test_that("geom_mirrored_histogram works", {
3 | p <- ggplot(nhefs_weights, aes(.fitted)) +
4 | geom_mirror_histogram(
5 | aes(group = qsmk),
6 | bins = 50
7 | ) +
8 | geom_mirror_histogram(
9 | aes(fill = qsmk, weight = w_ate),
10 | bins = 50,
11 | alpha = 0.5
12 | ) +
13 | scale_y_continuous(labels = abs)
14 |
15 | expect_doppelganger("layered (weighted and unweighted)", p)
16 | })
17 |
18 | test_that("geom_mirrored_histogram errors/warns correctly", {
19 | # group of 3 or more
20 | edu_group <- ggplot(nhefs_weights, aes(.fitted)) +
21 | geom_mirror_histogram(
22 | aes(group = education),
23 | bins = 50
24 | )
25 | expect_snapshot_warning(print(edu_group))
26 |
27 | # no group
28 | no_group <- ggplot(nhefs_weights, aes(.fitted)) +
29 | geom_mirror_histogram(bins = 50)
30 |
31 | expect_snapshot_warning(print(no_group))
32 | })
33 |
34 | test_that("NO_GROUP is still -1", {
35 | skip_on_cran()
36 | expect_equal(asNamespace("ggplot2")$NO_GROUP, -1)
37 | })
38 |
--------------------------------------------------------------------------------