├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ └── test-coverage.yaml ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS.md ├── R ├── actions.R ├── expose-helpers.R ├── expose.R ├── exposure.R ├── packs.R ├── ruler-package.R ├── rules.R ├── spread-groups.R ├── utils-pipe.R └── utils.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── codecov.yml ├── cran-comments.md ├── docs ├── 404.html ├── LICENSE-text.html ├── LICENSE.html ├── articles │ ├── design-and-format.html │ ├── design-and-format_files │ │ ├── anchor-sections-1.0 │ │ │ ├── anchor-sections.css │ │ │ └── anchor-sections.js │ │ └── header-attrs-2.5 │ │ │ └── header-attrs.js │ ├── index.html │ ├── rule-packs.html │ ├── rule-packs_files │ │ ├── anchor-sections-1.0 │ │ │ ├── anchor-sections.css │ │ │ └── anchor-sections.js │ │ └── header-attrs-2.5 │ │ │ └── header-attrs.js │ ├── validation.html │ └── validation_files │ │ ├── anchor-sections-1.0 │ │ ├── anchor-sections.css │ │ └── anchor-sections.js │ │ └── header-attrs-2.5 │ │ └── header-attrs.js ├── authors.html ├── bootstrap-toc.css ├── bootstrap-toc.js ├── docsearch.css ├── docsearch.js ├── extra.css ├── extra.js ├── index.html ├── jquery.sticky-kit.min.js ├── link.svg ├── news │ └── index.html ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml ├── reference │ ├── Rplot001.png │ ├── act_after_exposure.html │ ├── add_pack_names.html │ ├── any_breaker.html │ ├── assert_any_breaker.html │ ├── bind_exposures.html │ ├── cell-pack.html │ ├── column-pack.html │ ├── data-pack.html │ ├── expose.html │ ├── expose_single.html │ ├── exposure.html │ ├── group-pack.html │ ├── index.html │ ├── inside_punct.html │ ├── pack_info.html │ ├── packs_info.html │ ├── pipe.html │ ├── reexports.html │ ├── row-pack.html │ ├── rule-packs.html │ ├── ruler-package.html │ ├── ruler-report.html │ ├── rules.html │ ├── single_exposure.html │ └── spread_groups.html └── sitemap.xml ├── inst └── WORDLIST ├── man ├── act_after_exposure.Rd ├── add_pack_names.Rd ├── any_breaker.Rd ├── assert_any_breaker.Rd ├── bind_exposures.Rd ├── cell-pack.Rd ├── column-pack.Rd ├── data-pack.Rd ├── expose.Rd ├── expose_single.Rd ├── exposure.Rd ├── group-pack.Rd ├── inside_punct.Rd ├── pack_info.Rd ├── packs_info.Rd ├── pipe.Rd ├── row-pack.Rd ├── rule-packs.Rd ├── ruler-package.Rd ├── ruler-report.Rd ├── rules.Rd ├── single_exposure.Rd └── spread_groups.Rd ├── pkgdown ├── extra.css └── extra.js ├── ruler.Rproj ├── tests ├── testthat.R └── testthat │ ├── helper-expose-data.R │ ├── test-actions.R │ ├── test-expose-helpers.R │ ├── test-expose.R │ ├── test-exposure.R │ ├── test-packs.R │ ├── test-rules.R │ ├── test-spread-groups.R │ └── test-utils.R └── vignettes ├── design-and-format.Rmd ├── rule-packs.Rmd └── validation.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^Meta$ 2 | ^doc$ 3 | ^.*\.Rproj$ 4 | ^\.Rproj\.user$ 5 | ^README\.Rmd$ 6 | ^README-.*\.png$ 7 | ^\.travis\.yml$ 8 | ^codecov\.yml$ 9 | ^docs$ 10 | ^_pkgdown\.yml$ 11 | ^pkgdown$ 12 | ^cran-comments\.md$ 13 | ^\.github$ 14 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | 15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | config: 21 | - {os: macOS-latest, r: 'devel'} 22 | - {os: macOS-latest, r: 'release'} 23 | - {os: windows-latest, r: 'devel'} 24 | - {os: windows-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 26 | - {os: ubuntu-latest, r: 'release'} 27 | - {os: ubuntu-latest, r: 'oldrel-1'} 28 | 29 | env: 30 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 31 | R_KEEP_PKG_SOURCE: yes 32 | 33 | steps: 34 | - uses: actions/checkout@v3 35 | 36 | - uses: r-lib/actions/setup-pandoc@v2 37 | 38 | - uses: r-lib/actions/setup-r@v2 39 | with: 40 | r-version: ${{ matrix.config.r }} 41 | http-user-agent: ${{ matrix.config.http-user-agent }} 42 | use-public-rspm: true 43 | 44 | - uses: r-lib/actions/setup-r-dependencies@v2 45 | with: 46 | extra-packages: any::rcmdcheck 47 | needs: check 48 | 49 | - uses: r-lib/actions/check-r-package@v2 50 | with: 51 | upload-snapshots: true 52 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: test-coverage 10 | 11 | jobs: 12 | test-coverage: 13 | runs-on: ubuntu-latest 14 | env: 15 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | 20 | - uses: r-lib/actions/setup-r@v2 21 | with: 22 | use-public-rspm: true 23 | 24 | - uses: r-lib/actions/setup-r-dependencies@v2 25 | with: 26 | extra-packages: any::covr 27 | needs: coverage 28 | 29 | - name: Test coverage 30 | run: | 31 | covr::codecov( 32 | quiet = FALSE, 33 | clean = FALSE, 34 | install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package") 35 | ) 36 | shell: Rscript {0} 37 | 38 | - name: Show testthat output 39 | if: always() 40 | run: | 41 | ## -------------------------------------------------------------------- 42 | find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true 43 | shell: bash 44 | 45 | - name: Upload test results 46 | if: failure() 47 | uses: actions/upload-artifact@v3 48 | with: 49 | name: coverage-test-failures 50 | path: ${{ runner.temp }}/package 51 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Meta 2 | doc 3 | .Rproj.user 4 | .Rhistory 5 | .RData 6 | .Ruserdata 7 | inst/doc 8 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: ruler 2 | Title: Tidy Data Validation Reports 3 | Version: 0.3.0.9000 4 | Authors@R: 5 | person(given = "Evgeni", 6 | family = "Chasnovski", 7 | role = c("aut", "cre"), 8 | email = "evgeni.chasnovski@gmail.com", 9 | comment = c(ORCID = "0000-0002-1617-4019")) 10 | Description: Tools for creating data validation pipelines and 11 | tidy reports. This package offers a framework for exploring and 12 | validating data frame like objects using 'dplyr' grammar of data 13 | manipulation. 14 | License: MIT + file LICENSE 15 | URL: https://echasnovski.github.io/ruler/, 16 | https://github.com/echasnovski/ruler 17 | BugReports: https://github.com/echasnovski/ruler/issues 18 | Depends: 19 | R (>= 3.4.0) 20 | Imports: 21 | dplyr (>= 0.8.0), 22 | keyholder, 23 | rlang, 24 | tibble, 25 | tidyr (>= 0.7.0), 26 | magrittr, 27 | purrr (>= 1.0.0) 28 | Suggests: 29 | covr, 30 | knitr, 31 | rmarkdown, 32 | testthat 33 | VignetteBuilder: 34 | knitr 35 | Encoding: UTF-8 36 | Roxygen: list(markdown = TRUE, old_usage = TRUE) 37 | RoxygenNote: 7.2.3 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2017 2 | COPYRIGHT HOLDER: Evgeni Chasnovski 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(expose_single,cell_pack) 4 | S3method(expose_single,col_pack) 5 | S3method(expose_single,data_pack) 6 | S3method(expose_single,default) 7 | S3method(expose_single,group_pack) 8 | S3method(expose_single,row_pack) 9 | S3method(print,cell_pack) 10 | S3method(print,col_pack) 11 | S3method(print,data_pack) 12 | S3method(print,exposure) 13 | S3method(print,group_pack) 14 | S3method(print,packs_info) 15 | S3method(print,row_pack) 16 | S3method(print,ruler_report) 17 | export("%>%") 18 | export(act_after_exposure) 19 | export(any_breaker) 20 | export(assert_any_breaker) 21 | export(bind_exposures) 22 | export(cell_packs) 23 | export(col_packs) 24 | export(data_packs) 25 | export(expose) 26 | export(get_exposure) 27 | export(get_packs_info) 28 | export(get_report) 29 | export(group_packs) 30 | export(inside_punct) 31 | export(is_exposure) 32 | export(is_packs_info) 33 | export(is_report) 34 | export(remove_exposure) 35 | export(row_packs) 36 | export(rules) 37 | export(spread_groups) 38 | import(dplyr) 39 | import(keyholder) 40 | importFrom(magrittr,"%>%") 41 | importFrom(rlang,"!!!") 42 | importFrom(rlang,"!!") 43 | importFrom(rlang,.data) 44 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # ruler (development version) 2 | 3 | # ruler 0.3.0 4 | 5 | * Update minimum `dplyr` version to be 0.8.0. 6 | * Introduce dependency on `purrr >= 1.0.0` as a reaction to soft deprecation or `rlang::squash()`. 7 | 8 | # ruler 0.2.4 9 | 10 | * Maintenance release in reaction to a planned update of `tibble`. 11 | 12 | # ruler 0.2.3 13 | 14 | * Reaction to `dplyr` 1.0.0. 15 | 16 | # ruler 0.2.2 17 | 18 | * Reaction to `tibble` 3.0.0. 19 | 20 | # ruler 0.2.1 21 | 22 | * Update logic of `rules()`: it now only converts bare expressions with `.` as input into formulas. 23 | 24 | # ruler 0.2.0 25 | 26 | This version is reaction to changes in `dplyr` 0.8.0. 27 | 28 | * Breaking changes: 29 | * Name repair in `rules()` now uses `__` instead of `..` as separator for 30 | function position in input `...`. This is done because of new `dplyr` name 31 | repair rules which assume that `..{[0-9]}` in the end of the name can be 32 | removed. 33 | * `rules()` behaviour now depends on version of `dplyr`. For version less 34 | than 0.8.0 it is a direct wrapper for `dplyr::funs()` which does custom 35 | name repair. For newer versions it quotes elements in `...` (except explicit 36 | formulas) and repairs names of the output. 37 | 38 | # ruler 0.1.4 39 | 40 | * Reaction to `tibble` 2.0.0: ease some tests and adjust to new functionality. 41 | 42 | # ruler 0.1.3 43 | 44 | * Update for `dplyr` 0.7.5. 45 | 46 | # ruler 0.1.2 47 | 48 | * Update for `rlang` 0.2.0. 49 | 50 | # ruler 0.1.1 51 | 52 | * Update some unnecessarily strict tests (for CRAN). 53 | 54 | # ruler 0.1.0 55 | 56 | * Initial release. 57 | -------------------------------------------------------------------------------- /R/actions.R: -------------------------------------------------------------------------------- 1 | # General act ------------------------------------------------------------- 2 | #' Act after exposure 3 | #' 4 | #' A wrapper for consistent application of some actions based on the data after 5 | #' exposure. 6 | #' @param .tbl Result of [exposure][expose], i.e. data frame with [exposure] 7 | #' attribute. 8 | #' @param .trigger Function which takes `.tbl` as argument and returns `TRUE` if 9 | #' some action needs to be performed. 10 | #' @param .actor Function which takes `.tbl` as argument and performs the 11 | #' action. 12 | #' 13 | #' @details Basically `act_after_exposure()` is doing the following: 14 | #' - Check that `.tbl` has a proper [exposure] attribute. 15 | #' - Compute whether to perform intended action by computing `.trigger(.tbl)`. 16 | #' - If trigger results in `TRUE` then `.actor(.tbl)` __is returned__. In other 17 | #' case `.tbl` is returned. 18 | #' 19 | #' It is a good idea that `.actor` should be doing one of two things: 20 | #' - Making side effects. For example throwing an error (if condition in 21 | #' `.trigger` is met), printing some information and so on. In this case it 22 | #' should return `.tbl` to be used properly inside a \link[magrittr:pipe]{pipe}. 23 | #' - Changing `.tbl` based on exposure information. In this case it should 24 | #' return the imputed version of `.tbl`. 25 | #' 26 | #' @seealso [any_breaker] for trigger which returns `TRUE` in case any rule 27 | #' breaker is found in exposure. 28 | #' 29 | #' [assert_any_breaker] for usage of `act_after_exposure()` in building data 30 | #' validation pipelines. 31 | #' 32 | #' @examples 33 | #' exposure_printer <- function(.tbl) { 34 | #' print(get_exposure(.tbl)) 35 | #' .tbl 36 | #' } 37 | #' mtcars_exposed <- mtcars %>% 38 | #' expose(data_packs(. %>% dplyr::summarise(nrow_low = nrow(.) > 50))) %>% 39 | #' act_after_exposure(any_breaker, exposure_printer) 40 | #' @export 41 | act_after_exposure <- function(.tbl, .trigger, .actor) { 42 | tbl_exposure <- get_exposure(.tbl) 43 | 44 | if (identical(tbl_exposure, NULL)) { 45 | stop("act_after_exposure: Input object does not have exposure.") 46 | } 47 | 48 | if (!is_exposure(tbl_exposure)) { 49 | stop( 50 | "act_after_exposure: Extracted 'exposure' object is not a ", 51 | "proper exposure." 52 | ) 53 | } 54 | 55 | if (isTRUE(.trigger(.tbl))) { 56 | res <- .actor(.tbl) 57 | } else { 58 | res <- .tbl 59 | } 60 | 61 | res 62 | } 63 | 64 | 65 | # Assertions -------------------------------------------------------------- 66 | #' Assert presence of rule breaker 67 | #' 68 | #' Function to assert if [exposure][expose] resulted in [detecting][any_breaker] 69 | #' some rule breakers. 70 | #' 71 | #' @inheritParams act_after_exposure 72 | #' @param .type The type of assertion. Can be only one of "error", "warning" or 73 | #' "message". 74 | #' @param .silent If `TRUE` no printing of rule breaker information is done. 75 | #' @param ... Arguments for printing rule breaker information. 76 | #' 77 | #' @details In case breaker presence this function does the following: 78 | #' - In case `.silent` is `FALSE` print rows from exposure 79 | #' [report][ruler-report] corresponding to rule breakers. 80 | #' - Make assertion of the chosen `.type` about breaker presence in exposure. 81 | #' - Return `.tbl` (for using inside a \link[magrittr:pipe]{pipe}). 82 | #' 83 | #' If there are no breakers only `.tbl` is returned. 84 | #' 85 | #' @seealso [any_breaker] for checking of breaker presence in exposure result. 86 | #' 87 | #' [act_after_exposure] for making general actions based in exposure result. 88 | #' 89 | #' @examples 90 | #' \dontrun{ 91 | #' mtcars %>% 92 | #' expose(data_packs(. %>% dplyr::summarise(nrow_low = nrow(.) > 50))) %>% 93 | #' assert_any_breaker() 94 | #' } 95 | #' @export 96 | assert_any_breaker <- function(.tbl, .type = "error", .silent = FALSE, ...) { 97 | informer_fun <- switch( 98 | .type, 99 | message = message, 100 | warning = function(.msg) { 101 | warning(.msg, call. = FALSE) 102 | }, 103 | function(.msg) { 104 | stop(.msg, call. = FALSE) 105 | } 106 | ) 107 | breakers_informer <- generate_breakers_informer( 108 | informer_fun, 109 | "assert_any_breaker: Some breakers found in exposure.", 110 | .silent, 111 | ... 112 | ) 113 | 114 | act_after_exposure(.tbl, any_breaker, breakers_informer) 115 | } 116 | 117 | 118 | # Triggers ---------------------------------------------------------------- 119 | #' Is there any breaker in exposure? 120 | #' 121 | #' Function designed to be used as trigger in [act_after_exposure()]. Returns 122 | #' `TRUE` if [exposure] attribute of `.tbl` has any information about data units 123 | #' not obeying the rules, i.e. rule breakers. 124 | #' 125 | #' @inheritParams act_after_exposure 126 | #' 127 | #' @seealso [assert_any_breaker] for implicit usage of `any_breaker()`. 128 | #' 129 | #' @examples 130 | #' mtcars %>% 131 | #' expose(data_packs(. %>% dplyr::summarise(nrow_low = nrow(.) > 50))) %>% 132 | #' any_breaker() 133 | #' @export 134 | any_breaker <- function(.tbl) { 135 | input_exposure <- get_exposure(.tbl) 136 | if (!is_exposure(input_exposure)) { 137 | stop("any_breaker: Input object has not a proper exposure.") 138 | } 139 | 140 | report <- get_report(input_exposure) 141 | 142 | !all(is_obeyer(report[["value"]])) 143 | } 144 | 145 | 146 | # Actors --------------------------------------------------------------- 147 | generate_breakers_informer <- 148 | function(.fun = stop, .message = "Some breakers found in exposure.", 149 | .silent, ...) { 150 | force(.fun) 151 | force(.message) 152 | force(.silent) 153 | 154 | function(.tbl) { 155 | report_breakers <- get_report(.tbl) %>% remove_obeyers(TRUE) 156 | 157 | if (!(.silent)) { 158 | cat(" Breakers report\n") 159 | print(report_breakers, ...) 160 | cat("\n") 161 | } 162 | .fun(.message) 163 | 164 | invisible(.tbl) 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /R/expose-helpers.R: -------------------------------------------------------------------------------- 1 | # General expose helpers -------------------------------------------------- 2 | guess_pack_type <- function(.pack_out, .rule_sep = inside_punct("\\._\\.")) { 3 | all_logical <- all(vapply(.pack_out, is.logical, TRUE)) 4 | n_rows_one <- nrow(.pack_out) == 1 5 | all_contain_sep <- all(grepl(pattern = .rule_sep, x = colnames(.pack_out))) 6 | 7 | if (!all_logical) { 8 | return("group_pack") 9 | } 10 | 11 | if (n_rows_one) { 12 | if (all_contain_sep) { 13 | return("col_pack") 14 | } else { 15 | return("data_pack") 16 | } 17 | } else { 18 | if (all_contain_sep) { 19 | return("cell_pack") 20 | } else { 21 | return("row_pack") 22 | } 23 | } 24 | } 25 | 26 | remove_obeyers <- function(.report, .do_remove) { 27 | if (!.do_remove) { 28 | return(.report) 29 | } else { 30 | .report %>% filter(!is_obeyer(.data[["value"]])) 31 | } 32 | } 33 | 34 | impute_exposure_pack_names <- function(.single_exposures, .exposure_ref) { 35 | pack_names <- rlang::names2(.single_exposures) 36 | is_empty_pack_names <- pack_names == "" 37 | if (sum(is_empty_pack_names) == 0) { 38 | return(.single_exposures) 39 | } 40 | 41 | # Collect data about imputed pack types 42 | pack_types <- vapply(.single_exposures, function(cur_single_exposure) { 43 | cur_single_exposure[["pack_info"]][["type"]][1] 44 | }, "chr") 45 | pack_types_table <- table(pack_types) 46 | unique_pack_types <- names(pack_types_table) 47 | 48 | start_ind_vec <- rep(1, length(unique_pack_types)) 49 | names(start_ind_vec) <- unique_pack_types 50 | 51 | # Account for reference pack types 52 | if (!identical(.exposure_ref, NULL)) { 53 | ref_pack_types <- .exposure_ref[["packs_info"]][["type"]] 54 | ref_pack_types_table <- table(ref_pack_types) 55 | common_pack_types <- intersect( 56 | unique_pack_types, 57 | names(ref_pack_types_table) 58 | ) 59 | 60 | start_ind_vec[common_pack_types] <- 61 | ref_pack_types_table[common_pack_types] + 1 62 | } 63 | 64 | # Impute 65 | def_names <- mapply( 66 | compute_def_names, 67 | .n = pack_types_table, .root = unique_pack_types, 68 | .start_ind = start_ind_vec, 69 | SIMPLIFY = FALSE 70 | ) %>% 71 | unsplit(f = pack_types) 72 | 73 | names(.single_exposures)[is_empty_pack_names] <- 74 | def_names[is_empty_pack_names] 75 | 76 | .single_exposures 77 | } 78 | 79 | #' Add pack names to single exposures 80 | #' 81 | #' Function to add pack names to single exposures. Converts list of 82 | #' [single exposures][single_exposure] to list of [exposures][exposure] without 83 | #' validating. 84 | #' 85 | #' @param .single_exposures List of [single exposures][single_exposure]. 86 | #' 87 | #' @keywords internal 88 | add_pack_names <- function(.single_exposures) { 89 | pack_names <- names(.single_exposures) 90 | 91 | lapply(pack_names, function(pack_name) { 92 | single_exposure <- .single_exposures[[pack_name]] 93 | 94 | # Add pack name to report 95 | report <- single_exposure[["report"]] 96 | new_report <- report 97 | new_report[["pack"]] <- rep(pack_name, nrow(report)) 98 | new_report <- new_report[, c("pack", colnames(report))] %>% 99 | as_report(.validate = FALSE) 100 | 101 | # Add pack name to pack info and convert to `packs_info` 102 | packs_info <- single_exposure[["pack_info"]] 103 | packs_info[["name"]] <- rep(pack_name, nrow(packs_info)) 104 | packs_info <- 105 | packs_info[, c("name", colnames(single_exposure[["pack_info"]]))] %>% 106 | as_packs_info(.validate = FALSE) 107 | 108 | new_exposure(packs_info, new_report, .validate = FALSE) 109 | }) %>% 110 | rlang::set_names(pack_names) 111 | } 112 | 113 | 114 | # Binder ------------------------------------------------------------------ 115 | #' Bind exposures 116 | #' 117 | #' Function to bind several exposures into one. 118 | #' 119 | #' @param ... Exposures to bind. 120 | #' @param .validate_output Whether to validate with [is_exposure()] if the 121 | #' output is exposure. 122 | #' 123 | #' @details __Note__ that the output might not have names in list-column `fun` 124 | #' in [packs info][packs_info], which depends on version of 125 | #' [dplyr][dplyr::dplyr-package] package. 126 | #' 127 | #' @examples 128 | #' my_data_packs <- data_packs( 129 | #' data_dims = . %>% dplyr::summarise(nrow_low = nrow(.) < 10), 130 | #' data_sum = . %>% dplyr::summarise(sum = sum(.) < 1000) 131 | #' ) 132 | #' 133 | #' ref_exposure <- mtcars %>% 134 | #' expose(my_data_packs) %>% 135 | #' get_exposure() 136 | #' 137 | #' exposure_1 <- mtcars %>% 138 | #' expose(my_data_packs[1]) %>% 139 | #' get_exposure() 140 | #' exposure_2 <- mtcars %>% 141 | #' expose(my_data_packs[2]) %>% 142 | #' get_exposure() 143 | #' exposure_binded <- bind_exposures(exposure_1, exposure_2) 144 | #' 145 | #' exposure_pipe <- mtcars %>% 146 | #' expose(my_data_packs[1]) %>% 147 | #' expose(my_data_packs[2]) %>% 148 | #' get_exposure() 149 | #' 150 | #' identical(exposure_binded, ref_exposure) 151 | #' 152 | #' identical(exposure_pipe, ref_exposure) 153 | #' @export 154 | bind_exposures <- function(..., .validate_output = TRUE) { 155 | exposures <- rlang::dots_list(...) %>% 156 | squash() %>% 157 | filter_not_null() 158 | 159 | if (length(exposures) == 0) { 160 | return(NULL) 161 | } 162 | 163 | binded_packs_info <- lapply(exposures, `[[`, "packs_info") %>% 164 | bind_rows() %>% 165 | as_packs_info(.validate = FALSE) 166 | row.names(binded_packs_info) <- NULL 167 | 168 | binded_report <- lapply(exposures, `[[`, "report") %>% 169 | bind_rows() %>% 170 | as_report(.validate = FALSE) 171 | row.names(binded_report) <- NULL 172 | 173 | new_exposure(binded_packs_info, binded_report, .validate = .validate_output) 174 | } 175 | 176 | filter_not_null <- function(.x) { 177 | is_null_x <- vapply(.x, identical, FUN.VALUE = TRUE, y = NULL) 178 | 179 | .x[!is_null_x] 180 | } 181 | 182 | 183 | # Assertions for pack outputs --------------------------------------------- 184 | assert_pack_out_one_row <- function(.pack_out, .pack_type) { 185 | if (nrow(.pack_out) != 1) { 186 | stop(paste0("Some ", .pack_type, " has output with not 1 row.")) 187 | } 188 | 189 | TRUE 190 | } 191 | 192 | assert_pack_out_all_logical <- function(.pack_out, .pack_type) { 193 | is_lgl_col <- vapply(.pack_out, is.logical, TRUE) 194 | 195 | if (all(is_lgl_col)) { 196 | return(TRUE) 197 | } else { 198 | stop(paste0("Some ", .pack_type, " has not logical output column")) 199 | } 200 | } 201 | 202 | assert_pack_out_all_have_separator <- 203 | function(.pack_out, .pack_type, .rule_sep) { 204 | has_sep <- grepl(pattern = .rule_sep, x = colnames(.pack_out)) 205 | 206 | if (all(has_sep)) { 207 | return(TRUE) 208 | } else { 209 | stop(paste0( 210 | "In some ", .pack_type, " not all columns contain rule separator" 211 | )) 212 | } 213 | } 214 | -------------------------------------------------------------------------------- /R/ruler-package.R: -------------------------------------------------------------------------------- 1 | #' ruler: Rule Your Data 2 | #' 3 | #' `ruler` offers a set of tools for creating tidy data validation reports using 4 | #' [dplyr](https://dplyr.tidyverse.org) grammar of data manipulation. It 5 | #' is designed to be flexible and extendable in terms of creating rules and 6 | #' using their output. 7 | #' 8 | #' The common workflow is: 9 | #' - Define dplyr-style [packs][rule-packs] of rules for basic data units (data, 10 | #' group, column, row, cell) to obey. 11 | #' - [Expose][expose] some data to those rules. The result is the same data with 12 | #' possibly created [exposure][exposure] attribute. Exposure contains 13 | #' information [about applied packs][packs_info] and [tidy data validation 14 | #' report][ruler-report]. 15 | #' - Use data and exposure to perform some [actions][act_after_exposure]: 16 | #' [assert about rule breakers][assert_any_breaker], impute data, remove 17 | #' outliers and so on. 18 | #' 19 | #' To learn more about `ruler` browse vignettes with `browseVignettes(package = 20 | #' "ruler")`. The preferred order is: 21 | #' 22 | #' 1. Design process and exposure format. 23 | #' 2. Rule packs. 24 | #' 3. Validation 25 | #' 26 | #' @import keyholder 27 | #' @import dplyr 28 | #' @importFrom rlang .data !! !!! 29 | "_PACKAGE" 30 | -------------------------------------------------------------------------------- /R/rules.R: -------------------------------------------------------------------------------- 1 | #' Create a list of rules 2 | #' 3 | #' `rules()` is a function designed to create input for `.funs` argument of 4 | #' scoped `dplyr` "mutating" verbs (such as 5 | #' [summarise_all()][dplyr::summarise_all()] and 6 | #' [transmute_all()][dplyr::transmute_all()]). It converts bare expressions 7 | #' with `.` as input into formulas and repairs names of the output. 8 | #' 9 | #' @param ... Bare expression(s) with `.` as input. 10 | #' @param .prefix Prefix to be added to function names. 11 | #' 12 | #' @details `rules()` repairs names by the following algorithm: 13 | #' - Absent names are replaced with the 'rule__\\{ind\\}' where \\{ind\\} is the 14 | #' index of function position in the `...` . 15 | #' - `.prefix` is added at the beginning of all names. The default is `._.` . It 16 | #' is picked for its symbolism (it is the Morse code of letter 'R') and rare 17 | #' occurrence in names. In those rare cases it can be manually changed but 18 | #' this will not be tracked further. **Note** that it is a good idea for 19 | #' `.prefix` to be [syntactic][make.names()], as `dplyr` will force tibble 20 | #' names to be syntactic. To check if string is "good", use it as input to 21 | #' `make.names()`: if output equals that string than it is a "good" choice. 22 | #' 23 | #' @examples 24 | #' # `rules()` accepts bare expression calls with `.` as input, which is not 25 | #' # possible with advised `list()` approach of `dplyr` 26 | #' dplyr::summarise_all(mtcars[, 1:2], rules(sd, "sd", sd(.), ~ sd(.))) 27 | #' 28 | #' dplyr::summarise_all(mtcars[, 1:2], rules(sd, .prefix = "a_a_")) 29 | #' 30 | #' # Use `...` in `summarise_all()` to supply extra arguments 31 | #' dplyr::summarise_all(data.frame(x = c(1:2, NA)), rules(sd), na.rm = TRUE) 32 | #' @export 33 | rules <- function(..., .prefix = "._.") { 34 | dots <- quos(...) 35 | names(dots) <- enhance_names( 36 | .name = rlang::names2(dots), .prefix = .prefix, .root = "rule" 37 | ) 38 | 39 | lapply(dots, extract_funs_input) 40 | } 41 | 42 | extract_funs_input <- function(obj) { 43 | expr <- rlang::quo_get_expr(obj) 44 | obj_function <- quo_get_function(obj) 45 | 46 | if (!is.null(obj_function)) { 47 | obj_function 48 | } else if (rlang::is_formula(expr)) { 49 | # This seems to actually recreate the formula, meaning attaching different 50 | # environment. However, this shouldn't be a problem because of 51 | # "explicitness" of input formula. 52 | eval(expr) 53 | } else if (has_dot_symbol(expr)) { 54 | stats::as.formula( 55 | object = paste0("~", rlang::expr_text(expr)), 56 | env = rlang::quo_get_env(obj) 57 | ) 58 | } else if (is.character(expr) && (length(expr) == 1)) { 59 | expr 60 | } else { 61 | stop( 62 | "Wrong input `", rlang::expr_text(expr), "` to `rules()`.", 63 | call. = FALSE 64 | ) 65 | } 66 | } 67 | 68 | has_dot_symbol <- function(x) { 69 | x_parts <- vapply(squash_expr(x), rlang::expr_text, character(1)) 70 | 71 | any(x_parts == ".") 72 | } 73 | 74 | squash_expr <- function(x) { 75 | if (rlang::is_syntactic_literal(x) || rlang::is_symbol(x)) { 76 | return(x) 77 | } 78 | 79 | unlist(lapply(as.list(x), squash_expr)) 80 | } 81 | 82 | quo_get_function <- function(x) { 83 | get0( 84 | x = rlang::expr_text(rlang::quo_get_expr(x)), 85 | envir = rlang::quo_get_env(x), 86 | mode = "function" 87 | ) 88 | } 89 | -------------------------------------------------------------------------------- /R/spread-groups.R: -------------------------------------------------------------------------------- 1 | #' Spread grouping columns 2 | #' 3 | #' Function that is used during interpretation of [group pack][group-pack] 4 | #' output. It converts grouped [summary][dplyr::summarise] into [column 5 | #' pack][column-pack] format. 6 | #' 7 | #' @param .tbl Data frame with result of grouped summary. 8 | #' @param ... A selection of grouping columns (as in [tidyr::unite()]). 9 | #' @param .group_sep A string to be used as separator of grouping levels. 10 | #' @param .col_sep A string to be used as separator in column pack. 11 | #' 12 | #' @details Multiple grouping variables are converted to one with 13 | #' [tidyr::unite()] and separator `.group_sep`. New values are then treated as 14 | #' variable names which should be validated and which represent the group data 15 | #' as a whole. 16 | #' 17 | #' @return A data frame in [column pack][column-pack] format. 18 | #' 19 | #' @examples 20 | #' mtcars_grouped_summary <- mtcars %>% 21 | #' dplyr::group_by(vs, am) %>% 22 | #' dplyr::summarise(n_low = dplyr::n() > 6, n_high = dplyr::n() < 10) 23 | #' 24 | #' spread_groups(mtcars_grouped_summary, vs, am) 25 | #' 26 | #' spread_groups(mtcars_grouped_summary, vs, am, .group_sep = "__") 27 | #' 28 | #' spread_groups(mtcars_grouped_summary, vs, am, .col_sep = "__") 29 | #' @export 30 | spread_groups <- function(.tbl, ..., .group_sep = ".", .col_sep = "._.") { 31 | tbl_ungrouped <- ungroup(.tbl) 32 | tbl_group_cols <- select(tbl_ungrouped, ...) 33 | 34 | # Check for presence of suppied group columns 35 | if (ncol(tbl_group_cols) == 0) { 36 | stop("spread_groups: No grouping columns are supplied.") 37 | } 38 | 39 | # Check if grouping columns has unique combined levels 40 | if (nrow(tbl_group_cols) != nrow(distinct(tbl_group_cols))) { 41 | stop("spread_groups: Grouping columns define non-unique levels.") 42 | } 43 | 44 | # Check for presence of rule columns 45 | rule_cols <- negate_select_cols(tbl_ungrouped, ...) 46 | rule_syms <- rlang::syms(rule_cols) 47 | if (length(rule_cols) == 0) { 48 | stop("spread_groups: No rule columns are supplied.") 49 | } 50 | 51 | # Check if all rule columns are logical 52 | is_all_rules_lgl <- tbl_ungrouped %>% 53 | select(!!!rule_syms) %>% 54 | vapply(is.logical, TRUE) %>% 55 | all() 56 | if (!is_all_rules_lgl) { 57 | stop("spread_groups: Some rule columns are not logical.") 58 | } 59 | 60 | group_id_sym <- rlang::sym(keyholder::compute_id_name(rule_cols)) 61 | 62 | tbl_ungrouped %>% 63 | tidyr::unite(!!group_id_sym, ..., sep = .group_sep, remove = TRUE) %>% 64 | tidyr::gather(key = "rule_name", value = "value", !!!rule_syms) %>% 65 | tidyr::unite( 66 | col = "var_rule", 67 | !!group_id_sym, 68 | "rule_name", 69 | sep = .col_sep, 70 | remove = TRUE 71 | ) %>% 72 | # For preserving ordering by rule and then by variable 73 | mutate( 74 | var_rule = factor(.data$var_rule, levels = unique(.data$var_rule)) 75 | ) %>% 76 | tidyr::spread(key = "var_rule", value = "value") 77 | } 78 | -------------------------------------------------------------------------------- /R/utils-pipe.R: -------------------------------------------------------------------------------- 1 | #' Pipe operator 2 | #' 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 4 | #' 5 | #' @name %>% 6 | #' @rdname pipe 7 | #' @keywords internal 8 | #' @export 9 | #' @importFrom magrittr %>% 10 | #' @usage lhs \%>\% rhs 11 | NULL 12 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | # General ----------------------------------------------------------------- 2 | #' Inside punctuation regular expression 3 | #' 4 | #' Function to construct regular expression of form: 'non alpha-numeric 5 | #' characters' + 'some characters' + 'non alpha-numeric characters'. 6 | #' 7 | #' @param .x Middle characters to be put between non alpha-numeric characters. 8 | #' 9 | #' @examples 10 | #' inside_punct() 11 | #' 12 | #' inside_punct("abc") 13 | #' @export 14 | inside_punct <- function(.x = "\\._\\.") { 15 | paste0("[^[:alnum:]]*", .x, "[^[:alnum:]]*") 16 | } 17 | 18 | negate_select_cols <- function(.tbl, ...) { 19 | selected_tbl <- select(.tbl, ...) 20 | 21 | setdiff(colnames(.tbl), colnames(selected_tbl)) 22 | } 23 | 24 | # Replicate deprecated `rlang::squash()` 25 | squash <- function(x) { 26 | out <- purrr::list_flatten(x) 27 | if (identical(out, x)) { 28 | return(out) 29 | } 30 | squash(out) 31 | } 32 | 33 | # General assertions ------------------------------------------------------ 34 | assert_positive_length <- function(.x, .name) { 35 | if (length(.x) == 0) { 36 | stop(.name, " should have positive length.", call. = FALSE) 37 | } 38 | 39 | invisible(.x) 40 | } 41 | 42 | assert_length <- function(.x, .length, .name) { 43 | if (length(.x) != .length) { 44 | stop(.name, " should have length ", .length, ".", call. = FALSE) 45 | } 46 | 47 | invisible(.x) 48 | } 49 | 50 | assert_character <- function(.x, .name) { 51 | if (!is.character(.x)) { 52 | stop(.name, " should be a character vector.", call. = FALSE) 53 | } 54 | 55 | invisible(.x) 56 | } 57 | 58 | 59 | # Class utilities --------------------------------------------------------- 60 | add_class <- function(.x, .class) { 61 | class(.x) <- c(.class, class(.x)) 62 | 63 | .x 64 | } 65 | 66 | add_class_cond <- function(.x, .class) { 67 | if (class(.x)[1] != .class) { 68 | class(.x) <- c(.class, class(.x)) 69 | } 70 | 71 | .x 72 | } 73 | 74 | remove_class_cond <- function(.x, .class) { 75 | if (class(.x)[1] == .class) { 76 | class(.x) <- class(.x)[-1] 77 | } 78 | 79 | .x 80 | } 81 | 82 | 83 | # Naming ------------------------------------------------------------------ 84 | compute_def_names <- function(.n = 1, .root = "", .start_ind = 1) { 85 | if (.n < 1) { 86 | return(character(0)) 87 | } else { 88 | paste0(.root, "__", seq_len(.n) + .start_ind - 1) 89 | } 90 | } 91 | 92 | enhance_names <- function(.name, .prefix = "", .root = "", .suffix = "", 93 | .start_ind = 1) { 94 | if (length(.name) == 0) { 95 | return(.name) 96 | } 97 | 98 | def_name <- compute_def_names(length(.name), .root, .start_ind) 99 | 100 | is_empty_name <- .name == "" 101 | .name[is_empty_name] <- def_name[is_empty_name] 102 | 103 | paste0(.prefix, .name, .suffix) 104 | } 105 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | title: ruler 2 | template: 3 | params: 4 | bootswatch: united 5 | 6 | reference: 7 | - title: Package description 8 | contents: 9 | - ruler-package 10 | - title: Data structures 11 | contents: 12 | - is_exposure 13 | - is_packs_info 14 | - is_report 15 | - title: Rules and packs 16 | contents: 17 | - rules 18 | - rule-packs 19 | - data-pack 20 | - group-pack 21 | - column-pack 22 | - row-pack 23 | - cell-pack 24 | - title: Validation 25 | contents: 26 | - expose 27 | - act_after_exposure 28 | - assert_any_breaker 29 | - title: Helpers 30 | contents: 31 | - any_breaker 32 | - bind_exposures 33 | - inside_punct 34 | - spread_groups 35 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | informational: true 10 | patch: 11 | default: 12 | target: auto 13 | threshold: 1% 14 | informational: true 15 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Submission details 2 | 3 | This is a maintenance update in react to CRAN errors and notes. 4 | 5 | ## Test environments 6 | 7 | * Local EndeavorOS 6.2.8-arch1-1 install, R 4.2.3 8 | * GitHub Action on x86_64-apple-darwin17.0 (64-bit), release and development version (2023-03-26 r84067) 9 | * GitHub Action on x86_64-w64-mingw32 (64-bit), release and development version (2023-03-27 r84084 ucrt) 10 | * win-builder, release and development version (2023-03-26 r84066 ucrt) 11 | 12 | ## R CMD check results 13 | 14 | 0 errors | 0 warnings | 0 notes 15 | 16 | --- 17 | 18 | ## Reverse dependencies 19 | 20 | There are no reverse dependencies. 21 | -------------------------------------------------------------------------------- /docs/404.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Page not found (404) • ruler 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 20 | 21 | 22 | 23 | 24 |
25 |
80 | 81 | 82 | 83 | 84 |
85 |
86 | 89 | 90 | Content not found. Please use links in the navbar. 91 | 92 |
93 | 94 | 98 | 99 |
100 | 101 | 102 | 103 | 114 |
115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /docs/LICENSE-text.html: -------------------------------------------------------------------------------- 1 | 2 | License • ruler 6 | 7 | 8 |
9 |
56 | 57 | 58 | 59 |
60 |
61 | 64 | 65 |
YEAR: 2017
66 | COPYRIGHT HOLDER: Evgeni Chasnovski
67 | 
68 | 69 |
70 | 71 | 74 | 75 |
76 | 77 | 78 | 79 |
88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | -------------------------------------------------------------------------------- /docs/LICENSE.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | License • ruler 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 35 | 36 | 37 | 38 | 39 | 40 |
41 |
42 | 97 | 98 | 99 |
100 | 101 |
102 |
103 | 106 | 107 |
YEAR: 2017
108 | COPYRIGHT HOLDER: Evgeni Chasnovski
109 | 
110 | 111 |
112 | 113 |
114 | 115 | 116 | 126 |
127 | 128 | 129 | 130 | -------------------------------------------------------------------------------- /docs/articles/design-and-format_files/anchor-sections-1.0/anchor-sections.css: -------------------------------------------------------------------------------- 1 | /* Styles for section anchors */ 2 | a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;} 3 | a.anchor-section::before {content: '#';} 4 | .hasAnchor:hover a.anchor-section {visibility: visible;} 5 | -------------------------------------------------------------------------------- /docs/articles/design-and-format_files/anchor-sections-1.0/anchor-sections.js: -------------------------------------------------------------------------------- 1 | // Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020. 2 | document.addEventListener('DOMContentLoaded', function() { 3 | // Do nothing if AnchorJS is used 4 | if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) { 5 | return; 6 | } 7 | 8 | const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6'); 9 | 10 | // Do nothing if sections are already anchored 11 | if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) { 12 | return null; 13 | } 14 | 15 | // Use section id when pandoc runs with --section-divs 16 | const section_id = function(x) { 17 | return ((x.classList.contains('section') || (x.tagName === 'SECTION')) 18 | ? x.id : ''); 19 | }; 20 | 21 | // Add anchors 22 | h.forEach(function(x) { 23 | const id = x.id || section_id(x.parentElement); 24 | if (id === '') { 25 | return null; 26 | } 27 | let anchor = document.createElement('a'); 28 | anchor.href = '#' + id; 29 | anchor.classList = ['anchor-section']; 30 | x.classList.add('hasAnchor'); 31 | x.appendChild(anchor); 32 | }); 33 | }); 34 | -------------------------------------------------------------------------------- /docs/articles/design-and-format_files/header-attrs-2.5/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | Articles • ruler 6 | 7 | 8 |
9 |
56 | 57 | 58 | 59 |
60 |
61 | 64 | 65 |
66 |

All vignettes

67 |

68 | 69 |
Design Process and Exposure Format
70 |
71 |
Rule Packs
72 |
73 |
Validation
74 |
75 |
76 |
77 |
78 | 79 | 80 |
89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /docs/articles/rule-packs_files/anchor-sections-1.0/anchor-sections.css: -------------------------------------------------------------------------------- 1 | /* Styles for section anchors */ 2 | a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;} 3 | a.anchor-section::before {content: '#';} 4 | .hasAnchor:hover a.anchor-section {visibility: visible;} 5 | -------------------------------------------------------------------------------- /docs/articles/rule-packs_files/anchor-sections-1.0/anchor-sections.js: -------------------------------------------------------------------------------- 1 | // Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020. 2 | document.addEventListener('DOMContentLoaded', function() { 3 | // Do nothing if AnchorJS is used 4 | if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) { 5 | return; 6 | } 7 | 8 | const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6'); 9 | 10 | // Do nothing if sections are already anchored 11 | if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) { 12 | return null; 13 | } 14 | 15 | // Use section id when pandoc runs with --section-divs 16 | const section_id = function(x) { 17 | return ((x.classList.contains('section') || (x.tagName === 'SECTION')) 18 | ? x.id : ''); 19 | }; 20 | 21 | // Add anchors 22 | h.forEach(function(x) { 23 | const id = x.id || section_id(x.parentElement); 24 | if (id === '') { 25 | return null; 26 | } 27 | let anchor = document.createElement('a'); 28 | anchor.href = '#' + id; 29 | anchor.classList = ['anchor-section']; 30 | x.classList.add('hasAnchor'); 31 | x.appendChild(anchor); 32 | }); 33 | }); 34 | -------------------------------------------------------------------------------- /docs/articles/rule-packs_files/header-attrs-2.5/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/articles/validation_files/anchor-sections-1.0/anchor-sections.css: -------------------------------------------------------------------------------- 1 | /* Styles for section anchors */ 2 | a.anchor-section {margin-left: 10px; visibility: hidden; color: inherit;} 3 | a.anchor-section::before {content: '#';} 4 | .hasAnchor:hover a.anchor-section {visibility: visible;} 5 | -------------------------------------------------------------------------------- /docs/articles/validation_files/anchor-sections-1.0/anchor-sections.js: -------------------------------------------------------------------------------- 1 | // Anchor sections v1.0 written by Atsushi Yasumoto on Oct 3rd, 2020. 2 | document.addEventListener('DOMContentLoaded', function() { 3 | // Do nothing if AnchorJS is used 4 | if (typeof window.anchors === 'object' && anchors.hasOwnProperty('hasAnchorJSLink')) { 5 | return; 6 | } 7 | 8 | const h = document.querySelectorAll('h1, h2, h3, h4, h5, h6'); 9 | 10 | // Do nothing if sections are already anchored 11 | if (Array.from(h).some(x => x.classList.contains('hasAnchor'))) { 12 | return null; 13 | } 14 | 15 | // Use section id when pandoc runs with --section-divs 16 | const section_id = function(x) { 17 | return ((x.classList.contains('section') || (x.tagName === 'SECTION')) 18 | ? x.id : ''); 19 | }; 20 | 21 | // Add anchors 22 | h.forEach(function(x) { 23 | const id = x.id || section_id(x.parentElement); 24 | if (id === '') { 25 | return null; 26 | } 27 | let anchor = document.createElement('a'); 28 | anchor.href = '#' + id; 29 | anchor.classList = ['anchor-section']; 30 | x.classList.add('hasAnchor'); 31 | x.appendChild(anchor); 32 | }); 33 | }); 34 | -------------------------------------------------------------------------------- /docs/articles/validation_files/header-attrs-2.5/header-attrs.js: -------------------------------------------------------------------------------- 1 | // Pandoc 2.9 adds attributes on both header and div. We remove the former (to 2 | // be compatible with the behavior of Pandoc < 2.8). 3 | document.addEventListener('DOMContentLoaded', function(e) { 4 | var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); 5 | var i, h, a; 6 | for (i = 0; i < hs.length; i++) { 7 | h = hs[i]; 8 | if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 9 | a = h.attributes; 10 | while (a.length > 0) h.removeAttribute(a[0].name); 11 | } 12 | }); 13 | -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | Authors and Citation • ruler 6 | 7 | 8 |
9 |
56 | 57 | 58 | 59 |
60 |
61 |
62 | 65 | 66 | 67 |
  • 68 |

    Evgeni Chasnovski. Author, maintainer. 69 |

    70 |
  • 71 |
72 |
73 |
74 |

Citation

75 | Source: DESCRIPTION 76 |
77 |
78 | 79 | 80 |

Chasnovski E (2023). 81 | ruler: Tidy Data Validation Reports. 82 | https://echasnovski.github.io/ruler/, 83 | https://github.com/echasnovski/ruler. 84 |

85 |
@Manual{,
 86 |   title = {ruler: Tidy Data Validation Reports},
 87 |   author = {Evgeni Chasnovski},
 88 |   year = {2023},
 89 |   note = {https://echasnovski.github.io/ruler/,
 90 | https://github.com/echasnovski/ruler},
 91 | }
92 | 93 |
94 | 95 |
96 | 97 | 98 | 99 |
108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | 6 | /* modified from https://github.com/twbs/bootstrap/blob/94b4076dd2efba9af71f0b18d4ee4b163aa9e0dd/docs/assets/css/src/docs.css#L548-L601 */ 7 | 8 | /* All levels of nav */ 9 | nav[data-toggle='toc'] .nav > li > a { 10 | display: block; 11 | padding: 4px 20px; 12 | font-size: 13px; 13 | font-weight: 500; 14 | color: #767676; 15 | } 16 | nav[data-toggle='toc'] .nav > li > a:hover, 17 | nav[data-toggle='toc'] .nav > li > a:focus { 18 | padding-left: 19px; 19 | color: #563d7c; 20 | text-decoration: none; 21 | background-color: transparent; 22 | border-left: 1px solid #563d7c; 23 | } 24 | nav[data-toggle='toc'] .nav > .active > a, 25 | nav[data-toggle='toc'] .nav > .active:hover > a, 26 | nav[data-toggle='toc'] .nav > .active:focus > a { 27 | padding-left: 18px; 28 | font-weight: bold; 29 | color: #563d7c; 30 | background-color: transparent; 31 | border-left: 2px solid #563d7c; 32 | } 33 | 34 | /* Nav: second level (shown on .active) */ 35 | nav[data-toggle='toc'] .nav .nav { 36 | display: none; /* Hide by default, but at >768px, show it */ 37 | padding-bottom: 10px; 38 | } 39 | nav[data-toggle='toc'] .nav .nav > li > a { 40 | padding-top: 1px; 41 | padding-bottom: 1px; 42 | padding-left: 30px; 43 | font-size: 12px; 44 | font-weight: normal; 45 | } 46 | nav[data-toggle='toc'] .nav .nav > li > a:hover, 47 | nav[data-toggle='toc'] .nav .nav > li > a:focus { 48 | padding-left: 29px; 49 | } 50 | nav[data-toggle='toc'] .nav .nav > .active > a, 51 | nav[data-toggle='toc'] .nav .nav > .active:hover > a, 52 | nav[data-toggle='toc'] .nav .nav > .active:focus > a { 53 | padding-left: 28px; 54 | font-weight: 500; 55 | } 56 | 57 | /* from https://github.com/twbs/bootstrap/blob/e38f066d8c203c3e032da0ff23cd2d6098ee2dd6/docs/assets/css/src/docs.css#L631-L634 */ 58 | nav[data-toggle='toc'] .nav > .active > ul { 59 | display: block; 60 | } 61 | -------------------------------------------------------------------------------- /docs/bootstrap-toc.js: -------------------------------------------------------------------------------- 1 | /*! 2 | * Bootstrap Table of Contents v0.4.1 (http://afeld.github.io/bootstrap-toc/) 3 | * Copyright 2015 Aidan Feldman 4 | * Licensed under MIT (https://github.com/afeld/bootstrap-toc/blob/gh-pages/LICENSE.md) */ 5 | (function() { 6 | 'use strict'; 7 | 8 | window.Toc = { 9 | helpers: { 10 | // return all matching elements in the set, or their descendants 11 | findOrFilter: function($el, selector) { 12 | // http://danielnouri.org/notes/2011/03/14/a-jquery-find-that-also-finds-the-root-element/ 13 | // http://stackoverflow.com/a/12731439/358804 14 | var $descendants = $el.find(selector); 15 | return $el.filter(selector).add($descendants).filter(':not([data-toc-skip])'); 16 | }, 17 | 18 | generateUniqueIdBase: function(el) { 19 | var text = $(el).text(); 20 | var anchor = text.trim().toLowerCase().replace(/[^A-Za-z0-9]+/g, '-'); 21 | return anchor || el.tagName.toLowerCase(); 22 | }, 23 | 24 | generateUniqueId: function(el) { 25 | var anchorBase = this.generateUniqueIdBase(el); 26 | for (var i = 0; ; i++) { 27 | var anchor = anchorBase; 28 | if (i > 0) { 29 | // add suffix 30 | anchor += '-' + i; 31 | } 32 | // check if ID already exists 33 | if (!document.getElementById(anchor)) { 34 | return anchor; 35 | } 36 | } 37 | }, 38 | 39 | generateAnchor: function(el) { 40 | if (el.id) { 41 | return el.id; 42 | } else { 43 | var anchor = this.generateUniqueId(el); 44 | el.id = anchor; 45 | return anchor; 46 | } 47 | }, 48 | 49 | createNavList: function() { 50 | return $(''); 51 | }, 52 | 53 | createChildNavList: function($parent) { 54 | var $childList = this.createNavList(); 55 | $parent.append($childList); 56 | return $childList; 57 | }, 58 | 59 | generateNavEl: function(anchor, text) { 60 | var $a = $(''); 61 | $a.attr('href', '#' + anchor); 62 | $a.text(text); 63 | var $li = $('
  • '); 64 | $li.append($a); 65 | return $li; 66 | }, 67 | 68 | generateNavItem: function(headingEl) { 69 | var anchor = this.generateAnchor(headingEl); 70 | var $heading = $(headingEl); 71 | var text = $heading.data('toc-text') || $heading.text(); 72 | return this.generateNavEl(anchor, text); 73 | }, 74 | 75 | // Find the first heading level (`

    `, then `

    `, etc.) that has more than one element. Defaults to 1 (for `

    `). 76 | getTopLevel: function($scope) { 77 | for (var i = 1; i <= 6; i++) { 78 | var $headings = this.findOrFilter($scope, 'h' + i); 79 | if ($headings.length > 1) { 80 | return i; 81 | } 82 | } 83 | 84 | return 1; 85 | }, 86 | 87 | // returns the elements for the top level, and the next below it 88 | getHeadings: function($scope, topLevel) { 89 | var topSelector = 'h' + topLevel; 90 | 91 | var secondaryLevel = topLevel + 1; 92 | var secondarySelector = 'h' + secondaryLevel; 93 | 94 | return this.findOrFilter($scope, topSelector + ',' + secondarySelector); 95 | }, 96 | 97 | getNavLevel: function(el) { 98 | return parseInt(el.tagName.charAt(1), 10); 99 | }, 100 | 101 | populateNav: function($topContext, topLevel, $headings) { 102 | var $context = $topContext; 103 | var $prevNav; 104 | 105 | var helpers = this; 106 | $headings.each(function(i, el) { 107 | var $newNav = helpers.generateNavItem(el); 108 | var navLevel = helpers.getNavLevel(el); 109 | 110 | // determine the proper $context 111 | if (navLevel === topLevel) { 112 | // use top level 113 | $context = $topContext; 114 | } else if ($prevNav && $context === $topContext) { 115 | // create a new level of the tree and switch to it 116 | $context = helpers.createChildNavList($prevNav); 117 | } // else use the current $context 118 | 119 | $context.append($newNav); 120 | 121 | $prevNav = $newNav; 122 | }); 123 | }, 124 | 125 | parseOps: function(arg) { 126 | var opts; 127 | if (arg.jquery) { 128 | opts = { 129 | $nav: arg 130 | }; 131 | } else { 132 | opts = arg; 133 | } 134 | opts.$scope = opts.$scope || $(document.body); 135 | return opts; 136 | } 137 | }, 138 | 139 | // accepts a jQuery object, or an options object 140 | init: function(opts) { 141 | opts = this.helpers.parseOps(opts); 142 | 143 | // ensure that the data attribute is in place for styling 144 | opts.$nav.attr('data-toggle', 'toc'); 145 | 146 | var $topContext = this.helpers.createChildNavList(opts.$nav); 147 | var topLevel = this.helpers.getTopLevel(opts.$scope); 148 | var $headings = this.helpers.getHeadings(opts.$scope, topLevel); 149 | this.helpers.populateNav($topContext, topLevel, $headings); 150 | } 151 | }; 152 | 153 | $(function() { 154 | $('nav[data-toggle="toc"]').each(function(i, el) { 155 | var $nav = $(el); 156 | Toc.init($nav); 157 | }); 158 | }); 159 | })(); 160 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/extra.css: -------------------------------------------------------------------------------- 1 | .navbar-default { 2 | background-color: #008080; 3 | border-color: #008080; 4 | } 5 | 6 | #toc { 7 | font-size: 150%; 8 | } 9 | 10 | #toc .nav a { 11 | font-size: 100%; 12 | } 13 | 14 | pre { 15 | background-color: #ffffff; 16 | border-color: #000000; 17 | border-width: 1px; 18 | overflow-x: auto; 19 | } 20 | 21 | pre code { 22 | overflow-wrap: normal; 23 | white-space: pre; 24 | } 25 | 26 | /* Idea style */ 27 | .hljs { 28 | display: block; 29 | overflow-x: auto; 30 | padding: 0.5em; 31 | color: #000; 32 | background: #fff; 33 | } 34 | 35 | .hljs-subst, 36 | .hljs-title { 37 | font-weight: normal; 38 | color: #000; 39 | } 40 | 41 | .hljs-comment, 42 | .hljs-quote { 43 | color: #808080; 44 | font-style: italic; 45 | } 46 | 47 | .hljs-meta { 48 | color: #808000; 49 | } 50 | 51 | .hljs-tag { 52 | background: #efefef; 53 | } 54 | 55 | .hljs-section, 56 | .hljs-name, 57 | .hljs-literal, 58 | .hljs-keyword, 59 | .hljs-selector-tag, 60 | .hljs-type, 61 | .hljs-selector-id, 62 | .hljs-selector-class { 63 | font-weight: bold; 64 | color: #000080; 65 | } 66 | 67 | .hljs-attribute, 68 | .hljs-number, 69 | .hljs-regexp, 70 | .hljs-link { 71 | font-weight: bold; 72 | color: #0000ff; 73 | } 74 | 75 | .hljs-number, 76 | .hljs-regexp, 77 | .hljs-link { 78 | font-weight: normal; 79 | } 80 | 81 | .hljs-string { 82 | color: #008000; 83 | font-weight: bold; 84 | } 85 | 86 | .hljs-symbol, 87 | .hljs-bullet, 88 | .hljs-formula { 89 | color: #000; 90 | background: #d0eded; 91 | font-style: italic; 92 | } 93 | 94 | .hljs-doctag { 95 | text-decoration: underline; 96 | } 97 | 98 | .hljs-variable, 99 | .hljs-template-variable { 100 | color: #660e7a; 101 | } 102 | 103 | .hljs-addition { 104 | background: #baeeba; 105 | } 106 | 107 | .hljs-deletion { 108 | background: #ffc8bd; 109 | } 110 | 111 | .hljs-emphasis { 112 | font-style: italic; 113 | } 114 | 115 | .hljs-strong { 116 | font-weight: bold; 117 | } 118 | 119 | /* Custom highlighting */ 120 | 121 | .hljs-tag, .hljs-formula, .hljs-addition, .hljs-deletion { 122 | background: #ffffff; 123 | } 124 | 125 | /* Strings */ 126 | .hljs-string { 127 | color: #008000; 128 | font-weight: bold; 129 | } 130 | 131 | /* Comments */ 132 | .hljs-comment { 133 | color: #404080; 134 | font-style: normal; 135 | } 136 | 137 | .hljs-fun-param { 138 | color: #ff4000; 139 | } 140 | 141 | .hljs-pipe, .hljs-assign { 142 | font-weight: bold; 143 | } 144 | -------------------------------------------------------------------------------- /docs/jquery.sticky-kit.min.js: -------------------------------------------------------------------------------- 1 | /* Sticky-kit v1.1.2 | WTFPL | Leaf Corcoran 2015 | */ 2 | /* 3 | Source: https://github.com/leafo/sticky-kit 4 | License: MIT 5 | */ 6 | (function(){var b,f;b=this.jQuery||window.jQuery;f=b(window);b.fn.stick_in_parent=function(d){var A,w,J,n,B,K,p,q,k,E,t;null==d&&(d={});t=d.sticky_class;B=d.inner_scrolling;E=d.recalc_every;k=d.parent;q=d.offset_top;p=d.spacer;w=d.bottoming;null==q&&(q=0);null==k&&(k=void 0);null==B&&(B=!0);null==t&&(t="is_stuck");A=b(document);null==w&&(w=!0);J=function(a,d,n,C,F,u,r,G){var v,H,m,D,I,c,g,x,y,z,h,l;if(!a.data("sticky_kit")){a.data("sticky_kit",!0);I=A.height();g=a.parent();null!=k&&(g=g.closest(k)); 7 | if(!g.length)throw"failed to find stick parent";v=m=!1;(h=null!=p?p&&a.closest(p):b("
    "))&&h.css("position",a.css("position"));x=function(){var c,f,e;if(!G&&(I=A.height(),c=parseInt(g.css("border-top-width"),10),f=parseInt(g.css("padding-top"),10),d=parseInt(g.css("padding-bottom"),10),n=g.offset().top+c+f,C=g.height(),m&&(v=m=!1,null==p&&(a.insertAfter(h),h.detach()),a.css({position:"",top:"",width:"",bottom:""}).removeClass(t),e=!0),F=a.offset().top-(parseInt(a.css("margin-top"),10)||0)-q, 8 | u=a.outerHeight(!0),r=a.css("float"),h&&h.css({width:a.outerWidth(!0),height:u,display:a.css("display"),"vertical-align":a.css("vertical-align"),"float":r}),e))return l()};x();if(u!==C)return D=void 0,c=q,z=E,l=function(){var b,l,e,k;if(!G&&(e=!1,null!=z&&(--z,0>=z&&(z=E,x(),e=!0)),e||A.height()===I||x(),e=f.scrollTop(),null!=D&&(l=e-D),D=e,m?(w&&(k=e+u+c>C+n,v&&!k&&(v=!1,a.css({position:"fixed",bottom:"",top:c}).trigger("sticky_kit:unbottom"))),eb&&!v&&(c-=l,c=Math.max(b-u,c),c=Math.min(q,c),m&&a.css({top:c+"px"})))):e>F&&(m=!0,b={position:"fixed",top:c},b.width="border-box"===a.css("box-sizing")?a.outerWidth()+"px":a.width()+"px",a.css(b).addClass(t),null==p&&(a.after(h),"left"!==r&&"right"!==r||h.append(a)),a.trigger("sticky_kit:stick")),m&&w&&(null==k&&(k=e+u+c>C+n),!v&&k)))return v=!0,"static"===g.css("position")&&g.css({position:"relative"}), 10 | a.css({position:"absolute",bottom:d,top:"auto"}).trigger("sticky_kit:bottom")},y=function(){x();return l()},H=function(){G=!0;f.off("touchmove",l);f.off("scroll",l);f.off("resize",y);b(document.body).off("sticky_kit:recalc",y);a.off("sticky_kit:detach",H);a.removeData("sticky_kit");a.css({position:"",bottom:"",top:"",width:""});g.position("position","");if(m)return null==p&&("left"!==r&&"right"!==r||a.insertAfter(h),h.remove()),a.removeClass(t)},f.on("touchmove",l),f.on("scroll",l),f.on("resize", 11 | y),b(document.body).on("sticky_kit:recalc",y),a.on("sticky_kit:detach",H),setTimeout(l,0)}};n=0;for(K=this.length;n 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $('.navbar-fixed-top').headroom(); 6 | 7 | $('body').css('padding-top', $('.navbar').height() + 10); 8 | $(window).resize(function(){ 9 | $('body').css('padding-top', $('.navbar').height() + 10); 10 | }); 11 | 12 | $('[data-toggle="tooltip"]').tooltip(); 13 | 14 | var cur_path = paths(location.pathname); 15 | var links = $("#navbar ul li a"); 16 | var max_length = -1; 17 | var pos = -1; 18 | for (var i = 0; i < links.length; i++) { 19 | if (links[i].getAttribute("href") === "#") 20 | continue; 21 | // Ignore external links 22 | if (links[i].host !== location.host) 23 | continue; 24 | 25 | var nav_path = paths(links[i].pathname); 26 | 27 | var length = prefix_length(nav_path, cur_path); 28 | if (length > max_length) { 29 | max_length = length; 30 | pos = i; 31 | } 32 | } 33 | 34 | // Add class to parent
  • , and enclosing
  • if in dropdown 35 | if (pos >= 0) { 36 | var menu_anchor = $(links[pos]); 37 | menu_anchor.parent().addClass("active"); 38 | menu_anchor.closest("li.dropdown").addClass("active"); 39 | } 40 | }); 41 | 42 | function paths(pathname) { 43 | var pieces = pathname.split("/"); 44 | pieces.shift(); // always starts with / 45 | 46 | var end = pieces[pieces.length - 1]; 47 | if (end === "index.html" || end === "") 48 | pieces.pop(); 49 | return(pieces); 50 | } 51 | 52 | // Returns -1 if not found 53 | function prefix_length(needle, haystack) { 54 | if (needle.length > haystack.length) 55 | return(-1); 56 | 57 | // Special case for length-0 haystack, since for loop won't run 58 | if (haystack.length === 0) { 59 | return(needle.length === 0 ? 0 : -1); 60 | } 61 | 62 | for (var i = 0; i < haystack.length; i++) { 63 | if (needle[i] != haystack[i]) 64 | return(i); 65 | } 66 | 67 | return(haystack.length); 68 | } 69 | 70 | /* Clipboard --------------------------*/ 71 | 72 | function changeTooltipMessage(element, msg) { 73 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 74 | element.setAttribute('data-original-title', msg); 75 | $(element).tooltip('show'); 76 | element.setAttribute('data-original-title', tooltipOriginalTitle); 77 | } 78 | 79 | if(ClipboardJS.isSupported()) { 80 | $(document).ready(function() { 81 | var copyButton = ""; 82 | 83 | $("div.sourceCode").addClass("hasCopyButton"); 84 | 85 | // Insert copy buttons: 86 | $(copyButton).prependTo(".hasCopyButton"); 87 | 88 | // Initialize tooltips: 89 | $('.btn-copy-ex').tooltip({container: 'body'}); 90 | 91 | // Initialize clipboard: 92 | var clipboardBtnCopies = new ClipboardJS('[data-clipboard-copy]', { 93 | text: function(trigger) { 94 | return trigger.parentNode.textContent.replace(/\n#>[^\n]*/g, ""); 95 | } 96 | }); 97 | 98 | clipboardBtnCopies.on('success', function(e) { 99 | changeTooltipMessage(e.trigger, 'Copied!'); 100 | e.clearSelection(); 101 | }); 102 | 103 | clipboardBtnCopies.on('error', function() { 104 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 105 | }); 106 | }); 107 | } 108 | })(window.jQuery || window.$) 109 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: '3.1' 2 | pkgdown: 2.0.7 3 | pkgdown_sha: ~ 4 | articles: 5 | design-and-format: design-and-format.html 6 | rule-packs: rule-packs.html 7 | validation: validation.html 8 | last_built: 2023-03-30T07:18Z 9 | 10 | -------------------------------------------------------------------------------- /docs/reference/Rplot001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/echasnovski/ruler/a17dd1313e07f2302fccc10b81432895b16653bd/docs/reference/Rplot001.png -------------------------------------------------------------------------------- /docs/reference/add_pack_names.html: -------------------------------------------------------------------------------- 1 | 2 | Add pack names to single exposures — add_pack_names • ruler 8 | 9 | 10 |
    11 |
    58 | 59 | 60 | 61 |
    62 |
    63 | 68 | 69 |
    70 |

    Function to add pack names to single exposures. Converts list of 71 | single exposures to list of exposures without 72 | validating.

    73 |
    74 | 75 |
    76 |
    add_pack_names(.single_exposures)
    77 |
    78 | 79 |
    80 |

    Arguments

    81 |
    .single_exposures
    82 |

    List of single exposures.

    83 | 84 |
    85 | 86 |
    87 | 90 |
    91 | 92 | 93 |
    96 | 97 |
    98 |

    Site built with pkgdown 2.0.7.

    99 |
    100 | 101 |
    102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /docs/reference/expose_single.html: -------------------------------------------------------------------------------- 1 | 2 | Expose data to single rule pack — expose_single • ruler 7 | 8 | 9 |
    10 |
    57 | 58 | 59 | 60 |
    61 |
    62 | 67 | 68 |
    69 |

    The workhorse generic function for doing exposure. The result is 70 | single_exposure.

    71 |
    72 | 73 |
    74 |
    expose_single(.tbl, .pack, .rule_sep, .remove_obeyers, ...)
    75 |
    76 | 77 |
    78 |

    Arguments

    79 |
    .tbl
    80 |

    Data frame of interest.

    81 | 82 | 83 |
    .pack
    84 |

    Rule pack function.

    85 | 86 | 87 |
    .rule_sep
    88 |

    Regular expression used as separator between column and 89 | rule names in col packs and cell packs.

    90 | 91 | 92 |
    .remove_obeyers
    93 |

    Whether to remove elements which obey rules from 94 | report.

    95 | 96 | 97 |
    ...
    98 |

    Further arguments passed to or from other methods.

    99 | 100 |
    101 | 102 |
    103 | 106 |
    107 | 108 | 109 |
    112 | 113 |
    114 |

    Site built with pkgdown 2.0.7.

    115 |
    116 | 117 |
    118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | -------------------------------------------------------------------------------- /docs/reference/inside_punct.html: -------------------------------------------------------------------------------- 1 | 2 | Inside punctuation regular expression — inside_punct • ruler 7 | 8 | 9 |
    10 |
    57 | 58 | 59 | 60 |
    61 |
    62 | 67 | 68 |
    69 |

    Function to construct regular expression of form: 'non alpha-numeric 70 | characters' + 'some characters' + 'non alpha-numeric characters'.

    71 |
    72 | 73 |
    74 |
    inside_punct(.x = "\\._\\.")
    75 |
    76 | 77 |
    78 |

    Arguments

    79 |
    .x
    80 |

    Middle characters to be put between non alpha-numeric characters.

    81 | 82 |
    83 | 84 |
    85 |

    Examples

    86 |
    inside_punct()
     87 | #> [1] "[^[:alnum:]]*\\._\\.[^[:alnum:]]*"
     88 | 
     89 | inside_punct("abc")
     90 | #> [1] "[^[:alnum:]]*abc[^[:alnum:]]*"
     91 | 
    92 |
    93 |
    94 | 97 |
    98 | 99 | 100 |
    103 | 104 |
    105 |

    Site built with pkgdown 2.0.7.

    106 |
    107 | 108 |
    109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | -------------------------------------------------------------------------------- /docs/reference/pack_info.html: -------------------------------------------------------------------------------- 1 | 2 | Pack info — pack_info • ruler 7 | 8 | 9 |
    10 |
    57 | 58 | 59 | 60 |
    61 |
    62 | 67 | 68 |
    69 |

    An S3 class pack_info to represent information about pack in single exposure. Its content is as in packs_info but without 70 | column 'name'.

    71 |
    72 | 73 |
    74 |
    new_pack_info(.pack, .remove_obeyers)
    75 |
    76 | 77 |
    78 |

    Arguments

    79 |
    .pack
    80 |

    Rule pack.

    81 | 82 | 83 |
    .remove_obeyers
    84 |

    Value of .remove_obeyers argument of expose() with 85 | which .pack was applied.

    86 | 87 |
    88 | 89 |
    90 | 93 |
    94 | 95 | 96 |
    99 | 100 |
    101 |

    Site built with pkgdown 2.0.7.

    102 |
    103 | 104 |
    105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | -------------------------------------------------------------------------------- /docs/reference/pipe.html: -------------------------------------------------------------------------------- 1 | 2 | Pipe operator — %>% • ruler 6 | 7 | 8 |
    9 |
    56 | 57 | 58 | 59 |
    60 |
    61 | 66 | 67 |
    68 |

    See magrittr::%>% for details.

    69 |
    70 | 71 |
    72 |
    lhs %>% rhs
    73 |
    74 | 75 | 76 |
    77 | 80 |
    81 | 82 | 83 |
    86 | 87 |
    88 |

    Site built with pkgdown 2.0.7.

    89 |
    90 | 91 |
    92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /docs/reference/reexports.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Objects exported from other packages — reexports • ruler 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 63 | 64 | 65 | 66 | 67 | 68 | 69 |
    70 |
    71 | 131 | 132 | 133 | 134 |
    135 | 136 |
    137 |
    138 | 143 | 144 |
    145 |

    These objects are imported from other packages. Follow the links 146 | below to see their documentation.

    147 | 148 |
    dplyr

    %>%

    149 | 150 | 151 |
    152 | 153 | 154 | 155 | 156 |
    157 | 162 |
    163 | 164 | 165 |
    166 | 169 | 170 |
    171 |

    Site built with pkgdown 1.5.1.

    172 |
    173 | 174 |
    175 |
    176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | -------------------------------------------------------------------------------- /docs/reference/single_exposure.html: -------------------------------------------------------------------------------- 1 | 2 | Single exposure — single_exposure • ruler 9 | 10 | 11 |
    12 |
    59 | 60 | 61 | 62 |
    63 |
    64 | 69 | 70 |
    71 |

    An S3 class single_exposure to represent exposure of data to one rule 72 | pack. It is a list of the following structure: pack_info - single 73 | pack_info object; report - tidy data validation report 74 | without column pack.

    75 |
    76 | 77 | 78 |
    79 |

    Details

    80 |

    Single exposure is implemented in order to encapsulate preliminary 81 | exposure data from single rule pack. It is needed to impute possibly missing 82 | pack names during exposure. That is why single_exposure doesn't 83 | contain pack name in any form.

    84 |
    85 | 86 |
    87 | 90 |
    91 | 92 | 93 |
    96 | 97 |
    98 |

    Site built with pkgdown 2.0.7.

    99 |
    100 | 101 |
    102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /docs/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | /404.html 5 | 6 | 7 | /LICENSE-text.html 8 | 9 | 10 | /LICENSE.html 11 | 12 | 13 | /articles/design-and-format.html 14 | 15 | 16 | /articles/index.html 17 | 18 | 19 | /articles/rule-packs.html 20 | 21 | 22 | /articles/validation.html 23 | 24 | 25 | /authors.html 26 | 27 | 28 | /index.html 29 | 30 | 31 | /news/index.html 32 | 33 | 34 | /reference/act_after_exposure.html 35 | 36 | 37 | /reference/add_pack_names.html 38 | 39 | 40 | /reference/any_breaker.html 41 | 42 | 43 | /reference/assert_any_breaker.html 44 | 45 | 46 | /reference/bind_exposures.html 47 | 48 | 49 | /reference/cell-pack.html 50 | 51 | 52 | /reference/column-pack.html 53 | 54 | 55 | /reference/data-pack.html 56 | 57 | 58 | /reference/expose.html 59 | 60 | 61 | /reference/expose_single.html 62 | 63 | 64 | /reference/exposure.html 65 | 66 | 67 | /reference/group-pack.html 68 | 69 | 70 | /reference/index.html 71 | 72 | 73 | /reference/inside_punct.html 74 | 75 | 76 | /reference/pack_info.html 77 | 78 | 79 | /reference/packs_info.html 80 | 81 | 82 | /reference/pipe.html 83 | 84 | 85 | /reference/reexports.html 86 | 87 | 88 | /reference/row-pack.html 89 | 90 | 91 | /reference/rule-packs.html 92 | 93 | 94 | /reference/ruler-package.html 95 | 96 | 97 | /reference/ruler-report.html 98 | 99 | 100 | /reference/rules.html 101 | 102 | 103 | /reference/single_exposure.html 104 | 105 | 106 | /reference/spread_groups.html 107 | 108 | 109 | -------------------------------------------------------------------------------- /inst/WORDLIST: -------------------------------------------------------------------------------- 1 | ’s 2 | all’ 3 | assertr 4 | assertthat 5 | behaviour 6 | binded 7 | chr 8 | dplyr 9 | funs 10 | github 11 | integerish 12 | keyholder 13 | lgl 14 | magrittr 15 | name’ 16 | naniar 17 | obeyers 18 | pack’ 19 | README 20 | reproducibility 21 | sealr 22 | skimr 23 | summarised 24 | tibble 25 | tibbles 26 | tidyverse 27 | whole’ 28 | -------------------------------------------------------------------------------- /man/act_after_exposure.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/actions.R 3 | \name{act_after_exposure} 4 | \alias{act_after_exposure} 5 | \title{Act after exposure} 6 | \usage{ 7 | act_after_exposure(.tbl, .trigger, .actor) 8 | } 9 | \arguments{ 10 | \item{.tbl}{Result of \link[=expose]{exposure}, i.e. data frame with \link{exposure} 11 | attribute.} 12 | 13 | \item{.trigger}{Function which takes \code{.tbl} as argument and returns \code{TRUE} if 14 | some action needs to be performed.} 15 | 16 | \item{.actor}{Function which takes \code{.tbl} as argument and performs the 17 | action.} 18 | } 19 | \description{ 20 | A wrapper for consistent application of some actions based on the data after 21 | exposure. 22 | } 23 | \details{ 24 | Basically \code{act_after_exposure()} is doing the following: 25 | \itemize{ 26 | \item Check that \code{.tbl} has a proper \link{exposure} attribute. 27 | \item Compute whether to perform intended action by computing \code{.trigger(.tbl)}. 28 | \item If trigger results in \code{TRUE} then \code{.actor(.tbl)} \strong{is returned}. In other 29 | case \code{.tbl} is returned. 30 | } 31 | 32 | It is a good idea that \code{.actor} should be doing one of two things: 33 | \itemize{ 34 | \item Making side effects. For example throwing an error (if condition in 35 | \code{.trigger} is met), printing some information and so on. In this case it 36 | should return \code{.tbl} to be used properly inside a \link[magrittr:pipe]{pipe}. 37 | \item Changing \code{.tbl} based on exposure information. In this case it should 38 | return the imputed version of \code{.tbl}. 39 | } 40 | } 41 | \examples{ 42 | exposure_printer <- function(.tbl) { 43 | print(get_exposure(.tbl)) 44 | .tbl 45 | } 46 | mtcars_exposed <- mtcars \%>\% 47 | expose(data_packs(. \%>\% dplyr::summarise(nrow_low = nrow(.) > 50))) \%>\% 48 | act_after_exposure(any_breaker, exposure_printer) 49 | } 50 | \seealso{ 51 | \link{any_breaker} for trigger which returns \code{TRUE} in case any rule 52 | breaker is found in exposure. 53 | 54 | \link{assert_any_breaker} for usage of \code{act_after_exposure()} in building data 55 | validation pipelines. 56 | } 57 | -------------------------------------------------------------------------------- /man/add_pack_names.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/expose-helpers.R 3 | \name{add_pack_names} 4 | \alias{add_pack_names} 5 | \title{Add pack names to single exposures} 6 | \usage{ 7 | add_pack_names(.single_exposures) 8 | } 9 | \arguments{ 10 | \item{.single_exposures}{List of \link[=single_exposure]{single exposures}.} 11 | } 12 | \description{ 13 | Function to add pack names to single exposures. Converts list of 14 | \link[=single_exposure]{single exposures} to list of \link[=exposure]{exposures} without 15 | validating. 16 | } 17 | \keyword{internal} 18 | -------------------------------------------------------------------------------- /man/any_breaker.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/actions.R 3 | \name{any_breaker} 4 | \alias{any_breaker} 5 | \title{Is there any breaker in exposure?} 6 | \usage{ 7 | any_breaker(.tbl) 8 | } 9 | \arguments{ 10 | \item{.tbl}{Result of \link[=expose]{exposure}, i.e. data frame with \link{exposure} 11 | attribute.} 12 | } 13 | \description{ 14 | Function designed to be used as trigger in \code{\link[=act_after_exposure]{act_after_exposure()}}. Returns 15 | \code{TRUE} if \link{exposure} attribute of \code{.tbl} has any information about data units 16 | not obeying the rules, i.e. rule breakers. 17 | } 18 | \examples{ 19 | mtcars \%>\% 20 | expose(data_packs(. \%>\% dplyr::summarise(nrow_low = nrow(.) > 50))) \%>\% 21 | any_breaker() 22 | } 23 | \seealso{ 24 | \link{assert_any_breaker} for implicit usage of \code{any_breaker()}. 25 | } 26 | -------------------------------------------------------------------------------- /man/assert_any_breaker.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/actions.R 3 | \name{assert_any_breaker} 4 | \alias{assert_any_breaker} 5 | \title{Assert presence of rule breaker} 6 | \usage{ 7 | assert_any_breaker(.tbl, .type = "error", .silent = FALSE, ...) 8 | } 9 | \arguments{ 10 | \item{.tbl}{Result of \link[=expose]{exposure}, i.e. data frame with \link{exposure} 11 | attribute.} 12 | 13 | \item{.type}{The type of assertion. Can be only one of "error", "warning" or 14 | "message".} 15 | 16 | \item{.silent}{If \code{TRUE} no printing of rule breaker information is done.} 17 | 18 | \item{...}{Arguments for printing rule breaker information.} 19 | } 20 | \description{ 21 | Function to assert if \link[=expose]{exposure} resulted in \link[=any_breaker]{detecting} 22 | some rule breakers. 23 | } 24 | \details{ 25 | In case breaker presence this function does the following: 26 | \itemize{ 27 | \item In case \code{.silent} is \code{FALSE} print rows from exposure 28 | \link[=ruler-report]{report} corresponding to rule breakers. 29 | \item Make assertion of the chosen \code{.type} about breaker presence in exposure. 30 | \item Return \code{.tbl} (for using inside a \link[magrittr:pipe]{pipe}). 31 | } 32 | 33 | If there are no breakers only \code{.tbl} is returned. 34 | } 35 | \examples{ 36 | \dontrun{ 37 | mtcars \%>\% 38 | expose(data_packs(. \%>\% dplyr::summarise(nrow_low = nrow(.) > 50))) \%>\% 39 | assert_any_breaker() 40 | } 41 | } 42 | \seealso{ 43 | \link{any_breaker} for checking of breaker presence in exposure result. 44 | 45 | \link{act_after_exposure} for making general actions based in exposure result. 46 | } 47 | -------------------------------------------------------------------------------- /man/bind_exposures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/expose-helpers.R 3 | \name{bind_exposures} 4 | \alias{bind_exposures} 5 | \title{Bind exposures} 6 | \usage{ 7 | bind_exposures(..., .validate_output = TRUE) 8 | } 9 | \arguments{ 10 | \item{...}{Exposures to bind.} 11 | 12 | \item{.validate_output}{Whether to validate with \code{\link[=is_exposure]{is_exposure()}} if the 13 | output is exposure.} 14 | } 15 | \description{ 16 | Function to bind several exposures into one. 17 | } 18 | \details{ 19 | \strong{Note} that the output might not have names in list-column \code{fun} 20 | in \link[=packs_info]{packs info}, which depends on version of 21 | \link[dplyr:dplyr-package]{dplyr} package. 22 | } 23 | \examples{ 24 | my_data_packs <- data_packs( 25 | data_dims = . \%>\% dplyr::summarise(nrow_low = nrow(.) < 10), 26 | data_sum = . \%>\% dplyr::summarise(sum = sum(.) < 1000) 27 | ) 28 | 29 | ref_exposure <- mtcars \%>\% 30 | expose(my_data_packs) \%>\% 31 | get_exposure() 32 | 33 | exposure_1 <- mtcars \%>\% 34 | expose(my_data_packs[1]) \%>\% 35 | get_exposure() 36 | exposure_2 <- mtcars \%>\% 37 | expose(my_data_packs[2]) \%>\% 38 | get_exposure() 39 | exposure_binded <- bind_exposures(exposure_1, exposure_2) 40 | 41 | exposure_pipe <- mtcars \%>\% 42 | expose(my_data_packs[1]) \%>\% 43 | expose(my_data_packs[2]) \%>\% 44 | get_exposure() 45 | 46 | identical(exposure_binded, ref_exposure) 47 | 48 | identical(exposure_pipe, ref_exposure) 49 | } 50 | -------------------------------------------------------------------------------- /man/cell-pack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/packs.R 3 | \name{cell-pack} 4 | \alias{cell-pack} 5 | \title{Cell rule pack} 6 | \description{ 7 | Cell rule pack is a \link[=rule-packs]{rule pack} which defines a set of rules for 8 | cells, i.e. functions which convert cells of interest to logical values. It 9 | should return a data frame with the following properties: 10 | \itemize{ 11 | \item Number of rows equals to \strong{number of rows for checked cells}. 12 | \item Column names should be treated as concatenation of 13 | \bold{'column name of check cell' + 'separator' + 'rule name'} 14 | \item Values indicate whether the \strong{cell} follows the rule. 15 | } 16 | } 17 | \details{ 18 | This format is inspired by \link[dplyr:mutate_all]{scoped variants of transmute()}. 19 | 20 | The most common way to define cell pack is by creating a 21 | \link[magrittr:pipe]{functional sequence} containing one of: 22 | \itemize{ 23 | \item \code{transmute_all(.funs = rules(...))}. 24 | \item \code{transmute_if(.predicate, .funs = rules(...))}. 25 | \item \code{transmute_at(.vars, .funs = rules(...))}. 26 | } 27 | 28 | \strong{Note} that (as of \code{dplyr} version 0.7.4) when only one column is 29 | transmuted, names of the output don't have a necessary structure. The 'column 30 | name of check cell' is missing which results (after \link[=expose]{exposure}) 31 | into empty string in \code{var} column of \link[=ruler-report]{validation report}. The 32 | current way of dealing with this is to name the input column (see examples). 33 | } 34 | \section{Using rules()}{ 35 | 36 | Using \code{\link[=rules]{rules()}} to create list of functions for scoped \code{dplyr} "mutating" 37 | verbs (such as \link[dplyr:summarise_all]{summarise_all()} and 38 | \link[dplyr:mutate_all]{transmute_all()}) is recommended because: 39 | \itemize{ 40 | \item It is a convenient way to ensure consistent naming of rules without manual 41 | name. 42 | \item It adds a common prefix to all rule names. This helps in defining 43 | separator as prefix surrounded by any number of non-alphanumeric values. 44 | } 45 | } 46 | 47 | \section{Note about rearranging rows}{ 48 | 49 | \strong{Note} that during exposure packs are applied to \link[keyholder:keys-set]{keyed object} with \link[keyholder:keyholder-id]{id key}. So they 50 | can rearrange rows as long as it is done with \link[keyholder:keyholder-supported-funs]{functions supported by keyholder}. Rows will be tracked and 51 | recognized as in the original data frame of interest. 52 | } 53 | 54 | \examples{ 55 | cell_outlier_rules <- . \%>\% dplyr::transmute_at( 56 | c("disp", "qsec"), 57 | rules(z_score = abs(. - mean(.)) / sd(.) > 1) 58 | ) 59 | 60 | cell_packs(outlier = cell_outlier_rules) 61 | 62 | # Dealing with one column edge case 63 | improper_pack <- . \%>\% dplyr::transmute_at( 64 | dplyr::vars(vs), 65 | rules(improper_is_neg = . < 0) 66 | ) 67 | 68 | proper_pack <- . \%>\% dplyr::transmute_at( 69 | dplyr::vars(vs = vs), 70 | rules(proper_is_neg = . < 0) 71 | ) 72 | 73 | mtcars[1:2, ] \%>\% 74 | expose(cell_packs(improper_pack, proper_pack)) \%>\% 75 | get_report() 76 | } 77 | \seealso{ 78 | \link[=data-pack]{Data pack}, \link[=group-pack]{group pack}, \link[=column-pack]{column pack}, \link[=row-pack]{row pack}. 79 | } 80 | -------------------------------------------------------------------------------- /man/column-pack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/packs.R 3 | \name{column-pack} 4 | \alias{column-pack} 5 | \title{Column rule pack} 6 | \description{ 7 | Column rule pack is a \link[=rule-packs]{rule pack} which defines a set of rules 8 | for columns as a whole, i.e. functions which convert columns of interest to 9 | logical values. It should return a data frame with the following properties: 10 | \itemize{ 11 | \item Number of rows equals to \strong{one}. 12 | \item Column names should be treated as concatenation of 13 | \bold{'check column name' + 'separator' + 'rule name'}. 14 | \item Values indicate whether the \strong{column as a whole} follows the rule. 15 | } 16 | } 17 | \details{ 18 | This format is inspired by \code{dplyr}'s 19 | \link[dplyr:summarise_all]{scoped variants of summarise()} applied to non-grouped 20 | data. 21 | 22 | The most common way to define column pack is by creating a 23 | \link[magrittr:pipe]{functional sequence} with no grouping and ending with 24 | one of: 25 | \itemize{ 26 | \item \code{summarise_all(.funs = rules(...))}. 27 | \item \code{summarise_if(.predicate, .funs = rules(...))}. 28 | \item \code{summarise_at(.vars, .funs = rules(...))}. 29 | } 30 | 31 | \strong{Note} that (as of \code{dplyr} version 0.7.4) when only one column is 32 | summarised, names of the output don't have a necessary structure. The 'check 33 | column name' is missing which results (after \link[=expose]{exposure}) into empty 34 | string in \code{var} column of \link[=ruler-report]{validation report}. The current way 35 | of dealing with this is to name the input column (see examples). 36 | } 37 | \section{Using rules()}{ 38 | 39 | Using \code{\link[=rules]{rules()}} to create list of functions for scoped \code{dplyr} "mutating" 40 | verbs (such as \link[dplyr:summarise_all]{summarise_all()} and 41 | \link[dplyr:mutate_all]{transmute_all()}) is recommended because: 42 | \itemize{ 43 | \item It is a convenient way to ensure consistent naming of rules without manual 44 | name. 45 | \item It adds a common prefix to all rule names. This helps in defining 46 | separator as prefix surrounded by any number of non-alphanumeric values. 47 | } 48 | } 49 | 50 | \examples{ 51 | # Validating present columns 52 | numeric_column_rules <- . \%>\% dplyr::summarise_if( 53 | is.numeric, 54 | rules(mean(.) > 5, sd(.) < 10) 55 | ) 56 | character_column_rules <- . \%>\% dplyr::summarise_if( 57 | is.character, 58 | rules(. \%in\% letters[1:4]) 59 | ) 60 | 61 | col_packs( 62 | num_col = numeric_column_rules, 63 | chr_col = character_column_rules 64 | ) 65 | 66 | # Dealing with one column edge case 67 | improper_pack <- . \%>\% dplyr::summarise_at( 68 | dplyr::vars(vs), 69 | rules(improper_is_chr = is.character) 70 | ) 71 | 72 | proper_pack <- . \%>\% dplyr::summarise_at( 73 | dplyr::vars(vs = vs), 74 | rules(proper_is_chr = is.character) 75 | ) 76 | 77 | mtcars \%>\% 78 | expose(col_packs(improper_pack, proper_pack)) \%>\% 79 | get_report() 80 | } 81 | \seealso{ 82 | \link[=data-pack]{Data pack}, \link[=group-pack]{group pack}, \link[=row-pack]{row pack}, \link[=cell-pack]{cell pack}. 83 | } 84 | -------------------------------------------------------------------------------- /man/data-pack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/packs.R 3 | \name{data-pack} 4 | \alias{data-pack} 5 | \title{Data rule pack} 6 | \description{ 7 | Data rule pack is a \link[=rule-packs]{rule pack} which defines a set of rules for 8 | data as a whole, i.e. functions which convert data to logical values. It 9 | should return a data frame with the following properties: 10 | \itemize{ 11 | \item Number of rows equals to \strong{one}. 12 | \item Column names should be treated as \strong{rule names}. 13 | \item Values indicate whether the \strong{data as a whole} follows the rule. 14 | } 15 | } 16 | \details{ 17 | This format is inspired by \code{dplyr}'s \link[dplyr:summarise]{summarise()} applied 18 | to non-grouped data. 19 | 20 | The most common way to define data pack is by creating a 21 | \link[magrittr:pipe]{functional sequence} with no grouping and ending with 22 | \code{summarise(...)}. 23 | } 24 | \examples{ 25 | data_dims_rules <- . \%>\% 26 | dplyr::summarise( 27 | nrow_low = nrow(.) > 10, 28 | nrow_up = nrow(.) < 20, 29 | ncol_low = ncol(.) > 5, 30 | ncol_up = ncol(.) < 10 31 | ) 32 | data_na_rules <- . \%>\% 33 | dplyr::summarise(all_not_na = Negate(anyNA)(.)) 34 | 35 | data_packs( 36 | data_nrow = data_dims_rules, 37 | data_na = data_na_rules 38 | ) 39 | } 40 | \seealso{ 41 | \link[=group-pack]{Group pack}, \link[=column-pack]{Column pack}, \link[=row-pack]{row pack}, \link[=cell-pack]{cell pack}. 42 | } 43 | -------------------------------------------------------------------------------- /man/expose.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/expose.R 3 | \name{expose} 4 | \alias{expose} 5 | \title{Expose data to rule packs} 6 | \usage{ 7 | expose(.tbl, ..., .rule_sep = inside_punct("\\\\._\\\\."), 8 | .remove_obeyers = TRUE, .guess = TRUE) 9 | } 10 | \arguments{ 11 | \item{.tbl}{Data frame of interest.} 12 | 13 | \item{...}{Rule packs. They can be in pure form or inside a list 14 | (at any depth).} 15 | 16 | \item{.rule_sep}{Regular expression used as separator between column and 17 | rule names in \link[=column-pack]{col packs} and \link[=cell-pack]{cell packs}.} 18 | 19 | \item{.remove_obeyers}{Whether to remove elements which obey rules from 20 | report.} 21 | 22 | \item{.guess}{Whether to guess type of unsupported rule pack type (see 23 | Details).} 24 | } 25 | \value{ 26 | A \code{.tbl} with possibly added 'exposure' attribute containing the 27 | resulting \link{exposure}. If \code{.tbl} already contains 'exposure' attribute then 28 | the result is binded with it. 29 | } 30 | \description{ 31 | Function for applying rule packs to data. 32 | } 33 | \details{ 34 | \code{expose()} applies all supplied rule packs to data, creates an 35 | \link{exposure} object based on results and stores it to attribute 'exposure'. 36 | It is guaranteed that \code{.tbl} is not modified in any other way in order to 37 | use \code{expose()} inside a \code{\link[magrittr]{pipe}}. 38 | 39 | It is a good idea to name all rule packs: explicitly in \code{...} (if they are 40 | supplied not inside list) or during creation with respective rule pack 41 | function. In case of missing name it is imputed based on possibly existing 42 | exposure attribute in \code{.tbl} and supplied rule packs. Imputation is similar 43 | to one in \code{\link[=rules]{rules()}} but applied to every pack type separately. 44 | 45 | Default value for \code{.rule_sep} is the regular expression \verb{characters ._. surrounded by non alphanumeric characters}. It is picked to be used 46 | smoothly with \code{dplyr}'s \link[dplyr:scoped]{scoped verbs} and \code{\link[=rules]{rules()}} instead 47 | of pure list. In most cases it shouldn't be changed but if needed it 48 | should align with \code{.prefix} in \code{\link[=rules]{rules()}}. 49 | } 50 | \section{Guessing}{ 51 | 52 | To work properly in some edge cases one should specify pack types with 53 | \link[=rule-packs]{appropriate function}. However with \code{.guess} equals to \code{TRUE} 54 | \code{expose} will guess the pack type based on its output after applying to 55 | \code{.tbl}. It uses the following features: 56 | \itemize{ 57 | \item Presence of non-logical columns: if present then the guess is \link[=group-pack]{group pack}. Grouping columns are guessed as all non-logical. This 58 | works incorrectly if some grouping column is logical: it will be guessed as 59 | result of applying the rule. \strong{Note} that on most occasions this edge case 60 | will produce error about grouping columns define non-unique levels. 61 | \item Combination of whether number of rows equals 1 (\code{n_rows_one}) and 62 | presence of \code{.rule_sep} in all column names (\code{all_contain_sep}). Guesses 63 | are: 64 | \itemize{ 65 | \item \link[=data-pack]{Data pack} if \code{n_rows_one == TRUE} and \code{all_contain_sep == FALSE}. 66 | \item \link[=column-pack]{Column pack} if \code{n_rows_one == TRUE} and 67 | \code{all_contain_sep == TRUE}. 68 | \item \link[=row-pack]{Row pack} if \code{n_rows_one == FALSE} and \code{all_contain_sep == FALSE}. This works incorrectly if output has one row which is checked. 69 | In this case it will be guessed as data pack. 70 | \item \link[=cell-pack]{Cell pack} if \code{n_rows_one == FALSE} and \code{all_contain_sep == TRUE}. This works incorrectly if output has one row in which cells 71 | are checked. In this case it will be guessed as column pack. 72 | } 73 | } 74 | } 75 | 76 | \examples{ 77 | my_rule_pack <- . \%>\% dplyr::summarise(nrow_neg = nrow(.) < 0) 78 | my_data_packs <- data_packs(my_data_pack_1 = my_rule_pack) 79 | 80 | # These pipes give identical results 81 | mtcars \%>\% 82 | expose(my_data_packs) \%>\% 83 | get_report() 84 | 85 | mtcars \%>\% 86 | expose(my_data_pack_1 = my_rule_pack) \%>\% 87 | get_report() 88 | 89 | # This throws an error because no pack type is specified for my_rule_pack 90 | \dontrun{ 91 | mtcars \%>\% expose(my_data_pack_1 = my_rule_pack, .guess = FALSE) 92 | } 93 | 94 | # Edge cases against using 'guess = TRUE' for robust code 95 | group_rule_pack <- . \%>\% 96 | dplyr::mutate(vs_one = vs == 1) \%>\% 97 | dplyr::group_by(vs_one, am) \%>\% 98 | dplyr::summarise(n_low = dplyr::n() > 10) 99 | group_rule_pack_dummy <- . \%>\% 100 | dplyr::mutate(vs_one = vs == 1) \%>\% 101 | dplyr::group_by(mpg, vs_one, wt) \%>\% 102 | dplyr::summarise(n_low = dplyr::n() > 10) 103 | row_rule_pack <- . \%>\% dplyr::transmute(neg_row_sum = rowSums(.) < 0) 104 | cell_rule_pack <- . \%>\% dplyr::transmute_all(rules(neg_value = . < 0)) 105 | 106 | # Only column 'am' is guessed as grouping which defines non-unique levels. 107 | \dontrun{ 108 | mtcars \%>\% 109 | expose(group_rule_pack, .remove_obeyers = FALSE, .guess = TRUE) \%>\% 110 | get_report() 111 | } 112 | 113 | # Values in `var` should contain combination of three grouping columns but 114 | # column 'vs_one' is guessed as rule. No error is thrown because the guessed 115 | # grouping column define unique levels. 116 | mtcars \%>\% 117 | expose(group_rule_pack_dummy, .remove_obeyers = FALSE, .guess = TRUE) \%>\% 118 | get_report() 119 | 120 | # Results should have in column 'id' value 1 and not 0. 121 | mtcars \%>\% 122 | dplyr::slice(1) \%>\% 123 | expose(row_rule_pack) \%>\% 124 | get_report() 125 | 126 | mtcars \%>\% 127 | dplyr::slice(1) \%>\% 128 | expose(cell_rule_pack) \%>\% 129 | get_report() 130 | } 131 | -------------------------------------------------------------------------------- /man/expose_single.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/expose.R 3 | \name{expose_single} 4 | \alias{expose_single} 5 | \title{Expose data to single rule pack} 6 | \usage{ 7 | expose_single(.tbl, .pack, .rule_sep, .remove_obeyers, ...) 8 | } 9 | \arguments{ 10 | \item{.tbl}{Data frame of interest.} 11 | 12 | \item{.pack}{Rule pack function.} 13 | 14 | \item{.rule_sep}{Regular expression used as separator between column and 15 | rule names in \link[=column-pack]{col packs} and \link[=cell-pack]{cell packs}.} 16 | 17 | \item{.remove_obeyers}{Whether to remove elements which obey rules from 18 | report.} 19 | 20 | \item{...}{Further arguments passed to or from other methods.} 21 | } 22 | \description{ 23 | The workhorse generic function for doing exposure. The result is 24 | \link{single_exposure}. 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /man/exposure.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/exposure.R 3 | \name{exposure} 4 | \alias{exposure} 5 | \alias{is_exposure} 6 | \alias{get_exposure} 7 | \alias{remove_exposure} 8 | \title{Exposure} 9 | \usage{ 10 | is_exposure(.x) 11 | 12 | get_exposure(.object) 13 | 14 | remove_exposure(.object) 15 | } 16 | \arguments{ 17 | \item{.x}{Object to test.} 18 | 19 | \item{.object}{Object to get or remove \code{exposure} attribute from.} 20 | } 21 | \value{ 22 | \code{get_exposure()} returns \code{object} if it is exposure and its attribute 23 | 'exposure' otherwise. 24 | 25 | \code{remove_exposure()} returns \code{object} with removed attributed 'exposure'. 26 | } 27 | \description{ 28 | Exposure is a result of \link[=expose]{exposing} data to rules. It is 29 | implemented with S3 class \code{exposure} which is a list of the following 30 | structure: \code{packs_info} - a \link{packs_info} object; \code{report} - 31 | \link[=ruler-report]{tidy data validation report}. 32 | } 33 | \examples{ 34 | my_col_packs <- col_packs( 35 | col_sum_props = . \%>\% dplyr::summarise_all( 36 | rules( 37 | col_sum_low = sum(.) > 100, 38 | col_sum_high = sum(.) < 1000 39 | ) 40 | ) 41 | ) 42 | mtcars_exposed <- mtcars \%>\% expose(my_col_packs) 43 | mtcars_exposure <- mtcars_exposed \%>\% get_exposure() 44 | 45 | is_exposure(mtcars_exposure) 46 | 47 | identical(remove_exposure(mtcars_exposed), mtcars) 48 | 49 | identical(get_exposure(mtcars_exposure), mtcars_exposure) 50 | } 51 | -------------------------------------------------------------------------------- /man/group-pack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/packs.R 3 | \name{group-pack} 4 | \alias{group-pack} 5 | \title{Group rule pack} 6 | \description{ 7 | Group rule pack is a \link[=rule-packs]{rule pack} which defines a set of rules 8 | for groups of rows as a whole, i.e. functions which convert groups of 9 | interest to logical values. It should return a data frame with the following 10 | properties: 11 | \itemize{ 12 | \item There should be present some columns which combined values \strong{uniquely} 13 | describe group. They should be defined during creation with 14 | \link[=rule-packs]{group_packs()}. 15 | \item Number of rows equals to \strong{number of checked groups}. 16 | \item Names of non-grouping columns should be treated as \strong{rule names}. 17 | \item Values indicate whether the \strong{group as a whole} follows the rule. 18 | } 19 | } 20 | \details{ 21 | This format is inspired by \code{dplyr}'s \link[dplyr:summarise]{summarise()} applied 22 | to grouped data. 23 | 24 | The most common way to define data pack is by creating a 25 | \link[magrittr:pipe]{functional sequence} with grouping and ending with 26 | \code{summarise(...)}. 27 | } 28 | \section{Interpretation}{ 29 | 30 | Group pack output is interpreted in the following way: 31 | \itemize{ 32 | \item All grouping columns are \link[tidyr:unite]{united} with delimiter \code{.group_sep} 33 | (which is an argument of \code{group_packs()}). 34 | \item Levels of the resulting column are treated as names of some new variables 35 | which should be exposed as a whole. Names of non-grouping columns are treated 36 | as rule names. They are transformed in \link[=column-pack]{column pack} format and 37 | interpreted accordingly. 38 | } 39 | 40 | Exposure result of group pack is different from others in a way that column 41 | \code{var} in \link[=ruler-report]{exposure report} doesn't represent the actual column 42 | in data. 43 | } 44 | 45 | \examples{ 46 | vs_am_rules <- . \%>\% 47 | dplyr::group_by(vs, am) \%>\% 48 | dplyr::summarise( 49 | nrow_low = n(.) > 10, 50 | nrow_up = n(.) < 20, 51 | rowmeans_low = rowMeans(.) > 19 52 | ) 53 | 54 | group_packs(vs_am = vs_am_rules, .group_vars = c("vs", "am")) 55 | } 56 | \seealso{ 57 | \link[=data-pack]{Data pack}, \link[=column-pack]{Column pack}, \link[=row-pack]{row pack}, \link[=cell-pack]{cell pack}. 58 | } 59 | -------------------------------------------------------------------------------- /man/inside_punct.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{inside_punct} 4 | \alias{inside_punct} 5 | \title{Inside punctuation regular expression} 6 | \usage{ 7 | inside_punct(.x = "\\\\._\\\\.") 8 | } 9 | \arguments{ 10 | \item{.x}{Middle characters to be put between non alpha-numeric characters.} 11 | } 12 | \description{ 13 | Function to construct regular expression of form: 'non alpha-numeric 14 | characters' + 'some characters' + 'non alpha-numeric characters'. 15 | } 16 | \examples{ 17 | inside_punct() 18 | 19 | inside_punct("abc") 20 | } 21 | -------------------------------------------------------------------------------- /man/pack_info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/exposure.R 3 | \name{pack_info} 4 | \alias{pack_info} 5 | \alias{new_pack_info} 6 | \title{Pack info} 7 | \usage{ 8 | new_pack_info(.pack, .remove_obeyers) 9 | } 10 | \arguments{ 11 | \item{.pack}{\link[=rule-packs]{Rule pack}.} 12 | 13 | \item{.remove_obeyers}{Value of \code{.remove_obeyers} argument of \code{\link[=expose]{expose()}} with 14 | which \code{.pack} was applied.} 15 | } 16 | \description{ 17 | An S3 class \code{pack_info} to represent information about pack in \link[=single_exposure]{single exposure}. Its content is as in \link{packs_info} but without 18 | column 'name'. 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/packs_info.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/exposure.R 3 | \name{packs_info} 4 | \alias{packs_info} 5 | \alias{is_packs_info} 6 | \alias{get_packs_info} 7 | \title{Packs info} 8 | \usage{ 9 | is_packs_info(.x, .skip_class = FALSE) 10 | 11 | get_packs_info(.object) 12 | } 13 | \arguments{ 14 | \item{.x}{Object to test.} 15 | 16 | \item{.skip_class}{Whether to skip checking inheritance from \code{packs_info}.} 17 | 18 | \item{.object}{Object to get \code{packs_info} value from \code{exposure} attribute.} 19 | } 20 | \value{ 21 | \code{get_packs_info()} returns \code{packs_info} attribute of \code{object} if it 22 | is exposure and of its 'exposure' attribute otherwise. 23 | } 24 | \description{ 25 | An S3 class \code{packs_info} to represent information about packs in \link{exposure}. 26 | It is a tibble with the following structure: 27 | \itemize{ 28 | \item \strong{name} \verb{} : Name of the pack. 29 | \item \strong{type} \verb{} : \link[=rule-packs]{Pack type}. 30 | \item \strong{fun} \verb{} : List (preferably unnamed) of rule pack functions. 31 | \item \strong{remove_obeyers} \verb{} : value of \code{.remove_obeyers} argument of 32 | \code{\link[=expose]{expose()}} with which pack was applied. 33 | } 34 | } 35 | \details{ 36 | To avoid possible confusion it is preferred (but not required) that 37 | list-column \code{fun} doesn't have names. Names of packs are stored in \code{name} 38 | column. During \link[=expose]{exposure} \code{fun} is always created without names. 39 | } 40 | \examples{ 41 | my_row_packs <- row_packs( 42 | row_mean_props = . \%>\% dplyr::transmute(row_mean = rowMeans(.)) \%>\% 43 | dplyr::transmute( 44 | row_mean_low = row_mean > 20, 45 | row_mean_high = row_mean < 60 46 | ), 47 | row_outlier = . \%>\% dplyr::transmute(row_sum = rowSums(.)) \%>\% 48 | dplyr::transmute( 49 | not_row_outlier = abs(row_sum - mean(row_sum)) / sd(row_sum) < 1.5 50 | ) 51 | ) 52 | my_data_packs <- data_packs( 53 | data_dims = . \%>\% dplyr::summarise( 54 | nrow = nrow(.) == 32, 55 | ncol = ncol(.) == 5 56 | ) 57 | ) 58 | 59 | mtcars_exposed <- mtcars \%>\% 60 | expose(my_data_packs, .remove_obeyers = FALSE) \%>\% 61 | expose(my_row_packs) 62 | 63 | mtcars_exposed \%>\% get_packs_info() 64 | 65 | mtcars_exposed \%>\% 66 | get_packs_info() \%>\% 67 | is_packs_info() 68 | } 69 | -------------------------------------------------------------------------------- /man/pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-pipe.R 3 | \name{\%>\%} 4 | \alias{\%>\%} 5 | \title{Pipe operator} 6 | \usage{ 7 | lhs \%>\% rhs 8 | } 9 | \description{ 10 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 11 | } 12 | \keyword{internal} 13 | -------------------------------------------------------------------------------- /man/row-pack.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/packs.R 3 | \name{row-pack} 4 | \alias{row-pack} 5 | \title{Row rule pack} 6 | \description{ 7 | Row rule pack is a \link[=rule-packs]{rule pack} which defines a set of rules for 8 | rows as a whole, i.e. functions which convert rows of interest to logical 9 | values. It should return a data frame with the following properties: 10 | \itemize{ 11 | \item Number of rows equals to \strong{number of checked rows}. 12 | \item Column names should be treated as \strong{rule names}. 13 | \item Values indicate whether the \strong{row as a whole} follows the rule. 14 | } 15 | } 16 | \details{ 17 | This format is inspired by \code{dplyr}'s \link[dplyr:transmute]{transmute()}. 18 | 19 | The most common way to define row pack is by creating a 20 | \link[magrittr:pipe]{functional sequence} containing \code{transmute(...)}. 21 | } 22 | \section{Note about rearranging rows}{ 23 | 24 | \strong{Note} that during exposure packs are applied to \link[keyholder:keys-set]{keyed object} with \link[keyholder:keyholder-id]{id key}. So they 25 | can rearrange rows as long as it is done with \link[keyholder:keyholder-supported-funs]{functions supported by keyholder}. Rows will be tracked and 26 | recognized as in the original data frame of interest. 27 | } 28 | 29 | \examples{ 30 | some_row_mean_rules <- . \%>\% 31 | dplyr::slice(1:3) \%>\% 32 | dplyr::mutate(row_mean = rowMeans(.)) \%>\% 33 | dplyr::transmute( 34 | row_mean_low = row_mean > 10, 35 | row_mean_up = row_mean < 20 36 | ) 37 | all_row_sum_rules <- . \%>\% 38 | dplyr::mutate(row_sum = rowSums(.)) \%>\% 39 | dplyr::transmute(row_sum_low = row_sum > 30) 40 | 41 | row_packs( 42 | some_row_mean_rules, 43 | all_row_sum_rules 44 | ) 45 | } 46 | \seealso{ 47 | \link[=data-pack]{Data pack}, \link[=group-pack]{group pack}, \link[=column-pack]{column pack}, \link[=cell-pack]{cell pack}. 48 | } 49 | -------------------------------------------------------------------------------- /man/rule-packs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/packs.R 3 | \name{rule-packs} 4 | \alias{rule-packs} 5 | \alias{packs} 6 | \alias{data_packs} 7 | \alias{group_packs} 8 | \alias{col_packs} 9 | \alias{row_packs} 10 | \alias{cell_packs} 11 | \title{Create rule packs} 12 | \usage{ 13 | data_packs(...) 14 | 15 | group_packs(..., .group_vars, .group_sep = ".") 16 | 17 | col_packs(...) 18 | 19 | row_packs(...) 20 | 21 | cell_packs(...) 22 | } 23 | \arguments{ 24 | \item{...}{Functions which define packs. They can be in pure form or inside a 25 | list (at any depth).} 26 | 27 | \item{.group_vars}{Character vector of names of grouping variables.} 28 | 29 | \item{.group_sep}{String to be used as separator when uniting grouping 30 | levels for \code{var} column in \link[=ruler-report]{exposure report}.} 31 | } 32 | \value{ 33 | \code{data_packs()} returns a list of what should be \link[=data-pack]{data rule packs}, \code{group_packs()} - \link[=group-pack]{group rule packs}, 34 | \code{col_packs()} - \link[=column-pack]{column rule packs}, \code{row_packs()} - \link[=row-pack]{row rule packs}, \code{cell_packs()} - \link[=cell-pack]{cell rule packs}. 35 | } 36 | \description{ 37 | Functions for creating different kinds of rule packs. \strong{Rule} is a function 38 | which converts data unit of interest (data, group, column, row, cell) to 39 | logical value indicating whether this object satisfies certain condition. 40 | \strong{Rule pack} is a function which combines several rules into one functional 41 | block. It takes a data frame of interest and returns a data frame with 42 | certain structure and column naming scheme. Types of packs differ in 43 | interpretation of their output. 44 | } 45 | \details{ 46 | These functions convert \code{...} to list, apply \code{rlang}'s 47 | \link[rlang:flatten]{squash()} and add appropriate classes (\code{group_packs()} also 48 | adds necessary attributes). Also they are only constructors and do not check 49 | for validity of certain pack. \strong{Note} that it is allowed for elements of 50 | \code{...} to not have names: they will be computed during exposure. However it is 51 | a good idea to manually name packs. 52 | } 53 | -------------------------------------------------------------------------------- /man/ruler-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ruler-package.R 3 | \docType{package} 4 | \name{ruler-package} 5 | \alias{ruler} 6 | \alias{ruler-package} 7 | \title{ruler: Rule Your Data} 8 | \description{ 9 | \code{ruler} offers a set of tools for creating tidy data validation reports using 10 | \href{https://dplyr.tidyverse.org}{dplyr} grammar of data manipulation. It 11 | is designed to be flexible and extendable in terms of creating rules and 12 | using their output. 13 | } 14 | \details{ 15 | The common workflow is: 16 | \itemize{ 17 | \item Define dplyr-style \link[=rule-packs]{packs} of rules for basic data units (data, 18 | group, column, row, cell) to obey. 19 | \item \link[=expose]{Expose} some data to those rules. The result is the same data with 20 | possibly created \link{exposure} attribute. Exposure contains 21 | information \link[=packs_info]{about applied packs} and \link[=ruler-report]{tidy data validation report}. 22 | \item Use data and exposure to perform some \link[=act_after_exposure]{actions}: 23 | \link[=assert_any_breaker]{assert about rule breakers}, impute data, remove 24 | outliers and so on. 25 | } 26 | 27 | To learn more about \code{ruler} browse vignettes with \code{browseVignettes(package = "ruler")}. The preferred order is: 28 | \enumerate{ 29 | \item Design process and exposure format. 30 | \item Rule packs. 31 | \item Validation 32 | } 33 | } 34 | \seealso{ 35 | Useful links: 36 | \itemize{ 37 | \item \url{https://echasnovski.github.io/ruler/} 38 | \item \url{https://github.com/echasnovski/ruler} 39 | \item Report bugs at \url{https://github.com/echasnovski/ruler/issues} 40 | } 41 | 42 | } 43 | \author{ 44 | \strong{Maintainer}: Evgeni Chasnovski \email{evgeni.chasnovski@gmail.com} (\href{https://orcid.org/0000-0002-1617-4019}{ORCID}) 45 | 46 | } 47 | -------------------------------------------------------------------------------- /man/ruler-report.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/exposure.R 3 | \name{ruler-report} 4 | \alias{ruler-report} 5 | \alias{is_report} 6 | \alias{get_report} 7 | \title{Tidy data validation report} 8 | \usage{ 9 | is_report(.x, .skip_class = FALSE) 10 | 11 | get_report(.object) 12 | } 13 | \arguments{ 14 | \item{.x}{Object to test.} 15 | 16 | \item{.skip_class}{Whether to skip checking inheritance from \code{ruler_report}.} 17 | 18 | \item{.object}{Object to get \code{report} value from \code{exposure} attribute.} 19 | } 20 | \value{ 21 | \code{get_report()} returns \code{report} element of \code{object} if it is 22 | exposure and of its 'exposure' attribute otherwise. 23 | } 24 | \description{ 25 | A tibble representing the data validation result of certain data units in 26 | tidy way: 27 | \itemize{ 28 | \item \strong{pack} \verb{} : Name of rule pack from column 'name' of corresponding 29 | \link{packs_info} object. 30 | \item \strong{rule} \verb{} : Name of the rule defined in rule pack. 31 | \item \strong{var} \verb{} : Name of the variable which validation result is reported. 32 | Value '.all' is reserved and interpreted as 'all columns as a whole'. 33 | \strong{Note} that \code{var} doesn't always represent the actual column in data frame 34 | (see \link[=group-pack]{group packs}). 35 | \item \strong{id} \verb{} : Index of the row in tested data frame which validation 36 | result is reported. Value 0 is reserved and interpreted as 'all rows as a 37 | whole'. 38 | \item \strong{value} \verb{} : Whether the described data unit obeys the rule. 39 | } 40 | } 41 | \details{ 42 | There are four basic combinations of \code{var} and \code{id} values which 43 | define five basic data units: 44 | \itemize{ 45 | \item \code{var == '.all'} and \code{id == 0}: Data as a whole. 46 | \item \code{var != '.all'} and \code{id == 0}: Group (\code{var} shouldn't be an actual column 47 | name) or column (\code{var} should be an actual column name) as a whole. 48 | \item \code{var == '.all'} and \code{id != 0}: Row as a whole. 49 | \item \code{var != '.all'} and \code{id != 0}: Described cell. 50 | } 51 | } 52 | \examples{ 53 | my_row_packs <- row_packs( 54 | row_mean_props = . \%>\% dplyr::transmute(row_mean = rowMeans(.)) \%>\% 55 | dplyr::transmute( 56 | row_mean_low = row_mean > 20, 57 | row_mean_high = row_mean < 60 58 | ), 59 | row_outlier = . \%>\% dplyr::transmute(row_sum = rowSums(.)) \%>\% 60 | dplyr::transmute( 61 | not_row_outlier = abs(row_sum - mean(row_sum)) / sd(row_sum) < 1.5 62 | ) 63 | ) 64 | my_data_packs <- data_packs( 65 | data_dims = . \%>\% dplyr::summarise( 66 | nrow = nrow(.) == 32, 67 | ncol = ncol(.) == 5 68 | ) 69 | ) 70 | 71 | mtcars_exposed <- mtcars \%>\% 72 | expose(my_data_packs, .remove_obeyers = FALSE) \%>\% 73 | expose(my_row_packs) 74 | 75 | mtcars_exposed \%>\% get_report() 76 | 77 | mtcars_exposed \%>\% 78 | get_report() \%>\% 79 | is_report() 80 | } 81 | -------------------------------------------------------------------------------- /man/rules.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/rules.R 3 | \name{rules} 4 | \alias{rules} 5 | \title{Create a list of rules} 6 | \usage{ 7 | rules(..., .prefix = "._.") 8 | } 9 | \arguments{ 10 | \item{...}{Bare expression(s) with \code{.} as input.} 11 | 12 | \item{.prefix}{Prefix to be added to function names.} 13 | } 14 | \description{ 15 | \code{rules()} is a function designed to create input for \code{.funs} argument of 16 | scoped \code{dplyr} "mutating" verbs (such as 17 | \link[dplyr:summarise_all]{summarise_all()} and 18 | \link[dplyr:mutate_all]{transmute_all()}). It converts bare expressions 19 | with \code{.} as input into formulas and repairs names of the output. 20 | } 21 | \details{ 22 | \code{rules()} repairs names by the following algorithm: 23 | \itemize{ 24 | \item Absent names are replaced with the 'rule__\\{ind\\}' where \\{ind\\} is the 25 | index of function position in the \code{...} . 26 | \item \code{.prefix} is added at the beginning of all names. The default is \code{._.} . It 27 | is picked for its symbolism (it is the Morse code of letter 'R') and rare 28 | occurrence in names. In those rare cases it can be manually changed but 29 | this will not be tracked further. \strong{Note} that it is a good idea for 30 | \code{.prefix} to be \link[=make.names]{syntactic}, as \code{dplyr} will force tibble 31 | names to be syntactic. To check if string is "good", use it as input to 32 | \code{make.names()}: if output equals that string than it is a "good" choice. 33 | } 34 | } 35 | \examples{ 36 | # `rules()` accepts bare expression calls with `.` as input, which is not 37 | # possible with advised `list()` approach of `dplyr` 38 | dplyr::summarise_all(mtcars[, 1:2], rules(sd, "sd", sd(.), ~ sd(.))) 39 | 40 | dplyr::summarise_all(mtcars[, 1:2], rules(sd, .prefix = "a_a_")) 41 | 42 | # Use `...` in `summarise_all()` to supply extra arguments 43 | dplyr::summarise_all(data.frame(x = c(1:2, NA)), rules(sd), na.rm = TRUE) 44 | } 45 | -------------------------------------------------------------------------------- /man/single_exposure.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/exposure.R 3 | \name{single_exposure} 4 | \alias{single_exposure} 5 | \title{Single exposure} 6 | \description{ 7 | An S3 class \code{single_exposure} to represent exposure of data to \strong{one} rule 8 | pack. It is a list of the following structure: \code{pack_info} - single 9 | \link{pack_info} object; \code{report} - \link[=ruler-report]{tidy data validation report} 10 | without column \code{pack}. 11 | } 12 | \details{ 13 | Single exposure is implemented in order to encapsulate preliminary 14 | exposure data from single rule pack. It is needed to impute possibly missing 15 | pack names during \link[=expose]{exposure}. That is why \code{single_exposure} doesn't 16 | contain pack name in any form. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/spread_groups.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/spread-groups.R 3 | \name{spread_groups} 4 | \alias{spread_groups} 5 | \title{Spread grouping columns} 6 | \usage{ 7 | spread_groups(.tbl, ..., .group_sep = ".", .col_sep = "._.") 8 | } 9 | \arguments{ 10 | \item{.tbl}{Data frame with result of grouped summary.} 11 | 12 | \item{...}{A selection of grouping columns (as in \code{\link[tidyr:unite]{tidyr::unite()}}).} 13 | 14 | \item{.group_sep}{A string to be used as separator of grouping levels.} 15 | 16 | \item{.col_sep}{A string to be used as separator in column pack.} 17 | } 18 | \value{ 19 | A data frame in \link[=column-pack]{column pack} format. 20 | } 21 | \description{ 22 | Function that is used during interpretation of \link[=group-pack]{group pack} 23 | output. It converts grouped \link[dplyr:summarise]{summary} into \link[=column-pack]{column pack} format. 24 | } 25 | \details{ 26 | Multiple grouping variables are converted to one with 27 | \code{\link[tidyr:unite]{tidyr::unite()}} and separator \code{.group_sep}. New values are then treated as 28 | variable names which should be validated and which represent the group data 29 | as a whole. 30 | } 31 | \examples{ 32 | mtcars_grouped_summary <- mtcars \%>\% 33 | dplyr::group_by(vs, am) \%>\% 34 | dplyr::summarise(n_low = dplyr::n() > 6, n_high = dplyr::n() < 10) 35 | 36 | spread_groups(mtcars_grouped_summary, vs, am) 37 | 38 | spread_groups(mtcars_grouped_summary, vs, am, .group_sep = "__") 39 | 40 | spread_groups(mtcars_grouped_summary, vs, am, .col_sep = "__") 41 | } 42 | -------------------------------------------------------------------------------- /pkgdown/extra.css: -------------------------------------------------------------------------------- 1 | .navbar-default { 2 | background-color: #008080; 3 | border-color: #008080; 4 | } 5 | 6 | #toc { 7 | font-size: 150%; 8 | } 9 | 10 | #toc .nav a { 11 | font-size: 100%; 12 | } 13 | 14 | pre { 15 | background-color: #ffffff; 16 | border-color: #000000; 17 | border-width: 1px; 18 | overflow-x: auto; 19 | } 20 | 21 | pre code { 22 | overflow-wrap: normal; 23 | white-space: pre; 24 | } 25 | 26 | /* Idea style */ 27 | .hljs { 28 | display: block; 29 | overflow-x: auto; 30 | padding: 0.5em; 31 | color: #000; 32 | background: #fff; 33 | } 34 | 35 | .hljs-subst, 36 | .hljs-title { 37 | font-weight: normal; 38 | color: #000; 39 | } 40 | 41 | .hljs-comment, 42 | .hljs-quote { 43 | color: #808080; 44 | font-style: italic; 45 | } 46 | 47 | .hljs-meta { 48 | color: #808000; 49 | } 50 | 51 | .hljs-tag { 52 | background: #efefef; 53 | } 54 | 55 | .hljs-section, 56 | .hljs-name, 57 | .hljs-literal, 58 | .hljs-keyword, 59 | .hljs-selector-tag, 60 | .hljs-type, 61 | .hljs-selector-id, 62 | .hljs-selector-class { 63 | font-weight: bold; 64 | color: #000080; 65 | } 66 | 67 | .hljs-attribute, 68 | .hljs-number, 69 | .hljs-regexp, 70 | .hljs-link { 71 | font-weight: bold; 72 | color: #0000ff; 73 | } 74 | 75 | .hljs-number, 76 | .hljs-regexp, 77 | .hljs-link { 78 | font-weight: normal; 79 | } 80 | 81 | .hljs-string { 82 | color: #008000; 83 | font-weight: bold; 84 | } 85 | 86 | .hljs-symbol, 87 | .hljs-bullet, 88 | .hljs-formula { 89 | color: #000; 90 | background: #d0eded; 91 | font-style: italic; 92 | } 93 | 94 | .hljs-doctag { 95 | text-decoration: underline; 96 | } 97 | 98 | .hljs-variable, 99 | .hljs-template-variable { 100 | color: #660e7a; 101 | } 102 | 103 | .hljs-addition { 104 | background: #baeeba; 105 | } 106 | 107 | .hljs-deletion { 108 | background: #ffc8bd; 109 | } 110 | 111 | .hljs-emphasis { 112 | font-style: italic; 113 | } 114 | 115 | .hljs-strong { 116 | font-weight: bold; 117 | } 118 | 119 | /* Custom highlighting */ 120 | 121 | .hljs-tag, .hljs-formula, .hljs-addition, .hljs-deletion { 122 | background: #ffffff; 123 | } 124 | 125 | /* Strings */ 126 | .hljs-string { 127 | color: #008000; 128 | font-weight: bold; 129 | } 130 | 131 | /* Comments */ 132 | .hljs-comment { 133 | color: #404080; 134 | font-style: normal; 135 | } 136 | 137 | .hljs-fun-param { 138 | color: #ff4000; 139 | } 140 | 141 | .hljs-pipe, .hljs-assign { 142 | font-weight: bold; 143 | } 144 | -------------------------------------------------------------------------------- /ruler.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(dplyr) 3 | library(rlang) 4 | library(ruler) 5 | 6 | test_check("ruler") 7 | -------------------------------------------------------------------------------- /tests/testthat/helper-expose-data.R: -------------------------------------------------------------------------------- 1 | # Results of some packs --------------------------------------------------- 2 | input_data_pack_out <- tibble::tibble("rule__1" = TRUE, "nrow" = FALSE) 3 | input_group_pack_out <- tibble::tibble( 4 | "vs" = c(0, 0, 1, 1), "am" = c(0, 1, 0, 1), 5 | "n_low" = c(TRUE, FALSE, FALSE, FALSE), 6 | "n_high" = c(TRUE, TRUE, TRUE, TRUE) 7 | ) 8 | input_col_pack_out <- tibble::tibble( 9 | "vs_._.rule__1" = TRUE, "am_._.rule__1" = FALSE, 10 | "cyl_._.not_outlier" = TRUE, "vs_._.not_outlier" = TRUE 11 | ) 12 | input_row_pack_out <- tibble::tibble( 13 | "row_rule__1" = rep(TRUE, 2), 14 | "._.rule__2" = c(TRUE, FALSE) 15 | ) %>% keyholder::assign_keys(tibble::tibble(.id = c(1, 3))) 16 | input_cell_pack_out <- tibble::tibble( 17 | "vs_._.rule__1" = rep(TRUE, 2), "am_._.rule__1" = rep(FALSE, 2), 18 | "cyl_._.not_outlier" = c(TRUE, FALSE), "vs_._.not_outlier" = c(TRUE, FALSE) 19 | ) %>% keyholder::assign_keys(tibble::tibble(.id = c(1, 4))) 20 | 21 | 22 | # Exposure data ----------------------------------------------------------- 23 | input_packs <- list( 24 | data = data_packs( 25 | . %>% dplyr::summarise( 26 | nrow_low = nrow(.) > 10, nrow_high = nrow(.) < 20, 27 | ncol_low = ncol(.) > 5, ncol_high = ncol(.) < 10 28 | ) 29 | )[[1]], 30 | group = group_packs( 31 | . %>% dplyr::group_by(vs, am) %>% 32 | dplyr::summarise(n_low = dplyr::n() > 10, n_high = dplyr::n() < 15) %>% 33 | dplyr::ungroup(), 34 | .group_vars = c("vs", "am"), .group_sep = "." 35 | )[[1]], 36 | col = col_packs( 37 | . %>% dplyr::summarise_if( 38 | rlang::is_integerish, 39 | rules(tot_sum = sum(.) > 100) 40 | ) 41 | )[[1]], 42 | row = row_packs( 43 | . %>% dplyr::transmute(row_sum = rowSums(.)) %>% 44 | dplyr::transmute( 45 | outlier_sum = abs(row_sum - mean(row_sum)) / sd(row_sum) < 1 46 | ) %>% 47 | dplyr::slice(15:1) 48 | )[[1]], 49 | cell = cell_packs( 50 | . %>% dplyr::transmute_if( 51 | Negate(rlang::is_integerish), 52 | rules(abs(. - mean(.)) / sd(.) < 2) 53 | ) 54 | )[[1]], 55 | col_other = col_packs( 56 | . %>% dplyr::summarise_if( 57 | rlang::is_integerish, 58 | rules( 59 | tot_sum = sum(.) > 100, 60 | .prefix = "_._" 61 | ) 62 | ) 63 | )[[1]], 64 | cell_other = cell_packs( 65 | . %>% dplyr::transmute_if( 66 | Negate(rlang::is_integerish), 67 | rules(abs(. - mean(.)) / sd(.) < 2, 68 | .prefix = "_._" 69 | ) 70 | ) 71 | )[[1]] 72 | ) 73 | input_remove_obeyers <- c( 74 | data = TRUE, group = FALSE, col = FALSE, 75 | row = TRUE, cell = TRUE 76 | ) 77 | input_reports <- list( 78 | data = tibble::tibble( 79 | rule = c("nrow_high", "ncol_high"), 80 | var = rep(".all", 2), 81 | id = rep(0L, 2), 82 | value = rep(FALSE, 2) 83 | ), 84 | group = tibble::tibble( 85 | rule = rep(c("n_low", "n_high"), each = 4), 86 | var = rep(c("0.0", "0.1", "1.0", "1.1"), times = 2), 87 | id = rep(0L, 8), 88 | value = c(TRUE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE) 89 | ), 90 | col = tibble::tibble( 91 | rule = rep("tot_sum", 6), 92 | var = c("cyl", "hp", "vs", "am", "gear", "carb"), 93 | id = rep(0L, 6), 94 | value = c(TRUE, TRUE, FALSE, FALSE, TRUE, FALSE) 95 | ), 96 | row = tibble::tibble( 97 | rule = rep("outlier_sum", 2), 98 | var = rep(".all", 2), 99 | id = c(15L, 7L), 100 | value = rep(FALSE, 2) 101 | ), 102 | cell = tibble::tibble( 103 | rule = rep("rule__1", 7), 104 | var = c("mpg", "mpg", "drat", "wt", "wt", "wt", "qsec"), 105 | id = c(18L, 20L, 19L, 15L, 16L, 17L, 9L), 106 | value = rep(FALSE, 7) 107 | ) 108 | ) 109 | 110 | # Construction of exposure data 111 | add_pack_name_to_single_report <- function(.report, .pack_name) { 112 | res <- .report 113 | res[["pack"]] <- rep(.pack_name, nrow(.report)) 114 | 115 | res[, c("pack", colnames(.report))] %>% add_class("ruler_report") 116 | } 117 | 118 | single_exposure_inds <- c( 119 | "data", "cell", "col", "col", "data", "row", "data", 120 | "group" 121 | ) 122 | exposure_names <- c( 123 | "data_dims", "cell_not_outlier", "col_proper_sums", 124 | "new_col_proper_sums", "new_data_pack", "row_not_outlier", 125 | "another_data_pack", "first_group_pack" 126 | ) 127 | 128 | input_single_exposures <- mapply( 129 | new_single_exposure, 130 | # `unname()` is needed to ensure that input vectors have no names. Otherwise 131 | # there can be issues with `dplyr::bind_rows()` (powered by 132 | # `vctrs::vec_rbind()`) removing those names but 'tibble'>=3.0.0 keeping them. 133 | unname(input_packs[single_exposure_inds]), 134 | unname(input_remove_obeyers[single_exposure_inds]), 135 | unname(input_reports[single_exposure_inds]), 136 | SIMPLIFY = FALSE 137 | ) %>% 138 | setNames(exposure_names) 139 | 140 | input_exposures <- mapply( 141 | new_exposure, 142 | mapply( 143 | new_packs_info, 144 | exposure_names, 145 | # `unname()` is needed to ensure that input vectors have no names 146 | lapply(unname(input_packs[single_exposure_inds]), list), 147 | unname(input_remove_obeyers[single_exposure_inds]), 148 | SIMPLIFY = FALSE 149 | ), 150 | mapply( 151 | add_pack_name_to_single_report, 152 | # `unname()` is needed to ensure that input vectors have no names 153 | unname(input_reports[single_exposure_inds]), 154 | exposure_names, 155 | SIMPLIFY = FALSE 156 | ), 157 | SIMPLIFY = FALSE 158 | ) %>% 159 | setNames(exposure_names) 160 | 161 | exposure_ref_inds <- c("col", "col", "cell", "data", "data", "row", "group") 162 | exposure_ref_pack_names <- c( 163 | "col_pack_n1", "col_pack_n2", "cell_pack_n1", 164 | "data_pack_n1", "data_pack_n2", "row_pack_n1", 165 | "group_pack_n1" 166 | ) 167 | input_exposure_ref <- new_exposure( 168 | new_packs_info( 169 | exposure_ref_pack_names, 170 | # `unname()` is needed to ensure that input vectors have no names 171 | unname(input_packs[exposure_ref_inds]), 172 | unname(input_remove_obeyers[exposure_ref_inds]) 173 | ), 174 | mapply( 175 | add_pack_name_to_single_report, 176 | # `unname()` is needed to ensure that input vectors have no names 177 | unname(input_reports[exposure_ref_inds]), 178 | exposure_ref_pack_names, 179 | SIMPLIFY = FALSE 180 | ) %>% 181 | dplyr::bind_rows() %>% 182 | as_report(.validate = FALSE) 183 | ) 184 | -------------------------------------------------------------------------------- /tests/testthat/test-actions.R: -------------------------------------------------------------------------------- 1 | context("actions") 2 | 3 | 4 | # Helper functions -------------------------------------------------------- 5 | # Taken from https://github.com/harrelfe/Hmisc/blob/master/R/regexpEscape.s 6 | escape_regex <- function(string) { 7 | gsub("([.|()\\^{}+$*?]|\\[|\\])", "\\\\\\1", string) 8 | } 9 | 10 | 11 | # Input data -------------------------------------------------------------- 12 | mtcars_exposed <- mtcars %>% set_exposure(input_exposure_ref) 13 | rule_breakers <- input_exposure_ref %>% 14 | get_report() %>% 15 | filter(!(value %in% TRUE)) 16 | 17 | trigger_nrow_30 <- function(.tbl) { 18 | nrow(get_report(.tbl)) > 40 19 | } 20 | trigger_nrow_10 <- function(.tbl) { 21 | nrow(get_report(.tbl)) > 10 22 | } 23 | actor_print <- function(.tbl) { 24 | print(get_exposure(.tbl)) 25 | 26 | .tbl 27 | } 28 | 29 | assert_text <- "assert_any_breaker: Some breakers found in exposure." 30 | 31 | exposure_no_breakers <- input_exposure_ref 32 | exposure_no_breakers$packs_info <- exposure_no_breakers$packs_info %>% 33 | slice(1) %>% 34 | as_packs_info() 35 | exposure_no_breakers$report <- exposure_no_breakers$report %>% 36 | slice(c(1, 2, 5)) %>% 37 | as_report() 38 | 39 | mtcars_exposed_no_breakers <- set_exposure(mtcars, exposure_no_breakers) 40 | 41 | 42 | # Custom expectations ----------------------------------------------------- 43 | expect_asserts <- function(input, type, silent = FALSE, result = input, 44 | output_name = "Breakers report\n", 45 | output_report, 46 | warnings = character(0), 47 | messages = character(0), 48 | ...) { 49 | assert_evaluation <- evaluate_promise( 50 | assert_any_breaker(input, type, silent, ...) 51 | ) 52 | 53 | expect_identical(assert_evaluation$result, result) 54 | expect_match(assert_evaluation$output, output_name) 55 | expect_match(assert_evaluation$output, output_report) 56 | expect_identical(assert_evaluation$warnings, warnings) 57 | expect_identical(assert_evaluation$messages, messages) 58 | } 59 | 60 | 61 | # act_after_exposure ------------------------------------------------------ 62 | test_that("act_after_exposure works", { 63 | expect_error( 64 | act_after_exposure(mtcars, trigger_nrow_30, actor_print), 65 | "act_after_exposure:.*not.*have" 66 | ) 67 | 68 | input_bad <- mtcars 69 | attr(input_bad, "exposure") <- "a" 70 | 71 | expect_error( 72 | act_after_exposure(input_bad, trigger_nrow_30, actor_print), 73 | "act_after_exposure:.*not.*proper.*exposure" 74 | ) 75 | 76 | expect_silent( 77 | output_1 <- act_after_exposure( 78 | mtcars_exposed, trigger_nrow_30, 79 | actor_print 80 | ) 81 | ) 82 | expect_identical(output_1, mtcars_exposed) 83 | 84 | output_ref <- capture_output(print(input_exposure_ref)) 85 | 86 | expect_output( 87 | output_2 <- act_after_exposure( 88 | mtcars_exposed, trigger_nrow_10, 89 | actor_print 90 | ), 91 | output_ref, 92 | fixed = TRUE 93 | ) 94 | expect_identical(output_2, mtcars_exposed) 95 | }) 96 | 97 | 98 | # assert_any_breaker ------------------------------------------------------ 99 | test_that("assert_any_breaker works", { 100 | output_ref <- escape_regex(capture_output(print(rule_breakers))) 101 | 102 | # Error assertions 103 | expect_error( 104 | expect_output(assert_any_breaker(mtcars_exposed), output_ref), 105 | assert_text 106 | ) 107 | expect_error( 108 | expect_output(assert_any_breaker(mtcars_exposed, "error"), output_ref), 109 | assert_text 110 | ) 111 | expect_error( 112 | expect_output(assert_any_breaker(mtcars_exposed, "error", TRUE), ""), 113 | assert_text 114 | ) 115 | 116 | # Warning and message assertions 117 | expect_asserts( 118 | mtcars_exposed, 119 | "warning", 120 | output_report = output_ref, 121 | warnings = assert_text 122 | ) 123 | expect_asserts( 124 | mtcars_exposed, 125 | "message", 126 | output_report = output_ref, 127 | messages = paste0(assert_text, "\n") 128 | ) 129 | 130 | # Absence of printing 131 | expect_asserts( 132 | mtcars_exposed, 133 | "warning", 134 | silent = TRUE, 135 | output_name = "", 136 | output_report = "", 137 | warnings = assert_text 138 | ) 139 | expect_asserts( 140 | mtcars_exposed, 141 | "message", 142 | silent = TRUE, 143 | output_name = "", 144 | output_report = "", 145 | messages = paste0(assert_text, "\n") 146 | ) 147 | 148 | # Absence of assertions 149 | expect_asserts( 150 | mtcars_exposed_no_breakers, 151 | "error", 152 | output_name = "", 153 | output_report = "" 154 | ) 155 | expect_asserts( 156 | mtcars_exposed_no_breakers, 157 | "warning", 158 | output_name = "", 159 | output_report = "" 160 | ) 161 | expect_asserts( 162 | mtcars_exposed_no_breakers, 163 | "message", 164 | output_name = "", 165 | output_report = "" 166 | ) 167 | }) 168 | 169 | test_that("assert_any_breaker accounts for printing options", { 170 | output_ref <- escape_regex(capture_output(print(rule_breakers, n = 3))) 171 | 172 | expect_error( 173 | expect_output( 174 | assert_any_breaker(mtcars_exposed, "error", n = 3), 175 | output_ref 176 | ), 177 | assert_text 178 | ) 179 | expect_asserts( 180 | mtcars_exposed, 181 | "warning", 182 | output_report = output_ref, 183 | warnings = assert_text, 184 | n = 3 185 | ) 186 | expect_asserts( 187 | mtcars_exposed, 188 | "message", 189 | output_report = output_ref, 190 | messages = paste0(assert_text, "\n"), 191 | n = 3 192 | ) 193 | }) 194 | 195 | 196 | # any_breaker ------------------------------------------------------------- 197 | test_that("any_breaker works", { 198 | expect_error(any_breaker("a"), "any_breaker:.*not.*proper.*exposure") 199 | expect_true(any_breaker(input_exposure_ref)) 200 | expect_false(any_breaker(exposure_no_breakers)) 201 | }) 202 | 203 | 204 | # generate_breakers_informer ---------------------------------------------- 205 | test_that("generate_breakers_informer works", { 206 | custom_assert_text <- "Custom" 207 | informer <- generate_breakers_informer( 208 | .fun = warning, 209 | .message = custom_assert_text, 210 | .silent = FALSE 211 | ) 212 | 213 | expect_is(informer, "function") 214 | 215 | output <- evaluate_promise(informer(.tbl = mtcars_exposed)) 216 | 217 | expect_identical(output$result, mtcars_exposed) 218 | expect_match( 219 | output$output, 220 | escape_regex(capture_output(print(rule_breakers))) 221 | ) 222 | expect_identical(output$warnings, custom_assert_text) 223 | expect_identical(output$messages, character(0)) 224 | }) 225 | -------------------------------------------------------------------------------- /tests/testthat/test-expose-helpers.R: -------------------------------------------------------------------------------- 1 | context("expose-helpers") 2 | 3 | 4 | # guess_pack_type --------------------------------------------------------- 5 | test_that("guess_pack_type works", { 6 | expect_identical(guess_pack_type(input_data_pack_out), "data_pack") 7 | expect_identical(guess_pack_type(input_group_pack_out), "group_pack") 8 | expect_identical(guess_pack_type(input_col_pack_out), "col_pack") 9 | expect_identical(guess_pack_type(input_row_pack_out), "row_pack") 10 | expect_identical(guess_pack_type(input_cell_pack_out), "cell_pack") 11 | 12 | input_col_pack_out_1 <- input_col_pack_out 13 | names(input_col_pack_out_1) <- 14 | gsub("\\._\\.", "\\.___\\.", names(input_col_pack_out_1)) 15 | 16 | expect_identical( 17 | guess_pack_type( 18 | input_col_pack_out_1, 19 | inside_punct("\\.___\\.") 20 | ), 21 | "col_pack" 22 | ) 23 | }) 24 | 25 | 26 | # remove_obeyers ---------------------------------------------------------- 27 | test_that("remove_obeyers works", { 28 | input_report <- tibble::tibble( 29 | pack = rep("data_pack", 4), rule = paste0("rule__", 1:4), 30 | var = rep(".all", 4), id = rep(0L, 4), 31 | value = c(TRUE, FALSE, TRUE, NA) 32 | ) 33 | 34 | expect_identical(remove_obeyers(input_report, FALSE), input_report) 35 | expect_identical(remove_obeyers(input_report, TRUE), input_report[c(2, 4), ]) 36 | }) 37 | 38 | 39 | # impute_exposure_pack_names ---------------------------------------------- 40 | test_that("impute_exposure_pack_names works with NULL reference exposure", { 41 | expect_identical( 42 | impute_exposure_pack_names(input_single_exposures, input_exposure_ref), 43 | input_single_exposures 44 | ) 45 | 46 | cur_input_single_exposures <- input_single_exposures 47 | names_remove_inds <- c(1, 2, 3, 5, 6, 8) 48 | names(cur_input_single_exposures)[names_remove_inds] <- 49 | rep("", length(names_remove_inds)) 50 | 51 | expect_identical( 52 | names(impute_exposure_pack_names(cur_input_single_exposures, NULL)), 53 | c( 54 | "data_pack__1", "cell_pack__1", "col_pack__1", "new_col_proper_sums", 55 | "data_pack__2", "row_pack__1", "another_data_pack", "group_pack__1" 56 | ) 57 | ) 58 | }) 59 | 60 | test_that("impute_exposure_pack_names works with not NULL reference exposure", { 61 | cur_input_single_exposures <- input_single_exposures 62 | names_remove_inds <- c(1, 2, 3, 5, 6, 8) 63 | names(cur_input_single_exposures)[names_remove_inds] <- 64 | rep("", length(names_remove_inds)) 65 | 66 | expect_identical( 67 | names(impute_exposure_pack_names( 68 | cur_input_single_exposures, 69 | input_exposure_ref 70 | )), 71 | c( 72 | "data_pack__3", "cell_pack__2", "col_pack__3", "new_col_proper_sums", 73 | "data_pack__4", "row_pack__2", "another_data_pack", "group_pack__2" 74 | ) 75 | ) 76 | }) 77 | 78 | 79 | # add_pack_names ---------------------------------------------------------- 80 | test_that("add_pack_names works", { 81 | expect_identical( 82 | add_pack_names(input_single_exposures), 83 | input_exposures 84 | ) 85 | }) 86 | 87 | 88 | # bind_exposures ---------------------------------------------------------- 89 | test_that("bind_exposures works", { 90 | expect_identical( 91 | bind_exposures(list(input_exposure_ref, NULL)), 92 | input_exposure_ref 93 | ) 94 | expect_identical( 95 | bind_exposures(list(NULL, NULL)), 96 | NULL 97 | ) 98 | 99 | output_ref <- new_exposure( 100 | .packs_info = new_packs_info( 101 | rep(input_exposure_ref$packs_info$name, 2), 102 | c(input_exposure_ref$packs_info$fun, input_exposure_ref$packs_info$fun), 103 | rep(input_exposure_ref$packs_info$remove_obeyers, 2) 104 | ), 105 | .report = bind_rows( 106 | input_exposure_ref$report, 107 | input_exposure_ref$report 108 | ) %>% 109 | add_class_cond("ruler_report") 110 | ) 111 | 112 | expect_identical( 113 | bind_exposures(list(input_exposure_ref, input_exposure_ref)), 114 | output_ref 115 | ) 116 | expect_identical( 117 | bind_exposures(input_exposure_ref, input_exposure_ref), 118 | output_ref 119 | ) 120 | }) 121 | 122 | 123 | # filter_not_null --------------------------------------------------------- 124 | test_that("filter_not_null works", { 125 | input <- list(NULL, 1, list(2), NULL, "a", "b", list(NULL)) 126 | output_ref <- input[-c(1, 4)] 127 | 128 | expect_identical(filter_not_null(input), output_ref) 129 | }) 130 | 131 | 132 | # assert_pack_out_one_row ------------------------------------------------- 133 | test_that("assert_pack_out_one_row works", { 134 | expect_silent(assert_pack_out_one_row(input_data_pack_out, "data_pack")) 135 | expect_error( 136 | assert_pack_out_one_row(input_row_pack_out, "row_pack"), 137 | "row_pack.*not.*row" 138 | ) 139 | }) 140 | 141 | 142 | # assert_pack_out_all_logical --------------------------------------------- 143 | test_that("assert_pack_out_all_logical works", { 144 | expect_silent(assert_pack_out_all_logical(input_data_pack_out, "data_pack")) 145 | 146 | input_bad <- tibble::tibble(good = c(TRUE, FALSE), bad = 1:2) 147 | 148 | expect_error( 149 | assert_pack_out_all_logical(input_bad, "cell_pack"), 150 | "cell_pack.*not.*logical" 151 | ) 152 | }) 153 | 154 | 155 | # assert_pack_out_all_have_separator -------------------------------------- 156 | test_that("assert_pack_out_all_have_separator works", { 157 | expect_silent( 158 | assert_pack_out_all_have_separator( 159 | input_col_pack_out, "col_pack", inside_punct("\\._\\.") 160 | ) 161 | ) 162 | expect_error( 163 | assert_pack_out_all_have_separator( 164 | input_data_pack_out, "data_pack", inside_punct("\\._\\.") 165 | ), 166 | "data_pack.*not.*separator" 167 | ) 168 | expect_error( 169 | assert_pack_out_all_have_separator( 170 | input_col_pack_out, "col_pack", inside_punct("\\.___\\.") 171 | ), 172 | "col_pack.*not.*separator" 173 | ) 174 | }) 175 | -------------------------------------------------------------------------------- /tests/testthat/test-packs.R: -------------------------------------------------------------------------------- 1 | context("packs") 2 | 3 | 4 | # Input data -------------------------------------------------------------- 5 | input <- list(1, dot2 = "a", mean, list(new = 2, 3)) 6 | 7 | compute_output_ref <- function(.extra_class) { 8 | list( 9 | structure(1, class = c(.extra_class, "rule_pack", "numeric")), 10 | dot2 = structure("a", class = c(.extra_class, "rule_pack", "character")), 11 | structure(mean, class = c(.extra_class, "rule_pack", "function")), 12 | new = structure(2, class = c(.extra_class, "rule_pack", "numeric")), 13 | structure(3, class = c(.extra_class, "rule_pack", "numeric")) 14 | ) 15 | } 16 | 17 | 18 | # data_packs -------------------------------------------------------------- 19 | test_that("data_packs works", { 20 | output <- data_packs(!!!input) 21 | output_ref <- compute_output_ref(.extra_class = "data_pack") 22 | 23 | expect_identical(output, output_ref) 24 | }) 25 | 26 | 27 | # group_packs ------------------------------------------------------------- 28 | test_that("group_packs works", { 29 | output_1 <- group_packs(!!!input, .group_vars = c("x", "y")) 30 | output_2 <- group_packs( 31 | !!!input, 32 | .group_vars = c("x", "y"), 33 | .group_sep = "+" 34 | ) 35 | output_ref <- compute_output_ref(.extra_class = "group_pack") %>% 36 | lapply(`attr<-`, which = "group_vars", value = c("x", "y")) 37 | output_ref_1 <- lapply(output_ref, `attr<-`, which = "group_sep", value = ".") 38 | output_ref_2 <- lapply(output_ref, `attr<-`, which = "group_sep", value = "+") 39 | 40 | expect_identical(output_1, output_ref_1) 41 | expect_identical(output_2, output_ref_2) 42 | }) 43 | 44 | test_that("group_packs throws errors", { 45 | expect_error(group_packs(!!!input, .group_vars = character(0))) 46 | expect_error(group_packs(!!!input, .group_vars = 1:2)) 47 | 48 | expect_error(group_packs(!!!input, .group_vars = "a", .group_sep = 1)) 49 | expect_error( 50 | group_packs(!!!input, .group_vars = "a", .group_sep = c("+", "-")) 51 | ) 52 | }) 53 | 54 | 55 | # col_packs --------------------------------------------------------------- 56 | test_that("col_packs works", { 57 | output <- col_packs(!!!input) 58 | output_ref <- compute_output_ref(.extra_class = "col_pack") 59 | 60 | expect_identical(output, output_ref) 61 | }) 62 | 63 | 64 | # row_packs --------------------------------------------------------------- 65 | test_that("row_packs works", { 66 | output <- row_packs(!!!input) 67 | output_ref <- compute_output_ref(.extra_class = "row_pack") 68 | 69 | expect_identical(output, output_ref) 70 | }) 71 | 72 | 73 | # cell_packs -------------------------------------------------------------- 74 | test_that("cell_packs works", { 75 | output <- cell_packs(!!!input) 76 | output_ref <- compute_output_ref(.extra_class = "cell_pack") 77 | 78 | expect_identical(output, output_ref) 79 | }) 80 | 81 | 82 | # squash_dots_rule_pack --------------------------------------------------- 83 | test_that("squash_dots_rule_pack returns a list", { 84 | output <- squash_dots_rule_pack(1, .extra_class = "extra") 85 | names(output) <- NULL 86 | output_ref <- list(structure(1, class = c("extra", "rule_pack", "numeric"))) 87 | 88 | expect_identical(output, output_ref) 89 | }) 90 | 91 | test_that("squash_dots_rule_pack returns a named list", { 92 | output <- squash_dots_rule_pack(!!!input[1:3], .extra_class = "extra") 93 | output_ref <- compute_output_ref(.extra_class = "extra")[1:3] 94 | 95 | expect_identical(output, output_ref) 96 | }) 97 | 98 | test_that("squash_dots_rule_pack squashes", { 99 | output <- squash_dots_rule_pack( 100 | list(list(1L), list(2L, list(3L))), 101 | list(list(list(4L)), list(5L, list(6L))), 102 | .extra_class = "extra" 103 | ) 104 | names(output) <- NULL 105 | output_ref <- lapply( 106 | 1:6, 107 | structure, 108 | class = c("extra", "rule_pack", "integer") 109 | ) 110 | 111 | expect_identical(output, output_ref) 112 | }) 113 | 114 | 115 | # print.data_pack --------------------------------------------------------- 116 | test_that("print.data_pack works", { 117 | expect_output(print(data_packs(!!!input)[[1]]), "Data.*ule.*ack") 118 | }) 119 | 120 | 121 | # print.group_pack -------------------------------------------------------- 122 | test_that("print.group_pack works", { 123 | expect_output( 124 | print(group_packs(!!!input, .group_vars = "a")[[1]]), 125 | "Group.*ule.*ack" 126 | ) 127 | }) 128 | 129 | 130 | # print.col_pack ---------------------------------------------------------- 131 | test_that("print.col_pack works", { 132 | expect_output(print(col_packs(!!!input)[[1]]), "Column.*ule.*ack") 133 | }) 134 | 135 | 136 | # print.row_pack ---------------------------------------------------------- 137 | test_that("print.row_pack works", { 138 | expect_output(print(row_packs(!!!input)[[1]]), "Row.*ule.*ack") 139 | }) 140 | 141 | 142 | # print.cell_pack --------------------------------------------------------- 143 | test_that("print.cell_pack works", { 144 | expect_output(print(cell_packs(!!!input)[[1]]), "Cell.*ule.*ack") 145 | }) 146 | -------------------------------------------------------------------------------- /tests/testthat/test-rules.R: -------------------------------------------------------------------------------- 1 | context("rules") 2 | 3 | 4 | # rules ------------------------------------------------------------------- 5 | test_that("rules works", { 6 | output_1 <- rules(mean, "mean", mean(.), ~ mean(.)) 7 | output_ref_1 <- list( 8 | ._.rule__1 = mean, 9 | ._.rule__2 = "mean", 10 | ._.rule__3 = ~ mean(.), 11 | ._.rule__4 = output_1[[4]] 12 | ) 13 | 14 | expect_identical(output_1, output_ref_1) 15 | 16 | output_2 <- rules(~ mean(.), .prefix = "a_a_") 17 | output_ref_2 <- list(a_a_rule__1 = output_2[[1]]) 18 | 19 | expect_identical(output_2, output_ref_2) 20 | 21 | expect_error(rules(mean2), "`mean2`") 22 | }) 23 | 24 | 25 | # extract_funs_input ------------------------------------------------------ 26 | # Tested in `rules()` 27 | 28 | 29 | # has_dot_symbol ---------------------------------------------------------- 30 | # Tested in `rules()` 31 | 32 | 33 | # squash_expr ------------------------------------------------------------- 34 | # Tested in `rules()` 35 | 36 | 37 | # quo_get_function -------------------------------------------------------- 38 | # Tested in `rules()` 39 | -------------------------------------------------------------------------------- /tests/testthat/test-spread-groups.R: -------------------------------------------------------------------------------- 1 | context("spread-groups") 2 | 3 | 4 | # Input data -------------------------------------------------------------- 5 | input_grouped_summary <- mtcars %>% 6 | group_by(vs, am) %>% 7 | summarise(n_low = dplyr::n() > 6, n_high = dplyr::n() < 10) 8 | 9 | 10 | # spread_groups ----------------------------------------------------------- 11 | test_that("spread_groups works", { 12 | output_ref_1 <- tibble::tibble( 13 | "0.0._.n_low" = TRUE, "0.1._.n_low" = FALSE, 14 | "1.0._.n_low" = TRUE, "1.1._.n_low" = TRUE, 15 | "0.0._.n_high" = FALSE, "0.1._.n_high" = TRUE, 16 | "1.0._.n_high" = TRUE, "1.1._.n_high" = TRUE 17 | ) 18 | 19 | expect_identical( 20 | spread_groups(input_grouped_summary, vs, am), 21 | output_ref_1 22 | ) 23 | 24 | output_ref_2 <- output_ref_1 25 | colnames(output_ref_2) <- gsub("^(.)\\.", "\\1__", colnames(output_ref_2)) 26 | 27 | expect_identical( 28 | spread_groups(input_grouped_summary, vs, am, .group_sep = "__"), 29 | output_ref_2 30 | ) 31 | 32 | output_ref_3 <- output_ref_1 33 | colnames(output_ref_3) <- gsub("\\._\\.", "___", colnames(output_ref_3)) 34 | 35 | expect_identical( 36 | spread_groups(input_grouped_summary, vs, am, .col_sep = "___"), 37 | output_ref_3 38 | ) 39 | }) 40 | 41 | test_that("spread_groups throws errors", { 42 | expect_error( 43 | spread_groups(input_grouped_summary), 44 | "spread_groups: No group.*column" 45 | ) 46 | expect_error( 47 | spread_groups(input_grouped_summary, ends_with("Absent")), 48 | "spread_groups: No group.*column" 49 | ) 50 | expect_error( 51 | spread_groups(input_grouped_summary, vs), 52 | "spread_groups:.*non-unique" 53 | ) 54 | expect_error( 55 | spread_groups(input_grouped_summary, everything()), 56 | "spread_groups: No rule.*column" 57 | ) 58 | expect_error( 59 | input_grouped_summary %>% 60 | ungroup() %>% 61 | mutate(vs = 1:4) %>% 62 | spread_groups(vs), 63 | "spread_groups:.*logical" 64 | ) 65 | }) 66 | -------------------------------------------------------------------------------- /tests/testthat/test-utils.R: -------------------------------------------------------------------------------- 1 | context("utils") 2 | 3 | 4 | # Input data -------------------------------------------------------------- 5 | df <- mtcars 6 | 7 | 8 | # inside_punct ------------------------------------------------------------ 9 | test_that("inside_punct works", { 10 | input1 <- c( 11 | "._.", "._.a", "a._.", "a._.a", 12 | "a_._.a", "a._._a", "a_._._a", 13 | "a__._._a", "a_._.__a", "a__._.__a", 14 | "._.*_.", "._.._.", 15 | "__.a", ".__a", "...", "a_a" 16 | ) 17 | 18 | expect_identical(grep(inside_punct(), input1), 1:12) 19 | 20 | input2 <- c( 21 | "a", "_a", "a_", "_a_", 22 | "__a", "a__", "__a__", 23 | "_" 24 | ) 25 | 26 | expect_identical(grep(inside_punct("a"), input2), 1:7) 27 | }) 28 | 29 | 30 | # negate_select_cols ------------------------------------------------------ 31 | test_that("negate_select_cols works", { 32 | output_1 <- negate_select_cols(mtcars, vs, am) 33 | output_ref_1 <- setdiff(colnames(mtcars), c("vs", "am")) 34 | 35 | expect_identical(output_1, output_ref_1) 36 | 37 | output_2 <- negate_select_cols(mtcars, one_of("vs", "am")) 38 | output_ref_2 <- output_ref_1 39 | 40 | expect_identical(output_2, output_ref_2) 41 | 42 | output_3 <- negate_select_cols(mtcars, dplyr::matches("p|a")) 43 | output_ref_3 <- c("cyl", "wt", "qsec", "vs") 44 | 45 | expect_identical(output_3, output_ref_3) 46 | 47 | output_4 <- negate_select_cols(mtcars, cyl:am) 48 | output_ref_4 <- c("mpg", "gear", "carb") 49 | 50 | expect_identical(output_4, output_ref_4) 51 | 52 | output_5 <- negate_select_cols(mtcars, -(cyl:am)) 53 | output_ref_5 <- c("cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am") 54 | 55 | expect_identical(output_5, output_ref_5) 56 | }) 57 | 58 | 59 | # assert_positive_length -------------------------------------------------- 60 | test_that("assert_positive_length works", { 61 | expect_error( 62 | assert_positive_length(list(), "Some name"), 63 | "^Some name.*positive.*length" 64 | ) 65 | 66 | expect_identical(assert_positive_length(1:2, "Some name"), 1:2) 67 | expect_identical(assert_positive_length(list(1:2), "Some name"), list(1:2)) 68 | }) 69 | 70 | 71 | # assert_length ----------------------------------------------------------- 72 | test_that("assert_length works", { 73 | expect_error( 74 | assert_length(c("a", "b"), 1, "New name"), 75 | "^New name.*length.*1" 76 | ) 77 | expect_error( 78 | assert_length(1, 2, "New name"), 79 | "^New name.*length.*2" 80 | ) 81 | 82 | expect_identical(assert_length(list("c"), 1, "New name"), list("c")) 83 | }) 84 | 85 | 86 | # assert_character -------------------------------------------------------- 87 | test_that("assert_character works", { 88 | expect_error( 89 | assert_character(1L, "Tmp name"), 90 | "Tmp name.*character" 91 | ) 92 | expect_error( 93 | assert_character(list("a"), "Tmp name"), 94 | "Tmp name.*character" 95 | ) 96 | 97 | expect_identical(assert_positive_length(c("a", "A"), "Tmp name"), c("a", "A")) 98 | }) 99 | 100 | 101 | # add_class --------------------------------------------------------------- 102 | test_that("add_class works", { 103 | expect_equal(class(add_class(df, "some")), c("some", "data.frame")) 104 | }) 105 | 106 | 107 | # add_class_cond ---------------------------------------------------------- 108 | test_that("add_class_cond works", { 109 | expect_equal(class(add_class_cond(df, "data.frame")), "data.frame") 110 | expect_equal(class(add_class_cond(df, "some")), c("some", "data.frame")) 111 | }) 112 | 113 | 114 | # remove_class_cond ------------------------------------------------------- 115 | test_that("remove_class_cond works", { 116 | input <- structure(1, class = c("a", "b")) 117 | 118 | expect_equal(remove_class_cond(input, "a"), structure(1, class = "b")) 119 | expect_equal(remove_class_cond(input, "b"), input) 120 | }) 121 | 122 | 123 | # compute_def_names ------------------------------------------------------- 124 | test_that("compute_def_names works", { 125 | expect_identical(compute_def_names(0), character(0)) 126 | expect_identical(compute_def_names(10), paste0("__", seq_len(10))) 127 | expect_identical(compute_def_names(10, "base"), paste0("base__", seq_len(10))) 128 | expect_identical( 129 | compute_def_names(10, .start_ind = 4), 130 | paste0("__", seq_len(10) + 3) 131 | ) 132 | expect_identical( 133 | compute_def_names(10, "base", 4), 134 | paste0("base__", seq_len(10) + 3) 135 | ) 136 | }) 137 | 138 | 139 | # enhance_names ----------------------------------------------------------- 140 | test_that("enhance_names works", { 141 | expect_identical(enhance_names(character(0)), character(0)) 142 | 143 | input <- c("", "name", "", "name", "var") 144 | output_ref_1 <- c("__1", "name", "__3", "name", "var") 145 | 146 | expect_identical(enhance_names(input), output_ref_1) 147 | expect_identical( 148 | enhance_names(input, .prefix = "._."), 149 | paste0("._.", output_ref_1) 150 | ) 151 | expect_identical( 152 | enhance_names(input, .suffix = "__"), 153 | paste0(output_ref_1, "__") 154 | ) 155 | expect_identical( 156 | enhance_names(input, .prefix = "._.", .suffix = "__"), 157 | paste0("._.", output_ref_1, "__") 158 | ) 159 | 160 | expect_identical( 161 | enhance_names(input, .root = "base"), 162 | c("base__1", "name", "base__3", "name", "var") 163 | ) 164 | expect_identical( 165 | enhance_names(input, .root = "base", .start_ind = 5), 166 | c("base__5", "name", "base__7", "name", "var") 167 | ) 168 | }) 169 | -------------------------------------------------------------------------------- /vignettes/design-and-format.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Design Process and Exposure Format" 3 | author: "Evgeni Chasnovski" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Design Process and Exposure Format} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | The main idea of the `ruler` package is to create a format of validation results (along with functional API) that will work naturally with [tidyverse](https://www.tidyverse.org/) tools. This vignette will: 13 | 14 | - Guide you through the design process of __exposure__: `ruler`'s validation result format. This should help to understand the foundations of `ruler` validation workflow. 15 | - Describe exposure format. 16 | 17 | ## Design process 18 | 19 | The preferred local data structure in `tidyverse` is [tibble](https://tibble.tidyverse.org): "A modern re-imagining of the data frame", on which its implementation is based. That is why `ruler` uses data frames as preferred format for data to be validated. However the initial goal is to use tibbles in creation of validation result format as much as possible. 20 | 21 | Basically data frame is a list of variables with the same length. It is easier to think about it as two-dimensional structure where columns can be of different types. 22 | 23 | In abstract form validation of data frame can be put as ___asking whether certain subset of data frame (data unit) obeys certain rule___. The result of validation is logical __value__ representing an answer. 24 | 25 | With influence of [dplyr](https://dplyr.tidyverse.org)'s grammar of data manipulation a data frame can be represented in terms of the following data units: 26 | 27 | - Data frame as a whole. Validation can be done by `summarise()` _without_ grouping. 28 | - Collection of groups of rows. Validation can be done by `summarise()` _with_ grouping. 29 | - Collection of columns. Validation can be done by scoped variants of `summarise()` _without_ grouping: `summarise_all()`, `summarise_if()` and `summarise_at()`. 30 | - Collection of rows. Validation can be done by `transmute()`. 31 | - 2d-collection of cells. Validation can be done by scoped variants of `transmute()`: `transmute_all()`, `transmute_if()` and `transmute_at()`. 32 | 33 | In `ruler` data, group, column, row and cell are five basic data units. They all can be described by the combination of two variables: 34 | 35 | - __var__ which represents the variable name of data unit: 36 | - Value '.all' is reserved for 'all columns as a whole'. 37 | - Value _equal_ to some column name indicates column of data unit. 38 | - Value _not equal_ to some column name indicates the name of group: it is created by uniting (with delimiter) group levels of grouping columns. 39 | - __id__ which represents the row index of data unit: 40 | - Value 0 is reserved for 'all rows as a whole'. 41 | - Value not equal to 0 indicates the row index of data unit. 42 | 43 | Validation of data units can be done with the `dplyr` functions described above. Their application to some data unit can give answers to multiple questions. That is why by design __rules__ (functions that answer one certain question about one type of data unit) are combined in __rule packs__ (functions that answer multiple questions about one type of data unit). 44 | 45 | Application of rule pack to data is connected with several points: 46 | 47 | - Rule packs should have unique __names__ to be used as references. 48 | - By the same reason rules should have names. However uniqueness is necessary only within corresponding rule pack which makes pair 'pack name'+'rule name' a key of identifying the actual rule. 49 | - Output of rule packs for different data units differ in their structure. Therefore rule packs should have __types__ to apply different interpretations to their outputs. 50 | - During the actual validation the most part of results normally indicates obedience to rules. This can cause storing many redundant information in validation results. `ruler` has option of __removing obeyers__ from results during the validation. 51 | 52 | In `ruler` __exposing__ data to rules means applying rule packs to data, collecting results in common format and attaching them to the data as an `exposure` attribute. In this way actual exposure can be done in multiple steps and also be a part of a general data preparation pipeline. 53 | 54 | ## Exposure 55 | 56 | __Exposure__ is a format designed to contain uniform information about validation of different data units. For reproducibility it also saves information about applied packs. Basically exposure is a list with two elements: 57 | 58 | 1. __Packs info__: a `tibble` with the following structure: 59 | - _name_ \ : Name of the pack. If not set manually it will be imputed during exposure. 60 | - _type_ \ : Name of pack type. Indicates which data unit pack checks. 61 | - _fun_ \ : List (preferably unnamed) of rule pack functions. 62 | - _remove_obeyers_ \ : Whether rows about obeyers (data units that obey certain rule) were removed from report after applying pack. 63 | 2. __Tidy data validation report__: a `tibble` with the following structure: 64 | - _pack_ \ : Name of rule pack (from column 'name' in packs info). 65 | - _rule_ \ : Name of the rule defined in rule pack. 66 | - _var_ \ : Name of the data unit variable. 67 | - _id_ \ : Row index of data unit. 68 | - _value_ \ : Whether the described data unit obeys the rule. 69 | --------------------------------------------------------------------------------