├── .Rbuildignore ├── .covrignore ├── .github ├── .gitignore ├── CONTRIBUTING.md ├── issue_template.md ├── pull_request_template.md └── workflows │ ├── R-CMD-check.yaml │ ├── pkgdown.yaml │ ├── style.yaml │ └── test-coverage.yaml ├── .gitignore ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── adorn_ns.R ├── adorn_pct_formatting.R ├── adorn_percentages.R ├── adorn_rounding.R ├── adorn_title.R ├── adorn_totals.R ├── as_and_untabyl.R ├── clean_names.R ├── compare_df_cols.R ├── convert_to_date.R ├── excel_dates.R ├── excel_time_to_numeric.R ├── get_dupes.R ├── get_level_groups.R ├── get_one_to_one.R ├── janitor.R ├── janitor_deprecated.R ├── make_clean_names.R ├── paste_skip_na.R ├── print_tabyl.R ├── remove_empties.R ├── round_half_up.R ├── round_to_fraction.R ├── row_to_names.R ├── sas_dates.R ├── single_value.R ├── statistical_tests.R ├── tabyl.R ├── top_levels.R └── utils-pipe.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── codecov.yml ├── cran-comments.md ├── dirty_data.xlsx ├── janitor.Rproj ├── man ├── add_totals_col.Rd ├── add_totals_row.Rd ├── adorn_crosstab.Rd ├── adorn_ns.Rd ├── adorn_pct_formatting.Rd ├── adorn_percentages.Rd ├── adorn_rounding.Rd ├── adorn_title.Rd ├── adorn_totals.Rd ├── as_tabyl.Rd ├── chisq.test.Rd ├── clean_names.Rd ├── compare_df_cols.Rd ├── compare_df_cols_same.Rd ├── convert_to_NA.Rd ├── convert_to_date.Rd ├── crosstab.Rd ├── describe_class.Rd ├── excel_numeric_to_date.Rd ├── excel_time_to_numeric.Rd ├── figures │ ├── dirty_data.PNG │ └── logo_small.png ├── find_header.Rd ├── fisher.test.Rd ├── get_dupes.Rd ├── get_one_to_one.Rd ├── janitor-package.Rd ├── janitor_deprecated.Rd ├── make_clean_names.Rd ├── mu_to_u.Rd ├── paste_skip_na.Rd ├── pipe.Rd ├── remove_constant.Rd ├── remove_empty.Rd ├── remove_empty_cols.Rd ├── remove_empty_rows.Rd ├── round_half_up.Rd ├── round_to_fraction.Rd ├── row_to_names.Rd ├── sas_numeric_to_date.Rd ├── signif_half_up.Rd ├── single_value.Rd ├── tabyl.Rd ├── top_levels.Rd ├── untabyl.Rd └── use_first_valid_of.Rd ├── pkgdown └── favicon │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ └── favicon.ico ├── revdep ├── .gitignore ├── README.md ├── checks.rds ├── email.yml └── problems.md ├── tests ├── testthat.R └── testthat │ ├── test-adorn-ns.R │ ├── test-adorn-pct-formatting.R │ ├── test-adorn-percentages.R │ ├── test-adorn-rounding.R │ ├── test-adorn-title.R │ ├── test-adorn-totals.R │ ├── test-clean-names.R │ ├── test-compare_df_cols.R │ ├── test-convert_to_date.R │ ├── test-date-conversion.R │ ├── test-excel_time_to_numeric.R │ ├── test-get-dupes.R │ ├── test-get-level-groups.R │ ├── test-get_one_to_one.R │ ├── test-paste_skip_na.R │ ├── test-remove-empties.R │ ├── test-round_to_fraction.R │ ├── test-row-to-names.R │ ├── test-sas_dates.R │ ├── test-signif_half_up.R │ ├── test-single_value.R │ ├── test-statistical-tests.R │ ├── test-tabyl-classifiers.R │ ├── test-tabyl.R │ ├── test-top-levels.R │ ├── test-utilities.R │ └── testdata │ └── issue-578-sf.rds └── vignettes ├── janitor.Rmd ├── janitor.md ├── tabyls.Rmd └── tabyls.md /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^\.travis\.yml$ 4 | ^README\.Rmd$ # An Rmarkdown file used to generate README.md 5 | cran-comments.md # Comments for CRAN submission 6 | ^LICENSE\.md$ 7 | ^\.gitignore 8 | README.Rmd 9 | ^cran\-comments\.md$ 10 | dirty_data.xlsx 11 | ^.*\.docx 12 | codecov.yml 13 | planning.md 14 | planning.Rmd 15 | map of janitor 1.0 API.docx 16 | .github 17 | ^docs$ 18 | Index.md 19 | Index.Rmd 20 | ^revdep$ 21 | .covrignore 22 | ^CRAN-RELEASE$ 23 | ^doc$ 24 | ^Meta$ 25 | ^_pkgdown\.yml$ 26 | ^pkgdown$ 27 | ^\.github$ 28 | revdep/ 29 | ^CRAN-SUBMISSION$ 30 | -------------------------------------------------------------------------------- /.covrignore: -------------------------------------------------------------------------------- 1 | R/janitor_deprecated.R 2 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # CONTRIBUTING # 2 | 3 | ### Please contribute! 4 | 5 | We love collaboration. 6 | 7 | ### Bugs? Feature requests? 8 | 9 | * Submit an issue on the [issues page](https://github.com/sfirke/janitor/issues) 10 | 11 | ### Code contributions 12 | 13 | I would prefer some discussion before an unsolicited code contribution, i.e., pull request. This ensures that your effort is not wasted and that we're aligned on how to improve the janitor package. 14 | 15 | This is especially true if your proposed contribution does not match a currently open issue. If that's the case, please open new issue(s) to have the discussion there, prior to submitting code. 16 | 17 | If your proposed contribution addresses multiple issues, it should ideally be broken into multiple pull requests. This will make it easier for me to review and approve. 18 | 19 | #### The mechanics of contributing: 20 | 21 | * Fork this repo to your Github account 22 | * Clone your version on your account down to your machine from your account, e.g,. `git clone https://github.com//janitor.git` 23 | * Make sure to track progress upstream (i.e., on our version of `janitor` at `sfirke/janitor`) by doing `git remote add upstream https://github.com/sfirke/janitor.git`. Before making changes make sure to pull changes in from upstream by doing either `git fetch upstream` then merge later or `git pull upstream` to fetch and merge in one step 24 | * Make your changes (bonus points for making changes on a new feature branch) 25 | * Push up to your account 26 | * Submit a pull request to the main branch at `sfirke/janitor` 27 | 28 | ### Prefer to discuss over email? 29 | Email Sam. His email address is in the `DESCRIPTION` file of this repo. 30 | 31 | ### Thanks for contributing! 32 | -------------------------------------------------------------------------------- /.github/issue_template.md: -------------------------------------------------------------------------------- 1 | ## Feature requests 2 | 3 | Briefly describe what the feature would do and why it is in scope for the janitor package. 4 | 5 | ## Bug reports 6 | 7 | Please briefly describe your problem and what output you expect. 8 | 9 | Please include a minimal reprex. The goal of a reprex is to make it as easy as possible for me to recreate your problem so that I can fix it. If you've never heard of a reprex before, start by reading , and follow the advice further down the page. 10 | 11 | Delete these instructions once you have read them. 12 | 13 | --- 14 | 15 | Brief description of the problem 16 | 17 | ```r 18 | # insert reprex here 19 | ``` 20 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | ## Description 4 | 5 | 6 | ## Related Issue 7 | 11 | 12 | ## Example 13 | 15 | 16 | 18 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | permissions: read-all 12 | 13 | jobs: 14 | R-CMD-check: 15 | runs-on: ${{ matrix.config.os }} 16 | 17 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 18 | 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | config: 23 | - {os: macos-latest, r: 'release'} 24 | - {os: windows-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 26 | - {os: ubuntu-latest, r: 'release'} 27 | - {os: ubuntu-latest, r: 'oldrel-1'} 28 | 29 | env: 30 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 31 | R_KEEP_PKG_SOURCE: yes 32 | 33 | steps: 34 | - uses: actions/checkout@v4 35 | 36 | - uses: r-lib/actions/setup-pandoc@v2 37 | 38 | - uses: r-lib/actions/setup-r@v2 39 | with: 40 | r-version: ${{ matrix.config.r }} 41 | http-user-agent: ${{ matrix.config.http-user-agent }} 42 | use-public-rspm: true 43 | 44 | - uses: r-lib/actions/setup-r-dependencies@v2 45 | with: 46 | extra-packages: | 47 | any::sf 48 | any::rcmdcheck 49 | needs: check 50 | 51 | - uses: r-lib/actions/check-r-package@v2 52 | with: 53 | upload-snapshots: true 54 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' 55 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | release: 9 | types: [published] 10 | workflow_dispatch: 11 | 12 | name: pkgdown 13 | 14 | permissions: read-all 15 | 16 | jobs: 17 | pkgdown: 18 | runs-on: ubuntu-latest 19 | # Only restrict concurrency for non-PR jobs 20 | concurrency: 21 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 22 | env: 23 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 24 | permissions: 25 | contents: write 26 | steps: 27 | - uses: actions/checkout@v4 28 | 29 | - uses: r-lib/actions/setup-pandoc@v2 30 | 31 | - uses: r-lib/actions/setup-r@v2 32 | with: 33 | use-public-rspm: true 34 | 35 | - uses: r-lib/actions/setup-r-dependencies@v2 36 | with: 37 | extra-packages: any::pkgdown, local::. 38 | needs: website 39 | 40 | - name: Build site 41 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) 42 | shell: Rscript {0} 43 | 44 | - name: Deploy to GitHub pages 🚀 45 | if: github.event_name != 'pull_request' 46 | uses: JamesIves/github-pages-deploy-action@v4.5.0 47 | with: 48 | clean: false 49 | branch: gh-pages 50 | folder: docs 51 | -------------------------------------------------------------------------------- /.github/workflows/style.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | paths: ["**.[rR]", "**.[qrR]md", "**.[rR]markdown", "**.[rR]nw", "**.[rR]profile"] 6 | 7 | name: Style 8 | 9 | permissions: read-all 10 | 11 | jobs: 12 | style: 13 | runs-on: ubuntu-latest 14 | permissions: 15 | contents: write 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | steps: 19 | - name: Checkout repo 20 | uses: actions/checkout@v4 21 | with: 22 | fetch-depth: 0 23 | 24 | - name: Setup R 25 | uses: r-lib/actions/setup-r@v2 26 | with: 27 | use-public-rspm: true 28 | 29 | - name: Install dependencies 30 | uses: r-lib/actions/setup-r-dependencies@v2 31 | with: 32 | extra-packages: any::styler, any::roxygen2 33 | needs: styler 34 | 35 | - name: Enable styler cache 36 | run: styler::cache_activate() 37 | shell: Rscript {0} 38 | 39 | - name: Determine cache location 40 | id: styler-location 41 | run: | 42 | cat( 43 | "location=", 44 | styler::cache_info(format = "tabular")$location, 45 | "\n", 46 | file = Sys.getenv("GITHUB_OUTPUT"), 47 | append = TRUE, 48 | sep = "" 49 | ) 50 | shell: Rscript {0} 51 | 52 | - name: Cache styler 53 | uses: actions/cache@v4 54 | with: 55 | path: ${{ steps.styler-location.outputs.location }} 56 | key: ${{ runner.os }}-styler-${{ github.sha }} 57 | restore-keys: | 58 | ${{ runner.os }}-styler- 59 | ${{ runner.os }}- 60 | 61 | - name: Style 62 | run: styler::style_pkg() 63 | shell: Rscript {0} 64 | 65 | - name: Commit and push changes 66 | run: | 67 | if FILES_TO_COMMIT=($(git diff-index --name-only ${{ github.sha }} \ 68 | | egrep --ignore-case '\.(R|[qR]md|Rmarkdown|Rnw|Rprofile)$')) 69 | then 70 | git config --local user.name "$GITHUB_ACTOR" 71 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" 72 | git commit ${FILES_TO_COMMIT[*]} -m "Style code (GHA)" 73 | git pull --ff-only 74 | git push origin 75 | else 76 | echo "No changes to commit." 77 | fi 78 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: test-coverage 10 | 11 | permissions: read-all 12 | 13 | jobs: 14 | test-coverage: 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | 19 | steps: 20 | - uses: actions/checkout@v4 21 | 22 | - uses: r-lib/actions/setup-r@v2 23 | with: 24 | use-public-rspm: true 25 | 26 | - uses: r-lib/actions/setup-r-dependencies@v2 27 | with: 28 | extra-packages: | 29 | any::sf 30 | any::covr 31 | any::xml2 32 | needs: coverage 33 | 34 | - name: Test coverage 35 | run: | 36 | cov <- covr::package_coverage( 37 | quiet = FALSE, 38 | clean = FALSE, 39 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") 40 | ) 41 | covr::to_cobertura(cov) 42 | shell: Rscript {0} 43 | 44 | - uses: codecov/codecov-action@v4 45 | with: 46 | fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }} 47 | file: ./cobertura.xml 48 | plugin: noop 49 | disable_search: true 50 | token: ${{ secrets.CODECOV_TOKEN }} 51 | 52 | - name: Show testthat output 53 | if: always() 54 | run: | 55 | ## -------------------------------------------------------------------- 56 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true 57 | shell: bash 58 | 59 | - name: Upload test results 60 | if: failure() 61 | uses: actions/upload-artifact@v4 62 | with: 63 | name: coverage-test-failures 64 | path: ${{ runner.temp }}/package 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | *.docx 5 | 6 | 7 | ~WRL0005\.tmp 8 | doc 9 | docs 10 | Meta 11 | docs/ 12 | janitor.Rproj 13 | 14 | 15 | revdep/* 16 | revdep -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: janitor 2 | Title: Simple Tools for Examining and Cleaning Dirty Data 3 | Version: 2.2.1.9000 4 | Authors@R: c( 5 | person("Sam", "Firke", , "samuel.firke@gmail.com", role = c("aut", "cre")), 6 | person("Bill", "Denney", , "wdenney@humanpredictions.com", role = "ctb"), 7 | person("Chris", "Haid", , "chrishaid@gmail.com", role = "ctb"), 8 | person("Ryan", "Knight", , "ryangknight@gmail.com", role = "ctb"), 9 | person("Malte", "Grosser", , "malte.grosser@gmail.com", role = "ctb"), 10 | person("Jonathan", "Zadra", , "jonathan.zadra@sorensonimpact.com", role = "ctb"), 11 | person("Olivier", "Roy", role = "ctb"), 12 | person("Josep", family = "Pueyo-Ros", email = "josep.pueyo@udg.edu", role = "ctb") 13 | ) 14 | Description: The main janitor functions can: perfectly format data.frame 15 | column names; provide quick counts of variable combinations (i.e., 16 | frequency tables and crosstabs); and explore duplicate records. Other 17 | janitor functions nicely format the tabulation results. These 18 | tabulate-and-report functions approximate popular features of SPSS and 19 | Microsoft Excel. This package follows the principles of the 20 | "tidyverse" and works well with the pipe function %>%. janitor was 21 | built with beginning-to-intermediate R users in mind and is optimized 22 | for user-friendliness. 23 | License: MIT + file LICENSE 24 | URL: https://github.com/sfirke/janitor, https://sfirke.github.io/janitor/ 25 | BugReports: https://github.com/sfirke/janitor/issues 26 | Depends: 27 | R (>= 3.1.2) 28 | Imports: 29 | dplyr (>= 1.0.0), 30 | hms, 31 | lifecycle, 32 | lubridate, 33 | magrittr, 34 | purrr, 35 | rlang, 36 | snakecase (>= 0.9.2), 37 | stringi, 38 | stringr, 39 | tidyr (>= 1.0.0), 40 | tidyselect (>= 1.0.0) 41 | Suggests: 42 | dbplyr, 43 | knitr, 44 | rmarkdown, 45 | RSQLite, 46 | sf, 47 | testthat (>= 3.0.0), 48 | tibble, 49 | tidygraph 50 | VignetteBuilder: 51 | knitr 52 | Config/testthat/edition: 3 53 | Encoding: UTF-8 54 | LazyData: true 55 | Roxygen: list(markdown = TRUE) 56 | RoxygenNote: 7.3.2 57 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2016 2 | COPYRIGHT HOLDER: Sam Firke 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2016 Sam Firke 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(chisq.test,default) 4 | S3method(chisq.test,tabyl) 5 | S3method(clean_names,default) 6 | S3method(clean_names,sf) 7 | S3method(clean_names,tbl_graph) 8 | S3method(clean_names,tbl_lazy) 9 | S3method(describe_class,default) 10 | S3method(describe_class,factor) 11 | S3method(excel_time_to_numeric,POSIXct) 12 | S3method(excel_time_to_numeric,POSIXlt) 13 | S3method(excel_time_to_numeric,character) 14 | S3method(excel_time_to_numeric,logical) 15 | S3method(excel_time_to_numeric,numeric) 16 | S3method(fisher.test,default) 17 | S3method(fisher.test,tabyl) 18 | S3method(print,tabyl) 19 | S3method(tabyl,data.frame) 20 | S3method(tabyl,default) 21 | export("%>%") 22 | export(add_totals_col) 23 | export(add_totals_row) 24 | export(adorn_crosstab) 25 | export(adorn_ns) 26 | export(adorn_pct_formatting) 27 | export(adorn_percentages) 28 | export(adorn_rounding) 29 | export(adorn_title) 30 | export(adorn_totals) 31 | export(as_tabyl) 32 | export(chisq.test) 33 | export(clean_names) 34 | export(compare_df_cols) 35 | export(compare_df_cols_same) 36 | export(convert_to_NA) 37 | export(convert_to_date) 38 | export(convert_to_datetime) 39 | export(crosstab) 40 | export(describe_class) 41 | export(excel_numeric_to_date) 42 | export(excel_time_to_numeric) 43 | export(find_header) 44 | export(fisher.test) 45 | export(get_dupes) 46 | export(get_one_to_one) 47 | export(make_clean_names) 48 | export(paste_skip_na) 49 | export(remove_constant) 50 | export(remove_empty) 51 | export(remove_empty_cols) 52 | export(remove_empty_rows) 53 | export(round_half_up) 54 | export(round_to_fraction) 55 | export(row_to_names) 56 | export(sas_numeric_to_date) 57 | export(signif_half_up) 58 | export(single_value) 59 | export(tabyl) 60 | export(top_levels) 61 | export(untabyl) 62 | export(use_first_valid_of) 63 | importFrom(lubridate,as_date) 64 | importFrom(lubridate,as_datetime) 65 | importFrom(lubridate,force_tz) 66 | importFrom(lubridate,hour) 67 | importFrom(lubridate,minute) 68 | importFrom(lubridate,second) 69 | importFrom(lubridate,ymd) 70 | importFrom(lubridate,ymd_hms) 71 | importFrom(magrittr,"%>%") 72 | importFrom(rlang,"%||%") 73 | importFrom(rlang,dots_n) 74 | importFrom(rlang,expr) 75 | importFrom(rlang,syms) 76 | importFrom(snakecase,to_any_case) 77 | importFrom(stats,na.omit) 78 | importFrom(stringi,stri_trans_general) 79 | importFrom(stringi,stri_trans_list) 80 | importFrom(stringr,str_replace) 81 | importFrom(stringr,str_replace_all) 82 | importFrom(tidyselect,eval_select) 83 | -------------------------------------------------------------------------------- /R/adorn_pct_formatting.R: -------------------------------------------------------------------------------- 1 | #' Format a `data.frame` of decimals as percentages. 2 | #' 3 | #' @description 4 | #' Numeric columns get multiplied by 100 and formatted as 5 | #' percentages according to user specifications. This function defaults to 6 | #' excluding the first column of the input data.frame, assuming that it contains 7 | #' a descriptive variable, but this can be overridden by specifying the columns 8 | #' to adorn in the `...` argument. Non-numeric columns are always excluded. 9 | #' 10 | #' The decimal separator character is the result of `getOption("OutDec")`, which 11 | #' is based on the user's locale. If the default behavior is undesirable, 12 | #' change this value ahead of calling the function, either by changing locale or 13 | #' with `options(OutDec = ",")`. This aligns the decimal separator character 14 | #' with that used in `base::print()`. 15 | #' 16 | #' @param dat a data.frame with decimal values, typically the result of a call 17 | #' to `adorn_percentages` on a `tabyl`. If given a list of data.frames, this 18 | #' function will apply itself to each data.frame in the list (designed for 19 | #' 3-way `tabyl` lists). 20 | #' @param digits how many digits should be displayed after the decimal point? 21 | #' @param rounding method to use for rounding - either "half to even", the base 22 | #' R default method, or "half up", where 14.5 rounds up to 15. 23 | #' @param affix_sign should the % sign be affixed to the end? 24 | #' @param ... columns to adorn. This takes a tidyselect specification. By 25 | #' default, all numeric columns (besides the initial column, if numeric) are 26 | #' adorned, but this allows you to manually specify which columns should be 27 | #' adorned, for use on a data.frame that does not result from a call to 28 | #' `tabyl`. 29 | #' @return a data.frame with formatted percentages 30 | #' @export 31 | #' @examples 32 | #' mtcars %>% 33 | #' tabyl(am, cyl) %>% 34 | #' adorn_percentages("col") %>% 35 | #' adorn_pct_formatting() 36 | #' 37 | #' # Control the columns to be adorned with the ... variable selection argument 38 | #' # If using only the ... argument, you can use empty commas as shorthand 39 | #' # to supply the default values to the preceding arguments: 40 | #' 41 | #' cases <- data.frame( 42 | #' region = c("East", "West"), 43 | #' year = 2015, 44 | #' recovered = c(125, 87), 45 | #' died = c(13, 12) 46 | #' ) 47 | #' 48 | #' cases %>% 49 | #' adorn_percentages("col", , recovered:died) %>% 50 | #' adorn_pct_formatting(, , , recovered:died) 51 | #' 52 | adorn_pct_formatting <- function(dat, digits = 1, rounding = "half to even", affix_sign = TRUE, ...) { 53 | # if input is a list, call purrr::map to recursively apply this function to each data.frame 54 | if (is.list(dat) && !is.data.frame(dat)) { 55 | purrr::map(dat, adorn_pct_formatting, digits, rounding, affix_sign) 56 | } else { 57 | # catch bad inputs 58 | if (!is.data.frame(dat)) { 59 | stop("adorn_pct_formatting() must be called on a data.frame or list of data.frames") 60 | } 61 | rlang::arg_match0(rounding, c("half to even", "half up")) 62 | 63 | original <- dat # used below to record original instances of NA and NaN 64 | 65 | numeric_cols <- which(vapply(dat, is.numeric, logical(1))) 66 | non_numeric_cols <- setdiff(1:ncol(dat), numeric_cols) 67 | numeric_cols <- setdiff(numeric_cols, 1) # assume 1st column should not be included so remove it from numeric_cols. Moved up to this line so that if only 1st col is numeric, the function errors 68 | 69 | if (rlang::dots_n(...) == 0) { 70 | cols_to_adorn <- numeric_cols 71 | } else { 72 | expr <- rlang::expr(c(...)) 73 | cols_to_adorn <- tidyselect::eval_select(expr, data = dat) 74 | if (any(cols_to_adorn %in% non_numeric_cols)) { 75 | # don't need to print a message, adorn_rounding will 76 | cols_to_adorn <- setdiff(cols_to_adorn, non_numeric_cols) 77 | } 78 | } 79 | 80 | 81 | if ("one_way" %in% attr(dat, "tabyl_type")) { 82 | cols_to_adorn <- setdiff(numeric_cols, 2) # so that it works on a one-way tabyl 83 | } 84 | 85 | if (length(cols_to_adorn) == 0) { 86 | stop("at least one targeted column must be of class numeric") 87 | } 88 | 89 | dat[cols_to_adorn] <- lapply(dat[cols_to_adorn], function(x) x * 100) 90 | dat <- adorn_rounding(dat, digits = digits, rounding = rounding, ...) 91 | dat[cols_to_adorn] <- lapply(dat[cols_to_adorn], function(x) { 92 | format(x, 93 | nsmall = digits, 94 | decimal.mark = getOption("OutDec"), 95 | trim = TRUE 96 | ) 97 | }) # so that 0% prints as 0.0% or 0.00% etc. 98 | if (affix_sign) { 99 | dat[cols_to_adorn] <- lapply(dat[cols_to_adorn], function(x) paste0(x, "%")) 100 | } 101 | dat[cols_to_adorn][is.na(original[cols_to_adorn])] <- "-" # NA and NaN values in the original should be simply "-" for printing of results 102 | dat 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /R/adorn_percentages.R: -------------------------------------------------------------------------------- 1 | #' Convert a data.frame of counts to percentages. 2 | #' 3 | #' This function defaults to excluding the first column of the input data.frame, 4 | #' assuming that it contains a descriptive variable, but this can be overridden 5 | #' by specifying the columns to adorn in the `...` argument. 6 | #' 7 | #' @param dat A `tabyl` or other data.frame with a tabyl-like layout. 8 | #' If given a list of data.frames, this function will apply itself to each 9 | #' `data.frame` in the list (designed for 3-way `tabyl` lists). 10 | #' @param denominator The direction to use for calculating percentages. 11 | #' One of "row", "col", or "all". 12 | #' @param na.rm should missing values (including `NaN`) be omitted from the calculations? 13 | #' @param ... columns to adorn. This takes a <[`tidy-select`][dplyr::dplyr_tidy_select]> 14 | #' specification. By default, all numeric columns (besides the initial column, if numeric) 15 | #' are adorned, but this allows you to manually specify which columns should 16 | #' be adorned, for use on a `data.frame` that does not result from a call to [tabyl()]. 17 | #' 18 | #' @return A `data.frame` of percentages, expressed as numeric values between 0 and 1. 19 | #' @export 20 | #' @examples 21 | #' 22 | #' mtcars %>% 23 | #' tabyl(am, cyl) %>% 24 | #' adorn_percentages("col") 25 | #' 26 | #' # calculates correctly even with totals column and/or row: 27 | #' mtcars %>% 28 | #' tabyl(am, cyl) %>% 29 | #' adorn_totals("row") %>% 30 | #' adorn_percentages() 31 | #' 32 | #' # Control the columns to be adorned with the ... variable selection argument 33 | #' # If using only the ... argument, you can use empty commas as shorthand 34 | #' # to supply the default values to the preceding arguments: 35 | #' 36 | #' cases <- data.frame( 37 | #' region = c("East", "West"), 38 | #' year = 2015, 39 | #' recovered = c(125, 87), 40 | #' died = c(13, 12) 41 | #' ) 42 | #' 43 | #' cases %>% 44 | #' adorn_percentages(, , recovered:died) 45 | adorn_percentages <- function(dat, denominator = "row", na.rm = TRUE, ...) { 46 | # if input is a list, call purrr::map to recursively apply this function to each data.frame 47 | if (is.list(dat) && !is.data.frame(dat)) { 48 | purrr::map(dat, adorn_percentages, denominator, na.rm, ...) 49 | } else { 50 | # catch bad inputs 51 | if (!is.data.frame(dat)) { 52 | stop("adorn_percentages() must be called on a data.frame or list of data.frames") 53 | } 54 | rlang::arg_match0(denominator, c("row", "col", "all")) 55 | 56 | dat <- as_tabyl(dat) 57 | 58 | numeric_cols <- which(vapply(dat, is.numeric, logical(1))) 59 | non_numeric_cols <- setdiff(1:ncol(dat), numeric_cols) 60 | numeric_cols <- setdiff(numeric_cols, 1) # assume 1st column should not be included so remove it from numeric_cols. Moved up to this line so that if only 1st col is numeric, the function errors 61 | explicitly_exempt_totals <- FALSE 62 | 63 | if (rlang::dots_n(...) == 0) { 64 | cols_to_tally <- numeric_cols 65 | } else { 66 | expr <- rlang::expr(c(...)) 67 | cols_to_tally <- tidyselect::eval_select(expr, data = dat) 68 | explicitly_exempt_totals <- !(ncol(dat) %in% cols_to_tally) # if not present, it's b/c user explicitly exempted it 69 | if (any(cols_to_tally %in% non_numeric_cols)) { 70 | message("At least one non-numeric column was specified. All non-numeric columns will be removed from percentage calculations.") 71 | cols_to_tally <- setdiff(cols_to_tally, non_numeric_cols) 72 | } 73 | } 74 | 75 | if ("col" %in% attr(dat, "totals")) { 76 | # if there's a totals col, don't use it to calculate the %s 77 | cols_to_tally <- setdiff(cols_to_tally, ncol(dat)) 78 | } 79 | 80 | if (denominator == "row") { 81 | # if row-wise percentages and a totals column, need to exempt totals col and make it all 1s 82 | if ("col" %in% attr(dat, "totals") & !explicitly_exempt_totals) { 83 | dat[[ncol(dat)]] <- rep(1, nrow(dat)) 84 | } 85 | row_sum <- rowSums(dat[cols_to_tally], na.rm = na.rm) 86 | dat[, cols_to_tally] <- dat[cols_to_tally] / row_sum 87 | } else if (denominator == "col") { 88 | # if col-wise percentages and a row column, need to exempt totals row and make it all 1s 89 | if ("row" %in% attr(dat, "totals")) { 90 | col_sum <- colSums(dat[-nrow(dat), ][cols_to_tally], na.rm = na.rm) 91 | } else { 92 | col_sum <- colSums(dat[cols_to_tally], na.rm = na.rm) 93 | } 94 | # add totals col back to be tallied, #357 95 | if ("col" %in% attr(dat, "totals") & !explicitly_exempt_totals) { 96 | cols_to_tally <- c(cols_to_tally, ncol(dat)) 97 | if ("row" %in% attr(dat, "totals")) { 98 | col_sum <- c(col_sum, sum(dat[-nrow(dat), ncol(dat)])) 99 | } else { 100 | col_sum <- c(col_sum, sum(dat[, ncol(dat)])) 101 | } 102 | } 103 | dat[cols_to_tally] <- sweep(dat[cols_to_tally], 2, col_sum, `/`) # from http://stackoverflow.com/questions/9447801/dividing-columns-by-colsums-in-r 104 | } else if (denominator == "all") { 105 | # if all-wise percentages, need to exempt any totals col or row 106 | if ("row" %in% attr(dat, "totals")) { 107 | complete_n <- sum(dat[-nrow(dat), cols_to_tally], na.rm = TRUE) 108 | } else { 109 | complete_n <- sum(dat[, cols_to_tally], na.rm = TRUE) 110 | } 111 | # add totals col back to be tallied, #357 112 | if ("col" %in% attr(dat, "totals") & !explicitly_exempt_totals) { 113 | cols_to_tally <- c(cols_to_tally, ncol(dat)) 114 | } 115 | dat[cols_to_tally] <- dat[cols_to_tally] / complete_n 116 | } 117 | dat 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /R/adorn_rounding.R: -------------------------------------------------------------------------------- 1 | #' Round the numeric columns in a data.frame. 2 | #' 3 | #' @description 4 | #' Can run on any `data.frame` with at least one numeric column. 5 | #' This function defaults to excluding the first column of the input data.frame, 6 | #' assuming that it contains a descriptive variable, but this can be overridden by 7 | #' specifying the columns to round in the `...` argument. 8 | #' 9 | #' If you're formatting percentages, e.g., the result of [adorn_percentages()], 10 | #' use [adorn_pct_formatting()] instead. This is a more flexible variant for ad-hoc usage. 11 | #' Compared to `adorn_pct_formatting()`, it does not multiply by 100 or pad the 12 | #' numbers with spaces for alignment in the results `data.frame`. 13 | #' This function retains the class of numeric input columns. 14 | #' 15 | #' @param dat A `tabyl` or other `data.frame` with similar layout. 16 | #' If given a list of data.frames, this function will apply itself to each 17 | #' `data.frame` in the list (designed for 3-way `tabyl` lists). 18 | #' @param digits How many digits should be displayed after the decimal point? 19 | #' @param rounding Method to use for rounding - either "half to even" 20 | #' (the base R default method), or "half up", where 14.5 rounds up to 15. 21 | #' @param ... Columns to adorn. This takes a tidyselect specification. 22 | #' By default, all numeric columns (besides the initial column, if numeric) 23 | #' are adorned, but this allows you to manually specify which columns should 24 | #' be adorned, for use on a data.frame that does not result from a call to `tabyl`. 25 | #' 26 | #' @return The `data.frame` with rounded numeric columns. 27 | #' @export 28 | #' @examples 29 | #' 30 | #' mtcars %>% 31 | #' tabyl(am, cyl) %>% 32 | #' adorn_percentages() %>% 33 | #' adorn_rounding(digits = 2, rounding = "half up") 34 | #' 35 | #' # tolerates non-numeric columns: 36 | #' library(dplyr) 37 | #' mtcars %>% 38 | #' tabyl(am, cyl) %>% 39 | #' adorn_percentages("all") %>% 40 | #' mutate(dummy = "a") %>% 41 | #' adorn_rounding() 42 | #' 43 | #' # Control the columns to be adorned with the ... variable selection argument 44 | #' # If using only the ... argument, you can use empty commas as shorthand 45 | #' # to supply the default values to the preceding arguments: 46 | #' cases <- data.frame( 47 | #' region = c("East", "West"), 48 | #' year = 2015, 49 | #' recovered = c(125, 87), 50 | #' died = c(13, 12) 51 | #' ) 52 | #' 53 | #' cases %>% 54 | #' adorn_percentages(, , ends_with("ed")) %>% 55 | #' adorn_rounding(, , all_of(c("recovered", "died"))) 56 | adorn_rounding <- function(dat, digits = 1, rounding = "half to even", ...) { 57 | # if input is a list, call purrr::map to recursively apply this function to each data.frame 58 | if (is.list(dat) && !is.data.frame(dat)) { 59 | purrr::map(dat, adorn_rounding, digits, rounding, ...) 60 | } else { 61 | # catch bad inputs 62 | if (!is.data.frame(dat)) { 63 | stop("adorn_rounding() must be called on a data.frame or list of data.frames") 64 | } 65 | if (!rounding %in% c("half to even", "half up")) { 66 | stop("'rounding' must be one of 'half to even' or 'half up'") 67 | } 68 | numeric_cols <- which(vapply(dat, is.numeric, logical(1))) 69 | non_numeric_cols <- setdiff(1:ncol(dat), numeric_cols) 70 | # assume 1st column should not be included so remove it from numeric_cols. 71 | # Moved up to this line so that if only 1st col is numeric, the function errors 72 | numeric_cols <- setdiff(numeric_cols, 1) 73 | 74 | if (rlang::dots_n(...) == 0) { 75 | cols_to_round <- numeric_cols 76 | } else { 77 | expr <- rlang::expr(c(...)) 78 | cols_to_round <- tidyselect::eval_select(expr, data = dat) 79 | if (any(cols_to_round %in% non_numeric_cols)) { 80 | message("At least one non-numeric column was specified and will not be modified.") 81 | cols_to_round <- setdiff(cols_to_round, non_numeric_cols) 82 | } 83 | } 84 | 85 | if (rounding == "half to even") { 86 | dat[cols_to_round] <- lapply(dat[cols_to_round], function(x) round(x, digits = digits)) 87 | } else { 88 | dat[cols_to_round] <- lapply(dat[cols_to_round], function(x) round_half_up(x, digits = digits)) 89 | } 90 | dat 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /R/adorn_title.R: -------------------------------------------------------------------------------- 1 | #' Add column name to the top of a two-way tabyl. 2 | #' 3 | #' This function adds the column variable name to the top of a `tabyl` for a 4 | #' complete display of information. This makes the tabyl prettier, but renders 5 | #' the `data.frame` less useful for further manipulation. 6 | #' 7 | #' The `placement` argument indicates whether the column name should be added to 8 | #' the `top` of the tabyl in an otherwise-empty row `"top"` or appended to the 9 | #' already-present row name variable (`"combined"`). The formatting in the `"top"` 10 | #' option has the look of base R's `table()`; it also wipes out the other column 11 | #' names, making it hard to further use the `data.frame` besides formatting it for reporting. 12 | #' The `"combined"` option is more conservative in this regard. 13 | #' 14 | #' @param dat A `data.frame` of class `tabyl` or other `data.frame` with a tabyl-like layout. 15 | #' If given a list of data.frames, this function will apply itself to each `data.frame` 16 | #' in the list (designed for 3-way `tabyl` lists). 17 | #' @param placement The title placement, one of `"top"`, or `"combined"`. 18 | #' See **Details** for more information. 19 | #' @param row_name (optional) default behavior is to pull the row name from the 20 | #' attributes of the input `tabyl` object. If you wish to override that text, 21 | #' or if your input is not a `tabyl`, supply a string here. 22 | #' @param col_name (optional) default behavior is to pull the column_name from 23 | #' the attributes of the input `tabyl` object. If you wish to override that text, 24 | #' or if your input is not a `tabyl`, supply a string here. 25 | #' @return The input `tabyl`, augmented with the column title. Non-tabyl inputs 26 | #' that are of class `tbl_df` are downgraded to basic data.frames so that the 27 | #' title row prints correctly. 28 | #' 29 | #' @export 30 | #' @examples 31 | #' 32 | #' mtcars %>% 33 | #' tabyl(am, cyl) %>% 34 | #' adorn_title(placement = "top") 35 | #' 36 | #' # Adding a title to a non-tabyl 37 | #' library(tidyr) 38 | #' library(dplyr) 39 | #' mtcars %>% 40 | #' group_by(gear, am) %>% 41 | #' summarise(avg_mpg = mean(mpg), .groups = "drop") %>% 42 | #' pivot_wider(names_from = am, values_from = avg_mpg) %>% 43 | #' adorn_rounding() %>% 44 | #' adorn_title("top", row_name = "Gears", col_name = "Cylinders") 45 | adorn_title <- function(dat, placement = "top", row_name, col_name) { 46 | # if input is a list, call purrr::map to recursively apply this function to each data.frame 47 | if (is.list(dat) && !is.data.frame(dat)) { 48 | purrr::map(dat, adorn_title, placement, row_name, col_name) 49 | } else { 50 | if (!is.data.frame(dat)) { 51 | stop("\"dat\" must be a data.frame") 52 | } 53 | 54 | rlang::arg_match0(placement, c("top", "combined")) 55 | 56 | if (inherits(dat, "tabyl")) { 57 | if (attr(dat, "tabyl_type") == "one_way") { 58 | warning( 59 | "adorn_title is meant for two-way tabyls, calling it on a one-way tabyl may not yield a meaningful result" 60 | ) 61 | } 62 | } 63 | if (missing(col_name)) { 64 | if (!inherits(dat, "tabyl")) { 65 | stop("When input is not a data.frame of class tabyl, a value must be specified for the col_name argument.") 66 | } 67 | col_var <- attr(dat, "var_names")$col 68 | } else { 69 | if (!is.character(col_name)) { 70 | stop("col_name must be a string") 71 | } 72 | col_var <- col_name 73 | } 74 | 75 | if (!missing(row_name)) { 76 | if (!is.character(row_name)) { 77 | stop("row_name must be a string") 78 | } 79 | names(dat)[1] <- row_name 80 | row_var <- row_name 81 | } else { 82 | if (inherits(dat, "tabyl")) { 83 | row_var <- attr(dat, "var_names")$row 84 | } else { 85 | # for non-tabyl input, if no row_name supplied, use first existing name 86 | row_var <- names(dat)[1] 87 | } 88 | } 89 | 90 | 91 | if (placement == "top") { 92 | # to handle factors, problematic in first column and at bind_rows. 93 | dat[, ] <- lapply(dat[, ], as.character) 94 | # Can't use mutate_all b/c it strips attributes 95 | top <- dat[1, ] 96 | 97 | top[1, ] <- as.list(names(top)) 98 | 99 | out <- dplyr::bind_rows(top, dat) 100 | out <- stats::setNames(out, c("", col_var, rep("", ncol(out) - 2))) 101 | } 102 | if (placement == "combined") { 103 | out <- dat 104 | names(out)[1] <- paste(row_var, col_var, sep = "/") 105 | } 106 | # "top" text doesn't print if input (and thus the output) is a tibble 107 | if (inherits(out, "tbl_df")) { 108 | # but this prints row numbers, so don't apply to non-tbl_dfs like tabyls 109 | out <- as.data.frame(out) 110 | } 111 | out 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /R/as_and_untabyl.R: -------------------------------------------------------------------------------- 1 | #' Add `tabyl` attributes to a data.frame 2 | #' 3 | #' @description 4 | #' A `tabyl` is a `data.frame` containing counts of a variable or 5 | #' co-occurrences of two variables (a.k.a., a contingency table or crosstab). 6 | #' This specialized kind of data.frame has attributes that enable `adorn_` 7 | #' functions to be called for precise formatting and presentation of results. 8 | #' E.g., display results as a mix of percentages, Ns, add totals rows or 9 | #' columns, rounding options, in the style of Microsoft Excel PivotTable. 10 | #' 11 | #' A `tabyl` can be the result of a call to `janitor::tabyl()`, in which case 12 | #' these attributes are added automatically. This function adds `tabyl` class 13 | #' attributes to a data.frame that isn't the result of a call to `tabyl` but 14 | #' meets the requirements of a two-way tabyl: 1) First column contains values of 15 | #' variable 1 2) Column names 2:n are the values of variable 2 3) Numeric values 16 | #' in columns 2:n are counts of the co-occurrences of the two variables.* 17 | #' 18 | #' * = this is the ideal form of a `tabyl`, but janitor's `adorn_` functions tolerate 19 | #' and ignore non-numeric columns in positions 2:n. 20 | #' 21 | #' For instance, the result of [dplyr::count()] followed by [tidyr::pivot_wider()] 22 | #' can be treated as a `tabyl`. 23 | #' 24 | #' The result of calling [tabyl()] on a single variable is a special class of 25 | #' one-way tabyl; this function only pertains to the two-way tabyl. 26 | #' 27 | #' @param dat a data.frame with variable values in the first column and numeric 28 | #' values in all other columns. 29 | #' @param axes is this a two_way tabyl or a one_way tabyl? If this function is 30 | #' being called by a user, this should probably be "2". One-way tabyls are 31 | #' created by `tabyl` but are a special case. 32 | #' @param row_var_name (optional) the name of the variable in the row dimension; 33 | #' used by `adorn_title()`. 34 | #' @param col_var_name (optional) the name of the variable in the column 35 | #' dimension; used by `adorn_title()`. 36 | #' @return Returns the same data.frame, but with the additional class of "tabyl" 37 | #' and the attribute "core". 38 | #' @export 39 | #' @examples 40 | #' as_tabyl(mtcars) 41 | #' 42 | as_tabyl <- function(dat, axes = 2, row_var_name = NULL, col_var_name = NULL) { 43 | if (!axes %in% 1:2) { 44 | stop("axes must be either 1 or 2") 45 | } 46 | 47 | # check whether input meets requirements 48 | if (!is.data.frame(dat)) { 49 | stop("input must be a data.frame") 50 | } 51 | if (sum(unlist(lapply(dat, is.numeric))[-1]) == 0) { 52 | stop("at least one one of columns 2:n must be of class numeric") 53 | } 54 | 55 | # assign core attribute and classes 56 | if (inherits(dat, "tabyl")) { 57 | # if already a tabyl, may have totals row. 58 | # Safest play is to simply reorder the core rows to match the dat rows 59 | attr(dat, "core") <- attr(dat, "core")[order(match( 60 | attr(dat, "core")[, 1], 61 | dat[, 1] 62 | )), ] 63 | row.names(attr(dat, "core")) <- 1:nrow(attr(dat, "core")) # if they're sorted in the prior step above, this resets 64 | } else { 65 | attr(dat, "core") <- as.data.frame(dat) # core goes first so dat does not yet have attributes attached to it 66 | } 67 | 68 | attr(dat, "tabyl_type") <- ifelse( 69 | !is.null(attr(dat, "tabyl_type")), 70 | attr(dat, "tabyl_type"), # if a one_way tabyl has as_tabyl called on it, it should stay a one_way #523 71 | dplyr::case_when( 72 | axes == 1 ~ "one_way", 73 | axes == 2 ~ "two_way" 74 | ) 75 | ) 76 | class(dat) <- c("tabyl", setdiff(class(dat), "tabyl")) 77 | 78 | if (!missing(row_var_name) | !missing(col_var_name)) { 79 | if (axes != 2) { 80 | stop("variable names are only meaningful for two-way tabyls") 81 | } 82 | attr(dat, "var_names") <- list(row = row_var_name, col = col_var_name) 83 | } 84 | 85 | dat 86 | } 87 | 88 | #' Remove `tabyl` attributes from a data.frame. 89 | #' 90 | #' Strips away all `tabyl`-related attributes from a data.frame. 91 | #' 92 | #' @param dat a `data.frame` of class `tabyl`. 93 | #' @return the same `data.frame`, but without the `tabyl` class and attributes. 94 | #' @export 95 | #' @examples 96 | #' 97 | #' mtcars %>% 98 | #' tabyl(am) %>% 99 | #' untabyl() %>% 100 | #' attributes() # tabyl-specific attributes are gone 101 | untabyl <- function(dat) { 102 | # if input is a list, call purrr::map to recursively apply this function to each data.frame 103 | if (is.list(dat) && !is.data.frame(dat)) { 104 | purrr::map(dat, untabyl) 105 | } else { 106 | if (!inherits(dat, "tabyl")) { 107 | warning("untabyl() called on a non-tabyl") 108 | } 109 | class(dat) <- class(dat)[!class(dat) %in% "tabyl"] 110 | attr(dat, "core") <- NULL 111 | # These attributes may not exist, but simpler to declare them NULL regardless than to check to see if they exist: 112 | attr(dat, "totals") <- NULL 113 | attr(dat, "tabyl_type") <- NULL # may not exist, but simpler to declare it NULL regardless than to check to see if it exists 114 | attr(dat, "var_names") <- NULL # may not exist, but simpler to declare it NULL regardless than to check to see if it exists 115 | dat 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /R/excel_dates.R: -------------------------------------------------------------------------------- 1 | #' Convert dates encoded as serial numbers to Date class. 2 | #' 3 | #' @description 4 | #' Converts numbers like `42370` into date values like `2016-01-01`. 5 | #' 6 | #' Defaults to the modern Excel date encoding system. However, Excel for Mac 7 | #' 2008 and earlier Mac versions of Excel used a different date system. To 8 | #' determine what platform to specify: if the date 2016-01-01 is represented by 9 | #' the number 42370 in your spreadsheet, it's the modern system. If it's 40908, 10 | #' it's the old Mac system. More on date encoding systems at 11 | #' http://support.office.com/en-us/article/Date-calculations-in-Excel-e7fe7167-48a9-4b96-bb53-5612a800b487. 12 | #' 13 | #' A list of all timezones is available from `base::OlsonNames()`, and the 14 | #' current timezone is available from `base::Sys.timezone()`. 15 | #' 16 | #' If your input data has a mix of Excel numeric dates and actual dates, see the 17 | #' more powerful functions [convert_to_date()] and `convert_to_datetime()`. 18 | #' 19 | #' @param date_num numeric vector of serial numbers to convert. 20 | #' @param date_system the date system, either `"modern"` or `"mac pre-2011"`. 21 | #' @param include_time Include the time (hours, minutes, seconds) in the output? 22 | #' (See details) 23 | #' @param round_seconds Round the seconds to an integer (only has an effect when 24 | #' `include_time` is `TRUE`)? 25 | #' @param tz Time zone, used when `include_time = TRUE` (see details for 26 | #' more information on timezones). 27 | #' @return Returns a vector of class Date if `include_time` is 28 | #' `FALSE`. Returns a vector of class POSIXlt if `include_time` is 29 | #' `TRUE`. 30 | #' @details When using `include_time=TRUE`, days with leap seconds will not 31 | #' be accurately handled as they do not appear to be accurately handled by 32 | #' Windows (as described in 33 | #' https://support.microsoft.com/en-us/help/2722715/support-for-the-leap-second). 34 | #' 35 | #' @export 36 | #' @seealso [excel_time_to_numeric()] 37 | #' @examples 38 | #' excel_numeric_to_date(40000) 39 | #' excel_numeric_to_date(40000.5) # No time is included 40 | #' excel_numeric_to_date(40000.5, include_time = TRUE) # Time is included 41 | #' excel_numeric_to_date(40000.521, include_time = TRUE) # Time is included 42 | #' excel_numeric_to_date(40000.521, 43 | #' include_time = TRUE, 44 | #' round_seconds = FALSE 45 | #' ) # Time with fractional seconds is included 46 | #' @family date-time cleaning 47 | #' @importFrom lubridate as_date as_datetime force_tz hour minute second 48 | excel_numeric_to_date <- function(date_num, date_system = "modern", include_time = FALSE, round_seconds = TRUE, tz = Sys.timezone()) { 49 | if (all(is.na(date_num))) { 50 | # For NA input, return the expected type of NA output. 51 | if (include_time) { 52 | return(lubridate::as_datetime(date_num, tz = tz)) 53 | } else { 54 | return(lubridate::as_date(date_num)) 55 | } 56 | } else if (!is.numeric(date_num)) { 57 | stop("argument `date_num` must be of class numeric") 58 | } 59 | 60 | # Manage floating point imprecision; coerce to double to avoid integer 61 | # overflow. 62 | date_num_days <- (as.double(date_num) * 86400L + 0.001) %/% 86400L 63 | date_num_days_no_floating_correction <- date_num %/% 1 64 | # If the day rolls over due to machine precision, then the seconds should be zero 65 | mask_day_rollover <- !is.na(date_num) & date_num_days > date_num_days_no_floating_correction 66 | date_num_seconds <- (date_num - date_num_days) * 86400 67 | date_num_seconds[mask_day_rollover] <- 0 68 | if (round_seconds) { 69 | date_num_seconds <- round(date_num_seconds) 70 | } 71 | if (any(mask_day_rollover)) { 72 | warning(sum(mask_day_rollover), " date_num values are within 0.001 sec of a later date and were rounded up to the next day.") 73 | } 74 | mask_excel_leap_day_bug <- !is.na(date_num_days) & floor(date_num_days) == 60 75 | mask_before_excel_leap_day_bug <- !is.na(date_num_days) & floor(date_num_days) < 60 76 | date_num_days[mask_excel_leap_day_bug] <- NA_real_ 77 | if (any(!is.na(date_num_days) & (date_num_days < 1))) { 78 | warning("Only `date_num` >= 1 are valid in Excel, creating an earlier date than Excel supports.") 79 | } 80 | date_num_days[mask_before_excel_leap_day_bug] <- date_num_days[mask_before_excel_leap_day_bug] + 1 81 | ret <- 82 | if (date_system == "mac pre-2011") { 83 | lubridate::as_date(floor(date_num_days), origin = "1904-01-01") 84 | } else if (date_system == "modern") { 85 | lubridate::as_date(floor(date_num_days), origin = "1899-12-30") 86 | } else { 87 | stop("argument 'date_system' must be one of 'mac pre-2011' or 'modern'") 88 | } 89 | if (include_time) { 90 | ret <- lubridate::as_datetime(ret) 91 | lubridate::second(ret) <- date_num_seconds %% 60 92 | lubridate::minute(ret) <- floor(date_num_seconds / 60) %% 60 93 | lubridate::hour(ret) <- floor(date_num_seconds / 3600) 94 | ret <- lubridate::force_tz(ret, tzone = tz) 95 | } 96 | if (any(mask_excel_leap_day_bug)) { 97 | warning("NAs introduced by coercion, Excel leap day bug detected in `date_num`. 29 February 1900 does not exist.") 98 | } 99 | if (any(is.na(ret) & !is.na(date_num) & !mask_excel_leap_day_bug)) { 100 | warning("NAs introduced by coercion, possible daylight savings time issue with input. Consider `tz='UTC'`.") 101 | } 102 | ret 103 | } 104 | -------------------------------------------------------------------------------- /R/get_dupes.R: -------------------------------------------------------------------------------- 1 | #' Get rows of a `data.frame` with identical values for the specified variables. 2 | #' 3 | #' For hunting duplicate records during data cleaning. Specify the data.frame 4 | #' and the variable combination to search for duplicates and get back the 5 | #' duplicated rows. 6 | #' 7 | #' @param dat The input `data.frame`. 8 | #' @param ... Unquoted variable names to search for duplicates. This takes a 9 | #' tidyselect specification. 10 | #' @return A data.frame with the full records where the specified 11 | #' variables have duplicated values, as well as a variable `dupe_count` 12 | #' showing the number of rows sharing that combination of duplicated values. 13 | #' If the input data.frame was of class `tbl_df`, the output is as well. 14 | #' @export 15 | #' @examples 16 | #' get_dupes(mtcars, mpg, hp) 17 | #' 18 | #' # or called with the magrittr pipe %>% : 19 | #' mtcars %>% get_dupes(wt) 20 | #' 21 | #' # You can use tidyselect helpers to specify variables: 22 | #' mtcars %>% get_dupes(-c(wt, qsec)) 23 | #' mtcars %>% get_dupes(starts_with("cy")) 24 | #' @importFrom tidyselect eval_select 25 | #' @importFrom rlang expr dots_n syms %||% 26 | get_dupes <- function(dat, ...) { 27 | expr <- rlang::expr(c(...)) 28 | pos <- tidyselect::eval_select(expr, data = dat) 29 | 30 | # Check if dat is grouped and if so, save structure and ungroup temporarily 31 | is_grouped <- dplyr::is_grouped_df(dat) 32 | 33 | if (is_grouped) { 34 | dat_groups <- dplyr::group_vars(dat) 35 | dat <- dat %>% dplyr::ungroup() 36 | if (getOption("get_dupes.grouped_warning", TRUE) & interactive()) { 37 | message(paste0("Data is grouped by [", paste(dat_groups, collapse = "|"), "]. Note that get_dupes() is not group aware and does not limit duplicate detection to within-groups, but rather checks over the entire data frame. However grouping structure is preserved.\nThis message is shown once per session and may be disabled by setting options(\"get_dupes.grouped_warning\" = FALSE).")) # nocov 38 | options("get_dupes.grouped_warning" = FALSE) # nocov 39 | } 40 | } 41 | 42 | if (rlang::dots_n(...) == 0) { # if no tidyselect variables are specified, check the whole data.frame 43 | var_names <- names(dat) 44 | nms <- rlang::syms(var_names) 45 | message("No variable names specified - using all columns.\n") 46 | } else { 47 | var_names <- names(pos) 48 | nms <- rlang::syms(var_names) 49 | } 50 | 51 | dupe_count <- NULL # to appease NOTE for CRAN; does nothing. 52 | 53 | 54 | dupes <- dat %>% 55 | dplyr::add_count(!!!nms, name = "dupe_count") %>% 56 | dplyr::filter(dupe_count > 1) %>% 57 | dplyr::select(!!!nms, dupe_count, dplyr::everything()) %>% 58 | dplyr::arrange(dplyr::desc(dupe_count), !!!nms) 59 | 60 | # shorten error message for large data.frames 61 | if (length(var_names) > 10) { 62 | var_names <- c(var_names[1:9], paste("... and", length(var_names) - 9, "other variables")) 63 | } 64 | if (nrow(dupes) == 0) { 65 | message(paste0("No duplicate combinations found of: ", paste(var_names, collapse = ", "))) 66 | } 67 | 68 | # Reapply groups if dat was grouped 69 | if (is_grouped) dupes <- dupes %>% dplyr::group_by(!!!rlang::syms(dat_groups)) 70 | 71 | return(dupes) 72 | } 73 | -------------------------------------------------------------------------------- /R/get_level_groups.R: -------------------------------------------------------------------------------- 1 | # Return groupings for a factor variable in the top_levels() function 2 | 3 | get_level_groups <- function(vec, n, num_levels_in_var) { 4 | top_n_lvls <- paste(levels(vec)[1:n], collapse = ", ") 5 | bot_n_lvls <- paste(levels(vec)[(num_levels_in_var - n + 1):num_levels_in_var], collapse = ", ") 6 | 7 | # Identify middle combinations, if needed 8 | if (num_levels_in_var > 2 * n) { 9 | mid_lvls <- paste(levels(vec)[(n + 1):(num_levels_in_var - n)], collapse = ", ") 10 | } else { 11 | mid_lvls <- NA 12 | } 13 | 14 | # Truncate strings if needed 15 | ## Middle groups are variable size, so displaying the N there is useful; 16 | ## Top/Bottom are user-specified size, so just truncate the labels 17 | if (!is.na(mid_lvls) & nchar(mid_lvls) > 30) { 18 | mid_lvls <- paste0("<<< Middle Group (", num_levels_in_var - 2 * n, " categories) >>>") 19 | } 20 | if (nchar(top_n_lvls) > 30) { 21 | top_n_lvls <- paste0(substr(top_n_lvls, 1, 27), "...") 22 | } 23 | if (nchar(bot_n_lvls) > 30) { 24 | bot_n_lvls <- paste0(substr(bot_n_lvls, 1, 27), "...") 25 | } 26 | 27 | list(top = top_n_lvls, mid = mid_lvls, bot = bot_n_lvls) 28 | } 29 | -------------------------------------------------------------------------------- /R/get_one_to_one.R: -------------------------------------------------------------------------------- 1 | #' Find the list of columns that have a 1:1 mapping to each other 2 | #' 3 | #' @param dat A `data.frame` or similar object 4 | #' @return A list with one element for each group of columns that map 5 | #' identically to each other. 6 | #' @export 7 | #' @examples 8 | #' foo <- data.frame( 9 | #' Lab_Test_Long = c("Cholesterol, LDL", "Cholesterol, LDL", "Glucose"), 10 | #' Lab_Test_Short = c("CLDL", "CLDL", "GLUC"), 11 | #' LOINC = c(12345, 12345, 54321), 12 | #' Person = c("Sam", "Bill", "Sam"), 13 | #' stringsAsFactors = FALSE 14 | #' ) 15 | #' get_one_to_one(foo) 16 | get_one_to_one <- function(dat) { 17 | stopifnot(ncol(dat) > 0) 18 | stopifnot(!any(duplicated(names(dat)))) 19 | dat_alt <- dat 20 | for (idx in seq_along(dat_alt)) { 21 | dat_alt[[idx]] <- get_one_to_one_value_order(dat_alt[[idx]]) 22 | } 23 | remaining_cols <- names(dat_alt) 24 | ret <- list() 25 | while (length(remaining_cols) > 0) { 26 | nm1 <- remaining_cols[1] 27 | remaining_cols <- remaining_cols[-1] 28 | current_ret <- nm1 29 | for (nm2 in remaining_cols) { 30 | if (identical(dat_alt[[nm1]], dat_alt[[nm2]])) { 31 | current_ret <- c(current_ret, nm2) 32 | remaining_cols <- setdiff(remaining_cols, nm2) 33 | } 34 | } 35 | if (length(current_ret) > 1) { 36 | ret[[length(ret) + 1]] <- current_ret 37 | } 38 | } 39 | if (length(ret) == 0) { 40 | message("No columns in `", deparse(substitute(dat)), "` map to each other") 41 | } 42 | ret 43 | } 44 | 45 | get_one_to_one_value_order <- function(x) { 46 | # Convert the value to a factor so that any subtly different values become integers 47 | uvalues <- match(x, unique(x)) 48 | new_value <- as.integer(factor(uvalues, levels = unique(uvalues))) 49 | new_value 50 | } 51 | -------------------------------------------------------------------------------- /R/janitor.R: -------------------------------------------------------------------------------- 1 | #' @section Package context: 2 | #' Advanced users can do most things covered here, but they can do it 3 | #' faster with janitor and save their thinking for more fun tasks. 4 | #' @keywords internal 5 | "_PACKAGE" 6 | ## quiets concerns of R CMD check re: the .'s that appear in pipelines 7 | ## and the "n" that is produced by dplyr::count() in a pipeline 8 | if (getRversion() >= "2.15.1") utils::globalVariables(c(".", "n")) 9 | -------------------------------------------------------------------------------- /R/paste_skip_na.R: -------------------------------------------------------------------------------- 1 | #' Like `paste()`, but missing values are omitted 2 | #' 3 | #' @details If all values are missing, the value from the first argument is 4 | #' preserved. 5 | #' 6 | #' @param ...,sep,collapse See [base::paste()] 7 | #' @return A character vector of pasted values. 8 | #' @examples 9 | #' paste_skip_na(NA) # NA_character_ 10 | #' paste_skip_na("A", NA) # "A" 11 | #' paste_skip_na("A", NA, c(NA, "B"), sep = ",") # c("A", "A,B") 12 | #' @export 13 | paste_skip_na <- function(..., sep = " ", collapse = NULL) { 14 | args <- list(...) 15 | if (length(args) <= 1) { 16 | if (length(args) == 0) { 17 | # match the behavior of paste 18 | paste(sep = sep, collapse = collapse) 19 | } else if (!is.null(collapse)) { 20 | if (all(is.na(args[[1]]))) { 21 | # Collapsing with all NA values results in NA 22 | NA_character_ 23 | } else { 24 | # Collapsing without all NA values collapses the non-NA values 25 | paste(na.omit(args[[1]]), sep = sep, collapse = collapse) 26 | } 27 | } else { 28 | # as.character() to ensure that logical NA values are converted to 29 | # NA_character_ 30 | as.character(args[[1]]) 31 | } 32 | } else { 33 | # There are at least 2 arguments; paste the first two and recurse 34 | a1 <- args[[1]] 35 | a2 <- args[[2]] 36 | if (length(a1) != length(a2)) { 37 | if (length(a1) == 1) { 38 | a1 <- rep(a1, length(a2)) 39 | } else if (length(a2) == 1) { 40 | a2 <- rep(a2, length(a1)) 41 | } else { 42 | stop("Arguments must be the same length or one argument must be a scalar.") 43 | } 44 | } 45 | # Which arguments are NA, if any? 46 | mask1 <- !is.na(a1) 47 | mask2 <- !is.na(a2) 48 | mask_both <- mask1 & mask2 49 | mask_only2 <- (!mask1) & mask2 50 | firsttwo <- a1 51 | if (any(mask_only2)) { 52 | firsttwo[mask_only2] <- a2[mask_only2] 53 | } 54 | if (any(mask_both)) { 55 | # Collapse only occurs on the final pasting 56 | firsttwo[mask_both] <- paste(a1[mask_both], a2[mask_both], sep = sep, collapse = NULL) 57 | } 58 | # prepare to recurse, and recurse 59 | new_args <- append(list(firsttwo), args[-(1:2)]) 60 | new_args$sep <- sep 61 | new_args$collapse <- collapse 62 | do.call(paste_skip_na, new_args) 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /R/print_tabyl.R: -------------------------------------------------------------------------------- 1 | #' @export 2 | print.tabyl <- function(x, ...) { 3 | print.data.frame(x, row.names = FALSE) 4 | } 5 | -------------------------------------------------------------------------------- /R/remove_empties.R: -------------------------------------------------------------------------------- 1 | #' Remove empty rows and/or columns from a data.frame or matrix. 2 | #' 3 | #' Removes all rows and/or columns from a data.frame or matrix that 4 | #' are composed entirely of `NA` values. 5 | #' 6 | #' @param dat the input data.frame or matrix. 7 | #' @param which one of "rows", "cols", or `c("rows", "cols")`. Where no 8 | #' value of which is provided, defaults to removing both empty rows and empty 9 | #' columns, declaring the behavior with a printed message. 10 | #' @param cutoff a row/col should have more than this fraction of non-NA values to be 11 | #' retained. E.g., `cutoff = 0.8` means that rows/cols that are 20% or more missing will be dropped. 12 | #' @param quiet Should messages be suppressed (`TRUE`) or printed 13 | #' (`FALSE`) indicating the summary of empty columns or rows removed? 14 | #' @return Returns the object without its missing rows or columns. 15 | #' @family remove functions 16 | #' @seealso [remove_constant()] for removing 17 | #' constant columns. 18 | #' @examples 19 | #' # not run: 20 | #' # dat %>% remove_empty("rows") 21 | #' # addressing a common untidy-data scenario where we have a mixture of 22 | #' # blank values in some (character) columns and NAs in others: 23 | #' library(dplyr) 24 | #' dd <- tibble( 25 | #' x = c(LETTERS[1:5], NA, rep("", 2)), 26 | #' y = c(1:5, rep(NA, 3)) 27 | #' ) 28 | #' # remove_empty() drops row 5 (all NA) but not 6 and 7 (blanks + NAs) 29 | #' dd %>% remove_empty("rows") 30 | #' # solution: preprocess to convert whitespace/empty strings to NA, 31 | #' # _then_ remove empty (all-NA) rows 32 | #' dd %>% 33 | #' mutate(across(where(is.character), ~ na_if(trimws(.), ""))) %>% 34 | #' remove_empty("rows") 35 | #' @export 36 | remove_empty <- function(dat, which = c("rows", "cols"), cutoff = 1, quiet = TRUE) { 37 | if (missing(which) && !missing(dat)) { 38 | message("value for \"which\" not specified, defaulting to c(\"rows\", \"cols\")") 39 | which <- c("rows", "cols") 40 | } 41 | if ((sum(which %in% c("rows", "cols")) != length(which)) && !missing(dat)) { 42 | stop("\"which\" must be one of \"rows\", \"cols\", or c(\"rows\", \"cols\")") 43 | } 44 | if (length(cutoff) != 1) { 45 | stop("cutoff must be a single value") 46 | } else if (!is.numeric(cutoff)) { 47 | stop("cutoff must be numeric") 48 | } else if (cutoff <= 0 | cutoff > 1) { 49 | stop("cutoff must be >0 and <= 1") 50 | } else if (length(which) > 1 & cutoff != 1) { 51 | stop("cutoff must be used with only one of which = 'rows' or 'cols', not both") 52 | } 53 | if ("rows" %in% which) { 54 | # Using different code with cutoff = 1 vs cutoff != 1 to avoid possible 55 | # floating point errors. 56 | mask_keep <- 57 | if (cutoff == 1) { 58 | rowSums(is.na(dat)) != ncol(dat) 59 | } else { 60 | (rowSums(!is.na(dat)) / ncol(dat)) > cutoff 61 | } 62 | if (!quiet) { 63 | remove_message(dat = dat, mask_keep = mask_keep, which = "rows", reason = "empty") 64 | } 65 | dat <- dat[mask_keep, , drop = FALSE] 66 | } 67 | if ("cols" %in% which) { 68 | # Using different code with cutoff = 1 vs cutoff != 1 to avoid possible 69 | # floating point errors. 70 | mask_keep <- 71 | if (cutoff == 1) { 72 | colSums(is.na(dat)) != nrow(dat) 73 | } else { 74 | (colSums(!is.na(dat)) / nrow(dat)) > cutoff 75 | } 76 | if (!quiet) { 77 | remove_message(dat = dat, mask_keep = mask_keep, which = "columns", reason = "empty") 78 | } 79 | dat <- dat[, mask_keep, drop = FALSE] 80 | } 81 | dat 82 | } 83 | 84 | ## Remove constant columns 85 | 86 | #' @title Remove constant columns from a data.frame or matrix. 87 | #' @param dat the input data.frame or matrix. 88 | #' @param na.rm should `NA` values be removed when considering whether a 89 | #' column is constant? The default value of `FALSE` will result in a 90 | #' column not being removed if it's a mix of a single value and `NA`. 91 | #' @param quiet Should messages be suppressed (`TRUE`) or printed 92 | #' (`FALSE`) indicating the summary of empty columns or rows removed? 93 | #' 94 | #' @examples 95 | #' remove_constant(data.frame(A = 1, B = 1:3)) 96 | #' 97 | #' # To find the columns that are constant 98 | #' data.frame(A = 1, B = 1:3) %>% 99 | #' dplyr::select(!dplyr::all_of(names(remove_constant(.)))) %>% 100 | #' unique() 101 | #' @importFrom stats na.omit 102 | #' @family remove functions 103 | #' @seealso [remove_empty()] for removing empty 104 | #' columns or rows. 105 | #' @export 106 | remove_constant <- function(dat, na.rm = FALSE, quiet = TRUE) { 107 | mask <- 108 | sapply( 109 | X = seq_len(ncol(dat)), 110 | FUN = function(idx) { 111 | column_to_test <- 112 | if (is.matrix(dat)) { 113 | dat[, idx] 114 | } else { 115 | dat[[idx]] 116 | } 117 | length(unique( 118 | if (na.rm) { 119 | stats::na.omit(column_to_test) 120 | } else { 121 | column_to_test 122 | } 123 | )) <= 1 # the < is in case all values are NA with na.rm=TRUE 124 | } 125 | ) 126 | if (!quiet) { 127 | remove_message(dat = dat, mask_keep = !mask, which = "columns", reason = "constant") 128 | } 129 | dat[, !mask, drop = FALSE] 130 | } 131 | 132 | 133 | #' Generate the message describing columns or rows that are being removed. 134 | #' 135 | #' @inheritParams remove_empty 136 | #' @param mask_keep A logical vector of rows or columns to keep (`TRUE`) or 137 | #' remove (`FALSE`). 138 | #' @param reason The reason that rows are being removed (to be used in the 139 | #' message. 140 | #' @noRd 141 | remove_message <- function(dat, mask_keep, which = c("columns", "rows"), reason = c("empty", "constant")) { 142 | if (all(mask_keep)) { 143 | message("No ", reason, " ", which, " to remove.") 144 | } else { 145 | details <- 146 | if (which == "columns") { 147 | if (is.null(colnames(dat)) || any(colnames(dat) %in% "")) { 148 | sprintf("%0.3g%%", 100 * sum(!mask_keep) / length(mask_keep)) 149 | } else { 150 | sprintf("Removed: %s", paste(names(dat)[!mask_keep], collapse = ", ")) 151 | } 152 | } else { 153 | sprintf("%0.3g%%", 100 * sum(!mask_keep) / length(mask_keep)) 154 | } 155 | message( 156 | sprintf( 157 | "Removing %g %s %s of %g %s total (%s).", 158 | sum(!mask_keep), reason, which, length(mask_keep), which, details 159 | ) 160 | ) 161 | } 162 | } 163 | -------------------------------------------------------------------------------- /R/round_half_up.R: -------------------------------------------------------------------------------- 1 | #' Round a numeric vector; halves will be rounded up, ala Microsoft Excel. 2 | #' 3 | #' @description 4 | #' In base R `round()`, halves are rounded to even, e.g., 12.5 and 5 | #' 11.5 are both rounded to 12. This function rounds 12.5 to 13 (assuming 6 | #' `digits = 0`). Negative halves are rounded away from zero, e.g., -0.5 is 7 | #' rounded to -1. 8 | #' 9 | #' This may skew subsequent statistical analysis of the data, but may be 10 | #' desirable in certain contexts. This function is implemented exactly from 11 | #' ; see that question and comments for 12 | #' discussion of this issue. 13 | #' 14 | #' @param x a numeric vector to round. 15 | #' @param digits how many digits should be displayed after the decimal point? 16 | #' @returns A vector with the same length as `x` 17 | #' @export 18 | #' @examples 19 | #' round_half_up(12.5) 20 | #' round_half_up(1.125, 2) 21 | #' round_half_up(1.125, 1) 22 | #' round_half_up(-0.5, 0) # negatives get rounded away from zero 23 | #' 24 | round_half_up <- function(x, digits = 0) { 25 | posneg <- sign(x) 26 | z <- abs(x) * 10^digits 27 | z <- z + 0.5 + sqrt(.Machine$double.eps) 28 | z <- trunc(z) 29 | z <- z / 10^digits 30 | z * posneg 31 | } 32 | 33 | #' Round a numeric vector to the specified number of significant digits; halves will be rounded up. 34 | #' 35 | #' @description 36 | #' In base R `signif()`, halves are rounded to even, e.g., 37 | #' `signif(11.5, 2)` and `signif(12.5, 2)` are both rounded to 12. 38 | #' This function rounds 12.5 to 13 (assuming `digits = 2`). Negative halves 39 | #' are rounded away from zero, e.g., `signif(-2.5, 1)` is rounded to -3. 40 | #' 41 | #' This may skew subsequent statistical analysis of the data, but may be 42 | #' desirable in certain contexts. This function is implemented from 43 | #' ; see that question and 44 | #' comments for discussion of this issue. 45 | #' 46 | #' @param x a numeric vector to round. 47 | #' @param digits integer indicating the number of significant digits to be used. 48 | #' @export 49 | #' @examples 50 | #' signif_half_up(12.5, 2) 51 | #' signif_half_up(1.125, 3) 52 | #' signif_half_up(-2.5, 1) # negatives get rounded away from zero 53 | #' 54 | signif_half_up <- function(x, digits = 6) { 55 | xs <- which(x != 0 & !is.na(x) & !is.infinite(x)) 56 | 57 | y <- rep(0, length(x)) 58 | z <- x 59 | 60 | y[xs] <- 10^(digits - ceiling(log10(abs(x[xs])))) 61 | 62 | z[xs] <- round_half_up(x[xs] * y[xs]) / y[xs] 63 | 64 | return(z) 65 | } 66 | -------------------------------------------------------------------------------- /R/round_to_fraction.R: -------------------------------------------------------------------------------- 1 | #' Round to the nearest fraction of a specified denominator. 2 | #' 3 | #' @description 4 | #' Round a decimal to the precise decimal value of a specified 5 | #' fractional denominator. Common use cases include addressing floating point 6 | #' imprecision and enforcing that data values fall into a certain set. 7 | #' 8 | #' E.g., if a decimal represents hours and values should be logged to the nearest 9 | #' minute, `round_to_fraction(x, 60)` would enforce that distribution and 0.57 10 | #' would be rounded to 0.566667, the equivalent of 34/60. 0.56 would also be rounded 11 | #' to 34/60. 12 | #' 13 | #' Set `denominator = 1` to round to whole numbers. 14 | #' 15 | #' The `digits` argument allows for rounding of the subsequent result. 16 | #' 17 | #' @details 18 | #' If `digits` is `Inf`, `x` is rounded to the fraction 19 | #' and then kept at full precision. If `digits` is `"auto"`, the 20 | #' number of digits is automatically selected as 21 | #' `ceiling(log10(denominator)) + 1`. 22 | #' 23 | #' @param x A numeric vector 24 | #' @param denominator The denominator of the fraction for rounding (a scalar or 25 | #' vector positive integer). 26 | #' @param digits Integer indicating the number of decimal places to be used 27 | #' after rounding to the fraction. This is passed to `base::round()`). 28 | #' Negative values are allowed (see Details). (`Inf` indicates no 29 | #' subsequent rounding) 30 | #' @return the input x rounded to a decimal value that has an integer numerator relative 31 | #' to `denominator` (possibly subsequently rounded to a number of decimal 32 | #' digits). 33 | #' @examples 34 | #' round_to_fraction(1.6, denominator = 2) 35 | #' round_to_fraction(pi, denominator = 7) # 22/7 36 | #' round_to_fraction(c(8.1, 9.2), denominator = c(7, 8)) 37 | #' round_to_fraction(c(8.1, 9.2), denominator = c(7, 8), digits = 3) 38 | #' round_to_fraction(c(8.1, 9.2, 10.3), denominator = c(7, 8, 1001), digits = "auto") 39 | #' @export 40 | round_to_fraction <- function(x, denominator, digits = Inf) { 41 | stopifnot(is.numeric(x)) 42 | stopifnot(length(denominator) %in% c(1, length(x))) 43 | stopifnot(is.numeric(denominator)) 44 | stopifnot(denominator >= 1) 45 | if (!(identical(digits, "auto") || 46 | (is.numeric(digits) & (length(digits) %in% c(1, length(x)))))) { 47 | stop('`digits` must be either "auto" or a number that is either a scalar (length = 1) or the same length as `x`.') 48 | } 49 | ret <- round(x * denominator, digits = 0) / denominator 50 | if (identical(digits, "auto")) { 51 | digits <- ceiling(log10(denominator)) + 1 52 | } 53 | mask_inf_digits <- is.infinite(digits) 54 | if (!all(mask_inf_digits)) { 55 | ret[!mask_inf_digits] <- 56 | round(ret[!mask_inf_digits], digits = digits[!mask_inf_digits]) 57 | } 58 | ret 59 | } 60 | -------------------------------------------------------------------------------- /R/row_to_names.R: -------------------------------------------------------------------------------- 1 | #' Elevate a row to be the column names of a data.frame. 2 | #' 3 | #' @param dat The input data.frame 4 | #' @param row_number The row(s) of `dat` containing the variable names or the 5 | #' string `"find_header"` to use `find_header(dat=dat, ...)` to find 6 | #' the row_number. Allows for multiple rows input as a numeric vector. NA's are 7 | #' ignored, and if a column contains only `NA` value it will be named `"NA"`. 8 | #' @param ... Sent to `find_header()`, if 9 | #' `row_number = "find_header"`. Otherwise, ignored. 10 | #' @param remove_row Should the row `row_number` be removed from the 11 | #' resulting data.frame? 12 | #' @param remove_rows_above If `row_number != 1`, should the rows above 13 | #' `row_number` - that is, between `1:(row_number-1)` - be removed 14 | #' from the resulting data.frame? 15 | #' @param sep A character string to separate the values in the case of multiple 16 | #' rows input to `row_number`. 17 | #' @return A data.frame with new names (and some rows removed, if specified) 18 | #' @family Set names 19 | #' @examples 20 | #' x <- data.frame( 21 | #' X_1 = c(NA, "Title", 1:3), 22 | #' X_2 = c(NA, "Title2", 4:6) 23 | #' ) 24 | #' x %>% 25 | #' row_to_names(row_number = 2) 26 | #' 27 | #' x %>% 28 | #' row_to_names(row_number = "find_header") 29 | #' @export 30 | row_to_names <- function(dat, row_number, ..., remove_row = TRUE, remove_rows_above = TRUE, sep = "_") { 31 | # Check inputs 32 | if (!(is.logical(remove_row) & length(remove_row) == 1)) { 33 | stop("remove_row must be either TRUE or FALSE, not ", as.character(remove_row)) 34 | } else if (!(is.logical(remove_rows_above) & length(remove_rows_above) == 1)) { 35 | stop("remove_rows_above must be either TRUE or FALSE, not ", as.character(remove_rows_above)) 36 | } 37 | if (identical(row_number, "find_header")) { 38 | # no need to check if it is a character string, %in% will do that for us 39 | # (and will handle the odd-ball cases like someone sending in 40 | # factor("find_header")). 41 | row_number <- find_header(dat = dat, ...) 42 | } else if (is.numeric(row_number)) { 43 | extra_args <- list(...) 44 | if (length(extra_args) != 0) { 45 | stop("Extra arguments (...) may only be given if row_number = 'find_header'.") 46 | } 47 | } else { 48 | stop("row_number must be a numeric value or 'find_header'") 49 | } 50 | if (!is.character(sep)) { 51 | stop("`sep` must be of type `character`.") 52 | } 53 | if (length(sep) != 1) { 54 | stop("`sep` must be of length 1.") 55 | } 56 | if (is.na(sep)) { 57 | stop("`sep` can't be of type `NA_character_`.") 58 | } 59 | new_names <- sapply(dat[row_number, , drop = FALSE], paste_skip_na, collapse = sep) %>% 60 | stringr::str_replace_na() 61 | 62 | if (any(duplicated(new_names))) { 63 | rlang::warn( 64 | message = paste("Row", row_number, "does not provide unique names. Consider running clean_names() after row_to_names()."), 65 | class = "janitor_warn_row_to_names_not_unique" 66 | ) 67 | } 68 | colnames(dat) <- new_names 69 | rows_to_remove <- c( 70 | if (remove_row) { 71 | row_number 72 | } else { 73 | c() 74 | }, 75 | if (remove_rows_above) { 76 | seq_len(max(row_number) - 1) 77 | } else { 78 | c() 79 | } 80 | ) 81 | if (length(rows_to_remove)) { 82 | dat[-(rows_to_remove), , drop = FALSE] 83 | } else { 84 | dat 85 | } 86 | } 87 | 88 | #' Find the header row in a data.frame 89 | #' 90 | #' @details 91 | #' If `...` is missing, then the first row with no missing values is used. 92 | #' 93 | #' When searching for a specified value or value within a column, the first row 94 | #' with a match will be returned, regardless of the completeness of the rest of 95 | #' that row. If `...` has a single character argument, then the first 96 | #' column is searched for that value. If `...` has a named numeric 97 | #' argument, then the column whose position number matches the value of that 98 | #' argument is searched for the name (see the last example below). If more than one 99 | #' row is found matching a value that is searched for, the number of the first 100 | #' matching row will be returned (with a warning). 101 | #' 102 | #' @inheritParams row_to_names 103 | #' @param ... See details 104 | #' @return The row number for the header row 105 | #' @family Set names 106 | #' @examples 107 | #' # the first row 108 | #' find_header(data.frame(A = "B")) 109 | #' # the second row 110 | #' find_header(data.frame(A = c(NA, "B"))) 111 | #' # the second row since the first has an empty value 112 | #' find_header(data.frame(A = c(NA, "B"), B = c("C", "D"))) 113 | #' # The third row because the second column was searched for the text "E" 114 | #' find_header(data.frame(A = c(NA, "B", "C", "D"), B = c("C", "D", "E", "F")), "E" = 2) 115 | #' @export 116 | find_header <- function(dat, ...) { 117 | extra_args <- list(...) 118 | if (length(extra_args) == 0) { 119 | # Find the first complete row 120 | ret <- which(rowSums(is.na(dat)) == 0) 121 | if (length(ret) == 0) { 122 | stop("No complete rows (rows with zero NA values) were found.") 123 | } 124 | ret <- ret[1] 125 | } else if (length(extra_args) == 1) { 126 | if (is.null(names(extra_args))) { 127 | # Search for the argument in the first column 128 | column_to_search <- 1 129 | string_to_search <- extra_args[[1]] 130 | } else { 131 | # Search for the name of the argument in the indicated column 132 | column_to_search <- extra_args[[1]] 133 | string_to_search <- names(extra_args) 134 | } 135 | ret <- which(dat[[column_to_search]] %in% string_to_search) 136 | if (length(ret) == 0) { 137 | stop(sprintf( 138 | "The string '%s' was not found in column %g", string_to_search, column_to_search 139 | )) 140 | } else if (length(ret) > 1) { 141 | rlang::warn( 142 | message = 143 | sprintf( 144 | "The string '%s' was found %g times in column %g, using the first row where it was found", 145 | string_to_search, length(ret), column_to_search 146 | ), 147 | class = "janitor_warn_find_header_not_unique" 148 | ) 149 | ret <- ret[1] 150 | } 151 | } else { 152 | stop("Either zero or one arguments other than 'dat' may be provided.") 153 | } 154 | ret 155 | } 156 | -------------------------------------------------------------------------------- /R/sas_dates.R: -------------------------------------------------------------------------------- 1 | #' Convert a SAS date, time or date/time to an R object 2 | #' 3 | #' @inheritParams excel_numeric_to_date 4 | #' @param datetime_num numeric vector of date/time numbers (seconds since 5 | #' midnight 1960-01-01) to convert 6 | #' @param time_num numeric vector of time numbers (seconds since midnight on the 7 | #' current day) to convert 8 | #' @return If a date and time or datetime are provided, a POSIXct object. If a 9 | #' date is provided, a Date object. If a time is provided, an hms::hms object 10 | #' @references SAS Date, Time, and Datetime Values reference (retrieved on 11 | #' 2022-03-08): https://v8doc.sas.com/sashtml/lrcon/zenid-63.htm 12 | #' @examples 13 | #' sas_numeric_to_date(date_num = 15639) # 2002-10-26 14 | #' sas_numeric_to_date(datetime_num = 1217083532, tz = "UTC") # 1998-07-26T14:45:32Z 15 | #' sas_numeric_to_date(date_num = 15639, time_num = 3600, tz = "UTC") # 2002-10-26T01:00:00Z 16 | #' sas_numeric_to_date(time_num = 3600) # 01:00:00 17 | #' @family date-time cleaning 18 | #' @export 19 | sas_numeric_to_date <- function(date_num, datetime_num, time_num, tz = "UTC") { 20 | # Confirm that a usable set of input arguments is given 21 | has_date <- !missing(date_num) 22 | has_datetime <- !missing(datetime_num) 23 | has_time <- !missing(time_num) 24 | stopifnot(is.character(tz)) 25 | stopifnot(length(tz) == 1) 26 | if (tz != "UTC") { 27 | warning("SAS may not properly store timezones other than UTC. Consider confirming the accuracy of the resulting data.") 28 | } 29 | if (has_date & has_datetime) { 30 | stop("Must not give both `date_num` and `datetime_num`") 31 | } else if (has_time & has_datetime) { 32 | stop("Must not give both `time_num` and `datetime_num`") 33 | } 34 | if (has_time) { 35 | stopifnot("`time_num` must be non-negative" = all(is.na(time_num) | time_num >= 0)) 36 | # Note the value of 86400 is allowed by the SAS standard listed in the 37 | # references section 38 | stopifnot("`time_num` must be within the number of seconds in a day (<= 86400)" = all(is.na(time_num) | time_num <= 86400)) 39 | } 40 | if (has_date & has_time) { 41 | mask_na_match <- is.na(date_num) == is.na(time_num) 42 | if (!all(mask_na_match)) { 43 | stop("The same values are not NA for both `date_num` and `time_num`") 44 | } 45 | datetime_num <- 86400 * date_num + time_num 46 | has_datetime <- TRUE 47 | } 48 | if (has_datetime) { 49 | ret <- as.POSIXct(datetime_num, origin = "1960-01-01", tz = tz) 50 | } else if (has_date) { 51 | ret <- as.Date(date_num, origin = "1960-01-01") 52 | } else if (has_time) { 53 | ret <- hms::hms(seconds = time_num) 54 | } else { 55 | stop("Must give one of `date_num`, `datetime_num`, `time_num`, or `date_num` and `time_num`") 56 | } 57 | ret 58 | } 59 | -------------------------------------------------------------------------------- /R/single_value.R: -------------------------------------------------------------------------------- 1 | #' Ensure that a vector has only a single value throughout. 2 | #' 3 | #' Missing values are replaced with the single value, and if all values are 4 | #' missing, the first value in `missing` is used throughout. 5 | #' 6 | #' @param x The vector which should have a single value 7 | #' @param missing The vector of values to consider missing in `x` 8 | #' @param warn_if_all_missing Generate a warning if all values are missing? 9 | #' @param info If more than one value is found, append this to the warning or 10 | #' error to assist with determining the location of the issue. 11 | #' @return `x` as the scalar single value found throughout (or an error if 12 | #' more than one value is found). 13 | #' @examples 14 | #' # A simple use case with vectors of input 15 | #' 16 | #' single_value(c(NA, 1)) 17 | #' # Multiple, different values of missing can be given 18 | #' single_value(c(NA, "a"), missing = c(NA, "a")) 19 | #' 20 | #' # A typical use case with a grouped data.frame used for input and the output 21 | #' # (`B` is guaranteed to have a single value and only one row, in this case) 22 | #' data.frame( 23 | #' A = rep(1:3, each = 2), 24 | #' B = c(rep(4:6, each = 2)) 25 | #' ) %>% 26 | #' dplyr::group_by(A) %>% 27 | #' dplyr::summarize( 28 | #' B = single_value(B) 29 | #' ) 30 | #' 31 | #' try( 32 | #' # info is useful to give when multiple values may be found to see what 33 | #' # grouping variable or what calculation is causing the error 34 | #' data.frame( 35 | #' A = rep(1:3, each = 2), 36 | #' B = c(rep(1:2, each = 2), 1:2) 37 | #' ) %>% 38 | #' dplyr::group_by(A) %>% 39 | #' dplyr::mutate( 40 | #' C = single_value(B, info = paste("Calculating C for group A=", A)) 41 | #' ) 42 | #' ) 43 | #' @export 44 | single_value <- function(x, missing = NA, warn_if_all_missing = FALSE, info = NULL) { 45 | mask_found <- !(x %in% missing) 46 | if (warn_if_all_missing && !any(mask_found)) { 47 | warning("All values are missing") 48 | } 49 | found_values <- unique(x[mask_found]) 50 | if (length(found_values) == 0) { 51 | missing[1] 52 | } else if (length(found_values) == 1) { 53 | found_values 54 | } else { 55 | if (!is.null(info)) { 56 | info <- paste(":", info) 57 | } 58 | stop("More than one (", length(found_values), ") value found (", paste(found_values, collapse = ", "), ")", info) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /R/top_levels.R: -------------------------------------------------------------------------------- 1 | #' Generate a frequency table of a factor grouped into top-n, bottom-n, and all 2 | #' other levels. 3 | #' 4 | #' Get a frequency table of a factor variable, grouped into categories by level. 5 | #' 6 | #' @param input_vec The factor variable to tabulate. 7 | #' @param n Number of levels to include in top and bottom groups 8 | #' @param show_na Should cases where the variable is `NA` be shown? 9 | #' @return A `data.frame` (actually a `tbl_df`) with the frequencies of the 10 | #' grouped, tabulated variable. Includes counts and percentages, and valid 11 | #' percentages (calculated omitting `NA` values, if present in the vector and 12 | #' `show_na = TRUE`.) 13 | #' @export 14 | #' @examples 15 | #' top_levels(as.factor(mtcars$hp), 2) 16 | top_levels <- function(input_vec, n = 2, show_na = FALSE) { 17 | # Initial type error catching 18 | if (!is.factor(input_vec)) { 19 | stop("factor_vec is not of type 'factor'") 20 | } 21 | 22 | num_levels_in_var <- nlevels(input_vec) 23 | 24 | # handle bad inputs 25 | if (!num_levels_in_var > 2) { 26 | stop("input factor variable must have at least 3 levels") 27 | } 28 | if (num_levels_in_var < 2 * n) { 29 | stop(paste0( 30 | "there are ", num_levels_in_var, " levels in the variable and ", 31 | n, " levels in each of the top and bottom groups.\nSince 2 * ", n, " = ", 2 * n, 32 | " is greater than ", num_levels_in_var, ", 33 | there would be overlap in the top and bottom groups and some records will be double-counted." 34 | )) 35 | } 36 | if (n < 1 || n %% 1 != 0) { 37 | stop("n must be a whole number at least 1") 38 | } 39 | 40 | var_name <- deparse(substitute(input_vec)) 41 | 42 | # Identify top/mid/bottom group labels for printing 43 | groups <- get_level_groups(input_vec, n, num_levels_in_var) 44 | 45 | # convert input vector into grouped variable 46 | new_vec <- ifelse(as.numeric(input_vec) <= n, 47 | groups$top, 48 | ifelse(as.numeric(input_vec) > (num_levels_in_var - n), 49 | groups$bot, 50 | groups$mid 51 | ) 52 | ) 53 | 54 | # recode variable as hi-med-lo factor so table prints w/ correct sorting 55 | if (!is.na(groups$mid)) { 56 | new_vec <- factor(new_vec, levels = c(groups$top, groups$mid, groups$bot)) 57 | } else { 58 | new_vec <- factor(new_vec, levels = c(groups$top, groups$bot)) 59 | } 60 | 61 | # tabulate grouped variable, then reset name to match input variable name 62 | result <- tabyl(new_vec, show_na = show_na) 63 | names(result)[1] <- var_name 64 | result 65 | } 66 | -------------------------------------------------------------------------------- /R/utils-pipe.R: -------------------------------------------------------------------------------- 1 | #' Pipe operator 2 | #' 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 4 | #' 5 | #' @name %>% 6 | #' @rdname pipe 7 | #' @keywords internal 8 | #' @export 9 | #' @importFrom magrittr %>% 10 | #' @usage lhs \%>\% rhs 11 | #' @param lhs A value or the magrittr placeholder. 12 | #' @param rhs A function call using the magrittr semantics. 13 | #' @return The result of calling `rhs(lhs)`. 14 | #' @examples 15 | #' mtcars %>% 16 | #' tabyl(carb, cyl) %>% 17 | #' adorn_totals() 18 | NULL 19 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://sfirke.github.io/janitor/ 2 | template: 3 | bootstrap: 5 4 | 5 | 6 | reference: 7 | - title: Cleaning data 8 | 9 | - subtitle: Cleaning variable names 10 | contents: 11 | - contains("clean_names") 12 | 13 | - title: Exploring data 14 | desc: > 15 | tabyls are an enhanced version of tables. See `vignette("tabyls")` 16 | for more details. 17 | contents: 18 | - tabyl 19 | - starts_with("adorn") 20 | - contains("tabyl") 21 | - -contains('.test') 22 | 23 | - subtitle: Change order 24 | contents: 25 | - row_to_names 26 | - find_header 27 | 28 | - title: Comparison 29 | desc: > 30 | Compare data frames columns 31 | contents: 32 | - starts_with("compare_df_cols") 33 | 34 | - title: Removing unnecessary columns / rows 35 | contents: 36 | - starts_with("remove_") 37 | - get_dupes 38 | - get_one_to_one 39 | - top_levels 40 | - single_value 41 | 42 | - title: Rounding / dates helpers 43 | desc: > 44 | Help to mimic some behaviour from Excel or SAS. 45 | These should be used on vector. 46 | contents: 47 | - round_half_up 48 | - signif_half_up 49 | - round_to_fraction 50 | - excel_numeric_to_date 51 | - sas_numeric_to_date 52 | - excel_time_to_numeric 53 | - starts_with("convert_to_date") 54 | 55 | - title: Misc / helpers 56 | desc: > 57 | These functions can help perform less frequent operations. 58 | contents: 59 | - describe_class 60 | - paste_skip_na 61 | - chisq.test 62 | - fisher.test 63 | - mu_to_u 64 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: 2 | layout: "reach, diff, flags, files" 3 | behavior: default 4 | require_changes: false # if true: only post the comment if coverage changes 5 | require_base: no # [yes :: must have a base report to post] 6 | require_head: yes # [yes :: must have a head report to post] 7 | branches: null 8 | ignore: 9 | - "print_*" 10 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | # Submission 2 | 2024-12-22 3 | 4 | ## Submission summary 5 | 6 | ### janitor version 2.2.1 7 | Contains only trivial changes needed to address failing test on CRAN, specific to how timezones are handled in Debian. 8 | 9 | ### Test environments 10 | 11 | #### Windows 12 | * Windows Server 2022 x64 (build 20348) with R Under development (unstable) (2024-12-20 r87452 ucrt) via win-builder, checked 2024-12-21 13 | 14 | #### Linux 15 | * Ubuntu 24.04 R-version 4.3.3 (2024-02-29) (local) 16 | 17 | ### R CMD check results 18 | 0 errors | 0 warnings | 0 notes 19 | 20 | ### Downstream dependencies 21 | This does not negatively affect downstream dependencies. 22 | 23 | revdepcheck passed for 124 of 125 packages. I investigated package BFS and the erroring function and found no use of janitor's functions so believe this to be a false positive. -------------------------------------------------------------------------------- /dirty_data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/dirty_data.xlsx -------------------------------------------------------------------------------- /janitor.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | PackageRoxygenize: rd,collate,namespace 19 | 20 | UseNativePipeOperator: No 21 | 22 | SpellingDictionary: en_US 23 | -------------------------------------------------------------------------------- /man/add_totals_col.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/janitor_deprecated.R 3 | \name{add_totals_col} 4 | \alias{add_totals_col} 5 | \title{Append a totals column to a data.frame.} 6 | \usage{ 7 | add_totals_col(dat, na.rm = TRUE) 8 | } 9 | \arguments{ 10 | \item{dat}{an input data.frame with at least one numeric column.} 11 | 12 | \item{na.rm}{should missing values (including NaN) be omitted from the calculations?} 13 | } 14 | \value{ 15 | Returns a data.frame with a totals column containing row-wise sums. 16 | } 17 | \description{ 18 | This function is deprecated, use \code{\link[=adorn_totals]{adorn_totals(where = "col")}} instead. 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/add_totals_row.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/janitor_deprecated.R 3 | \name{add_totals_row} 4 | \alias{add_totals_row} 5 | \title{Append a totals row to a data.frame.} 6 | \usage{ 7 | add_totals_row(dat, fill = "-", na.rm = TRUE) 8 | } 9 | \arguments{ 10 | \item{dat}{an input data.frame with at least one numeric column.} 11 | 12 | \item{fill}{if there are more than one non-numeric columns, what string should fill the bottom row of those columns?} 13 | 14 | \item{na.rm}{should missing values (including NaN) be omitted from the calculations?} 15 | } 16 | \value{ 17 | Returns a data.frame with a totals row, consisting of "Total" in the first column and column sums in the others. 18 | } 19 | \description{ 20 | This function is deprecated, use \code{\link[=adorn_totals]{adorn_totals()}} instead. 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /man/adorn_crosstab.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/janitor_deprecated.R 3 | \name{adorn_crosstab} 4 | \alias{adorn_crosstab} 5 | \title{Add presentation formatting to a crosstabulation table.} 6 | \usage{ 7 | adorn_crosstab( 8 | dat, 9 | denom = "row", 10 | show_n = TRUE, 11 | digits = 1, 12 | show_totals = FALSE, 13 | rounding = "half to even" 14 | ) 15 | } 16 | \arguments{ 17 | \item{dat}{a data.frame with row names in the first column and numeric values in all other columns. Usually the piped-in result of a call to \code{crosstab} that included the argument \code{percent = "none"}.} 18 | 19 | \item{denom}{the denominator to use for calculating percentages. One of "row", "col", or "all".} 20 | 21 | \item{show_n}{should counts be displayed alongside the percentages?} 22 | 23 | \item{digits}{how many digits should be displayed after the decimal point?} 24 | 25 | \item{show_totals}{display a totals summary? Will be a row, column, or both depending on the value of \code{denom}.} 26 | 27 | \item{rounding}{method to use for truncating percentages - either "half to even", the base R default method, or "half up", where 14.5 rounds up to 15.} 28 | } 29 | \value{ 30 | Returns a data.frame. 31 | } 32 | \description{ 33 | This function is deprecated, use \code{\link[=tabyl]{tabyl()}} with the \code{adorn_} family of functions instead. 34 | } 35 | \keyword{internal} 36 | -------------------------------------------------------------------------------- /man/adorn_ns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/adorn_ns.R 3 | \name{adorn_ns} 4 | \alias{adorn_ns} 5 | \title{Add underlying Ns to a tabyl displaying percentages.} 6 | \usage{ 7 | adorn_ns( 8 | dat, 9 | position = "rear", 10 | ns = attr(dat, "core"), 11 | format_func = function(x) { 12 | format(x, big.mark = ",") 13 | }, 14 | ... 15 | ) 16 | } 17 | \arguments{ 18 | \item{dat}{A data.frame of class \code{tabyl} that has had \code{adorn_percentages} and/or 19 | \code{adorn_pct_formatting} called on it. If given a list of data.frames, 20 | this function will apply itself to each data.frame in the list (designed for 3-way \code{tabyl} lists).} 21 | 22 | \item{position}{Should the N go in the front, or in the rear, of the percentage?} 23 | 24 | \item{ns}{The Ns to append. The default is the "core" attribute of the input tabyl 25 | \code{dat}, where the original Ns of a two-way \code{tabyl} are stored. However, if your Ns 26 | are stored somewhere else, or you need to customize them beyond what can be done 27 | with \code{format_func}, you can supply them here.} 28 | 29 | \item{format_func}{A formatting function to run on the Ns. Consider defining 30 | with \code{\link[base:format]{base::format()}}.} 31 | 32 | \item{...}{Columns to adorn. This takes a tidyselect specification. By default, 33 | all columns are adorned except for the first column and columns not of class 34 | \code{numeric}, but this allows you to manually specify which columns should be adorned, 35 | for use on a data.frame that does not result from a call to \code{tabyl}.} 36 | } 37 | \value{ 38 | A \code{data.frame} with Ns appended 39 | } 40 | \description{ 41 | This function adds back the underlying Ns to a \code{tabyl} whose percentages were 42 | calculated using \code{\link[=adorn_percentages]{adorn_percentages()}}, to display the Ns and percentages together. 43 | You can also call it on a non-tabyl data.frame to which you wish to append Ns. 44 | } 45 | \examples{ 46 | mtcars \%>\% 47 | tabyl(am, cyl) \%>\% 48 | adorn_percentages("col") \%>\% 49 | adorn_pct_formatting() \%>\% 50 | adorn_ns(position = "front") 51 | 52 | # Format the Ns with a custom format_func: 53 | set.seed(1) 54 | bigger_dat <- data.frame( 55 | sex = rep(c("m", "f"), 3000), 56 | age = round(runif(3000, 1, 102), 0) 57 | ) 58 | bigger_dat$age_group <- cut(bigger_dat$age, quantile(bigger_dat$age, c(0, 1 / 3, 2 / 3, 1))) 59 | 60 | bigger_dat \%>\% 61 | tabyl(age_group, sex, show_missing_levels = FALSE) \%>\% 62 | adorn_totals(c("row", "col")) \%>\% 63 | adorn_percentages("col") \%>\% 64 | adorn_pct_formatting(digits = 1) \%>\% 65 | adorn_ns(format_func = function(x) format(x, big.mark = ".", decimal.mark = ",")) 66 | # Control the columns to be adorned with the ... variable selection argument 67 | # If using only the ... argument, you can use empty commas as shorthand 68 | # to supply the default values to the preceding arguments: 69 | 70 | cases <- data.frame( 71 | region = c("East", "West"), 72 | year = 2015, 73 | recovered = c(125, 87), 74 | died = c(13, 12) 75 | ) 76 | 77 | cases \%>\% 78 | adorn_percentages("col",,recovered:died) \%>\% 79 | adorn_pct_formatting(,,,,,recovered:died) \%>\% 80 | adorn_ns(,,,recovered:died) 81 | 82 | } 83 | -------------------------------------------------------------------------------- /man/adorn_pct_formatting.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/adorn_pct_formatting.R 3 | \name{adorn_pct_formatting} 4 | \alias{adorn_pct_formatting} 5 | \title{Format a \code{data.frame} of decimals as percentages.} 6 | \usage{ 7 | adorn_pct_formatting( 8 | dat, 9 | digits = 1, 10 | rounding = "half to even", 11 | affix_sign = TRUE, 12 | ... 13 | ) 14 | } 15 | \arguments{ 16 | \item{dat}{a data.frame with decimal values, typically the result of a call 17 | to \code{adorn_percentages} on a \code{tabyl}. If given a list of data.frames, this 18 | function will apply itself to each data.frame in the list (designed for 19 | 3-way \code{tabyl} lists).} 20 | 21 | \item{digits}{how many digits should be displayed after the decimal point?} 22 | 23 | \item{rounding}{method to use for rounding - either "half to even", the base 24 | R default method, or "half up", where 14.5 rounds up to 15.} 25 | 26 | \item{affix_sign}{should the \% sign be affixed to the end?} 27 | 28 | \item{...}{columns to adorn. This takes a tidyselect specification. By 29 | default, all numeric columns (besides the initial column, if numeric) are 30 | adorned, but this allows you to manually specify which columns should be 31 | adorned, for use on a data.frame that does not result from a call to 32 | \code{tabyl}.} 33 | } 34 | \value{ 35 | a data.frame with formatted percentages 36 | } 37 | \description{ 38 | Numeric columns get multiplied by 100 and formatted as 39 | percentages according to user specifications. This function defaults to 40 | excluding the first column of the input data.frame, assuming that it contains 41 | a descriptive variable, but this can be overridden by specifying the columns 42 | to adorn in the \code{...} argument. Non-numeric columns are always excluded. 43 | 44 | The decimal separator character is the result of \code{getOption("OutDec")}, which 45 | is based on the user's locale. If the default behavior is undesirable, 46 | change this value ahead of calling the function, either by changing locale or 47 | with \code{options(OutDec = ",")}. This aligns the decimal separator character 48 | with that used in \code{base::print()}. 49 | } 50 | \examples{ 51 | mtcars \%>\% 52 | tabyl(am, cyl) \%>\% 53 | adorn_percentages("col") \%>\% 54 | adorn_pct_formatting() 55 | 56 | # Control the columns to be adorned with the ... variable selection argument 57 | # If using only the ... argument, you can use empty commas as shorthand 58 | # to supply the default values to the preceding arguments: 59 | 60 | cases <- data.frame( 61 | region = c("East", "West"), 62 | year = 2015, 63 | recovered = c(125, 87), 64 | died = c(13, 12) 65 | ) 66 | 67 | cases \%>\% 68 | adorn_percentages("col", , recovered:died) \%>\% 69 | adorn_pct_formatting(, , , recovered:died) 70 | 71 | } 72 | -------------------------------------------------------------------------------- /man/adorn_percentages.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/adorn_percentages.R 3 | \name{adorn_percentages} 4 | \alias{adorn_percentages} 5 | \title{Convert a data.frame of counts to percentages.} 6 | \usage{ 7 | adorn_percentages(dat, denominator = "row", na.rm = TRUE, ...) 8 | } 9 | \arguments{ 10 | \item{dat}{A \code{tabyl} or other data.frame with a tabyl-like layout. 11 | If given a list of data.frames, this function will apply itself to each 12 | \code{data.frame} in the list (designed for 3-way \code{tabyl} lists).} 13 | 14 | \item{denominator}{The direction to use for calculating percentages. 15 | One of "row", "col", or "all".} 16 | 17 | \item{na.rm}{should missing values (including \code{NaN}) be omitted from the calculations?} 18 | 19 | \item{...}{columns to adorn. This takes a <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}> 20 | specification. By default, all numeric columns (besides the initial column, if numeric) 21 | are adorned, but this allows you to manually specify which columns should 22 | be adorned, for use on a \code{data.frame} that does not result from a call to \code{\link[=tabyl]{tabyl()}}.} 23 | } 24 | \value{ 25 | A \code{data.frame} of percentages, expressed as numeric values between 0 and 1. 26 | } 27 | \description{ 28 | This function defaults to excluding the first column of the input data.frame, 29 | assuming that it contains a descriptive variable, but this can be overridden 30 | by specifying the columns to adorn in the \code{...} argument. 31 | } 32 | \examples{ 33 | 34 | mtcars \%>\% 35 | tabyl(am, cyl) \%>\% 36 | adorn_percentages("col") 37 | 38 | # calculates correctly even with totals column and/or row: 39 | mtcars \%>\% 40 | tabyl(am, cyl) \%>\% 41 | adorn_totals("row") \%>\% 42 | adorn_percentages() 43 | 44 | # Control the columns to be adorned with the ... variable selection argument 45 | # If using only the ... argument, you can use empty commas as shorthand 46 | # to supply the default values to the preceding arguments: 47 | 48 | cases <- data.frame( 49 | region = c("East", "West"), 50 | year = 2015, 51 | recovered = c(125, 87), 52 | died = c(13, 12) 53 | ) 54 | 55 | cases \%>\% 56 | adorn_percentages(, , recovered:died) 57 | } 58 | -------------------------------------------------------------------------------- /man/adorn_rounding.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/adorn_rounding.R 3 | \name{adorn_rounding} 4 | \alias{adorn_rounding} 5 | \title{Round the numeric columns in a data.frame.} 6 | \usage{ 7 | adorn_rounding(dat, digits = 1, rounding = "half to even", ...) 8 | } 9 | \arguments{ 10 | \item{dat}{A \code{tabyl} or other \code{data.frame} with similar layout. 11 | If given a list of data.frames, this function will apply itself to each 12 | \code{data.frame} in the list (designed for 3-way \code{tabyl} lists).} 13 | 14 | \item{digits}{How many digits should be displayed after the decimal point?} 15 | 16 | \item{rounding}{Method to use for rounding - either "half to even" 17 | (the base R default method), or "half up", where 14.5 rounds up to 15.} 18 | 19 | \item{...}{Columns to adorn. This takes a tidyselect specification. 20 | By default, all numeric columns (besides the initial column, if numeric) 21 | are adorned, but this allows you to manually specify which columns should 22 | be adorned, for use on a data.frame that does not result from a call to \code{tabyl}.} 23 | } 24 | \value{ 25 | The \code{data.frame} with rounded numeric columns. 26 | } 27 | \description{ 28 | Can run on any \code{data.frame} with at least one numeric column. 29 | This function defaults to excluding the first column of the input data.frame, 30 | assuming that it contains a descriptive variable, but this can be overridden by 31 | specifying the columns to round in the \code{...} argument. 32 | 33 | If you're formatting percentages, e.g., the result of \code{\link[=adorn_percentages]{adorn_percentages()}}, 34 | use \code{\link[=adorn_pct_formatting]{adorn_pct_formatting()}} instead. This is a more flexible variant for ad-hoc usage. 35 | Compared to \code{adorn_pct_formatting()}, it does not multiply by 100 or pad the 36 | numbers with spaces for alignment in the results \code{data.frame}. 37 | This function retains the class of numeric input columns. 38 | } 39 | \examples{ 40 | 41 | mtcars \%>\% 42 | tabyl(am, cyl) \%>\% 43 | adorn_percentages() \%>\% 44 | adorn_rounding(digits = 2, rounding = "half up") 45 | 46 | # tolerates non-numeric columns: 47 | library(dplyr) 48 | mtcars \%>\% 49 | tabyl(am, cyl) \%>\% 50 | adorn_percentages("all") \%>\% 51 | mutate(dummy = "a") \%>\% 52 | adorn_rounding() 53 | 54 | # Control the columns to be adorned with the ... variable selection argument 55 | # If using only the ... argument, you can use empty commas as shorthand 56 | # to supply the default values to the preceding arguments: 57 | cases <- data.frame( 58 | region = c("East", "West"), 59 | year = 2015, 60 | recovered = c(125, 87), 61 | died = c(13, 12) 62 | ) 63 | 64 | cases \%>\% 65 | adorn_percentages(, , ends_with("ed")) \%>\% 66 | adorn_rounding(, , all_of(c("recovered", "died"))) 67 | } 68 | -------------------------------------------------------------------------------- /man/adorn_title.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/adorn_title.R 3 | \name{adorn_title} 4 | \alias{adorn_title} 5 | \title{Add column name to the top of a two-way tabyl.} 6 | \usage{ 7 | adorn_title(dat, placement = "top", row_name, col_name) 8 | } 9 | \arguments{ 10 | \item{dat}{A \code{data.frame} of class \code{tabyl} or other \code{data.frame} with a tabyl-like layout. 11 | If given a list of data.frames, this function will apply itself to each \code{data.frame} 12 | in the list (designed for 3-way \code{tabyl} lists).} 13 | 14 | \item{placement}{The title placement, one of \code{"top"}, or \code{"combined"}. 15 | See \strong{Details} for more information.} 16 | 17 | \item{row_name}{(optional) default behavior is to pull the row name from the 18 | attributes of the input \code{tabyl} object. If you wish to override that text, 19 | or if your input is not a \code{tabyl}, supply a string here.} 20 | 21 | \item{col_name}{(optional) default behavior is to pull the column_name from 22 | the attributes of the input \code{tabyl} object. If you wish to override that text, 23 | or if your input is not a \code{tabyl}, supply a string here.} 24 | } 25 | \value{ 26 | The input \code{tabyl}, augmented with the column title. Non-tabyl inputs 27 | that are of class \code{tbl_df} are downgraded to basic data.frames so that the 28 | title row prints correctly. 29 | } 30 | \description{ 31 | This function adds the column variable name to the top of a \code{tabyl} for a 32 | complete display of information. This makes the tabyl prettier, but renders 33 | the \code{data.frame} less useful for further manipulation. 34 | } 35 | \details{ 36 | The \code{placement} argument indicates whether the column name should be added to 37 | the \code{top} of the tabyl in an otherwise-empty row \code{"top"} or appended to the 38 | already-present row name variable (\code{"combined"}). The formatting in the \code{"top"} 39 | option has the look of base R's \code{table()}; it also wipes out the other column 40 | names, making it hard to further use the \code{data.frame} besides formatting it for reporting. 41 | The \code{"combined"} option is more conservative in this regard. 42 | } 43 | \examples{ 44 | 45 | mtcars \%>\% 46 | tabyl(am, cyl) \%>\% 47 | adorn_title(placement = "top") 48 | 49 | # Adding a title to a non-tabyl 50 | library(tidyr) 51 | library(dplyr) 52 | mtcars \%>\% 53 | group_by(gear, am) \%>\% 54 | summarise(avg_mpg = mean(mpg), .groups = "drop") \%>\% 55 | pivot_wider(names_from = am, values_from = avg_mpg) \%>\% 56 | adorn_rounding() \%>\% 57 | adorn_title("top", row_name = "Gears", col_name = "Cylinders") 58 | } 59 | -------------------------------------------------------------------------------- /man/adorn_totals.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/adorn_totals.R 3 | \name{adorn_totals} 4 | \alias{adorn_totals} 5 | \title{Append a totals row and/or column to a data.frame} 6 | \usage{ 7 | adorn_totals(dat, where = "row", fill = "-", na.rm = TRUE, name = "Total", ...) 8 | } 9 | \arguments{ 10 | \item{dat}{An input \code{data.frame} with at least one numeric column. If given a 11 | list of data.frames, this function will apply itself to each \code{data.frame} 12 | in the list (designed for 3-way \code{tabyl} lists).} 13 | 14 | \item{where}{One of "row", "col", or \code{c("row", "col")}} 15 | 16 | \item{fill}{If there are non-numeric columns, what should fill the bottom row 17 | of those columns? If a string, relevant columns will be coerced to character. 18 | If \code{NA} then column types are preserved.} 19 | 20 | \item{na.rm}{Should missing values (including \code{NaN}) be omitted from the calculations?} 21 | 22 | \item{name}{Name of the totals row and/or column. If both are created, and 23 | \code{name} is a single string, that name is applied to both. If both are created 24 | and \code{name} is a vector of length 2, the first element of the vector will be 25 | used as the row name (in column 1), and the second element will be used as the 26 | totals column name. Defaults to "Total".} 27 | 28 | \item{...}{Columns to total. This takes a tidyselect specification. By default, 29 | all numeric columns (besides the initial column, if numeric) are included in 30 | the totals, but this allows you to manually specify which columns should be 31 | included, for use on a data.frame that does not result from a call to \code{tabyl}.} 32 | } 33 | \value{ 34 | A \code{data.frame} augmented with a totals row, column, or both. 35 | The \code{data.frame} is now also of class \code{tabyl} and stores information about 36 | the attached totals and underlying data in the tabyl attributes. 37 | } 38 | \description{ 39 | This function defaults to excluding the first column of the input data.frame, 40 | assuming that it contains a descriptive variable, but this can be overridden 41 | by specifying the columns to be totaled in the \code{...} argument. Non-numeric 42 | columns are converted to character class and have a user-specified fill character 43 | inserted in the totals row. 44 | } 45 | \examples{ 46 | mtcars \%>\% 47 | tabyl(am, cyl) \%>\% 48 | adorn_totals() 49 | } 50 | -------------------------------------------------------------------------------- /man/as_tabyl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_and_untabyl.R 3 | \name{as_tabyl} 4 | \alias{as_tabyl} 5 | \title{Add \code{tabyl} attributes to a data.frame} 6 | \usage{ 7 | as_tabyl(dat, axes = 2, row_var_name = NULL, col_var_name = NULL) 8 | } 9 | \arguments{ 10 | \item{dat}{a data.frame with variable values in the first column and numeric 11 | values in all other columns.} 12 | 13 | \item{axes}{is this a two_way tabyl or a one_way tabyl? If this function is 14 | being called by a user, this should probably be "2". One-way tabyls are 15 | created by \code{tabyl} but are a special case.} 16 | 17 | \item{row_var_name}{(optional) the name of the variable in the row dimension; 18 | used by \code{adorn_title()}.} 19 | 20 | \item{col_var_name}{(optional) the name of the variable in the column 21 | dimension; used by \code{adorn_title()}.} 22 | } 23 | \value{ 24 | Returns the same data.frame, but with the additional class of "tabyl" 25 | and the attribute "core". 26 | } 27 | \description{ 28 | A \code{tabyl} is a \code{data.frame} containing counts of a variable or 29 | co-occurrences of two variables (a.k.a., a contingency table or crosstab). 30 | This specialized kind of data.frame has attributes that enable \code{adorn_} 31 | functions to be called for precise formatting and presentation of results. 32 | E.g., display results as a mix of percentages, Ns, add totals rows or 33 | columns, rounding options, in the style of Microsoft Excel PivotTable. 34 | 35 | A \code{tabyl} can be the result of a call to \code{janitor::tabyl()}, in which case 36 | these attributes are added automatically. This function adds \code{tabyl} class 37 | attributes to a data.frame that isn't the result of a call to \code{tabyl} but 38 | meets the requirements of a two-way tabyl: 1) First column contains values of 39 | variable 1 2) Column names 2:n are the values of variable 2 3) Numeric values 40 | in columns 2:n are counts of the co-occurrences of the two variables.* 41 | \itemize{ 42 | \item = this is the ideal form of a \code{tabyl}, but janitor's \code{adorn_} functions tolerate 43 | and ignore non-numeric columns in positions 2:n. 44 | } 45 | 46 | For instance, the result of \code{\link[dplyr:count]{dplyr::count()}} followed by \code{\link[tidyr:pivot_wider]{tidyr::pivot_wider()}} 47 | can be treated as a \code{tabyl}. 48 | 49 | The result of calling \code{\link[=tabyl]{tabyl()}} on a single variable is a special class of 50 | one-way tabyl; this function only pertains to the two-way tabyl. 51 | } 52 | \examples{ 53 | as_tabyl(mtcars) 54 | 55 | } 56 | -------------------------------------------------------------------------------- /man/chisq.test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/statistical_tests.R 3 | \name{chisq.test} 4 | \alias{chisq.test} 5 | \alias{chisq.test.default} 6 | \alias{chisq.test.tabyl} 7 | \title{Apply \code{stats::chisq.test()} to a two-way tabyl} 8 | \usage{ 9 | chisq.test(x, ...) 10 | 11 | \method{chisq.test}{default}(x, y = NULL, ...) 12 | 13 | \method{chisq.test}{tabyl}(x, tabyl_results = TRUE, ...) 14 | } 15 | \arguments{ 16 | \item{x}{a two-way tabyl, a numeric vector or a factor} 17 | 18 | \item{...}{other parameters passed to \code{\link[stats:chisq.test]{stats::chisq.test()}}} 19 | 20 | \item{y}{if x is a vector, must be another vector or factor of the same length} 21 | 22 | \item{tabyl_results}{If \code{TRUE} and \code{x} is a tabyl object, 23 | also return \code{observed}, \code{expected}, \code{residuals} and \code{stdres} as tabyl.} 24 | } 25 | \value{ 26 | The result is the same as the one of \code{stats::chisq.test()}. 27 | If \code{tabyl_results} is \code{TRUE}, the returned tables \code{observed}, \code{expected}, 28 | \code{residuals} and \code{stdres} are converted to tabyls. 29 | } 30 | \description{ 31 | This generic function overrides \code{stats::chisq.test}. If the passed table 32 | is a two-way tabyl, it runs it through janitor::chisq.test.tabyl, otherwise 33 | it just calls \code{stats::chisq.test()}. 34 | } 35 | \examples{ 36 | tab <- tabyl(mtcars, gear, cyl) 37 | chisq.test(tab) 38 | chisq.test(tab)$residuals 39 | 40 | } 41 | -------------------------------------------------------------------------------- /man/compare_df_cols.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compare_df_cols.R 3 | \name{compare_df_cols} 4 | \alias{compare_df_cols} 5 | \title{Compare data frames columns before merging} 6 | \usage{ 7 | compare_df_cols( 8 | ..., 9 | return = c("all", "match", "mismatch"), 10 | bind_method = c("bind_rows", "rbind"), 11 | strict_description = FALSE 12 | ) 13 | } 14 | \arguments{ 15 | \item{...}{A combination of data.frames, tibbles, and lists of 16 | data.frames/tibbles. The values may optionally be named arguments; if 17 | named, the output column will be the name; if not named, the output column 18 | will be the data.frame name (see examples section).} 19 | 20 | \item{return}{Should a summary of "all" columns be returned, only return 21 | "match"ing columns, or only "mismatch"ing columns?} 22 | 23 | \item{bind_method}{What method of binding should be used to determine 24 | matches? With "bind_rows", columns missing from a data.frame would be 25 | considered a match (as in \code{dplyr::bind_rows()}; with "rbind", columns 26 | missing from a data.frame would be considered a mismatch (as in 27 | \code{base::rbind()}.} 28 | 29 | \item{strict_description}{Passed to \code{describe_class}. Also, see the 30 | Details section.} 31 | } 32 | \value{ 33 | A data.frame with a column named "column_name" with a value named 34 | after the input data.frames' column names, and then one column per 35 | data.frame (named after the input data.frame). If more than one input has 36 | the same column name, the column naming will have suffixes defined by 37 | sequential use of \code{base::merge()} and may differ from expected naming. 38 | The rows within the data.frame-named columns are descriptions of the 39 | classes of the data within the columns (generated by 40 | \code{describe_class}). 41 | } 42 | \description{ 43 | Generate a comparison of data.frames (or similar objects) that indicates if 44 | they will successfully bind together by rows. 45 | } 46 | \details{ 47 | Due to the returned "column_name" column, no input data.frame may be 48 | named "column_name". 49 | 50 | The \code{strict_description} argument is most typically used to understand 51 | if factor levels match or are bindable. Factors are typically bindable, 52 | but the behavior of what happens when they bind differs based on the 53 | binding method ("bind_rows" or "rbind"). Even when 54 | \code{strict_description} is \code{FALSE}, data.frames may still bind 55 | because some classes (like factors and characters) can bind even if they 56 | appear to differ. 57 | } 58 | \examples{ 59 | compare_df_cols(data.frame(A = 1), data.frame(B = 2)) 60 | # user-defined names 61 | compare_df_cols(dfA = data.frame(A = 1), dfB = data.frame(B = 2)) 62 | # a combination of list and data.frame input 63 | compare_df_cols(listA = list(dfA = data.frame(A = 1), dfB = data.frame(B = 2)), data.frame(A = 3)) 64 | } 65 | \seealso{ 66 | Other data frame type comparison: 67 | \code{\link{compare_df_cols_same}()}, 68 | \code{\link{describe_class}()} 69 | } 70 | \concept{data frame type comparison} 71 | -------------------------------------------------------------------------------- /man/compare_df_cols_same.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compare_df_cols.R 3 | \name{compare_df_cols_same} 4 | \alias{compare_df_cols_same} 5 | \title{Do the the data.frames have the same columns & types?} 6 | \usage{ 7 | compare_df_cols_same( 8 | ..., 9 | bind_method = c("bind_rows", "rbind"), 10 | verbose = TRUE 11 | ) 12 | } 13 | \arguments{ 14 | \item{...}{A combination of data.frames, tibbles, and lists of 15 | data.frames/tibbles. The values may optionally be named arguments; if 16 | named, the output column will be the name; if not named, the output column 17 | will be the data.frame name (see examples section).} 18 | 19 | \item{bind_method}{What method of binding should be used to determine 20 | matches? With "bind_rows", columns missing from a data.frame would be 21 | considered a match (as in \code{dplyr::bind_rows()}; with "rbind", columns 22 | missing from a data.frame would be considered a mismatch (as in 23 | \code{base::rbind()}.} 24 | 25 | \item{verbose}{Print the mismatching columns if binding will fail.} 26 | } 27 | \value{ 28 | \code{TRUE} if row binding will succeed or \code{FALSE} if it will fail. 29 | } 30 | \description{ 31 | Check whether a set of data.frames are row-bindable. Calls \code{compare_df_cols()} 32 | and returns \code{TRUE} if there are no mis-matching rows. 33 | } 34 | \examples{ 35 | compare_df_cols_same(data.frame(A = 1), data.frame(A = 2)) 36 | compare_df_cols_same(data.frame(A = 1), data.frame(B = 2)) 37 | compare_df_cols_same(data.frame(A = 1), data.frame(B = 2), verbose = FALSE) 38 | compare_df_cols_same(data.frame(A = 1), data.frame(B = 2), bind_method = "rbind") 39 | } 40 | \seealso{ 41 | Other data frame type comparison: 42 | \code{\link{compare_df_cols}()}, 43 | \code{\link{describe_class}()} 44 | } 45 | \concept{data frame type comparison} 46 | -------------------------------------------------------------------------------- /man/convert_to_NA.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/janitor_deprecated.R 3 | \name{convert_to_NA} 4 | \alias{convert_to_NA} 5 | \title{Convert string values to true \code{NA} values.} 6 | \usage{ 7 | convert_to_NA(dat, strings) 8 | } 9 | \arguments{ 10 | \item{dat}{vector or data.frame to operate on.} 11 | 12 | \item{strings}{character vector of strings to convert.} 13 | } 14 | \value{ 15 | Returns a cleaned object. Can be a vector, data.frame, or \code{tibble::tbl_df} depending on the provided input. 16 | } 17 | \description{ 18 | Warning: Deprecated, do not use in new code. Use \code{\link[dplyr:na_if]{dplyr::na_if()}} instead. 19 | 20 | Converts instances of user-specified strings into \code{NA}. Can operate on either a single vector or an entire data.frame. 21 | } 22 | \seealso{ 23 | janitor_deprecated 24 | } 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /man/convert_to_date.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/convert_to_date.R 3 | \name{convert_to_date} 4 | \alias{convert_to_date} 5 | \alias{convert_to_datetime} 6 | \title{Parse dates from many formats} 7 | \usage{ 8 | convert_to_date( 9 | x, 10 | ..., 11 | character_fun = lubridate::ymd, 12 | string_conversion_failure = c("error", "warning") 13 | ) 14 | 15 | convert_to_datetime( 16 | x, 17 | ..., 18 | tz = "UTC", 19 | character_fun = lubridate::ymd_hms, 20 | string_conversion_failure = c("error", "warning") 21 | ) 22 | } 23 | \arguments{ 24 | \item{x}{The object to convert} 25 | 26 | \item{...}{Passed to further methods. Eventually may be passed to 27 | \code{excel_numeric_to_date()}, \code{base::as.POSIXct()}, or \code{base::as.Date()}.} 28 | 29 | \item{character_fun}{A function to convert non-numeric-looking, non-\code{NA} values 30 | in \code{x} to POSIXct objects.} 31 | 32 | \item{string_conversion_failure}{If a character value fails to parse into the 33 | desired class and instead returns \code{NA}, should the function return the 34 | result with a warning or throw an error?} 35 | 36 | \item{tz}{The timezone for POSIXct output, unless an object is POSIXt 37 | already. Ignored for Date output.} 38 | } 39 | \value{ 40 | POSIXct objects for \code{convert_to_datetime()} or Date objects for 41 | \code{convert_to_date()}. 42 | } 43 | \description{ 44 | Convert many date and date-time (POSIXct) formats as may be received 45 | from Microsoft Excel. 46 | } 47 | \details{ 48 | Character conversion checks if it matches something that looks like a 49 | Microsoft Excel numeric date, converts those to numeric, and then runs 50 | convert_to_datetime_helper() on those numbers. Then, character to Date or 51 | POSIXct conversion occurs via \code{character_fun(x, ...)} or 52 | \code{character_fun(x, tz=tz, ...)}, respectively. 53 | } 54 | \examples{ 55 | convert_to_date("2009-07-06") 56 | convert_to_date(40000) 57 | convert_to_date("40000.1") 58 | # Mixed date source data can be provided. 59 | convert_to_date(c("2020-02-29", "40000.1")) 60 | convert_to_datetime( 61 | c("2009-07-06", "40000.1", "40000", NA), 62 | character_fun = lubridate::ymd_h, truncated = 1, tz = "UTC" 63 | ) 64 | } 65 | \seealso{ 66 | Other date-time cleaning: 67 | \code{\link{excel_numeric_to_date}()}, 68 | \code{\link{excel_time_to_numeric}()}, 69 | \code{\link{sas_numeric_to_date}()} 70 | } 71 | \concept{date-time cleaning} 72 | -------------------------------------------------------------------------------- /man/crosstab.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/janitor_deprecated.R 3 | \name{crosstab} 4 | \alias{crosstab} 5 | \title{Generate a crosstabulation of two vectors.} 6 | \usage{ 7 | crosstab(...) 8 | } 9 | \arguments{ 10 | \item{...}{arguments} 11 | } 12 | \description{ 13 | This function is deprecated, use \code{\link[=tabyl]{tabyl(dat, var1, var2)}} instead. 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/describe_class.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/compare_df_cols.R 3 | \name{describe_class} 4 | \alias{describe_class} 5 | \alias{describe_class.factor} 6 | \alias{describe_class.default} 7 | \title{Describe the class(es) of an object} 8 | \usage{ 9 | describe_class(x, strict_description = TRUE) 10 | 11 | \method{describe_class}{factor}(x, strict_description = TRUE) 12 | 13 | \method{describe_class}{default}(x, strict_description = TRUE) 14 | } 15 | \arguments{ 16 | \item{x}{The object to describe} 17 | 18 | \item{strict_description}{Should differing factor levels be treated 19 | as differences for the purposes of identifying mismatches? 20 | \code{strict_description = TRUE} is stricter and factors with different 21 | levels will be treated as different classes. \code{FALSE} is more 22 | lenient: for class comparison purposes, the variable is just a "factor".} 23 | } 24 | \value{ 25 | A character scalar describing the class(es) of an object where if the 26 | scalar will match, columns in a data.frame (or similar object) should bind 27 | together without issue. 28 | } 29 | \description{ 30 | Describe the class(es) of an object 31 | } 32 | \details{ 33 | For package developers, an S3 generic method can be written for 34 | \code{describe_class()} for custom classes that may need more definition 35 | than the default method. This function is called by \code{\link[=compare_df_cols]{compare_df_cols()}}. 36 | } 37 | \section{Methods (by class)}{ 38 | \itemize{ 39 | \item \code{describe_class(factor)}: Describe factors with their levels 40 | and if they are ordered. 41 | 42 | \item \code{describe_class(default)}: List all classes of an object. 43 | 44 | }} 45 | \examples{ 46 | describe_class(1) 47 | describe_class(factor("A")) 48 | describe_class(ordered(c("A", "B"))) 49 | describe_class(ordered(c("A", "B")), strict_description = FALSE) 50 | } 51 | \seealso{ 52 | Other data frame type comparison: 53 | \code{\link{compare_df_cols}()}, 54 | \code{\link{compare_df_cols_same}()} 55 | } 56 | \concept{data frame type comparison} 57 | -------------------------------------------------------------------------------- /man/excel_numeric_to_date.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/excel_dates.R 3 | \name{excel_numeric_to_date} 4 | \alias{excel_numeric_to_date} 5 | \title{Convert dates encoded as serial numbers to Date class.} 6 | \usage{ 7 | excel_numeric_to_date( 8 | date_num, 9 | date_system = "modern", 10 | include_time = FALSE, 11 | round_seconds = TRUE, 12 | tz = Sys.timezone() 13 | ) 14 | } 15 | \arguments{ 16 | \item{date_num}{numeric vector of serial numbers to convert.} 17 | 18 | \item{date_system}{the date system, either \code{"modern"} or \code{"mac pre-2011"}.} 19 | 20 | \item{include_time}{Include the time (hours, minutes, seconds) in the output? 21 | (See details)} 22 | 23 | \item{round_seconds}{Round the seconds to an integer (only has an effect when 24 | \code{include_time} is \code{TRUE})?} 25 | 26 | \item{tz}{Time zone, used when \code{include_time = TRUE} (see details for 27 | more information on timezones).} 28 | } 29 | \value{ 30 | Returns a vector of class Date if \code{include_time} is 31 | \code{FALSE}. Returns a vector of class POSIXlt if \code{include_time} is 32 | \code{TRUE}. 33 | } 34 | \description{ 35 | Converts numbers like \code{42370} into date values like \code{2016-01-01}. 36 | 37 | Defaults to the modern Excel date encoding system. However, Excel for Mac 38 | 2008 and earlier Mac versions of Excel used a different date system. To 39 | determine what platform to specify: if the date 2016-01-01 is represented by 40 | the number 42370 in your spreadsheet, it's the modern system. If it's 40908, 41 | it's the old Mac system. More on date encoding systems at 42 | http://support.office.com/en-us/article/Date-calculations-in-Excel-e7fe7167-48a9-4b96-bb53-5612a800b487. 43 | 44 | A list of all timezones is available from \code{base::OlsonNames()}, and the 45 | current timezone is available from \code{base::Sys.timezone()}. 46 | 47 | If your input data has a mix of Excel numeric dates and actual dates, see the 48 | more powerful functions \code{\link[=convert_to_date]{convert_to_date()}} and \code{convert_to_datetime()}. 49 | } 50 | \details{ 51 | When using \code{include_time=TRUE}, days with leap seconds will not 52 | be accurately handled as they do not appear to be accurately handled by 53 | Windows (as described in 54 | https://support.microsoft.com/en-us/help/2722715/support-for-the-leap-second). 55 | } 56 | \examples{ 57 | excel_numeric_to_date(40000) 58 | excel_numeric_to_date(40000.5) # No time is included 59 | excel_numeric_to_date(40000.5, include_time = TRUE) # Time is included 60 | excel_numeric_to_date(40000.521, include_time = TRUE) # Time is included 61 | excel_numeric_to_date(40000.521, 62 | include_time = TRUE, 63 | round_seconds = FALSE 64 | ) # Time with fractional seconds is included 65 | } 66 | \seealso{ 67 | \code{\link[=excel_time_to_numeric]{excel_time_to_numeric()}} 68 | 69 | Other date-time cleaning: 70 | \code{\link{convert_to_date}()}, 71 | \code{\link{excel_time_to_numeric}()}, 72 | \code{\link{sas_numeric_to_date}()} 73 | } 74 | \concept{date-time cleaning} 75 | -------------------------------------------------------------------------------- /man/excel_time_to_numeric.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/excel_time_to_numeric.R 3 | \name{excel_time_to_numeric} 4 | \alias{excel_time_to_numeric} 5 | \title{Convert a time that may be inconsistently or inconveniently formatted from 6 | Microsoft Excel to a numeric number of seconds between 0 and 86400.} 7 | \usage{ 8 | excel_time_to_numeric(time_value, round_seconds = TRUE) 9 | } 10 | \arguments{ 11 | \item{time_value}{A vector of values to convert (see Details)} 12 | 13 | \item{round_seconds}{Should the output number of seconds be rounded to an 14 | integer?} 15 | } 16 | \value{ 17 | A vector of numbers >= 0 and <86400 18 | } 19 | \description{ 20 | Convert a time that may be inconsistently or inconveniently formatted from 21 | Microsoft Excel to a numeric number of seconds between 0 and 86400. 22 | } 23 | \details{ 24 | \code{time_value} may be one of the following formats: 25 | \itemize{ 26 | \item{numeric}{The input must be a value from 0 to 1 (exclusive of 1); this value is returned as-is.} 27 | \item{POSIXlt or POSIXct}{The input must be on the day 1899-12-31 (any other day will cause an error). The time of day is extracted and converted to a fraction of a day.} 28 | \item{character}{Any of the following (or a mixture of the choices):} 29 | \itemize{ 30 | \item{A character string that is a number between 0 and 1 (exclusive of 1). This value will be converted like a numeric value.} 31 | \item{A character string that looks like a date on 1899-12-31 (specifically, it must start with \code{"1899-12-31 "}), converted like a POSIXct object as described above.} 32 | \item{A character string that looks like a time. Choices are 12-hour time as hour, minute, and optionally second followed by "am" or "pm" (case insensitive) or 24-hour time when hour, minute, optionally second, and no "am" or "pm" is included.} 33 | } 34 | } 35 | } 36 | \seealso{ 37 | \code{\link[=excel_numeric_to_date]{excel_numeric_to_date()}} 38 | 39 | Other date-time cleaning: 40 | \code{\link{convert_to_date}()}, 41 | \code{\link{excel_numeric_to_date}()}, 42 | \code{\link{sas_numeric_to_date}()} 43 | } 44 | \concept{date-time cleaning} 45 | -------------------------------------------------------------------------------- /man/figures/dirty_data.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/man/figures/dirty_data.PNG -------------------------------------------------------------------------------- /man/figures/logo_small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/man/figures/logo_small.png -------------------------------------------------------------------------------- /man/find_header.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/row_to_names.R 3 | \name{find_header} 4 | \alias{find_header} 5 | \title{Find the header row in a data.frame} 6 | \usage{ 7 | find_header(dat, ...) 8 | } 9 | \arguments{ 10 | \item{dat}{The input data.frame} 11 | 12 | \item{...}{See details} 13 | } 14 | \value{ 15 | The row number for the header row 16 | } 17 | \description{ 18 | Find the header row in a data.frame 19 | } 20 | \details{ 21 | If \code{...} is missing, then the first row with no missing values is used. 22 | 23 | When searching for a specified value or value within a column, the first row 24 | with a match will be returned, regardless of the completeness of the rest of 25 | that row. If \code{...} has a single character argument, then the first 26 | column is searched for that value. If \code{...} has a named numeric 27 | argument, then the column whose position number matches the value of that 28 | argument is searched for the name (see the last example below). If more than one 29 | row is found matching a value that is searched for, the number of the first 30 | matching row will be returned (with a warning). 31 | } 32 | \examples{ 33 | # the first row 34 | find_header(data.frame(A = "B")) 35 | # the second row 36 | find_header(data.frame(A = c(NA, "B"))) 37 | # the second row since the first has an empty value 38 | find_header(data.frame(A = c(NA, "B"), B = c("C", "D"))) 39 | # The third row because the second column was searched for the text "E" 40 | find_header(data.frame(A = c(NA, "B", "C", "D"), B = c("C", "D", "E", "F")), "E" = 2) 41 | } 42 | \seealso{ 43 | Other Set names: 44 | \code{\link{clean_names}()}, 45 | \code{\link{mu_to_u}}, 46 | \code{\link{row_to_names}()} 47 | } 48 | \concept{Set names} 49 | -------------------------------------------------------------------------------- /man/fisher.test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/statistical_tests.R 3 | \name{fisher.test} 4 | \alias{fisher.test} 5 | \alias{fisher.test.default} 6 | \alias{fisher.test.tabyl} 7 | \title{Apply \code{stats::fisher.test()} to a two-way tabyl} 8 | \usage{ 9 | fisher.test(x, ...) 10 | 11 | \method{fisher.test}{default}(x, y = NULL, ...) 12 | 13 | \method{fisher.test}{tabyl}(x, ...) 14 | } 15 | \arguments{ 16 | \item{x}{A two-way tabyl, a numeric vector or a factor} 17 | 18 | \item{...}{Parameters passed to \code{\link[stats:fisher.test]{stats::fisher.test()}}} 19 | 20 | \item{y}{if x is a vector, must be another vector or factor of the same length} 21 | } 22 | \value{ 23 | The same as the one of \code{stats::fisher.test()}. 24 | } 25 | \description{ 26 | This generic function overrides \code{\link[stats:fisher.test]{stats::fisher.test()}}. If the passed table 27 | is a two-way tabyl, it runs it through \code{janitor::fisher.test.tabyl}, otherwise 28 | it just calls \code{stats::fisher.test()}. 29 | } 30 | \examples{ 31 | tab <- tabyl(mtcars, gear, cyl) 32 | fisher.test(tab) 33 | 34 | } 35 | -------------------------------------------------------------------------------- /man/get_dupes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_dupes.R 3 | \name{get_dupes} 4 | \alias{get_dupes} 5 | \title{Get rows of a \code{data.frame} with identical values for the specified variables.} 6 | \usage{ 7 | get_dupes(dat, ...) 8 | } 9 | \arguments{ 10 | \item{dat}{The input \code{data.frame}.} 11 | 12 | \item{...}{Unquoted variable names to search for duplicates. This takes a 13 | tidyselect specification.} 14 | } 15 | \value{ 16 | A data.frame with the full records where the specified 17 | variables have duplicated values, as well as a variable \code{dupe_count} 18 | showing the number of rows sharing that combination of duplicated values. 19 | If the input data.frame was of class \code{tbl_df}, the output is as well. 20 | } 21 | \description{ 22 | For hunting duplicate records during data cleaning. Specify the data.frame 23 | and the variable combination to search for duplicates and get back the 24 | duplicated rows. 25 | } 26 | \examples{ 27 | get_dupes(mtcars, mpg, hp) 28 | 29 | # or called with the magrittr pipe \%>\% : 30 | mtcars \%>\% get_dupes(wt) 31 | 32 | # You can use tidyselect helpers to specify variables: 33 | mtcars \%>\% get_dupes(-c(wt, qsec)) 34 | mtcars \%>\% get_dupes(starts_with("cy")) 35 | } 36 | -------------------------------------------------------------------------------- /man/get_one_to_one.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_one_to_one.R 3 | \name{get_one_to_one} 4 | \alias{get_one_to_one} 5 | \title{Find the list of columns that have a 1:1 mapping to each other} 6 | \usage{ 7 | get_one_to_one(dat) 8 | } 9 | \arguments{ 10 | \item{dat}{A \code{data.frame} or similar object} 11 | } 12 | \value{ 13 | A list with one element for each group of columns that map 14 | identically to each other. 15 | } 16 | \description{ 17 | Find the list of columns that have a 1:1 mapping to each other 18 | } 19 | \examples{ 20 | foo <- data.frame( 21 | Lab_Test_Long = c("Cholesterol, LDL", "Cholesterol, LDL", "Glucose"), 22 | Lab_Test_Short = c("CLDL", "CLDL", "GLUC"), 23 | LOINC = c(12345, 12345, 54321), 24 | Person = c("Sam", "Bill", "Sam"), 25 | stringsAsFactors = FALSE 26 | ) 27 | get_one_to_one(foo) 28 | } 29 | -------------------------------------------------------------------------------- /man/janitor-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/janitor.R 3 | \docType{package} 4 | \name{janitor-package} 5 | \alias{janitor} 6 | \alias{janitor-package} 7 | \title{janitor: Simple Tools for Examining and Cleaning Dirty Data} 8 | \description{ 9 | The main janitor functions can: perfectly format data.frame column names; provide quick counts of variable combinations (i.e., frequency tables and crosstabs); and explore duplicate records. Other janitor functions nicely format the tabulation results. These tabulate-and-report functions approximate popular features of SPSS and Microsoft Excel. This package follows the principles of the "tidyverse" and works well with the pipe function %>%. janitor was built with beginning-to-intermediate R users in mind and is optimized for user-friendliness. 10 | } 11 | \section{Package context}{ 12 | 13 | Advanced users can do most things covered here, but they can do it 14 | faster with janitor and save their thinking for more fun tasks. 15 | } 16 | 17 | \seealso{ 18 | Useful links: 19 | \itemize{ 20 | \item \url{https://github.com/sfirke/janitor} 21 | \item \url{https://sfirke.github.io/janitor/} 22 | \item Report bugs at \url{https://github.com/sfirke/janitor/issues} 23 | } 24 | 25 | } 26 | \author{ 27 | \strong{Maintainer}: Sam Firke \email{samuel.firke@gmail.com} 28 | 29 | Other contributors: 30 | \itemize{ 31 | \item Bill Denney \email{wdenney@humanpredictions.com} [contributor] 32 | \item Chris Haid \email{chrishaid@gmail.com} [contributor] 33 | \item Ryan Knight \email{ryangknight@gmail.com} [contributor] 34 | \item Malte Grosser \email{malte.grosser@gmail.com} [contributor] 35 | \item Jonathan Zadra \email{jonathan.zadra@sorensonimpact.com} [contributor] 36 | \item Olivier Roy [contributor] 37 | \item Josep Pueyo-Ros \email{josep.pueyo@udg.edu} [contributor] 38 | } 39 | 40 | } 41 | \keyword{internal} 42 | -------------------------------------------------------------------------------- /man/janitor_deprecated.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/janitor_deprecated.R 3 | \name{janitor_deprecated} 4 | \alias{janitor_deprecated} 5 | \title{Deprecated Functions in Package janitor} 6 | \description{ 7 | These functions have already become defunct or may be defunct as soon as the next release. 8 | } 9 | \details{ 10 | \itemize{ 11 | \item \code{\link[=adorn_crosstab]{adorn_crosstab()}} -> \code{adorn_} 12 | \item \code{\link[=crosstab]{crosstab()}} -> \code{\link[=tabyl]{tabyl()}} 13 | \item \code{\link[=use_first_valid_of]{use_first_valid_of()}} -> \code{\link[dplyr:coalesce]{dplyr::coalesce()}} 14 | \item \code{\link[=convert_to_NA]{convert_to_NA()}} -> \code{\link[dplyr:na_if]{dplyr::na_if()}} 15 | \item \code{\link[=add_totals_col]{add_totals_col()}} -> \code{\link[=adorn_totals]{adorn_totals(where = "col")}} 16 | \item \code{\link[=add_totals_row]{add_totals_row()}} -> \code{\link[=adorn_totals]{adorn_totals()}} 17 | \item \code{\link[=remove_empty_rows]{remove_empty_rows()}} -> \code{\link[=remove_empty]{remove_empty("rows")}} 18 | \item \code{\link[=remove_empty_cols]{remove_empty_cols()}} -> \code{\link[=remove_empty]{remove_empty("cols")}} 19 | } 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/mu_to_u.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/clean_names.R 3 | \docType{data} 4 | \name{mu_to_u} 5 | \alias{mu_to_u} 6 | \title{Constant to help map from mu to u} 7 | \format{ 8 | An object of class \code{character} of length 10. 9 | } 10 | \usage{ 11 | mu_to_u 12 | } 13 | \description{ 14 | This is a character vector with names of all known Unicode code points that 15 | look like the Greek mu or the micro symbol and values of "u". This is 16 | intended to simplify mapping from mu or micro in Unicode to the character "u" 17 | with \code{clean_names()} and \code{make_clean_names()}. 18 | } 19 | \details{ 20 | See the help in \code{clean_names()} for how to use this. 21 | } 22 | \seealso{ 23 | Other Set names: 24 | \code{\link{clean_names}()}, 25 | \code{\link{find_header}()}, 26 | \code{\link{row_to_names}()} 27 | } 28 | \concept{Set names} 29 | \keyword{datasets} 30 | -------------------------------------------------------------------------------- /man/paste_skip_na.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/paste_skip_na.R 3 | \name{paste_skip_na} 4 | \alias{paste_skip_na} 5 | \title{Like \code{paste()}, but missing values are omitted} 6 | \usage{ 7 | paste_skip_na(..., sep = " ", collapse = NULL) 8 | } 9 | \arguments{ 10 | \item{..., sep, collapse}{See \code{\link[base:paste]{base::paste()}}} 11 | } 12 | \value{ 13 | A character vector of pasted values. 14 | } 15 | \description{ 16 | Like \code{paste()}, but missing values are omitted 17 | } 18 | \details{ 19 | If all values are missing, the value from the first argument is 20 | preserved. 21 | } 22 | \examples{ 23 | paste_skip_na(NA) # NA_character_ 24 | paste_skip_na("A", NA) # "A" 25 | paste_skip_na("A", NA, c(NA, "B"), sep = ",") # c("A", "A,B") 26 | } 27 | -------------------------------------------------------------------------------- /man/pipe.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils-pipe.R 3 | \name{\%>\%} 4 | \alias{\%>\%} 5 | \title{Pipe operator} 6 | \usage{ 7 | lhs \%>\% rhs 8 | } 9 | \arguments{ 10 | \item{lhs}{A value or the magrittr placeholder.} 11 | 12 | \item{rhs}{A function call using the magrittr semantics.} 13 | } 14 | \value{ 15 | The result of calling \code{rhs(lhs)}. 16 | } 17 | \description{ 18 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details. 19 | } 20 | \examples{ 21 | mtcars \%>\% 22 | tabyl(carb, cyl) \%>\% 23 | adorn_totals() 24 | } 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /man/remove_constant.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/remove_empties.R 3 | \name{remove_constant} 4 | \alias{remove_constant} 5 | \title{Remove constant columns from a data.frame or matrix.} 6 | \usage{ 7 | remove_constant(dat, na.rm = FALSE, quiet = TRUE) 8 | } 9 | \arguments{ 10 | \item{dat}{the input data.frame or matrix.} 11 | 12 | \item{na.rm}{should \code{NA} values be removed when considering whether a 13 | column is constant? The default value of \code{FALSE} will result in a 14 | column not being removed if it's a mix of a single value and \code{NA}.} 15 | 16 | \item{quiet}{Should messages be suppressed (\code{TRUE}) or printed 17 | (\code{FALSE}) indicating the summary of empty columns or rows removed?} 18 | } 19 | \description{ 20 | Remove constant columns from a data.frame or matrix. 21 | } 22 | \examples{ 23 | remove_constant(data.frame(A = 1, B = 1:3)) 24 | 25 | # To find the columns that are constant 26 | data.frame(A = 1, B = 1:3) \%>\% 27 | dplyr::select(!dplyr::all_of(names(remove_constant(.)))) \%>\% 28 | unique() 29 | } 30 | \seealso{ 31 | \code{\link[=remove_empty]{remove_empty()}} for removing empty 32 | columns or rows. 33 | 34 | Other remove functions: 35 | \code{\link{remove_empty}()} 36 | } 37 | \concept{remove functions} 38 | -------------------------------------------------------------------------------- /man/remove_empty.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/remove_empties.R 3 | \name{remove_empty} 4 | \alias{remove_empty} 5 | \title{Remove empty rows and/or columns from a data.frame or matrix.} 6 | \usage{ 7 | remove_empty(dat, which = c("rows", "cols"), cutoff = 1, quiet = TRUE) 8 | } 9 | \arguments{ 10 | \item{dat}{the input data.frame or matrix.} 11 | 12 | \item{which}{one of "rows", "cols", or \code{c("rows", "cols")}. Where no 13 | value of which is provided, defaults to removing both empty rows and empty 14 | columns, declaring the behavior with a printed message.} 15 | 16 | \item{cutoff}{Under what fraction (>0 to <=1) of non-empty rows or columns should 17 | \code{which} be removed? Lower values keep more rows/columns, higher values drop more.} 18 | 19 | \item{quiet}{Should messages be suppressed (\code{TRUE}) or printed 20 | (\code{FALSE}) indicating the summary of empty columns or rows removed?} 21 | } 22 | \value{ 23 | Returns the object without its missing rows or columns. 24 | } 25 | \description{ 26 | Removes all rows and/or columns from a data.frame or matrix that 27 | are composed entirely of \code{NA} values. 28 | } 29 | \examples{ 30 | # not run: 31 | # dat \%>\% remove_empty("rows") 32 | # addressing a common untidy-data scenario where we have a mixture of 33 | # blank values in some (character) columns and NAs in others: 34 | library(dplyr) 35 | dd <- tibble( 36 | x = c(LETTERS[1:5], NA, rep("", 2)), 37 | y = c(1:5, rep(NA, 3)) 38 | ) 39 | # remove_empty() drops row 5 (all NA) but not 6 and 7 (blanks + NAs) 40 | dd \%>\% remove_empty("rows") 41 | # solution: preprocess to convert whitespace/empty strings to NA, 42 | # _then_ remove empty (all-NA) rows 43 | dd \%>\% 44 | mutate(across(where(is.character), ~ na_if(trimws(.), ""))) \%>\% 45 | remove_empty("rows") 46 | } 47 | \seealso{ 48 | \code{\link[=remove_constant]{remove_constant()}} for removing 49 | constant columns. 50 | 51 | Other remove functions: 52 | \code{\link{remove_constant}()} 53 | } 54 | \concept{remove functions} 55 | -------------------------------------------------------------------------------- /man/remove_empty_cols.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/janitor_deprecated.R 3 | \name{remove_empty_cols} 4 | \alias{remove_empty_cols} 5 | \title{Removes empty columns from a data.frame.} 6 | \usage{ 7 | remove_empty_cols(dat) 8 | } 9 | \arguments{ 10 | \item{dat}{the input data.frame.} 11 | } 12 | \value{ 13 | Returns the data.frame with no empty columns. 14 | } 15 | \description{ 16 | This function is deprecated, use \code{\link[=remove_empty]{remove_empty("cols")}} instead. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/remove_empty_rows.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/janitor_deprecated.R 3 | \name{remove_empty_rows} 4 | \alias{remove_empty_rows} 5 | \title{Removes empty rows from a data.frame.} 6 | \usage{ 7 | remove_empty_rows(dat) 8 | } 9 | \arguments{ 10 | \item{dat}{the input data.frame.} 11 | } 12 | \value{ 13 | Returns the data.frame with no empty rows. 14 | } 15 | \description{ 16 | This function is deprecated, use \code{\link[=remove_empty]{remove_empty("rows")}} instead. 17 | } 18 | \examples{ 19 | # not run: 20 | # dat \%>\% remove_empty_rows 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /man/round_half_up.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/round_half_up.R 3 | \name{round_half_up} 4 | \alias{round_half_up} 5 | \title{Round a numeric vector; halves will be rounded up, ala Microsoft Excel.} 6 | \usage{ 7 | round_half_up(x, digits = 0) 8 | } 9 | \arguments{ 10 | \item{x}{a numeric vector to round.} 11 | 12 | \item{digits}{how many digits should be displayed after the decimal point?} 13 | } 14 | \value{ 15 | A vector with the same length as \code{x} 16 | } 17 | \description{ 18 | In base R \code{round()}, halves are rounded to even, e.g., 12.5 and 19 | 11.5 are both rounded to 12. This function rounds 12.5 to 13 (assuming 20 | \code{digits = 0}). Negative halves are rounded away from zero, e.g., -0.5 is 21 | rounded to -1. 22 | 23 | This may skew subsequent statistical analysis of the data, but may be 24 | desirable in certain contexts. This function is implemented exactly from 25 | \url{https://stackoverflow.com/a/12688836}; see that question and comments for 26 | discussion of this issue. 27 | } 28 | \examples{ 29 | round_half_up(12.5) 30 | round_half_up(1.125, 2) 31 | round_half_up(1.125, 1) 32 | round_half_up(-0.5, 0) # negatives get rounded away from zero 33 | 34 | } 35 | -------------------------------------------------------------------------------- /man/round_to_fraction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/round_to_fraction.R 3 | \name{round_to_fraction} 4 | \alias{round_to_fraction} 5 | \title{Round to the nearest fraction of a specified denominator.} 6 | \usage{ 7 | round_to_fraction(x, denominator, digits = Inf) 8 | } 9 | \arguments{ 10 | \item{x}{A numeric vector} 11 | 12 | \item{denominator}{The denominator of the fraction for rounding (a scalar or 13 | vector positive integer).} 14 | 15 | \item{digits}{Integer indicating the number of decimal places to be used 16 | after rounding to the fraction. This is passed to \code{base::round()}). 17 | Negative values are allowed (see Details). (\code{Inf} indicates no 18 | subsequent rounding)} 19 | } 20 | \value{ 21 | the input x rounded to a decimal value that has an integer numerator relative 22 | to \code{denominator} (possibly subsequently rounded to a number of decimal 23 | digits). 24 | } 25 | \description{ 26 | Round a decimal to the precise decimal value of a specified 27 | fractional denominator. Common use cases include addressing floating point 28 | imprecision and enforcing that data values fall into a certain set. 29 | 30 | E.g., if a decimal represents hours and values should be logged to the nearest 31 | minute, \code{round_to_fraction(x, 60)} would enforce that distribution and 0.57 32 | would be rounded to 0.566667, the equivalent of 34/60. 0.56 would also be rounded 33 | to 34/60. 34 | 35 | Set \code{denominator = 1} to round to whole numbers. 36 | 37 | The \code{digits} argument allows for rounding of the subsequent result. 38 | } 39 | \details{ 40 | If \code{digits} is \code{Inf}, \code{x} is rounded to the fraction 41 | and then kept at full precision. If \code{digits} is \code{"auto"}, the 42 | number of digits is automatically selected as 43 | \code{ceiling(log10(denominator)) + 1}. 44 | } 45 | \examples{ 46 | round_to_fraction(1.6, denominator = 2) 47 | round_to_fraction(pi, denominator = 7) # 22/7 48 | round_to_fraction(c(8.1, 9.2), denominator = c(7, 8)) 49 | round_to_fraction(c(8.1, 9.2), denominator = c(7, 8), digits = 3) 50 | round_to_fraction(c(8.1, 9.2, 10.3), denominator = c(7, 8, 1001), digits = "auto") 51 | } 52 | -------------------------------------------------------------------------------- /man/row_to_names.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/row_to_names.R 3 | \name{row_to_names} 4 | \alias{row_to_names} 5 | \title{Elevate a row to be the column names of a data.frame.} 6 | \usage{ 7 | row_to_names( 8 | dat, 9 | row_number, 10 | ..., 11 | remove_row = TRUE, 12 | remove_rows_above = TRUE, 13 | sep = "_" 14 | ) 15 | } 16 | \arguments{ 17 | \item{dat}{The input data.frame} 18 | 19 | \item{row_number}{The row(s) of \code{dat} containing the variable names or the 20 | string \code{"find_header"} to use \code{find_header(dat=dat, ...)} to find 21 | the row_number. Allows for multiple rows input as a numeric vector. NA's are 22 | ignored, and if a column contains only \code{NA} value it will be named \code{"NA"}.} 23 | 24 | \item{...}{Sent to \code{find_header()}, if 25 | \code{row_number = "find_header"}. Otherwise, ignored.} 26 | 27 | \item{remove_row}{Should the row \code{row_number} be removed from the 28 | resulting data.frame?} 29 | 30 | \item{remove_rows_above}{If \code{row_number != 1}, should the rows above 31 | \code{row_number} - that is, between \code{1:(row_number-1)} - be removed 32 | from the resulting data.frame?} 33 | 34 | \item{sep}{A character string to separate the values in the case of multiple 35 | rows input to \code{row_number}.} 36 | } 37 | \value{ 38 | A data.frame with new names (and some rows removed, if specified) 39 | } 40 | \description{ 41 | Elevate a row to be the column names of a data.frame. 42 | } 43 | \examples{ 44 | x <- data.frame( 45 | X_1 = c(NA, "Title", 1:3), 46 | X_2 = c(NA, "Title2", 4:6) 47 | ) 48 | x \%>\% 49 | row_to_names(row_number = 2) 50 | 51 | x \%>\% 52 | row_to_names(row_number = "find_header") 53 | } 54 | \seealso{ 55 | Other Set names: 56 | \code{\link{clean_names}()}, 57 | \code{\link{find_header}()}, 58 | \code{\link{mu_to_u}} 59 | } 60 | \concept{Set names} 61 | -------------------------------------------------------------------------------- /man/sas_numeric_to_date.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/sas_dates.R 3 | \name{sas_numeric_to_date} 4 | \alias{sas_numeric_to_date} 5 | \title{Convert a SAS date, time or date/time to an R object} 6 | \usage{ 7 | sas_numeric_to_date(date_num, datetime_num, time_num, tz = "UTC") 8 | } 9 | \arguments{ 10 | \item{date_num}{numeric vector of serial numbers to convert.} 11 | 12 | \item{datetime_num}{numeric vector of date/time numbers (seconds since 13 | midnight 1960-01-01) to convert} 14 | 15 | \item{time_num}{numeric vector of time numbers (seconds since midnight on the 16 | current day) to convert} 17 | 18 | \item{tz}{Time zone, used when \code{include_time = TRUE} (see details for 19 | more information on timezones).} 20 | } 21 | \value{ 22 | If a date and time or datetime are provided, a POSIXct object. If a 23 | date is provided, a Date object. If a time is provided, an hms::hms object 24 | } 25 | \description{ 26 | Convert a SAS date, time or date/time to an R object 27 | } 28 | \examples{ 29 | sas_numeric_to_date(date_num = 15639) # 2002-10-26 30 | sas_numeric_to_date(datetime_num = 1217083532, tz = "UTC") # 1998-07-26T14:45:32Z 31 | sas_numeric_to_date(date_num = 15639, time_num = 3600, tz = "UTC") # 2002-10-26T01:00:00Z 32 | sas_numeric_to_date(time_num = 3600) # 01:00:00 33 | } 34 | \references{ 35 | SAS Date, Time, and Datetime Values reference (retrieved on 36 | 2022-03-08): https://v8doc.sas.com/sashtml/lrcon/zenid-63.htm 37 | } 38 | \seealso{ 39 | Other date-time cleaning: 40 | \code{\link{convert_to_date}()}, 41 | \code{\link{excel_numeric_to_date}()}, 42 | \code{\link{excel_time_to_numeric}()} 43 | } 44 | \concept{date-time cleaning} 45 | -------------------------------------------------------------------------------- /man/signif_half_up.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/round_half_up.R 3 | \name{signif_half_up} 4 | \alias{signif_half_up} 5 | \title{Round a numeric vector to the specified number of significant digits; halves will be rounded up.} 6 | \usage{ 7 | signif_half_up(x, digits = 6) 8 | } 9 | \arguments{ 10 | \item{x}{a numeric vector to round.} 11 | 12 | \item{digits}{integer indicating the number of significant digits to be used.} 13 | } 14 | \description{ 15 | In base R \code{signif()}, halves are rounded to even, e.g., 16 | \code{signif(11.5, 2)} and \code{signif(12.5, 2)} are both rounded to 12. 17 | This function rounds 12.5 to 13 (assuming \code{digits = 2}). Negative halves 18 | are rounded away from zero, e.g., \code{signif(-2.5, 1)} is rounded to -3. 19 | 20 | This may skew subsequent statistical analysis of the data, but may be 21 | desirable in certain contexts. This function is implemented from 22 | \url{https://stackoverflow.com/a/1581007/}; see that question and 23 | comments for discussion of this issue. 24 | } 25 | \examples{ 26 | signif_half_up(12.5, 2) 27 | signif_half_up(1.125, 3) 28 | signif_half_up(-2.5, 1) # negatives get rounded away from zero 29 | 30 | } 31 | -------------------------------------------------------------------------------- /man/single_value.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/single_value.R 3 | \name{single_value} 4 | \alias{single_value} 5 | \title{Ensure that a vector has only a single value throughout.} 6 | \usage{ 7 | single_value(x, missing = NA, warn_if_all_missing = FALSE, info = NULL) 8 | } 9 | \arguments{ 10 | \item{x}{The vector which should have a single value} 11 | 12 | \item{missing}{The vector of values to consider missing in \code{x}} 13 | 14 | \item{warn_if_all_missing}{Generate a warning if all values are missing?} 15 | 16 | \item{info}{If more than one value is found, append this to the warning or 17 | error to assist with determining the location of the issue.} 18 | } 19 | \value{ 20 | \code{x} as the scalar single value found throughout (or an error if 21 | more than one value is found). 22 | } 23 | \description{ 24 | Missing values are replaced with the single value, and if all values are 25 | missing, the first value in \code{missing} is used throughout. 26 | } 27 | \examples{ 28 | # A simple use case with vectors of input 29 | 30 | single_value(c(NA, 1)) 31 | # Multiple, different values of missing can be given 32 | single_value(c(NA, "a"), missing = c(NA, "a")) 33 | 34 | # A typical use case with a grouped data.frame used for input and the output 35 | # (`B` is guaranteed to have a single value and only one row, in this case) 36 | data.frame( 37 | A = rep(1:3, each = 2), 38 | B = c(rep(4:6, each = 2)) 39 | ) \%>\% 40 | dplyr::group_by(A) \%>\% 41 | dplyr::summarize( 42 | B = single_value(B) 43 | ) 44 | 45 | try( 46 | # info is useful to give when multiple values may be found to see what 47 | # grouping variable or what calculation is causing the error 48 | data.frame( 49 | A = rep(1:3, each = 2), 50 | B = c(rep(1:2, each = 2), 1:2) 51 | ) \%>\% 52 | dplyr::group_by(A) \%>\% 53 | dplyr::mutate( 54 | C = single_value(B, info = paste("Calculating C for group A=", A)) 55 | ) 56 | ) 57 | } 58 | -------------------------------------------------------------------------------- /man/tabyl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tabyl.R 3 | \name{tabyl} 4 | \alias{tabyl} 5 | \alias{tabyl.default} 6 | \alias{tabyl.data.frame} 7 | \title{Generate a frequency table (1-, 2-, or 3-way).} 8 | \usage{ 9 | tabyl(dat, ...) 10 | 11 | \method{tabyl}{default}(dat, show_na = TRUE, show_missing_levels = TRUE, ...) 12 | 13 | \method{tabyl}{data.frame}(dat, var1, var2, var3, show_na = TRUE, show_missing_levels = TRUE, ...) 14 | } 15 | \arguments{ 16 | \item{dat}{A \code{data.frame} containing the variables you wish to count. 17 | Or, a vector you want to tabulate.} 18 | 19 | \item{...}{Additional arguments passed to methods.} 20 | 21 | \item{show_na}{Should counts of \code{NA} values be displayed? In a one-way tabyl, 22 | the presence of \code{NA} values triggers an additional column showing valid percentages 23 | (calculated excluding \code{NA} values).} 24 | 25 | \item{show_missing_levels}{Should counts of missing levels of factors be displayed? 26 | These will be rows and/or columns of zeroes. Useful for keeping consistent 27 | output dimensions even when certain factor levels may not be present in the data.} 28 | 29 | \item{var1}{The column name of the first variable.} 30 | 31 | \item{var2}{(optional) the column name of the second variable 32 | (its values become the column names in a 2-way tabulation).} 33 | 34 | \item{var3}{(optional) the column name of the third variable 35 | (a 3-way tabulation is split into a list on its values).} 36 | } 37 | \value{ 38 | A \code{data.frame} with frequencies and percentages of the tabulated variable(s). 39 | A 3-way tabulation returns a list of data frames. 40 | } 41 | \description{ 42 | A fully-featured alternative to \code{table()}. Results are data.frames and can be 43 | formatted and enhanced with janitor's family of \code{adorn_} functions. 44 | 45 | Specify a \code{data.frame} and the one, two, or three unquoted column names you 46 | want to tabulate. Three variables generates a list of 2-way tabyls, 47 | split by the third variable. 48 | 49 | Alternatively, you can tabulate a single variable that isn't in a \code{data.frame} 50 | by calling \code{tabyl()} on a vector, e.g., \code{tabyl(mtcars$gear)}. 51 | } 52 | \examples{ 53 | 54 | tabyl(mtcars, cyl) 55 | tabyl(mtcars, cyl, gear) 56 | tabyl(mtcars, cyl, gear, am) 57 | 58 | # or using the \%>\% pipe 59 | mtcars \%>\% 60 | tabyl(cyl, gear) 61 | 62 | # illustrating show_na functionality: 63 | my_cars <- rbind(mtcars, rep(NA, 11)) 64 | my_cars \%>\% tabyl(cyl) 65 | my_cars \%>\% tabyl(cyl, show_na = FALSE) 66 | 67 | # Calling on a single vector not in a data.frame: 68 | val <- c("hi", "med", "med", "lo") 69 | tabyl(val) 70 | } 71 | -------------------------------------------------------------------------------- /man/top_levels.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/top_levels.R 3 | \name{top_levels} 4 | \alias{top_levels} 5 | \title{Generate a frequency table of a factor grouped into top-n, bottom-n, and all 6 | other levels.} 7 | \usage{ 8 | top_levels(input_vec, n = 2, show_na = FALSE) 9 | } 10 | \arguments{ 11 | \item{input_vec}{The factor variable to tabulate.} 12 | 13 | \item{n}{Number of levels to include in top and bottom groups} 14 | 15 | \item{show_na}{Should cases where the variable is \code{NA} be shown?} 16 | } 17 | \value{ 18 | A \code{data.frame} (actually a \code{tbl_df}) with the frequencies of the 19 | grouped, tabulated variable. Includes counts and percentages, and valid 20 | percentages (calculated omitting \code{NA} values, if present in the vector and 21 | \code{show_na = TRUE}.) 22 | } 23 | \description{ 24 | Get a frequency table of a factor variable, grouped into categories by level. 25 | } 26 | \examples{ 27 | top_levels(as.factor(mtcars$hp), 2) 28 | } 29 | -------------------------------------------------------------------------------- /man/untabyl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/as_and_untabyl.R 3 | \name{untabyl} 4 | \alias{untabyl} 5 | \title{Remove \code{tabyl} attributes from a data.frame.} 6 | \usage{ 7 | untabyl(dat) 8 | } 9 | \arguments{ 10 | \item{dat}{a \code{data.frame} of class \code{tabyl}.} 11 | } 12 | \value{ 13 | the same \code{data.frame}, but without the \code{tabyl} class and attributes. 14 | } 15 | \description{ 16 | Strips away all \code{tabyl}-related attributes from a data.frame. 17 | } 18 | \examples{ 19 | 20 | mtcars \%>\% 21 | tabyl(am) \%>\% 22 | untabyl() \%>\% 23 | attributes() # tabyl-specific attributes are gone 24 | } 25 | -------------------------------------------------------------------------------- /man/use_first_valid_of.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/janitor_deprecated.R 3 | \name{use_first_valid_of} 4 | \alias{use_first_valid_of} 5 | \title{Returns first non-\code{NA} value from a set of vectors.} 6 | \usage{ 7 | use_first_valid_of(..., if_all_NA = NA) 8 | } 9 | \arguments{ 10 | \item{...}{the input vectors. Order matters: these are searched and prioritized in the order they are supplied.} 11 | 12 | \item{if_all_NA}{what value should be used when all of the vectors return \code{NA} for a certain index? Default is \code{NA}.} 13 | } 14 | \value{ 15 | Returns a single vector with the selected values. 16 | } 17 | \description{ 18 | Warning: Deprecated, do not use in new code. Use \code{\link[dplyr:coalesce]{dplyr::coalesce()}} instead. 19 | 20 | At each position of the input vectors, iterates through in order and returns the first non-NA value. 21 | This is a robust replacement of the common \code{ifelse(!is.na(x), x, ifelse(!is.na(y), y, z))}. 22 | It's more readable and handles problems like \code{\link[=ifelse]{ifelse()}}'s inability to work with dates in this way. 23 | } 24 | \seealso{ 25 | janitor_deprecated 26 | } 27 | \keyword{internal} 28 | -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /revdep/.gitignore: -------------------------------------------------------------------------------- 1 | checks 2 | library 3 | checks.noindex 4 | library.noindex 5 | data.sqlite 6 | *.html 7 | -------------------------------------------------------------------------------- /revdep/README.md: -------------------------------------------------------------------------------- 1 | # Platform 2 | 3 | |field |value | 4 | |:--------|:----------------------------| 5 | |version |R version 3.4.4 (2018-03-15) | 6 | |os |Ubuntu 18.04.2 LTS | 7 | |system |x86_64, linux-gnu | 8 | |ui |RStudio | 9 | |language |(EN) | 10 | |collate |en_US.UTF-8 | 11 | |tz |America/New_York | 12 | |date |2019-03-13 | 13 | 14 | # Dependencies 15 | 16 | |package |old |new |Δ | 17 | |:----------|:--------|:----------|:--| 18 | |janitor |1.1.1 |1.1.1.9000 |* | 19 | |assertthat |0.2.0 |0.2.0 | | 20 | |BH |1.69.0-1 |1.69.0-1 | | 21 | |cli |1.0.1 |1.0.1 | | 22 | |crayon |1.3.4 |1.3.4 | | 23 | |dplyr |0.8.0.1 |0.8.0.1 | | 24 | |fansi |0.4.0 |0.4.0 | | 25 | |glue |1.3.1 |1.3.1 | | 26 | |magrittr |1.5 |1.5 | | 27 | |pillar |1.3.1 |1.3.1 | | 28 | |pkgconfig |2.0.2 |2.0.2 | | 29 | |plogr |0.2.0 |0.2.0 | | 30 | |purrr |0.3.1 |0.3.1 | | 31 | |R6 |2.4.0 |2.4.0 | | 32 | |Rcpp |1.0.0 |1.0.0 | | 33 | |rlang |0.3.1 |0.3.1 | | 34 | |snakecase |0.9.2 |0.9.2 | | 35 | |stringi |1.4.3 |1.4.3 | | 36 | |stringr |1.4.0 |1.4.0 | | 37 | |tibble |2.0.1 |2.0.1 | | 38 | |tidyr |0.8.3 |0.8.3 | | 39 | |tidyselect |0.2.5 |0.2.5 | | 40 | |utf8 |1.1.4 |1.1.4 | | 41 | 42 | # Revdeps 43 | 44 | ## Couldn't check (2) 45 | 46 | |package |version |error |warning |note | 47 | |:---------------|:-------|:-----|:-------|:----| 48 | |bomrang |? | | | | 49 | |fivethirtyeight |? | | | | 50 | 51 | ## All (8) 52 | 53 | |package |version |error |warning |note | 54 | |:------------------------------------|:-------|:-----|:-------|:----| 55 | |[ballr](problems.md#ballr) |0.2.3 |1 |1 |1 | 56 | |bomrang |? | | | | 57 | |[congressbr](problems.md#congressbr) |0.2.0 |1 |1 |2 | 58 | |[driftR](problems.md#driftr) |1.1.0 |1 |1 | | 59 | |fivethirtyeight |? | | | | 60 | |[moderndive](problems.md#moderndive) |0.2.0 |1 |1 | | 61 | |[postal](problems.md#postal) |0.1.1 |1 |1 | | 62 | |[questionr](problems.md#questionr) |0.7.0 |1 |1 |2 | 63 | 64 | -------------------------------------------------------------------------------- /revdep/checks.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/revdep/checks.rds -------------------------------------------------------------------------------- /revdep/email.yml: -------------------------------------------------------------------------------- 1 | release_date: ??? 2 | rel_release_date: ??? 3 | my_news_url: ??? 4 | release_version: ??? 5 | -------------------------------------------------------------------------------- /revdep/problems.md: -------------------------------------------------------------------------------- 1 | # ballr 2 | 3 | Version: 0.2.3 4 | 5 | ## In both 6 | 7 | * checking PDF version of manual without hyperrefs or index ... ERROR 8 | ``` 9 | Re-running with no redirection of stdout/stderr. 10 | Hmm ... looks like a package 11 | You may want to clean up by 'rm -rf /tmp/RtmpUWMLZE/Rd2pdf4edb13f7208d' 12 | ``` 13 | 14 | * checking PDF version of manual ... WARNING 15 | ``` 16 | LaTeX errors when creating PDF version. 17 | This typically indicates Rd problems. 18 | ``` 19 | 20 | * checking package dependencies ... NOTE 21 | ``` 22 | Package suggested but not available for checking: ‘devtools’ 23 | ``` 24 | 25 | # congressbr 26 | 27 | Version: 0.2.0 28 | 29 | ## In both 30 | 31 | * checking PDF version of manual without hyperrefs or index ... ERROR 32 | ``` 33 | Re-running with no redirection of stdout/stderr. 34 | Hmm ... looks like a package 35 | You may want to clean up by 'rm -rf /tmp/RtmpUMM3at/Rd2pdf9f95c96e9c0' 36 | ``` 37 | 38 | * checking PDF version of manual ... WARNING 39 | ``` 40 | LaTeX errors when creating PDF version. 41 | This typically indicates Rd problems. 42 | ``` 43 | 44 | * checking package dependencies ... NOTE 45 | ``` 46 | Package suggested but not available for checking: ‘devtools’ 47 | ``` 48 | 49 | * checking data for non-ASCII characters ... NOTE 50 | ``` 51 | Note: found 1 marked UTF-8 string 52 | ``` 53 | 54 | # driftR 55 | 56 | Version: 1.1.0 57 | 58 | ## In both 59 | 60 | * checking PDF version of manual without hyperrefs or index ... ERROR 61 | ``` 62 | Re-running with no redirection of stdout/stderr. 63 | Hmm ... looks like a package 64 | You may want to clean up by 'rm -rf /tmp/Rtmph3nrYw/Rd2pdf175d6a37b8fb' 65 | ``` 66 | 67 | * checking PDF version of manual ... WARNING 68 | ``` 69 | LaTeX errors when creating PDF version. 70 | This typically indicates Rd problems. 71 | ``` 72 | 73 | # moderndive 74 | 75 | Version: 0.2.0 76 | 77 | ## In both 78 | 79 | * checking PDF version of manual without hyperrefs or index ... ERROR 80 | ``` 81 | Re-running with no redirection of stdout/stderr. 82 | Hmm ... looks like a package 83 | You may want to clean up by 'rm -rf /tmp/RtmpG2Xo0M/Rd2pdf51a252e4d198' 84 | ``` 85 | 86 | * checking PDF version of manual ... WARNING 87 | ``` 88 | LaTeX errors when creating PDF version. 89 | This typically indicates Rd problems. 90 | ``` 91 | 92 | # postal 93 | 94 | Version: 0.1.1 95 | 96 | ## In both 97 | 98 | * checking PDF version of manual without hyperrefs or index ... ERROR 99 | ``` 100 | Re-running with no redirection of stdout/stderr. 101 | Hmm ... looks like a package 102 | You may want to clean up by 'rm -rf /tmp/RtmpgzvDpk/Rd2pdf5fc260d75179' 103 | ``` 104 | 105 | * checking PDF version of manual ... WARNING 106 | ``` 107 | LaTeX errors when creating PDF version. 108 | This typically indicates Rd problems. 109 | ``` 110 | 111 | # questionr 112 | 113 | Version: 0.7.0 114 | 115 | ## In both 116 | 117 | * checking PDF version of manual without hyperrefs or index ... ERROR 118 | ``` 119 | Re-running with no redirection of stdout/stderr. 120 | Hmm ... looks like a package 121 | You may want to clean up by 'rm -rf /tmp/RtmpHnFYMV/Rd2pdf289a7f920965' 122 | ``` 123 | 124 | * checking PDF version of manual ... WARNING 125 | ``` 126 | LaTeX errors when creating PDF version. 127 | This typically indicates Rd problems. 128 | ``` 129 | 130 | * checking Rd cross-references ... NOTE 131 | ``` 132 | Package unavailable to check Rd xrefs: ‘Hmisc’ 133 | ``` 134 | 135 | * checking data for non-ASCII characters ... NOTE 136 | ``` 137 | Note: found 4145 marked UTF-8 strings 138 | ``` 139 | 140 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | # This file is part of the standard setup for testthat. 2 | # It is recommended that you do not modify it. 3 | # 4 | # Where should you do additional test configuration? 5 | # Learn more about the roles of various files in: 6 | # * https://r-pkgs.org/tests.html 7 | # * https://testthat.r-lib.org/reference/test_package.html#special-files 8 | 9 | library(testthat) 10 | library(janitor) 11 | 12 | test_check("janitor") 13 | -------------------------------------------------------------------------------- /tests/testthat/test-adorn-pct-formatting.R: -------------------------------------------------------------------------------- 1 | source1 <- mtcars %>% 2 | tabyl(cyl, am) %>% 3 | adorn_percentages() 4 | 5 | test_that("calculations are accurate", { 6 | expect_equal( 7 | untabyl(adorn_pct_formatting(source1)), # default parameter is denom = "row" 8 | data.frame( 9 | cyl = c(4, 6, 8), 10 | `0` = c("27.3%", "57.1%", "85.7%"), 11 | `1` = c("72.7%", "42.9%", "14.3%"), 12 | check.names = FALSE, 13 | stringsAsFactors = FALSE 14 | ) 15 | ) 16 | }) 17 | 18 | test_that("data.frames with no numeric columns beyond the first cause failure", { 19 | expect_error( 20 | adorn_pct_formatting(data.frame(a = 1:2, b = c("hi", "lo"))), 21 | "at least one targeted column must be of class numeric", 22 | fixed = TRUE 23 | ) 24 | }) 25 | 26 | dat <- data.frame(Operation = c("Login", "Posted", "Deleted"), `Total Count` = c(5, 25, 40), check.names = FALSE) 27 | 28 | test_that("works with a single numeric column per #89", { 29 | expect_equal( 30 | dat %>% adorn_percentages("col") %>% untabyl(), 31 | data.frame( 32 | Operation = c("Login", "Posted", "Deleted"), 33 | `Total Count` = c(5 / 70, 25 / 70, 40 / 70), 34 | check.names = FALSE 35 | ) 36 | ) 37 | }) 38 | 39 | test_that("works with totals row", { 40 | expect_equal( 41 | dat %>% adorn_totals("row") %>% adorn_percentages("col") %>% untabyl(), 42 | data.frame( 43 | Operation = c("Login", "Posted", "Deleted", "Total"), 44 | `Total Count` = c(5 / 70, 25 / 70, 40 / 70, 1), 45 | check.names = FALSE, stringsAsFactors = FALSE 46 | ) 47 | ) 48 | }) 49 | 50 | test_that("works with one-way tabyl", { 51 | expect_equal( 52 | mtcars %>% 53 | tabyl(carb) %>% 54 | adorn_pct_formatting(digits = 0) %>% 55 | untabyl(), 56 | data.frame( 57 | carb = c(1:4, 6, 8), 58 | n = c(7, 10, 3, 10, 1, 1), 59 | percent = c("22%", "31%", "9%", "31%", "3%", "3%"), 60 | stringsAsFactors = FALSE 61 | ) 62 | ) 63 | }) 64 | 65 | test_that("NAs are replaced with dashes when percentage signs are affixed", { 66 | # NaNs from adorn_percentages, the more common case (still uncommon) 67 | has_nans <- mtcars %>% 68 | tabyl(carb, cyl) %>% 69 | .[5:6, ] %>% 70 | adorn_percentages("col") %>% 71 | adorn_pct_formatting() %>% 72 | untabyl() 73 | row.names(has_nans) <- NULL 74 | expect_equal( 75 | has_nans, 76 | data.frame( 77 | carb = c(6, 8), 78 | `4` = c("-", "-"), 79 | `6` = c("100.0%", "0.0%"), 80 | `8` = c("0.0%", "100.0%"), 81 | check.names = FALSE, 82 | stringsAsFactors = FALSE 83 | ) 84 | ) 85 | 86 | # NAs convert to - 87 | has_nas <- data.frame(a = c("big", "little"), x = c(0.1, 0.123), y = c(0.98, NA), stringsAsFactors = FALSE) 88 | expect_equal( 89 | adorn_pct_formatting(has_nas), 90 | data.frame(a = c("big", "little"), x = c("10.0%", "12.3%"), y = c("98.0%", "-"), stringsAsFactors = FALSE) 91 | ) 92 | }) 93 | 94 | test_that("NAs are replaced with dashes - no percentage signs affixed", { 95 | # NaNs from adorn_percentages, the more common case (still uncommon) 96 | has_nans <- mtcars %>% 97 | tabyl(carb, cyl) %>% 98 | .[5:6, ] %>% 99 | adorn_percentages("col") %>% 100 | adorn_pct_formatting(affix_sign = FALSE) %>% 101 | untabyl() 102 | row.names(has_nans) <- NULL 103 | expect_equal( 104 | has_nans, 105 | data.frame( 106 | carb = c(6, 8), 107 | `4` = c("-", "-"), 108 | `6` = c("100.0", "0.0"), 109 | `8` = c("0.0", "100.0"), 110 | check.names = FALSE, 111 | stringsAsFactors = FALSE 112 | ) 113 | ) 114 | 115 | # NAs convert to - symbol 116 | has_nas <- data.frame(a = c("big", "little"), x = c(0.1, 0.123), y = c(0.98, NA), stringsAsFactors = FALSE) 117 | expect_equal( 118 | adorn_pct_formatting(has_nas, affix_sign = FALSE), 119 | data.frame(a = c("big", "little"), x = c("10.0", "12.3"), y = c("98.0", "-"), stringsAsFactors = FALSE) 120 | ) 121 | }) 122 | 123 | 124 | test_that("bad rounding argument caught", { 125 | expect_error( 126 | dat %>% 127 | adorn_percentages() %>% 128 | adorn_pct_formatting(rounding = "blargh"), 129 | "`rounding` must be one of \"half to even\" or \"half up\", not \"blargh\".", 130 | fixed = TRUE 131 | ) 132 | }) 133 | 134 | test_that("automatically invokes purrr::map when called on a 3-way tabyl", { 135 | three <- tabyl(mtcars, cyl, am, gear) 136 | expect_equal( 137 | adorn_pct_formatting(three), # vanilla call 138 | purrr::map(three, adorn_pct_formatting) 139 | ) 140 | 141 | # with arguments passing through 142 | expect_equal( 143 | adorn_pct_formatting(three, 2, "half up", affix_sign = FALSE), 144 | purrr::map(three, adorn_pct_formatting, 2, "half up", FALSE) 145 | ) 146 | }) 147 | 148 | test_that("non-data.frame inputs are handled", { 149 | expect_error(adorn_pct_formatting(1:5), "adorn_pct_formatting() must be called on a data.frame or list of data.frames", fixed = TRUE) 150 | }) 151 | 152 | test_that("tidyselecting works", { 153 | target <- data.frame( 154 | color = c("green", "blue", "red"), 155 | first_wave = c(1:3), 156 | second_wave = c(4:6), 157 | third_wave = c(3, 3, 3), 158 | size = c("small", "medium", "large"), 159 | stringsAsFactors = FALSE 160 | ) %>% 161 | adorn_percentages() 162 | 163 | two_cols <- target %>% 164 | adorn_pct_formatting(, , , first_wave:second_wave) 165 | expect_equal(two_cols$first_wave, c("12.5%", "20.0%", "25.0%")) 166 | expect_equal(two_cols$third_wave, c(3 / 8, 3 / 10, 3 / 12)) 167 | 168 | expect_message( 169 | target %>% 170 | adorn_pct_formatting(, , , third_wave:size), 171 | "At least one non-numeric column was specified and will not be modified." 172 | ) 173 | # correct behavior occurs when text columns are skipped 174 | expect_message( 175 | text_skipped <- target %>% 176 | adorn_pct_formatting(., , , , c(first_wave, size)), 177 | "At least one non-numeric column was specified and will not be modified." 178 | ) 179 | 180 | expect_equal(text_skipped$first_wave, c("12.5%", "20.0%", "25.0%")) 181 | expect_equal( 182 | text_skipped %>% dplyr::select(-first_wave), 183 | target %>% dplyr::select(-first_wave), 184 | ignore_attr = TRUE 185 | ) 186 | }) 187 | 188 | test_that("decimal.mark works", { 189 | locale_decimal_sep <- getOption("OutDec") # not sure if it's necessary to save and restore this, 190 | # but seems safe for locale-independent testing processes 191 | options(OutDec = ",") 192 | expect_true( 193 | all(grepl(",", unlist(adorn_pct_formatting(source1)[2]))) 194 | ) 195 | options(OutDec = locale_decimal_sep) 196 | }) 197 | -------------------------------------------------------------------------------- /tests/testthat/test-adorn-rounding.R: -------------------------------------------------------------------------------- 1 | x <- data.frame( 2 | a = c(rep("x", 55), rep("y", 45)), 3 | b = c(rep("x", 50), rep("y", 50)), 4 | stringsAsFactors = FALSE 5 | ) 6 | 7 | # Crosstab with decimal values ending in .5 8 | y <- x %>% 9 | tabyl(a, b) %>% 10 | adorn_percentages("all") 11 | 12 | test_that("rounding parameter works", { 13 | expect_equal( 14 | y %>% 15 | adorn_rounding(digits = 1, rounding = "half up") %>% 16 | untabyl(), 17 | data.frame( 18 | a = c("x", "y"), 19 | x = c(0.5, 0.0), 20 | y = c(0.1, 0.5), 21 | stringsAsFactors = FALSE 22 | ) 23 | ) 24 | # Test failing on CRAN and only there 25 | skip_on_cran() 26 | expect_equal( 27 | y %>% 28 | adorn_rounding(digits = 1) %>% # default rounding: "half to even" 29 | untabyl(), 30 | data.frame( 31 | a = c("x", "y"), 32 | x = c(0.5, 0.0), 33 | y = c(0.0, 0.4), 34 | stringsAsFactors = FALSE 35 | ) 36 | ) 37 | }) 38 | 39 | test_that("digit control succeeds", { 40 | expect_equal( 41 | y %>% 42 | adorn_rounding(digits = 0, rounding = "half up") %>% 43 | untabyl(), 44 | data.frame( 45 | a = c("x", "y"), 46 | x = c(1, 0), 47 | y = c(0, 0), 48 | stringsAsFactors = FALSE 49 | ) 50 | ) 51 | expect_equal( 52 | y %>% 53 | adorn_rounding(digits = 2, rounding = "half up"), # shouldn't do anything given the input only having 2 decimal places 54 | y 55 | ) 56 | }) 57 | 58 | test_that("bad rounding argument caught", { 59 | expect_error( 60 | y %>% 61 | adorn_rounding(rounding = "blargh"), 62 | "'rounding' must be one of 'half to even' or 'half up'", 63 | fixed = TRUE 64 | ) 65 | }) 66 | 67 | test_that("works when called on a 3-way tabyl", { 68 | triple <- mtcars %>% 69 | tabyl(am, cyl, vs) %>% 70 | adorn_percentages("row") 71 | 72 | triple_rounded_manual <- triple 73 | triple_rounded_manual[[1]] <- adorn_rounding(triple[[1]]) 74 | triple_rounded_manual[[2]] <- adorn_rounding(triple[[2]]) 75 | 76 | expect_equal( 77 | triple %>% 78 | adorn_rounding(), 79 | triple_rounded_manual 80 | ) 81 | }) 82 | 83 | 84 | test_that("tidyselecting works", { 85 | target <- data.frame( 86 | color = c("green", "blue", "red"), 87 | first_wave = c(1:3), 88 | second_wave = c(4:6), 89 | third_wave = c(3, 3, 3), 90 | size = c("small", "medium", "large"), 91 | stringsAsFactors = FALSE 92 | ) %>% 93 | adorn_percentages() 94 | 95 | two_cols <- target %>% 96 | adorn_rounding(, "half up", first_wave:second_wave) 97 | expect_equal(two_cols$first_wave, c(.1, .2, .3)) 98 | expect_equal(two_cols$third_wave, c(3 / 8, 3 / 10, 3 / 12)) 99 | 100 | expect_message( 101 | target %>% 102 | adorn_rounding(, , third_wave:size), 103 | "At least one non-numeric column was specified and will not be modified." 104 | ) 105 | expect_message( 106 | text_skipped <- target %>% 107 | adorn_rounding(, , c(first_wave, size)), 108 | "At least one non-numeric column was specified and will not be modified." 109 | ) 110 | expect_equal(text_skipped$first_wave, c(.1, .2, .2)) 111 | expect_equal( 112 | text_skipped %>% dplyr::select(-first_wave), 113 | target %>% dplyr::select(-first_wave), 114 | ignore_attr = TRUE 115 | ) 116 | }) 117 | 118 | test_that("non-data.frame inputs are handled", { 119 | expect_error(adorn_rounding(1:5), "adorn_rounding() must be called on a data.frame or list of data.frames", fixed = TRUE) 120 | }) 121 | -------------------------------------------------------------------------------- /tests/testthat/test-adorn-title.R: -------------------------------------------------------------------------------- 1 | source1 <- mtcars %>% 2 | tabyl(gear, cyl) 3 | 4 | test_that("placement is correct", { 5 | # Top 6 | expect_equal( 7 | source1 %>% 8 | adorn_title() %>% 9 | names(), 10 | c("", "cyl", rep("", 2)) 11 | ) 12 | expect_equal( 13 | source1 %>% 14 | adorn_title() %>% 15 | .[1, ] %>% 16 | unlist() %>% 17 | unname(), 18 | c("gear", "4", "6", "8") 19 | ) 20 | # Combined 21 | expect_equal( 22 | source1 %>% 23 | adorn_title("combined") %>% 24 | names(), 25 | c("gear/cyl", "4", "6", "8") 26 | ) 27 | }) 28 | 29 | test_that("name overrides work", { 30 | expect_equal( 31 | source1 %>% 32 | adorn_title(row_name = "R", col_name = "C") %>% 33 | names(), 34 | c("", "C", rep("", 2)) 35 | ) 36 | }) 37 | 38 | test_that("non-tabyls are treated correctly", { 39 | non_tab <- mtcars %>% 40 | dplyr::count(gear, cyl) %>% 41 | tidyr::pivot_wider(names_from = gear, values_from = n) 42 | expect_error(adorn_title(non_tab), "When input is not a data.frame of class tabyl, a value must be specified for the col_name argument") 43 | 44 | expect_equal( 45 | non_tab %>% adorn_title(col_name = "col") %>% names(), 46 | c("", "col", rep("", 2)) 47 | ) 48 | 49 | expect_equal( 50 | non_tab %>% adorn_title(placement = "combined", col_name = "col") %>% names(), 51 | c("cyl/col", 3, 4, 5) 52 | ) 53 | 54 | expect_equal( 55 | non_tab %>% adorn_title(placement = "combined", row_name = "row!", col_name = "col") %>% names(), 56 | c("row!/col", 3, 4, 5) 57 | ) 58 | }) 59 | test_that("bad inputs are caught", { 60 | expect_error(adorn_title(1:2), 61 | "\"dat\" must be a data.frame", 62 | fixed = TRUE 63 | ) 64 | expect_error( 65 | adorn_title(source1, 66 | placement = "blargh" 67 | ), 68 | "`placement` must be one of \"top\" or \"combined\"", 69 | fixed = TRUE 70 | ) 71 | expect_error( 72 | adorn_title(source1, 73 | row_name = 1:4 74 | ), 75 | "row_name must be a string" 76 | ) 77 | expect_error( 78 | adorn_title(source1, 79 | col_name = mtcars 80 | ), 81 | "col_name must be a string" 82 | ) 83 | 84 | # Doesn't make sense with a one-way tabyl 85 | expect_warning( 86 | mtcars %>% tabyl(cyl) %>% adorn_title(), 87 | "adorn_title is meant for two-way tabyls, calling it on a one-way tabyl may not yield a meaningful result" 88 | ) 89 | }) 90 | 91 | test_that("works with non-count inputs", { 92 | source2_base <- data.frame(sector = c("North", "South"), units = 1:2, group = c("a", "b")) 93 | source2_tibble <- dplyr::as_tibble(source2_base) 94 | expect_equal( 95 | adorn_title(source2_base, col_name = "Characteristics") %>% names(), 96 | c("", "Characteristics", "") 97 | ) 98 | expect_equal( 99 | adorn_title(source2_base, col_name = "Characteristics"), 100 | adorn_title(source2_tibble, col_name = "Characteristics") 101 | ) 102 | }) 103 | 104 | test_that("for printing purposes: tabyl class stays tabyl, data.frame stays data.frame, tibble is downgraded to data.frame", { 105 | # right output classes with tabyl inputs 106 | expect_equal(class(mtcars %>% tabyl(cyl, am) %>% adorn_title()), c("tabyl", "data.frame")) 107 | expect_equal(class(mtcars %>% tabyl(gear, carb) %>% adorn_title(., "combined")), c("tabyl", "data.frame")) 108 | 109 | # Create tibble input: 110 | mpg_by_cyl_and_am <- 111 | mtcars %>% 112 | dplyr::group_by(cyl, am) %>% 113 | dplyr::summarise(mean_mpg = mean(mpg)) %>% 114 | tidyr::pivot_wider(names_from = am, values_from = mean_mpg) 115 | 116 | # handles tibble input 117 | expect_s3_class( 118 | mpg_by_cyl_and_am %>% adorn_title("top", "Cylinders", "Automatic?"), 119 | "data.frame" 120 | ) 121 | 122 | # Convert columns 2:n to strings 123 | expect_s3_class( 124 | mpg_by_cyl_and_am %>% adorn_pct_formatting() %>% # nonsense command here, just want to convert cols 2:n into character 125 | adorn_title("top", "Cylinders", "Automatic?"), 126 | "data.frame" 127 | ) 128 | 129 | # handles data.frame non-tabyl input 130 | expect_s3_class( 131 | mtcars %>% adorn_title("top", col_name = "hey look ma I'm a title"), 132 | "data.frame" 133 | ) 134 | }) 135 | 136 | test_that("works with factors in input", { 137 | facts <- data.frame(a = "high", large = "1", stringsAsFactors = TRUE) 138 | # first with "top" then "combined" 139 | expect_equal( 140 | facts %>% adorn_title(col_name = "col"), 141 | data.frame(a = c("a", "high"), col = c("large", "1"), stringsAsFactors = FALSE) %>% 142 | setNames(., c("", "col")) 143 | ) 144 | # with combined the original column types are preserved 145 | expect_equal( 146 | facts %>% adorn_title("combined", col_name = "col"), 147 | data.frame(`a/col` = "high", large = "1", stringsAsFactors = TRUE, check.names = FALSE) 148 | ) 149 | }) 150 | 151 | test_that("automatically invokes purrr::map when called on a 3-way tabyl", { 152 | three <- tabyl(mtcars, cyl, am, gear) %>% 153 | adorn_percentages() %>% 154 | adorn_pct_formatting() 155 | expect_equal( 156 | adorn_title(three), # vanilla call 157 | purrr::map(three, adorn_title) 158 | ) 159 | 160 | # with arguments passing through, incl. custom row and col names 161 | expect_equal( 162 | adorn_title(three, "combined", "cyl", "am"), 163 | purrr::map(three, adorn_title, "combined", "cyl", "am") 164 | ) 165 | }) 166 | -------------------------------------------------------------------------------- /tests/testthat/test-convert_to_date.R: -------------------------------------------------------------------------------- 1 | test_that("convert_date works", { 2 | expect_equal( 3 | convert_to_date("2009-07-06"), 4 | as.Date("2009-07-06") 5 | ) 6 | expect_equal( 7 | convert_to_date(40000), 8 | as.Date("2009-07-06") 9 | ) 10 | expect_equal( 11 | convert_to_date(40000.1), 12 | as.Date("2009-07-06") 13 | ) 14 | expect_equal( 15 | convert_to_date("40000"), 16 | as.Date("2009-07-06") 17 | ) 18 | expect_equal( 19 | convert_to_date("40000.1"), 20 | as.Date("2009-07-06") 21 | ) 22 | expect_equal( 23 | convert_to_date(factor("40000.1")), 24 | as.Date("2009-07-06") 25 | ) 26 | expect_equal( 27 | convert_to_date(as.Date("2009-07-06")), 28 | as.Date("2009-07-06") 29 | ) 30 | expect_equal( 31 | convert_to_date(as.POSIXct("2009-07-06", tz = "UTC")), 32 | as.Date("2009-07-06") 33 | ) 34 | expect_equal( 35 | convert_to_date(as.POSIXlt("2009-07-06")), 36 | as.Date("2009-07-06") 37 | ) 38 | expect_equal( 39 | convert_to_date(c("2009-07-06", "40000.1", "40000", NA)), 40 | c(rep(as.Date("2009-07-06"), 3), NA), 41 | info = "Mixed input works, including NA." 42 | ) 43 | }) 44 | 45 | test_that("convert_datetime works", { 46 | expect_equal( 47 | convert_to_datetime("2009-07-06 12:13:14"), 48 | as.POSIXct("2009-07-06 12:13:14", tz = "UTC") 49 | ) 50 | expect_equal( 51 | convert_to_datetime("2009-07-06 12:13:14", tz = "Etc/GMT-5"), 52 | as.POSIXct("2009-07-06 12:13:14", tz = "Etc/GMT-5"), 53 | info = "The tz argument is respected" 54 | ) 55 | expect_equal( 56 | convert_to_datetime(40000), 57 | as.POSIXct("2009-07-06", tz = "UTC") 58 | ) 59 | expect_equal( 60 | convert_to_datetime(40000.1), 61 | as.POSIXct("2009-07-06 02:24", tz = "UTC") 62 | ) 63 | expect_equal( 64 | convert_to_datetime(40000.1, tz = "Etc/GMT-5"), 65 | as.POSIXct("2009-07-06 02:24", tz = "Etc/GMT-5") 66 | ) 67 | expect_equal( 68 | convert_to_datetime("40000"), 69 | as.POSIXct("2009-07-06", tz = "UTC") 70 | ) 71 | expect_equal( 72 | convert_to_datetime("40000.1"), 73 | as.POSIXct("2009-07-06 02:24", tz = "UTC") 74 | ) 75 | expect_equal( 76 | convert_to_datetime("40000.1", tz = "Etc/GMT-5"), 77 | as.POSIXct("2009-07-06 02:24", tz = "Etc/GMT-5") 78 | ) 79 | expect_equal( 80 | convert_to_datetime(factor("40000.1")), 81 | as.POSIXct("2009-07-06 02:24", tz = "UTC") 82 | ) 83 | expect_equal( 84 | convert_to_datetime(as.Date("2009-07-06")), 85 | as.POSIXct("2009-07-06", tz = "UTC") 86 | ) 87 | expect_equal( 88 | convert_to_datetime(as.POSIXct("2009-07-06", tz = "UTC")), 89 | as.POSIXct("2009-07-06", tz = "UTC") 90 | ) 91 | expect_equal( 92 | convert_to_datetime(as.POSIXlt("2009-07-06", tz = "UTC")), 93 | as.POSIXct("2009-07-06", tz = "UTC") 94 | ) 95 | expect_equal( 96 | convert_to_datetime(c("2009-07-06", "40000.1", "40000", NA), character_fun = lubridate::ymd_h, truncated = 1, tz = "UTC"), 97 | as.POSIXct(c("2009-07-06 00:00", "2009-07-06 02:24", "2009-07-06 00:00", NA), tz = "UTC"), 98 | info = "Mixed input works, including NA." 99 | ) 100 | }) 101 | 102 | test_that("convert_date warnings and errors work", { 103 | expect_warning( 104 | expect_error( 105 | convert_to_date("A"), 106 | regexp = "Not all character strings converted to class Date." 107 | ), 108 | regexp = "All formats failed to parse." # lubridate warning 109 | ) 110 | expect_warning( 111 | expect_error( 112 | convert_to_date(LETTERS), 113 | regexp = "Not all character strings converted to class Date.*17 other values", 114 | info = "Confirm the 'other values' when there are many values not converted." 115 | ), 116 | regexp = "All formats failed to parse." # lubridate warning 117 | ) 118 | expect_warning( 119 | expect_error( 120 | convert_to_date(LETTERS), 121 | regexp = "Not all character strings converted to class Date." 122 | ), 123 | regexp = "All formats failed to parse." # lubridate warning 124 | ) 125 | expect_warning( 126 | expect_warning( 127 | expect_equal( 128 | convert_to_date("A", string_conversion_failure = "warning"), 129 | as.Date(NA) 130 | ), 131 | regexp = "All formats failed to parse. No formats found." 132 | ), 133 | regexp = "Not all character strings converted to class Date." 134 | ) 135 | expect_error( 136 | convert_to_date("A", character_fun = function(x) 1), 137 | regexp = "must return class Date" 138 | ) 139 | expect_warning( 140 | convert_to_date("40000", include_time = TRUE), 141 | regexp = "`include_time` is ignored in favor of `out_class`" 142 | ) 143 | }) 144 | -------------------------------------------------------------------------------- /tests/testthat/test-get-dupes.R: -------------------------------------------------------------------------------- 1 | test_df <- data.frame(a = c(1, 3, 3, 3, 5), b = c("a", "c", "c", "e", "c"), stringsAsFactors = FALSE) 2 | 3 | test_that("Correct combinations of duplicates are found", { 4 | expect_equal(get_dupes(test_df, a), data.frame(a = test_df[[1]][2:4], dupe_count = rep(3L, 3), b = test_df[[2]][2:4], stringsAsFactors = FALSE)) 5 | expect_equal(get_dupes(test_df, b), data.frame(b = test_df[[2]][c(2:3, 5)], dupe_count = rep(3L, 3), a = test_df[[1]][c(2:3, 5)], stringsAsFactors = FALSE)) 6 | }) 7 | 8 | test_that("calling with no specified variable names uses all variable names", { 9 | expect_message( 10 | expect_equal(get_dupes(test_df), get_dupes(test_df, a, b)), 11 | "No variable names specified - using all columns." 12 | ) 13 | expect_message( 14 | expect_message( 15 | get_dupes(mtcars), 16 | "No variable names specified - using all columns." 17 | ), 18 | "No duplicate combinations found of: mpg, cyl.*and 2 other variables" 19 | ) 20 | }) 21 | 22 | no_dupes <- data.frame(a = 1, stringsAsFactors = FALSE) 23 | 24 | test_that("instances of no dupes throw correct messages, return empty df", { 25 | expect_message(no_dupes %>% get_dupes(a), "No duplicate combinations found of: a") 26 | expect_message( 27 | no_dup_a <- no_dupes %>% get_dupes(a), 28 | "No duplicate combinations found of: a" 29 | ) 30 | expect_equal( 31 | no_dup_a, 32 | data.frame(a = double(0), dupe_count = integer(0)) 33 | ) 34 | expect_message( 35 | expect_message( 36 | mtcars %>% dplyr::select(-1) %>% get_dupes(), 37 | "No variable names specified - using all columns." 38 | ), 39 | "No duplicate combinations found of: cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb" 40 | ) 41 | expect_message( 42 | expect_message( 43 | mtcars %>% get_dupes(), 44 | "No variable names specified - using all columns." 45 | ), 46 | "No duplicate combinations found of: mpg, cyl, disp, hp, drat, wt, qsec, vs, am, ... and 2 other variables" 47 | ) 48 | }) 49 | 50 | test_that("incorrect variable names are handled", { 51 | expect_error(get_dupes(mtcars, x)) 52 | }) 53 | 54 | test_that("works on variables with irregular names", { 55 | badname_df <- mtcars %>% dplyr::mutate(`bad name!` = mpg * 1000) 56 | expect_equal( 57 | badname_df %>% get_dupes(`bad name!`, cyl) %>% dim(), 58 | c(10, 13) 59 | ) # does it return the right-sized result? 60 | expect_message( 61 | expect_message( 62 | badname_df_dup <- badname_df %>% get_dupes(), 63 | "No variable names specified - using all columns" 64 | ), 65 | "No duplicate combinations found of: mpg, cyl, disp, hp, drat, wt, qsec, vs, am, ... and 3 other variables" 66 | ) 67 | expect_s3_class(badname_df_dup, "data.frame") # test for success, i.e., produces a data.frame (with 0 rows) 68 | }) 69 | 70 | test_that("tidyselect specification matches exact specification", { 71 | expect_equal(mtcars %>% get_dupes(contains("cy"), mpg), mtcars %>% get_dupes(cyl, mpg)) 72 | expect_equal(mtcars %>% get_dupes(mpg), mtcars %>% get_dupes(-c(cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb))) 73 | expect_equal( 74 | suppressMessages(mtcars %>% dplyr::select(cyl, wt) %>% get_dupes()), 75 | mtcars %>% dplyr::select(cyl, wt) %>% get_dupes(dplyr::everything()) 76 | ) 77 | }) 78 | 79 | test_that("grouped and ungrouped data is handled correctly", { 80 | expect_equal( 81 | mtcars %>% dplyr::group_by(carb, cyl) %>% get_dupes(mpg, carb) %>% dplyr::group_vars(), 82 | mtcars %>% dplyr::group_by(carb, cyl) %>% dplyr::group_vars() 83 | ) 84 | expect_equal( 85 | mtcars %>% dplyr::group_by(carb, cyl) %>% get_dupes(mpg, carb) %>% dplyr::ungroup(), 86 | mtcars %>% tibble::as_tibble() %>% get_dupes(mpg, carb) 87 | ) 88 | }) 89 | 90 | test_that("tibbles stay tibbles, non-tibble stay non-tibbles", { 91 | expect_equal( 92 | class(test_df %>% get_dupes(a)), 93 | class(test_df) 94 | ) 95 | expect_equal( 96 | class(tibble::as_tibble(test_df) %>% get_dupes(a)), 97 | class(tibble::as_tibble(test_df)) 98 | ) 99 | }) 100 | 101 | test_that("result is sorted by dupe_count descending, #493", { 102 | test_sort <- data.frame( 103 | a = c("x", "x", "y", "y", "y", "a", "a"), 104 | b = 1:7 105 | ) 106 | res <- get_dupes(test_sort, a) 107 | expect_equal(unique(res$a), c("y", "a", "x")) # y has 3, then alphabetically a precedes x 108 | }) 109 | -------------------------------------------------------------------------------- /tests/testthat/test-get-level-groups.R: -------------------------------------------------------------------------------- 1 | # Tests the get_level_groups helper function called by top_levels() 2 | 3 | shorts <- factor(c("a", "b", "c", "d", "e", "f"), levels = rev(letters[1:6])) 4 | longs <- factor(c("aaaaaaaaaaaaaaaa", "bbbbbbbbbbbbbbbbb", "cccccccccccccccccccc", "dddddddddddddddd", NA, "hhhhhhhhhhhhhhhh", "bbbbbbbbbbbbbbbbb"), levels = c("dddddddddddddddd", "aaaaaaaaaaaaaaaa", "cccccccccccccccccccc", "bbbbbbbbbbbbbbbbb", "hhhhhhhhhhhhhhhh")) 5 | 6 | short1 <- get_level_groups(shorts, 1, max(as.numeric(shorts), na.rm = TRUE)) 7 | short2 <- get_level_groups(shorts, 2, max(as.numeric(shorts), na.rm = TRUE)) 8 | short3 <- get_level_groups(shorts, 3, max(as.numeric(shorts), na.rm = TRUE)) 9 | 10 | test_that("names are grouped properly and groups are ordered correctly", { 11 | expect_equal(short1, list(top = "f", mid = "e, d, c, b", bot = "a")) 12 | expect_equal(short2, list(top = "f, e", mid = c("d, c"), bot = "b, a")) 13 | expect_equal(short3, list(top = "f, e, d", mid = NA, bot = "c, b, a")) 14 | }) 15 | 16 | long1 <- get_level_groups(longs, 1, max(as.numeric(longs), na.rm = TRUE)) 17 | long2 <- get_level_groups(longs, 2, max(as.numeric(longs), na.rm = TRUE)) 18 | 19 | test_that("truncation works correctly", { 20 | expect_equal(long1, list(top = "dddddddddddddddd", mid = "<<< Middle Group (3 categories) >>>", bot = "hhhhhhhhhhhhhhhh")) 21 | expect_equal(long2, list(top = "dddddddddddddddd, aaaaaaaaa...", mid = "cccccccccccccccccccc", bot = "bbbbbbbbbbbbbbbbb, hhhhhhhh...")) 22 | expect_equal(nchar(long2$top), 30) 23 | expect_equal(nchar(long2$bot), 30) 24 | }) 25 | -------------------------------------------------------------------------------- /tests/testthat/test-get_one_to_one.R: -------------------------------------------------------------------------------- 1 | test_that("get_one_to_one", { 2 | foo <- data.frame( 3 | Lab_Test_Long = c("Cholesterol, LDL", "Cholesterol, LDL", "Glucose"), 4 | Lab_Test_Short = c("CLDL", "CLDL", "GLUC"), 5 | LOINC = c(12345, 12345, 54321), 6 | Person = c("Sam", "Bill", "Sam"), 7 | stringsAsFactors = FALSE 8 | ) 9 | expect_equal( 10 | get_one_to_one(foo), 11 | list( 12 | c("Lab_Test_Long", "Lab_Test_Short", "LOINC") 13 | ) 14 | ) 15 | # NA is respected as though it were any other value 16 | foo <- data.frame( 17 | Lab_Test_Long = c(NA, NA, "Glucose"), 18 | Lab_Test_Short = c("CLDL", "CLDL", "GLUC"), 19 | LOINC = c(12345, 12345, 54321), 20 | Person = c("Sam", "Bill", "Sam"), 21 | stringsAsFactors = FALSE 22 | ) 23 | expect_equal( 24 | get_one_to_one(foo), 25 | list( 26 | c("Lab_Test_Long", "Lab_Test_Short", "LOINC") 27 | ) 28 | ) 29 | }) 30 | 31 | test_that("get_one_to_one: columns are only described once", { 32 | expect_true( 33 | !any(duplicated(unlist( 34 | get_one_to_one(mtcars[1:3, ]) 35 | ))) 36 | ) 37 | expect_equal( 38 | get_one_to_one(mtcars[1:3, ]), 39 | list( 40 | c("mpg", "cyl", "disp", "hp", "drat", "vs", "carb"), 41 | c("wt", "qsec"), 42 | c("am", "gear") 43 | ) 44 | ) 45 | # Ensure that single column outputs are dropped 46 | expect_equal( 47 | get_one_to_one(mtcars[1:5, ]), 48 | list( 49 | c("mpg", "disp", "drat"), 50 | c("cyl", "hp"), 51 | c("am", "gear") 52 | ) 53 | ) 54 | expect_message( 55 | expect_equal( 56 | get_one_to_one(mtcars), 57 | list() 58 | ), 59 | regexp = "No columns in `mtcars` map to each other" 60 | ) 61 | }) 62 | 63 | test_that("nearly duplicated dates (second decimal place differs) to not cause failure (#543)", { 64 | dates <- tibble::tibble( 65 | modification_time = 66 | structure( 67 | c(1684261364.85967, 1684274880.48328, 1684261364.85967, 1684418379.74664, 1685105253.21695, 1684418379.76668, 1684279133.50118, 1684161951.81434, 1684281651.93175, 1678483898.72893, 1685103626.03424), 68 | class = c("POSIXct", "POSIXt") 69 | ), 70 | access_time = 71 | structure( 72 | c(1685040222.34459, 1685041485.59089, 1685105067.68569, 1685040222.51569, 1685105253.21795, 1685105067.73877, 1685105253.66953, 1685106417.48391, 1685105253.66853, 1685041485.59089, 1685103652.82275), 73 | class = c("POSIXct", "POSIXt") 74 | ), 75 | change_time = structure( 76 | c(1684261364.85967, 1684274880.48328, 1684261364.85967, 1684418379.74664, 1685105253.21695, 1684418379.76668, 1684279133.50118, 1684161951.81434, 1684281651.93175, 1678483898.72893, 1685103626.03424), 77 | class = c("POSIXct", "POSIXt") 78 | ) 79 | ) 80 | expect_equal( 81 | janitor::get_one_to_one(dates), 82 | list(c("modification_time", "change_time")) 83 | ) 84 | }) 85 | -------------------------------------------------------------------------------- /tests/testthat/test-paste_skip_na.R: -------------------------------------------------------------------------------- 1 | test_that("paste_skip_na", { 2 | # handle no arguments the same as paste() 3 | expect_equal(paste_skip_na(), paste()) 4 | expect_equal(paste_skip_na(NA), NA_character_) 5 | expect_equal(paste_skip_na(NA, NA), NA_character_) 6 | expect_equal(paste_skip_na(NA, NA, sep = ","), NA_character_) 7 | # It does not behave like paste(NA, NA, collapse = ",") nor does it behave like paste(c(), collapse = ",") 8 | expect_equal(paste_skip_na(NA, NA, collapse = ","), NA_character_) 9 | 10 | expect_equal(paste_skip_na("A", NA), "A") 11 | expect_equal(paste_skip_na("A", NA, collapse = ","), "A") 12 | expect_equal(paste_skip_na("A", NA, c(NA, "B"), collapse = ","), "A,A B") 13 | expect_equal(paste_skip_na("A", NA, c(NA, "B"), sep = ","), c("A", "A,B")) 14 | 15 | expect_equal(paste_skip_na(c("A", "B"), NA), c("A", "B")) 16 | expect_equal(paste_skip_na(NA, c("A", "B")), c("A", "B")) 17 | }) 18 | 19 | test_that("paste_skip_na expected errors", { 20 | expect_error( 21 | paste_skip_na(c("A", "B"), c("A", "B", "C")), 22 | regexp = "Arguments must be the same length or one argument must be a scalar.", 23 | fixed = TRUE 24 | ) 25 | }) 26 | -------------------------------------------------------------------------------- /tests/testthat/test-round_to_fraction.R: -------------------------------------------------------------------------------- 1 | test_that("round_to_fraction input requirements (confirm errors)", { 2 | expect_error( 3 | round_to_fraction(x = "A", denominator = 1), 4 | info = "x must be numberic" 5 | ) 6 | expect_error( 7 | round_to_fraction(x = 1, denominator = "A"), 8 | info = "denominator must be numberic" 9 | ) 10 | expect_error( 11 | round_to_fraction(x = 1, denominator = c(1, 2)), 12 | info = "length(denominator) must equal length(x) or be 1" 13 | ) 14 | expect_error( 15 | round_to_fraction(x = 1, denominator = -1), 16 | info = "denominator must be positive" 17 | ) 18 | expect_error( 19 | round_to_fraction(x = 1, denominator = 0), 20 | info = "denominator must be positive, not zero" 21 | ) 22 | expect_error( 23 | round_to_fraction(x = 1, denominator = 2, digits = "A"), 24 | info = "digits must be numeric or 'auto'" 25 | ) 26 | expect_error( 27 | round_to_fraction(x = 1, denominator = 2, digits = c(1, 2)), 28 | info = "digits must be numeric or 'auto'" 29 | ) 30 | expect_error( 31 | round_to_fraction(x = c(1, 2), denominator = 2, digits = c(1, 2, 3)), 32 | info = "digits must be numeric the same length as x (or scalar)" 33 | ) 34 | }) 35 | 36 | test_that("round_to_fraction results are as expected", { 37 | # scalars 38 | expect_equal( 39 | round_to_fraction(x = 1.1, denominator = 7), 40 | 8 / 7 41 | ) 42 | expect_equal( 43 | round_to_fraction(x = 1.1, denominator = 7, digits = Inf), 44 | round_to_fraction(x = 1.1, denominator = 7) 45 | ) 46 | expect_equal( 47 | round_to_fraction(x = 1.1, denominator = 7, digits = 5), 48 | round(8 / 7, digits = 5) 49 | ) 50 | expect_equal( 51 | round_to_fraction(x = 1.1, denominator = 7, digits = "auto"), 52 | round(8 / 7, digits = 2) 53 | ) 54 | 55 | # vectors 56 | expect_equal( 57 | round_to_fraction(x = c(1.1, 2.05), denominator = 7, digits = Inf), 58 | c(8 / 7, 2) 59 | ) 60 | expect_equal( 61 | round_to_fraction(x = c(1.1, 2.05), denominator = c(7, 25), digits = Inf), 62 | c(8 / 7, 51 / 25) 63 | ) 64 | expect_equal( 65 | round_to_fraction(x = c(1.1, 2.05), denominator = c(7, 27), digits = Inf), 66 | c(8 / 7, 55 / 27) 67 | ) 68 | expect_equal( 69 | round_to_fraction(x = c(1.1, 2.05), denominator = c(7, 27), digits = 3), 70 | round(c(8 / 7, 55 / 27), digits = 3) 71 | ) 72 | expect_equal( 73 | round_to_fraction(x = c(1.1, 2.05), denominator = c(7, 27), digits = c(3, 4)), 74 | round(c(8 / 7, 55 / 27), digits = c(3, 4)) 75 | ) 76 | expect_equal( 77 | round_to_fraction(x = c(1.1, 2.05), denominator = c(7, 27), digits = "auto"), 78 | round(c(8 / 7, 55 / 27), digits = c(2, 3)) 79 | ) 80 | }) 81 | -------------------------------------------------------------------------------- /tests/testthat/test-sas_dates.R: -------------------------------------------------------------------------------- 1 | test_that("sas_numeric_to_date", { 2 | expect_equal( 3 | sas_numeric_to_date(date_num = 15639), 4 | as.Date("2002-10-26") 5 | ) 6 | expect_equal( 7 | sas_numeric_to_date(datetime_num = 1217083532, tz = "UTC"), 8 | as.POSIXct("1998-07-26 14:45:32", tz = "UTC") 9 | ) 10 | expect_equal( 11 | sas_numeric_to_date(date_num = 15639, time_num = 3600, tz = "UTC"), 12 | as.POSIXct("2002-10-26 01:00:00", tz = "UTC") 13 | ) 14 | expect_equal( 15 | sas_numeric_to_date(time_num = 3600), 16 | hms::hms(3600) 17 | ) 18 | # NA management 19 | expect_equal( 20 | sas_numeric_to_date(date_num = c(NA, 1), time_num = c(NA, 1), tz = "UTC"), 21 | as.POSIXct(c(NA, "1960-01-02 00:00:01"), tz = "UTC") 22 | ) 23 | expect_equal( 24 | sas_numeric_to_date(date_num = NA, time_num = NA, tz = "UTC"), 25 | as.POSIXct(NA, tz = "UTC") 26 | ) 27 | # Timezone warning (#583) 28 | expect_warning( 29 | sas_numeric_to_date(date_num = 1, time_num = 1, tz = "America/New_York"), 30 | regexp = "SAS may not properly store timezones other than UTC. Consider confirming the accuracy of the resulting data.", 31 | fixed = TRUE 32 | ) 33 | }) 34 | 35 | test_that("sas_numeric_to_date expected errors", { 36 | expect_error( 37 | sas_numeric_to_date(date_num = 15639, datetime_num = 1), 38 | regexp = "Must not give both `date_num` and `datetime_num`" 39 | ) 40 | expect_error( 41 | sas_numeric_to_date(datetime_num = 1, time_num = 1), 42 | regexp = "Must not give both `time_num` and `datetime_num`" 43 | ) 44 | expect_error( 45 | sas_numeric_to_date(time_num = -1), 46 | regexp = "`time_num` must be non-negative" 47 | ) 48 | expect_error( 49 | sas_numeric_to_date(time_num = 86401), 50 | regexp = "`time_num` must be within the number of seconds in a day (<= 86400)", 51 | fixed = TRUE 52 | ) 53 | expect_error( 54 | sas_numeric_to_date(), 55 | regexp = "Must give one of `date_num`, `datetime_num`, `time_num`, or `date_num` and `time_num`" 56 | ) 57 | expect_error( 58 | sas_numeric_to_date(date_num = c(NA, 1), time_num = c(1, NA)), 59 | regexp = "The same values are not NA for both `date_num` and `time_num`" 60 | ) 61 | }) 62 | -------------------------------------------------------------------------------- /tests/testthat/test-signif_half_up.R: -------------------------------------------------------------------------------- 1 | test_that("signif_half_up results are as expected", { 2 | # scalars 3 | expect_equal( 4 | signif_half_up(x = 12.5, digits = 2), 5 | 13 6 | ) 7 | expect_equal( 8 | signif_half_up(x = 0), 9 | 0 10 | ) 11 | expect_equal( 12 | signif_half_up(x = -2.5, digits = 1), 13 | -3 14 | ) 15 | expect_equal( 16 | signif_half_up(x = 123.45, digits = 4), 17 | 123.5 18 | ) 19 | expect_equal( 20 | signif_half_up(x = -123.45, digits = 4), 21 | -123.5 22 | ) 23 | # vectors 24 | expect_equal( 25 | signif_half_up(x = c(12.5, 0, -2.5, 123.45, -123.45), digits = 2), 26 | c(13, 0, -2.5, 120, -120) 27 | ) 28 | expect_equal( 29 | signif_half_up(x = c(1, 1.5, 1.49, NA, NaN, -Inf, Inf), digits = 2), 30 | c(1, 1.5, 1.5, NA, NaN, -Inf, Inf) 31 | ) 32 | }) 33 | -------------------------------------------------------------------------------- /tests/testthat/test-single_value.R: -------------------------------------------------------------------------------- 1 | test_that("single_value", { 2 | expect_equal(single_value(1), 1) 3 | expect_equal(single_value(c(1, NA)), 1) 4 | expect_equal(single_value(c(1, NA, 1)), 1) 5 | expect_equal(single_value(NA_real_), NA) 6 | expect_equal(single_value(NA, missing = NA_real_), NA_real_) 7 | 8 | # Order of `missing` affects the output 9 | expect_equal(single_value("", missing = c(NA, "")), NA_character_) 10 | expect_equal(single_value(NA, missing = c("", NA)), "") 11 | 12 | # Check warn_if_all_missing 13 | expect_silent(single_value(NA)) 14 | expect_warning( 15 | single_value(NA, warn_if_all_missing = TRUE), 16 | regexp = "All values are missing" 17 | ) 18 | 19 | expect_error( 20 | single_value(1:2), 21 | regexp = "More than one (2) value found (1, 2)", 22 | fixed = TRUE 23 | ) 24 | expect_error( 25 | single_value(1:2, info = "multiple"), 26 | regexp = "More than one (2) value found (1, 2): multiple", 27 | fixed = TRUE 28 | ) 29 | }) 30 | -------------------------------------------------------------------------------- /tests/testthat/test-statistical-tests.R: -------------------------------------------------------------------------------- 1 | # Tests for two-way statistical tests 2 | 3 | # Duplicate mtcars rows to avoid chis.test warnings 4 | mtcars3 <- rbind(mtcars, mtcars, mtcars) 5 | tab <- table(mtcars3$am, mtcars3$cyl) 6 | ttab <- tabyl(mtcars3, am, cyl) 7 | ow_tab <- tabyl(mtcars3, am) 8 | 9 | test_that("one-way tabyl is rejected by chisq.test and fisher.test", { 10 | expect_error(chisq.test(ow_tab)) 11 | expect_error(fisher.test(ow_tab)) 12 | }) 13 | 14 | test_that("janitor::chisq.test on a table is correct", { 15 | res <- stats::chisq.test(tab) 16 | jres <- janitor::chisq.test(tab) 17 | expect_equal(jres, res) 18 | }) 19 | 20 | test_that("janitor::chisq.test on a matrix is correct", { 21 | mat <- matrix(c(151, 434, 345, 221, 145, 167), ncol = 3) 22 | res <- stats::chisq.test(mat) 23 | jres <- janitor::chisq.test(mat) 24 | expect_equal(jres, res) 25 | }) 26 | 27 | test_that("janitor::chisq.test on two factors is correct", { 28 | res <- stats::chisq.test(mtcars3$am, mtcars3$cyl) 29 | jres <- janitor::chisq.test(mtcars3$am, mtcars3$cyl) 30 | expect_equal(jres, res) 31 | }) 32 | 33 | test_that("janitor::chisq.test with a numeric vector and p is correct", { 34 | v1 <- round(runif(10, 200, 1000)) 35 | v2 <- round(runif(10, 200, 1000)) 36 | res <- stats::chisq.test(v1, p = v2 / sum(v2)) 37 | jres <- janitor::chisq.test(v1, p = v2 / sum(v2)) 38 | expect_equal(jres, res) 39 | }) 40 | 41 | test_that("janitor::fisher.test on a table is correct", { 42 | res <- stats::fisher.test(tab) 43 | jres <- janitor::fisher.test(tab) 44 | expect_equal(jres, res) 45 | }) 46 | 47 | test_that("janitor::fisher.test on a matrix is correct", { 48 | mat <- matrix(c(151, 434, 345, 221, 145, 167), ncol = 3) 49 | res <- stats::fisher.test(mat) 50 | jres <- janitor::fisher.test(mat) 51 | expect_equal(jres, res) 52 | }) 53 | 54 | test_that("janitor::fisher.test on two vectors is correct", { 55 | res <- stats::fisher.test(mtcars3$am, mtcars3$cyl) 56 | jres <- janitor::fisher.test(mtcars3$am, mtcars3$cyl) 57 | expect_equal(jres, res) 58 | }) 59 | 60 | test_that("janitor::chisq.test on a two-way tabyl is identical to stats::chisq.test", { 61 | tab <- tabyl(mtcars3, am, cyl) 62 | tres <- chisq.test(tab, tabyl_results = FALSE) 63 | tab <- table(mtcars3$am, mtcars3$cyl) 64 | res <- chisq.test(tab) 65 | expect_equal(tres, res) 66 | }) 67 | 68 | test_that("janitor::fisher.test on a two-way tabyl is identical to stats::fisher.test", { 69 | tab <- tabyl(mtcars3, am, cyl) 70 | tres <- fisher.test(tab) 71 | tab <- table(mtcars3$am, mtcars3$cyl) 72 | res <- fisher.test(tab) 73 | expect_equal(tres, res) 74 | }) 75 | 76 | test_that("janitor::chisq.test returns tabyl tables", { 77 | tres <- chisq.test(ttab, tabyl_results = TRUE) 78 | expect_s3_class(tres$observed, "tabyl") 79 | expect_s3_class(tres$expected, "tabyl") 80 | expect_s3_class(tres$residuals, "tabyl") 81 | expect_s3_class(tres$stdres, "tabyl") 82 | }) 83 | 84 | test_that("returned tabyls have correct names and attributes", { 85 | tres <- chisq.test(ttab, tabyl_results = TRUE) 86 | expect_named(tres$observed, c("am", "4", "6", "8")) 87 | expect_named(tres$expected, c("am", "4", "6", "8")) 88 | expect_named(tres$residuals, c("am", "4", "6", "8")) 89 | expect_named(tres$stdres, c("am", "4", "6", "8")) 90 | expect_equal(tres$observed[[1]], c("0", "1")) 91 | expect_equal(tres$expected[[1]], c("0", "1")) 92 | expect_equal(tres$residuals[[1]], c("0", "1")) 93 | expect_equal(tres$stdres[[1]], c("0", "1")) 94 | expect_equal(attr(tres$observed, "var_names"), list(row = "am", col = "cyl")) 95 | expect_equal(attr(tres$expected, "var_names"), list(row = "am", col = "cyl")) 96 | expect_equal(attr(tres$residuals, "var_names"), list(row = "am", col = "cyl")) 97 | expect_equal(attr(tres$stdres, "var_names"), list(row = "am", col = "cyl")) 98 | }) 99 | 100 | test_that("totals are excluded from the statistical tests, #385", { 101 | # Chi-Square 102 | cx <- chisq.test(ttab) 103 | cx_totals <- suppressWarnings(chisq.test(adorn_totals(ttab, "both"))) 104 | cx_totals$data.name <- "ttab" # otherwise the test shows a mismatch, as the inputs had different names 105 | expect_equal( 106 | cx, 107 | cx_totals 108 | ) 109 | expect_warning( 110 | chisq.test(ttab %>% adorn_totals()), 111 | "detected a totals row" 112 | ) 113 | 114 | # Fisher 115 | fisher <- fisher.test(ttab) 116 | fisher_totals <- suppressWarnings(fisher.test(adorn_totals(ttab, "both"))) 117 | fisher_totals$data.name <- "ttab" # otherwise the test shows a mismatch, as the inputs had different names 118 | expect_equal( 119 | fisher, 120 | fisher_totals 121 | ) 122 | expect_warning( 123 | fisher.test(ttab %>% adorn_totals()), 124 | "detected a totals row" 125 | ) 126 | }) 127 | -------------------------------------------------------------------------------- /tests/testthat/test-tabyl-classifiers.R: -------------------------------------------------------------------------------- 1 | # Tests tabyl class functions 2 | 3 | a <- mtcars %>% 4 | tabyl(cyl, carb) 5 | 6 | b <- mtcars %>% 7 | dplyr::count(cyl, carb) %>% 8 | tidyr::pivot_wider( 9 | names_from = carb, 10 | values_from = n, 11 | values_fill = 0, 12 | names_sort = TRUE 13 | ) %>% 14 | as.data.frame() # for comparison purposes, remove the tbl_df aspect 15 | 16 | 17 | test_that("as_tabyl works on result of a non-janitor count/pivot_wider", { 18 | expect_equal( 19 | as_tabyl(a), 20 | as_tabyl(b, 2, "cyl", "carb") 21 | ) 22 | }) 23 | 24 | test_that("as_tabyl sets attributes correctly", { 25 | d <- as_tabyl(a) 26 | expect_equal(class(d), class(a)) 27 | expect_equal(attr(d, "core"), untabyl(a)) 28 | expect_equal(attr(d, "tabyl_type"), "two_way") 29 | }) 30 | 31 | test_that("untabyl puts back to original form", { 32 | expect_equal(mtcars, untabyl(as_tabyl(mtcars))) 33 | }) 34 | 35 | test_that("untabyl warns if called on non-tabyl", { 36 | expect_warning( 37 | untabyl(mtcars), 38 | "untabyl\\(\\) called on a non-tabyl" 39 | ) 40 | }) 41 | 42 | test_that("untabyl automatically invokes purrr::map when called on a 3-way tabyl", { 43 | three <- tabyl(mtcars, cyl, am, gear) 44 | expect_equal( 45 | untabyl(three), # vanilla call 46 | purrr::map(three, untabyl) 47 | ) 48 | }) 49 | 50 | test_that("as_tabyl is okay with non-numeric columns", { 51 | e <- b %>% 52 | dplyr::mutate(extra = "val") 53 | expect_equal(attr(as_tabyl(e), "core"), e) # implied success of as_tabyl 54 | }) 55 | 56 | test_that("as_tabyl fails if no numeric columns in 2:n", { 57 | bad <- data.frame( 58 | a = 1:2, 59 | b = c("x", "y") 60 | ) 61 | expect_error(as_tabyl(bad), "at least one one of columns 2:n must be of class numeric") 62 | }) 63 | 64 | test_that("bad inputs are caught", { 65 | expect_error(as_tabyl(mtcars, 3), 66 | "axes must be either 1 or 2", 67 | fixed = TRUE 68 | ) 69 | 70 | expect_error(as_tabyl(1:10), 71 | "input must be a data.frame", 72 | fixed = TRUE 73 | ) 74 | 75 | # don't pass names to a 1-way tabyl 76 | expect_error( 77 | as_tabyl(mtcars, axes = 1, row_var_name = "foo"), 78 | "variable names are only meaningful for two-way tabyls" 79 | ) 80 | }) 81 | 82 | test_that("adorn_totals and adorn_percentages reset the tabyl's core to reflect sorting, #407", { 83 | unsorted <- mtcars %>% tabyl(am, cyl) 84 | sorted <- dplyr::arrange(unsorted, desc(`4`)) 85 | expect_equal( 86 | sorted %>% 87 | adorn_totals() %>% 88 | attr(., "core"), 89 | sorted %>% 90 | untabyl() 91 | ) 92 | expect_equal( 93 | sorted %>% 94 | adorn_percentages() %>% 95 | attr(., "core"), 96 | sorted %>% 97 | untabyl() 98 | ) 99 | # both: 100 | expect_equal( 101 | sorted %>% 102 | adorn_totals() %>% 103 | adorn_percentages() %>% 104 | attr(., "core"), 105 | sorted %>% 106 | untabyl() 107 | ) 108 | # Ns with "Total" row sorted to top - the Total N should be up there too: 109 | expect_equal( 110 | sorted %>% 111 | adorn_totals() %>% 112 | adorn_percentages("col") %>% 113 | dplyr::arrange(desc(`4`)) %>% 114 | adorn_ns() %>% 115 | dplyr::pull(`4`) %>% 116 | dplyr::first(), 117 | "1.0000000 (11)" 118 | ) 119 | }) 120 | -------------------------------------------------------------------------------- /tests/testthat/test-top-levels.R: -------------------------------------------------------------------------------- 1 | fac <- factor(c("a", "b", "c", "d", "e", "f", "f"), levels = rev(letters[1:6])) 2 | fac_odd_lvls <- factor(fac, levels = rev(letters[1:5])) 3 | 4 | # more tests - group names and ordering - are in test-get-level-groups.R 5 | test_that("top_levels values are correct", { 6 | expect_equal(top_levels(fac)[[3]], c(3 / 7, 2 / 7, 2 / 7)) # default n = 2, num_levels = 6 7 | expect_equal(top_levels(fac)[[2]], c(3, 2, 2)) 8 | expect_equal(top_levels(fac, 3)[[3]], c(4 / 7, 3 / 7)) # n = 3, num_levels = 6 9 | expect_equal(top_levels(fac, 3)[[2]], c(4, 3)) 10 | expect_equal(top_levels(fac_odd_lvls)[[2]], c(2, 1, 2)) # default n = 2, num_levels = 5 11 | expect_equal(top_levels(fac_odd_lvls)[[3]], c(0.4, 0.2, 0.4)) 12 | expect_equal(top_levels(fac_odd_lvls, 1)[[2]], c(1, 3, 1)) # n = 1, num_levels = 5 13 | expect_equal(top_levels(fac_odd_lvls, 1)[[3]], c(0.2, 0.6, 0.2)) 14 | }) 15 | 16 | test_that("top_levels missing levels are represented", { 17 | x <- as.factor(letters[1:5])[1:3] 18 | expect_equal( 19 | top_levels(x)[[1]], 20 | structure(1:3, .Label = c("a, b", "c", "d, e"), class = "factor") 21 | ) 22 | expect_equal( 23 | top_levels(x)[[2]], 24 | c(2, 1, 0) 25 | ) 26 | }) 27 | 28 | 29 | test_that("top_levels NA results are treated appropriately", { 30 | fac_na <- fac 31 | fac_na[7] <- NA 32 | expect_equal(top_levels(fac_na)[[2]], rep(2, 3)) 33 | expect_equal(top_levels(fac_na, show_na = TRUE)[[2]], c(2, 2, 2, 1)) 34 | expect_equal(top_levels(fac_na, show_na = TRUE)[[3]], c(2 / 7, 2 / 7, 2 / 7, 1 / 7)) 35 | expect_equal(top_levels(fac_na, show_na = TRUE)[[4]], c(1 / 3, 1 / 3, 1 / 3, NA)) 36 | }) 37 | 38 | test_that("top_levels default n parameter works", { 39 | expect_equal(top_levels(fac), top_levels(fac, 2)) 40 | }) 41 | 42 | test_that("top_levels missing levels are treated appropriately", { 43 | fac_missing_lvl <- fac 44 | fac_missing_lvl[2] <- NA 45 | expect_equal(top_levels(fac_missing_lvl)[[2]], c(3, 2, 1)) 46 | }) 47 | 48 | test_that("top_levels bad type inputs are handled", { 49 | expect_error(top_levels(c(0, 1), "factor_vec is not of type 'factor'")) 50 | expect_error(top_levels(c("hi", "lo"), "factor_vec is not of type 'factor'")) 51 | expect_error(top_levels(mtcars, "factor_vec is not of type 'factor'")) 52 | }) 53 | 54 | test_that("top_levels bad n value is handled", { 55 | expect_error(top_levels(fac, 4)) 56 | expect_error(top_levels(fac_odd_lvls, 3)) 57 | expect_error(top_levels(fac, 0)) 58 | expect_error(top_levels(factor(c("a", "b"))), "input factor variable must have at least 3 levels") 59 | }) 60 | 61 | test_that("top_levels correct variable name assigned to first column of result", { 62 | expect_equal(names(top_levels(fac))[1], "fac") 63 | }) 64 | -------------------------------------------------------------------------------- /tests/testthat/test-utilities.R: -------------------------------------------------------------------------------- 1 | test_that("round_half_up works", { 2 | expect_equal(round_half_up(-0.5, 0), -1) 3 | expect_equal(round_half_up(0.5, 0), 1) 4 | expect_equal(round_half_up(1.125, 2), 1.13) 5 | expect_equal(round_half_up(1.135, 2), 1.14) 6 | expect_equal(round_half_up(2436.845, 2), 2436.85) 7 | }) 8 | -------------------------------------------------------------------------------- /tests/testthat/testdata/issue-578-sf.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/tests/testthat/testdata/issue-578-sf.rds --------------------------------------------------------------------------------