├── .Rbuildignore
├── .covrignore
├── .github
    ├── .gitignore
    ├── CONTRIBUTING.md
    ├── issue_template.md
    ├── pull_request_template.md
    └── workflows
    │   ├── R-CMD-check.yaml
    │   ├── pkgdown.yaml
    │   ├── style.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
    ├── adorn_ns.R
    ├── adorn_pct_formatting.R
    ├── adorn_percentages.R
    ├── adorn_rounding.R
    ├── adorn_title.R
    ├── adorn_totals.R
    ├── as_and_untabyl.R
    ├── clean_names.R
    ├── compare_df_cols.R
    ├── convert_to_date.R
    ├── excel_dates.R
    ├── excel_time_to_numeric.R
    ├── get_dupes.R
    ├── get_level_groups.R
    ├── get_one_to_one.R
    ├── janitor.R
    ├── janitor_deprecated.R
    ├── make_clean_names.R
    ├── paste_skip_na.R
    ├── print_tabyl.R
    ├── remove_empties.R
    ├── round_half_up.R
    ├── round_to_fraction.R
    ├── row_to_names.R
    ├── sas_dates.R
    ├── single_value.R
    ├── statistical_tests.R
    ├── tabyl.R
    ├── top_levels.R
    └── utils-pipe.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── codecov.yml
├── cran-comments.md
├── dirty_data.xlsx
├── janitor.Rproj
├── man
    ├── add_totals_col.Rd
    ├── add_totals_row.Rd
    ├── adorn_crosstab.Rd
    ├── adorn_ns.Rd
    ├── adorn_pct_formatting.Rd
    ├── adorn_percentages.Rd
    ├── adorn_rounding.Rd
    ├── adorn_title.Rd
    ├── adorn_totals.Rd
    ├── as_tabyl.Rd
    ├── chisq.test.Rd
    ├── clean_names.Rd
    ├── compare_df_cols.Rd
    ├── compare_df_cols_same.Rd
    ├── convert_to_NA.Rd
    ├── convert_to_date.Rd
    ├── crosstab.Rd
    ├── describe_class.Rd
    ├── excel_numeric_to_date.Rd
    ├── excel_time_to_numeric.Rd
    ├── figures
    │   ├── dirty_data.PNG
    │   └── logo_small.png
    ├── find_header.Rd
    ├── fisher.test.Rd
    ├── get_dupes.Rd
    ├── get_one_to_one.Rd
    ├── janitor-package.Rd
    ├── janitor_deprecated.Rd
    ├── make_clean_names.Rd
    ├── mu_to_u.Rd
    ├── paste_skip_na.Rd
    ├── pipe.Rd
    ├── remove_constant.Rd
    ├── remove_empty.Rd
    ├── remove_empty_cols.Rd
    ├── remove_empty_rows.Rd
    ├── round_half_up.Rd
    ├── round_to_fraction.Rd
    ├── row_to_names.Rd
    ├── sas_numeric_to_date.Rd
    ├── signif_half_up.Rd
    ├── single_value.Rd
    ├── tabyl.Rd
    ├── top_levels.Rd
    ├── untabyl.Rd
    └── use_first_valid_of.Rd
├── pkgdown
    └── favicon
    │   ├── apple-touch-icon-120x120.png
    │   ├── apple-touch-icon-152x152.png
    │   ├── apple-touch-icon-180x180.png
    │   ├── apple-touch-icon-60x60.png
    │   ├── apple-touch-icon-76x76.png
    │   ├── apple-touch-icon.png
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   └── favicon.ico
├── revdep
    ├── .gitignore
    ├── README.md
    ├── checks.rds
    ├── email.yml
    └── problems.md
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-adorn-ns.R
    │   ├── test-adorn-pct-formatting.R
    │   ├── test-adorn-percentages.R
    │   ├── test-adorn-rounding.R
    │   ├── test-adorn-title.R
    │   ├── test-adorn-totals.R
    │   ├── test-clean-names.R
    │   ├── test-compare_df_cols.R
    │   ├── test-convert_to_date.R
    │   ├── test-date-conversion.R
    │   ├── test-excel_time_to_numeric.R
    │   ├── test-get-dupes.R
    │   ├── test-get-level-groups.R
    │   ├── test-get_one_to_one.R
    │   ├── test-paste_skip_na.R
    │   ├── test-remove-empties.R
    │   ├── test-round_to_fraction.R
    │   ├── test-row-to-names.R
    │   ├── test-sas_dates.R
    │   ├── test-signif_half_up.R
    │   ├── test-single_value.R
    │   ├── test-statistical-tests.R
    │   ├── test-tabyl-classifiers.R
    │   ├── test-tabyl.R
    │   ├── test-top-levels.R
    │   ├── test-utilities.R
    │   └── testdata
    │       └── issue-578-sf.rds
└── vignettes
    ├── janitor.Rmd
    ├── janitor.md
    ├── tabyls.Rmd
    └── tabyls.md


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^\.travis\.yml$
 4 | ^README\.Rmd$       # An Rmarkdown file used to generate README.md
 5 | cran-comments.md    # Comments for CRAN submission
 6 | ^LICENSE\.md$
 7 | ^\.gitignore
 8 | README.Rmd
 9 | ^cran\-comments\.md$
10 | dirty_data.xlsx
11 | ^.*\.docx
12 | codecov.yml
13 | planning.md
14 | planning.Rmd
15 | map of janitor 1.0 API.docx
16 | .github
17 | ^docs$
18 | Index.md
19 | Index.Rmd
20 | ^revdep$
21 | .covrignore
22 | ^CRAN-RELEASE$
23 | ^doc$
24 | ^Meta$
25 | ^_pkgdown\.yml$
26 | ^pkgdown$
27 | ^\.github$
28 | revdep/
29 | ^CRAN-SUBMISSION$
30 | 


--------------------------------------------------------------------------------
/.covrignore:
--------------------------------------------------------------------------------
1 | R/janitor_deprecated.R
2 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # CONTRIBUTING #
 2 | 
 3 | ### Please contribute!
 4 | 
 5 | We love collaboration.
 6 | 
 7 | ### Bugs?  Feature requests?
 8 | 
 9 | * Submit an issue on the [issues page](https://github.com/sfirke/janitor/issues)
10 | 
11 | ### Code contributions
12 | 
13 | I would prefer some discussion before an unsolicited code contribution, i.e., pull request.  This ensures that your effort is not wasted and that we're aligned on how to improve the janitor package.
14 | 
15 | This is especially true if your proposed contribution does not match a currently open issue.  If that's the case, please open new issue(s) to have the discussion there, prior to submitting code.
16 | 
17 | If your proposed contribution addresses multiple issues, it should ideally be broken into multiple pull requests.  This will make it easier for me to review and approve.
18 | 
19 | #### The mechanics of contributing:
20 | 
21 | * Fork this repo to your Github account
22 | * Clone your version on your account down to your machine from your account, e.g,. `git clone https://github.com/<yourgithubusername>/janitor.git`
23 | * Make sure to track progress upstream (i.e., on our version of `janitor` at `sfirke/janitor`) by doing `git remote add upstream https://github.com/sfirke/janitor.git`. Before making changes make sure to pull changes in from upstream by doing either `git fetch upstream` then merge later or `git pull upstream` to fetch and merge in one step
24 | * Make your changes (bonus points for making changes on a new feature branch)
25 | * Push up to your account
26 | * Submit a pull request to the main branch at `sfirke/janitor`
27 | 
28 | ### Prefer to discuss over email?
29 | Email Sam.  His email address is in the `DESCRIPTION` file of this repo.
30 | 
31 | ### Thanks for contributing!
32 | 


--------------------------------------------------------------------------------
/.github/issue_template.md:
--------------------------------------------------------------------------------
 1 | ## Feature requests
 2 | 
 3 | Briefly describe what the feature would do and why it is in scope for the janitor package.
 4 | 
 5 | ## Bug reports
 6 | 
 7 | Please briefly describe your problem and what output you expect.
 8 | 
 9 | Please include a minimal reprex. The goal of a reprex is to make it as easy as possible for me to recreate your problem so that I can fix it. If you've never heard of a reprex before, start by reading <https://github.com/jennybc/reprex#what-is-a-reprex>, and follow the advice further down the page.  
10 | 
11 | Delete these instructions once you have read them.
12 | 
13 | ---
14 | 
15 | Brief description of the problem
16 | 
17 | ```r
18 | # insert reprex here
19 | ```
20 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | <!--- Provide a general summary of your changes in the Title above -->
 2 | 
 3 | ## Description
 4 | <!--- Describe your changes in detail -->
 5 | 
 6 | ## Related Issue
 7 | <!--- Please note what issue(s) your pull request addresses,
 8 | e.g., "fixes #150".  If there's not an issue related to your
 9 | pull request, please create one so we can align on the problem
10 | being addressed. -->
11 | 
12 | ## Example
13 | <!--- if introducing a new feature or changing behavior of existing
14 | methods/functions, include an example if possible to do in brief form -->
15 | 
16 | <!--- Did you remember to include tests? Unless you're just changing
17 | grammar, please include new tests for your change -->
18 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | permissions: read-all
12 | 
13 | jobs:
14 |   R-CMD-check:
15 |     runs-on: ${{ matrix.config.os }}
16 | 
17 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
18 | 
19 |     strategy:
20 |       fail-fast: false
21 |       matrix:
22 |         config:
23 |           - {os: macos-latest,   r: 'release'}
24 |           - {os: windows-latest, r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
26 |           - {os: ubuntu-latest,   r: 'release'}
27 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
28 | 
29 |     env:
30 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
31 |       R_KEEP_PKG_SOURCE: yes
32 | 
33 |     steps:
34 |       - uses: actions/checkout@v4
35 | 
36 |       - uses: r-lib/actions/setup-pandoc@v2
37 | 
38 |       - uses: r-lib/actions/setup-r@v2
39 |         with:
40 |           r-version: ${{ matrix.config.r }}
41 |           http-user-agent: ${{ matrix.config.http-user-agent }}
42 |           use-public-rspm: true
43 | 
44 |       - uses: r-lib/actions/setup-r-dependencies@v2
45 |         with:
46 |           extra-packages: |
47 |             any::sf
48 |             any::rcmdcheck
49 |           needs: check
50 | 
51 |       - uses: r-lib/actions/check-r-package@v2
52 |         with:
53 |           upload-snapshots: true
54 |           build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
55 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   release:
 9 |     types: [published]
10 |   workflow_dispatch:
11 | 
12 | name: pkgdown
13 | 
14 | permissions: read-all
15 | 
16 | jobs:
17 |   pkgdown:
18 |     runs-on: ubuntu-latest
19 |     # Only restrict concurrency for non-PR jobs
20 |     concurrency:
21 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
22 |     env:
23 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
24 |     permissions:
25 |       contents: write
26 |     steps:
27 |       - uses: actions/checkout@v4
28 | 
29 |       - uses: r-lib/actions/setup-pandoc@v2
30 | 
31 |       - uses: r-lib/actions/setup-r@v2
32 |         with:
33 |           use-public-rspm: true
34 | 
35 |       - uses: r-lib/actions/setup-r-dependencies@v2
36 |         with:
37 |           extra-packages: any::pkgdown, local::.
38 |           needs: website
39 | 
40 |       - name: Build site
41 |         run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
42 |         shell: Rscript {0}
43 | 
44 |       - name: Deploy to GitHub pages 🚀
45 |         if: github.event_name != 'pull_request'
46 |         uses: JamesIves/github-pages-deploy-action@v4.5.0
47 |         with:
48 |           clean: false
49 |           branch: gh-pages
50 |           folder: docs
51 | 


--------------------------------------------------------------------------------
/.github/workflows/style.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     paths: ["**.[rR]", "**.[qrR]md", "**.[rR]markdown", "**.[rR]nw", "**.[rR]profile"]
 6 | 
 7 | name: Style
 8 | 
 9 | permissions: read-all
10 | 
11 | jobs:
12 |   style:
13 |     runs-on: ubuntu-latest
14 |     permissions:
15 |       contents: write
16 |     env:
17 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 |     steps:
19 |       - name: Checkout repo
20 |         uses: actions/checkout@v4
21 |         with:
22 |           fetch-depth: 0
23 | 
24 |       - name: Setup R
25 |         uses: r-lib/actions/setup-r@v2
26 |         with:
27 |           use-public-rspm: true
28 | 
29 |       - name: Install dependencies
30 |         uses: r-lib/actions/setup-r-dependencies@v2
31 |         with:
32 |           extra-packages: any::styler, any::roxygen2
33 |           needs: styler
34 | 
35 |       - name: Enable styler cache
36 |         run: styler::cache_activate()
37 |         shell: Rscript {0}
38 | 
39 |       - name: Determine cache location
40 |         id: styler-location
41 |         run: |
42 |           cat(
43 |             "location=",
44 |             styler::cache_info(format = "tabular")$location,
45 |             "\n",
46 |             file = Sys.getenv("GITHUB_OUTPUT"),
47 |             append = TRUE,
48 |             sep = ""
49 |           )
50 |         shell: Rscript {0}
51 | 
52 |       - name: Cache styler
53 |         uses: actions/cache@v4
54 |         with:
55 |           path: ${{ steps.styler-location.outputs.location }}
56 |           key: ${{ runner.os }}-styler-${{ github.sha }}
57 |           restore-keys: |
58 |             ${{ runner.os }}-styler-
59 |             ${{ runner.os }}-
60 | 
61 |       - name: Style
62 |         run: styler::style_pkg()
63 |         shell: Rscript {0}
64 | 
65 |       - name: Commit and push changes
66 |         run: |
67 |           if FILES_TO_COMMIT=($(git diff-index --name-only ${{ github.sha }} \
68 |               | egrep --ignore-case '\.(R|[qR]md|Rmarkdown|Rnw|Rprofile)$'))
69 |           then
70 |             git config --local user.name "$GITHUB_ACTOR"
71 |             git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
72 |             git commit ${FILES_TO_COMMIT[*]} -m "Style code (GHA)"
73 |             git pull --ff-only
74 |             git push origin
75 |           else
76 |             echo "No changes to commit."
77 |           fi
78 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: test-coverage
10 | 
11 | permissions: read-all
12 | 
13 | jobs:
14 |   test-coverage:
15 |     runs-on: ubuntu-latest
16 |     env:
17 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 | 
19 |     steps:
20 |       - uses: actions/checkout@v4
21 | 
22 |       - uses: r-lib/actions/setup-r@v2
23 |         with:
24 |           use-public-rspm: true
25 | 
26 |       - uses: r-lib/actions/setup-r-dependencies@v2
27 |         with:
28 |           extra-packages: |
29 |             any::sf
30 |             any::covr
31 |             any::xml2
32 |           needs: coverage
33 | 
34 |       - name: Test coverage
35 |         run: |
36 |           cov <- covr::package_coverage(
37 |             quiet = FALSE,
38 |             clean = FALSE,
39 |             install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
40 |           )
41 |           covr::to_cobertura(cov)
42 |         shell: Rscript {0}
43 | 
44 |       - uses: codecov/codecov-action@v4
45 |         with:
46 |           fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }}
47 |           file: ./cobertura.xml
48 |           plugin: noop
49 |           disable_search: true
50 |           token: ${{ secrets.CODECOV_TOKEN }}
51 | 
52 |       - name: Show testthat output
53 |         if: always()
54 |         run: |
55 |           ## --------------------------------------------------------------------
56 |           find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
57 |         shell: bash
58 | 
59 |       - name: Upload test results
60 |         if: failure()
61 |         uses: actions/upload-artifact@v4
62 |         with:
63 |           name: coverage-test-failures
64 |           path: ${{ runner.temp }}/package
65 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | *.docx
 5 | 
 6 | 
 7 | ~WRL0005\.tmp
 8 | doc
 9 | docs
10 | Meta
11 | docs/
12 | janitor.Rproj
13 | 
14 | 
15 | revdep/*
16 | revdep


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: janitor
 2 | Title: Simple Tools for Examining and Cleaning Dirty Data
 3 | Version: 2.2.1.9000
 4 | Authors@R: c(
 5 |     person("Sam", "Firke", , "samuel.firke@gmail.com", role = c("aut", "cre")),
 6 |     person("Bill", "Denney", , "wdenney@humanpredictions.com", role = "ctb"),
 7 |     person("Chris", "Haid", , "chrishaid@gmail.com", role = "ctb"),
 8 |     person("Ryan", "Knight", , "ryangknight@gmail.com", role = "ctb"),
 9 |     person("Malte", "Grosser", , "malte.grosser@gmail.com", role = "ctb"),
10 |     person("Jonathan", "Zadra", , "jonathan.zadra@sorensonimpact.com", role = "ctb"),
11 |     person("Olivier", "Roy", role = "ctb"),
12 |     person("Josep", family = "Pueyo-Ros", email = "josep.pueyo@udg.edu", role = "ctb")
13 |   )
14 | Description: The main janitor functions can: perfectly format data.frame
15 |     column names; provide quick counts of variable combinations (i.e.,
16 |     frequency tables and crosstabs); and explore duplicate records. Other
17 |     janitor functions nicely format the tabulation results. These
18 |     tabulate-and-report functions approximate popular features of SPSS and
19 |     Microsoft Excel. This package follows the principles of the
20 |     "tidyverse" and works well with the pipe function %>%. janitor was
21 |     built with beginning-to-intermediate R users in mind and is optimized
22 |     for user-friendliness.
23 | License: MIT + file LICENSE
24 | URL: https://github.com/sfirke/janitor, https://sfirke.github.io/janitor/
25 | BugReports: https://github.com/sfirke/janitor/issues
26 | Depends:
27 |     R (>= 3.1.2)
28 | Imports:
29 |     dplyr (>= 1.0.0),
30 |     hms,
31 |     lifecycle,
32 |     lubridate,
33 |     magrittr,
34 |     purrr,
35 |     rlang,
36 |     snakecase (>= 0.9.2),
37 |     stringi,
38 |     stringr,
39 |     tidyr (>= 1.0.0),
40 |     tidyselect (>= 1.0.0)
41 | Suggests:
42 |     dbplyr,
43 |     knitr,
44 |     rmarkdown,
45 |     RSQLite,
46 |     sf,
47 |     testthat (>= 3.0.0),
48 |     tibble,
49 |     tidygraph
50 | VignetteBuilder: 
51 |     knitr
52 | Config/testthat/edition: 3
53 | Encoding: UTF-8
54 | LazyData: true
55 | Roxygen: list(markdown = TRUE)
56 | RoxygenNote: 7.3.2
57 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2016
2 | COPYRIGHT HOLDER: Sam Firke
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | 
 2 | The MIT License (MIT)
 3 | 
 4 | Copyright (c) 2016 Sam Firke
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | S3method(chisq.test,default)
 4 | S3method(chisq.test,tabyl)
 5 | S3method(clean_names,default)
 6 | S3method(clean_names,sf)
 7 | S3method(clean_names,tbl_graph)
 8 | S3method(clean_names,tbl_lazy)
 9 | S3method(describe_class,default)
10 | S3method(describe_class,factor)
11 | S3method(excel_time_to_numeric,POSIXct)
12 | S3method(excel_time_to_numeric,POSIXlt)
13 | S3method(excel_time_to_numeric,character)
14 | S3method(excel_time_to_numeric,logical)
15 | S3method(excel_time_to_numeric,numeric)
16 | S3method(fisher.test,default)
17 | S3method(fisher.test,tabyl)
18 | S3method(print,tabyl)
19 | S3method(tabyl,data.frame)
20 | S3method(tabyl,default)
21 | export("%>%")
22 | export(add_totals_col)
23 | export(add_totals_row)
24 | export(adorn_crosstab)
25 | export(adorn_ns)
26 | export(adorn_pct_formatting)
27 | export(adorn_percentages)
28 | export(adorn_rounding)
29 | export(adorn_title)
30 | export(adorn_totals)
31 | export(as_tabyl)
32 | export(chisq.test)
33 | export(clean_names)
34 | export(compare_df_cols)
35 | export(compare_df_cols_same)
36 | export(convert_to_NA)
37 | export(convert_to_date)
38 | export(convert_to_datetime)
39 | export(crosstab)
40 | export(describe_class)
41 | export(excel_numeric_to_date)
42 | export(excel_time_to_numeric)
43 | export(find_header)
44 | export(fisher.test)
45 | export(get_dupes)
46 | export(get_one_to_one)
47 | export(make_clean_names)
48 | export(paste_skip_na)
49 | export(remove_constant)
50 | export(remove_empty)
51 | export(remove_empty_cols)
52 | export(remove_empty_rows)
53 | export(round_half_up)
54 | export(round_to_fraction)
55 | export(row_to_names)
56 | export(sas_numeric_to_date)
57 | export(signif_half_up)
58 | export(single_value)
59 | export(tabyl)
60 | export(top_levels)
61 | export(untabyl)
62 | export(use_first_valid_of)
63 | importFrom(lubridate,as_date)
64 | importFrom(lubridate,as_datetime)
65 | importFrom(lubridate,force_tz)
66 | importFrom(lubridate,hour)
67 | importFrom(lubridate,minute)
68 | importFrom(lubridate,second)
69 | importFrom(lubridate,ymd)
70 | importFrom(lubridate,ymd_hms)
71 | importFrom(magrittr,"%>%")
72 | importFrom(rlang,"%||%")
73 | importFrom(rlang,dots_n)
74 | importFrom(rlang,expr)
75 | importFrom(rlang,syms)
76 | importFrom(snakecase,to_any_case)
77 | importFrom(stats,na.omit)
78 | importFrom(stringi,stri_trans_general)
79 | importFrom(stringi,stri_trans_list)
80 | importFrom(stringr,str_replace)
81 | importFrom(stringr,str_replace_all)
82 | importFrom(tidyselect,eval_select)
83 | 


--------------------------------------------------------------------------------
/R/adorn_pct_formatting.R:
--------------------------------------------------------------------------------
  1 | #' Format a `data.frame` of decimals as percentages.
  2 | #'
  3 | #' @description
  4 | #' Numeric columns get multiplied by 100 and formatted as
  5 | #' percentages according to user specifications. This function defaults to
  6 | #' excluding the first column of the input data.frame, assuming that it contains
  7 | #' a descriptive variable, but this can be overridden by specifying the columns
  8 | #' to adorn in the `...` argument.  Non-numeric columns are always excluded.
  9 | #'
 10 | #' The decimal separator character is the result of `getOption("OutDec")`, which
 11 | #' is based on the user's locale. If the default behavior is undesirable,
 12 | #' change this value ahead of calling the function, either by changing locale or
 13 | #' with `options(OutDec = ",")`.  This aligns the decimal separator character
 14 | #' with that used in `base::print()`.
 15 | #'
 16 | #' @param dat a data.frame with decimal values, typically the result of a call
 17 | #'   to `adorn_percentages` on a `tabyl`. If given a list of data.frames, this
 18 | #'   function will apply itself to each data.frame in the list (designed for
 19 | #'   3-way `tabyl` lists).
 20 | #' @param digits how many digits should be displayed after the decimal point?
 21 | #' @param rounding method to use for rounding - either "half to even", the base
 22 | #'   R default method, or "half up", where 14.5 rounds up to 15.
 23 | #' @param affix_sign should the % sign be affixed to the end?
 24 | #' @param ... columns to adorn. This takes a tidyselect specification.  By
 25 | #'   default, all numeric columns (besides the initial column, if numeric) are
 26 | #'   adorned, but this allows you to manually specify which columns should be
 27 | #'   adorned, for use on a data.frame that does not result from a call to
 28 | #'   `tabyl`.
 29 | #' @return a data.frame with formatted percentages
 30 | #' @export
 31 | #' @examples
 32 | #' mtcars %>%
 33 | #'   tabyl(am, cyl) %>%
 34 | #'   adorn_percentages("col") %>%
 35 | #'   adorn_pct_formatting()
 36 | #'
 37 | #' # Control the columns to be adorned with the ... variable selection argument
 38 | #' # If using only the ... argument, you can use empty commas as shorthand
 39 | #' # to supply the default values to the preceding arguments:
 40 | #'
 41 | #' cases <- data.frame(
 42 | #'   region = c("East", "West"),
 43 | #'   year = 2015,
 44 | #'   recovered = c(125, 87),
 45 | #'   died = c(13, 12)
 46 | #' )
 47 | #'
 48 | #' cases %>%
 49 | #'   adorn_percentages("col", , recovered:died) %>%
 50 | #'   adorn_pct_formatting(, , , recovered:died)
 51 | #'
 52 | adorn_pct_formatting <- function(dat, digits = 1, rounding = "half to even", affix_sign = TRUE, ...) {
 53 |   # if input is a list, call purrr::map to recursively apply this function to each data.frame
 54 |   if (is.list(dat) && !is.data.frame(dat)) {
 55 |     purrr::map(dat, adorn_pct_formatting, digits, rounding, affix_sign)
 56 |   } else {
 57 |     # catch bad inputs
 58 |     if (!is.data.frame(dat)) {
 59 |       stop("adorn_pct_formatting() must be called on a data.frame or list of data.frames")
 60 |     }
 61 |     rlang::arg_match0(rounding, c("half to even", "half up"))
 62 | 
 63 |     original <- dat # used below to record original instances of NA and NaN
 64 | 
 65 |     numeric_cols <- which(vapply(dat, is.numeric, logical(1)))
 66 |     non_numeric_cols <- setdiff(1:ncol(dat), numeric_cols)
 67 |     numeric_cols <- setdiff(numeric_cols, 1) # assume 1st column should not be included so remove it from numeric_cols. Moved up to this line so that if only 1st col is numeric, the function errors
 68 | 
 69 |     if (rlang::dots_n(...) == 0) {
 70 |       cols_to_adorn <- numeric_cols
 71 |     } else {
 72 |       expr <- rlang::expr(c(...))
 73 |       cols_to_adorn <- tidyselect::eval_select(expr, data = dat)
 74 |       if (any(cols_to_adorn %in% non_numeric_cols)) {
 75 |         # don't need to print a message, adorn_rounding will
 76 |         cols_to_adorn <- setdiff(cols_to_adorn, non_numeric_cols)
 77 |       }
 78 |     }
 79 | 
 80 | 
 81 |     if ("one_way" %in% attr(dat, "tabyl_type")) {
 82 |       cols_to_adorn <- setdiff(numeric_cols, 2) # so that it works on a one-way tabyl
 83 |     }
 84 | 
 85 |     if (length(cols_to_adorn) == 0) {
 86 |       stop("at least one targeted column must be of class numeric")
 87 |     }
 88 | 
 89 |     dat[cols_to_adorn] <- lapply(dat[cols_to_adorn], function(x) x * 100)
 90 |     dat <- adorn_rounding(dat, digits = digits, rounding = rounding, ...)
 91 |     dat[cols_to_adorn] <- lapply(dat[cols_to_adorn], function(x) {
 92 |       format(x,
 93 |         nsmall = digits,
 94 |         decimal.mark = getOption("OutDec"),
 95 |         trim = TRUE
 96 |       )
 97 |     }) # so that 0% prints as 0.0% or 0.00% etc.
 98 |     if (affix_sign) {
 99 |       dat[cols_to_adorn] <- lapply(dat[cols_to_adorn], function(x) paste0(x, "%"))
100 |     }
101 |     dat[cols_to_adorn][is.na(original[cols_to_adorn])] <- "-" # NA and NaN values in the original should be simply "-" for printing of results
102 |     dat
103 |   }
104 | }
105 | 


--------------------------------------------------------------------------------
/R/adorn_percentages.R:
--------------------------------------------------------------------------------
  1 | #' Convert a data.frame of counts to percentages.
  2 | #'
  3 | #' This function defaults to excluding the first column of the input data.frame,
  4 | #' assuming that it contains a descriptive variable, but this can be overridden
  5 | #' by specifying the columns to adorn in the `...` argument.
  6 | #'
  7 | #' @param dat A `tabyl` or other data.frame with a tabyl-like layout.
  8 | #'   If given a list of data.frames, this function will apply itself to each
  9 | #'   `data.frame` in the list (designed for 3-way `tabyl` lists).
 10 | #' @param denominator The direction to use for calculating percentages.
 11 | #'   One of "row", "col", or "all".
 12 | #' @param na.rm should missing values (including `NaN`) be omitted from the calculations?
 13 | #' @param ... columns to adorn. This takes a <[`tidy-select`][dplyr::dplyr_tidy_select]>
 14 | #'   specification. By default, all numeric columns (besides the initial column, if numeric)
 15 | #'   are adorned, but this allows you to manually specify which columns should
 16 | #'   be adorned, for use on a `data.frame` that does not result from a call to [tabyl()].
 17 | #'
 18 | #' @return A `data.frame` of percentages, expressed as numeric values between 0 and 1.
 19 | #' @export
 20 | #' @examples
 21 | #'
 22 | #' mtcars %>%
 23 | #'   tabyl(am, cyl) %>%
 24 | #'   adorn_percentages("col")
 25 | #'
 26 | #' # calculates correctly even with totals column and/or row:
 27 | #' mtcars %>%
 28 | #'   tabyl(am, cyl) %>%
 29 | #'   adorn_totals("row") %>%
 30 | #'   adorn_percentages()
 31 | #'
 32 | #' # Control the columns to be adorned with the ... variable selection argument
 33 | #' # If using only the ... argument, you can use empty commas as shorthand
 34 | #' # to supply the default values to the preceding arguments:
 35 | #'
 36 | #' cases <- data.frame(
 37 | #'   region = c("East", "West"),
 38 | #'   year = 2015,
 39 | #'   recovered = c(125, 87),
 40 | #'   died = c(13, 12)
 41 | #' )
 42 | #'
 43 | #' cases %>%
 44 | #'   adorn_percentages(, , recovered:died)
 45 | adorn_percentages <- function(dat, denominator = "row", na.rm = TRUE, ...) {
 46 |   # if input is a list, call purrr::map to recursively apply this function to each data.frame
 47 |   if (is.list(dat) && !is.data.frame(dat)) {
 48 |     purrr::map(dat, adorn_percentages, denominator, na.rm, ...)
 49 |   } else {
 50 |     # catch bad inputs
 51 |     if (!is.data.frame(dat)) {
 52 |       stop("adorn_percentages() must be called on a data.frame or list of data.frames")
 53 |     }
 54 |     rlang::arg_match0(denominator, c("row", "col", "all"))
 55 | 
 56 |     dat <- as_tabyl(dat)
 57 | 
 58 |     numeric_cols <- which(vapply(dat, is.numeric, logical(1)))
 59 |     non_numeric_cols <- setdiff(1:ncol(dat), numeric_cols)
 60 |     numeric_cols <- setdiff(numeric_cols, 1) # assume 1st column should not be included so remove it from numeric_cols. Moved up to this line so that if only 1st col is numeric, the function errors
 61 |     explicitly_exempt_totals <- FALSE
 62 | 
 63 |     if (rlang::dots_n(...) == 0) {
 64 |       cols_to_tally <- numeric_cols
 65 |     } else {
 66 |       expr <- rlang::expr(c(...))
 67 |       cols_to_tally <- tidyselect::eval_select(expr, data = dat)
 68 |       explicitly_exempt_totals <- !(ncol(dat) %in% cols_to_tally) # if not present, it's b/c user explicitly exempted it
 69 |       if (any(cols_to_tally %in% non_numeric_cols)) {
 70 |         message("At least one non-numeric column was specified.  All non-numeric columns will be removed from percentage calculations.")
 71 |         cols_to_tally <- setdiff(cols_to_tally, non_numeric_cols)
 72 |       }
 73 |     }
 74 | 
 75 |     if ("col" %in% attr(dat, "totals")) {
 76 |       # if there's a totals col, don't use it to calculate the %s
 77 |       cols_to_tally <- setdiff(cols_to_tally, ncol(dat))
 78 |     }
 79 | 
 80 |     if (denominator == "row") {
 81 |       # if row-wise percentages and a totals column, need to exempt totals col and make it all 1s
 82 |       if ("col" %in% attr(dat, "totals") & !explicitly_exempt_totals) {
 83 |         dat[[ncol(dat)]] <- rep(1, nrow(dat))
 84 |       }
 85 |       row_sum <- rowSums(dat[cols_to_tally], na.rm = na.rm)
 86 |       dat[, cols_to_tally] <- dat[cols_to_tally] / row_sum
 87 |     } else if (denominator == "col") {
 88 |       # if col-wise percentages and a row column, need to exempt totals row and make it all 1s
 89 |       if ("row" %in% attr(dat, "totals")) {
 90 |         col_sum <- colSums(dat[-nrow(dat), ][cols_to_tally], na.rm = na.rm)
 91 |       } else {
 92 |         col_sum <- colSums(dat[cols_to_tally], na.rm = na.rm)
 93 |       }
 94 |       # add totals col back to be tallied, #357
 95 |       if ("col" %in% attr(dat, "totals") & !explicitly_exempt_totals) {
 96 |         cols_to_tally <- c(cols_to_tally, ncol(dat))
 97 |         if ("row" %in% attr(dat, "totals")) {
 98 |           col_sum <- c(col_sum, sum(dat[-nrow(dat), ncol(dat)]))
 99 |         } else {
100 |           col_sum <- c(col_sum, sum(dat[, ncol(dat)]))
101 |         }
102 |       }
103 |       dat[cols_to_tally] <- sweep(dat[cols_to_tally], 2, col_sum, `/`) # from http://stackoverflow.com/questions/9447801/dividing-columns-by-colsums-in-r
104 |     } else if (denominator == "all") {
105 |       # if all-wise percentages, need to exempt any totals col or row
106 |       if ("row" %in% attr(dat, "totals")) {
107 |         complete_n <- sum(dat[-nrow(dat), cols_to_tally], na.rm = TRUE)
108 |       } else {
109 |         complete_n <- sum(dat[, cols_to_tally], na.rm = TRUE)
110 |       }
111 |       # add totals col back to be tallied, #357
112 |       if ("col" %in% attr(dat, "totals") & !explicitly_exempt_totals) {
113 |         cols_to_tally <- c(cols_to_tally, ncol(dat))
114 |       }
115 |       dat[cols_to_tally] <- dat[cols_to_tally] / complete_n
116 |     }
117 |     dat
118 |   }
119 | }
120 | 


--------------------------------------------------------------------------------
/R/adorn_rounding.R:
--------------------------------------------------------------------------------
 1 | #' Round the numeric columns in a data.frame.
 2 | #'
 3 | #' @description
 4 | #' Can run on any `data.frame` with at least one numeric column.
 5 | #' This function defaults to excluding the first column of the input data.frame,
 6 | #' assuming that it contains a descriptive variable, but this can be overridden by
 7 | #' specifying the columns to round in the `...` argument.
 8 | #'
 9 | #' If you're formatting percentages, e.g., the result of [adorn_percentages()],
10 | #' use [adorn_pct_formatting()] instead.  This is a more flexible variant for ad-hoc usage.
11 | #' Compared to `adorn_pct_formatting()`, it does not multiply by 100 or pad the
12 | #' numbers with spaces for alignment in the results `data.frame`.
13 | #' This function retains the class of numeric input columns.
14 | #'
15 | #' @param dat A `tabyl` or other `data.frame` with similar layout.
16 | #'   If given a list of data.frames, this function will apply itself to each
17 | #'   `data.frame` in the list (designed for 3-way `tabyl` lists).
18 | #' @param digits How many digits should be displayed after the decimal point?
19 | #' @param rounding Method to use for rounding - either "half to even"
20 | #'   (the base R default method), or "half up", where 14.5 rounds up to 15.
21 | #' @param ... Columns to adorn.  This takes a tidyselect specification.
22 | #'   By default, all numeric columns (besides the initial column, if numeric)
23 | #'   are adorned, but this allows you to manually specify which columns should
24 | #'   be adorned, for use on a data.frame that does not result from a call to `tabyl`.
25 | #'
26 | #' @return The `data.frame` with rounded numeric columns.
27 | #' @export
28 | #' @examples
29 | #'
30 | #' mtcars %>%
31 | #'   tabyl(am, cyl) %>%
32 | #'   adorn_percentages() %>%
33 | #'   adorn_rounding(digits = 2, rounding = "half up")
34 | #'
35 | #' # tolerates non-numeric columns:
36 | #' library(dplyr)
37 | #' mtcars %>%
38 | #'   tabyl(am, cyl) %>%
39 | #'   adorn_percentages("all") %>%
40 | #'   mutate(dummy = "a") %>%
41 | #'   adorn_rounding()
42 | #'
43 | #' # Control the columns to be adorned with the ... variable selection argument
44 | #' # If using only the ... argument, you can use empty commas as shorthand
45 | #' # to supply the default values to the preceding arguments:
46 | #' cases <- data.frame(
47 | #'   region = c("East", "West"),
48 | #'   year = 2015,
49 | #'   recovered = c(125, 87),
50 | #'   died = c(13, 12)
51 | #' )
52 | #'
53 | #' cases %>%
54 | #'   adorn_percentages(, , ends_with("ed")) %>%
55 | #'   adorn_rounding(, , all_of(c("recovered", "died")))
56 | adorn_rounding <- function(dat, digits = 1, rounding = "half to even", ...) {
57 |   # if input is a list, call purrr::map to recursively apply this function to each data.frame
58 |   if (is.list(dat) && !is.data.frame(dat)) {
59 |     purrr::map(dat, adorn_rounding, digits, rounding, ...)
60 |   } else {
61 |     # catch bad inputs
62 |     if (!is.data.frame(dat)) {
63 |       stop("adorn_rounding() must be called on a data.frame or list of data.frames")
64 |     }
65 |     if (!rounding %in% c("half to even", "half up")) {
66 |       stop("'rounding' must be one of 'half to even' or 'half up'")
67 |     }
68 |     numeric_cols <- which(vapply(dat, is.numeric, logical(1)))
69 |     non_numeric_cols <- setdiff(1:ncol(dat), numeric_cols)
70 |     # assume 1st column should not be included so remove it from numeric_cols.
71 |     # Moved up to this line so that if only 1st col is numeric, the function errors
72 |     numeric_cols <- setdiff(numeric_cols, 1)
73 | 
74 |     if (rlang::dots_n(...) == 0) {
75 |       cols_to_round <- numeric_cols
76 |     } else {
77 |       expr <- rlang::expr(c(...))
78 |       cols_to_round <- tidyselect::eval_select(expr, data = dat)
79 |       if (any(cols_to_round %in% non_numeric_cols)) {
80 |         message("At least one non-numeric column was specified and will not be modified.")
81 |         cols_to_round <- setdiff(cols_to_round, non_numeric_cols)
82 |       }
83 |     }
84 | 
85 |     if (rounding == "half to even") {
86 |       dat[cols_to_round] <- lapply(dat[cols_to_round], function(x) round(x, digits = digits))
87 |     } else {
88 |       dat[cols_to_round] <- lapply(dat[cols_to_round], function(x) round_half_up(x, digits = digits))
89 |     }
90 |     dat
91 |   }
92 | }
93 | 


--------------------------------------------------------------------------------
/R/adorn_title.R:
--------------------------------------------------------------------------------
  1 | #' Add column name to the top of a two-way tabyl.
  2 | #'
  3 | #' This function adds the column variable name to the top of a `tabyl` for a
  4 | #' complete display of information.  This makes the tabyl prettier, but renders
  5 | #' the `data.frame` less useful for further manipulation.
  6 | #'
  7 | #' The `placement` argument indicates whether the column name should be added to
  8 | #' the `top` of the tabyl in an otherwise-empty row `"top"` or appended to the
  9 | #' already-present row name variable (`"combined"`). The formatting in the `"top"`
 10 | #' option has the look of base R's `table()`; it also wipes out the other column
 11 | #' names, making it hard to further use the `data.frame` besides formatting it for reporting.
 12 | #' The `"combined"` option is more conservative in this regard.
 13 | #'
 14 | #' @param dat A `data.frame` of class `tabyl` or other `data.frame` with a tabyl-like layout.
 15 | #'   If given a list of data.frames, this function will apply itself to each `data.frame`
 16 | #'   in the list (designed for 3-way `tabyl` lists).
 17 | #' @param placement The title placement, one of `"top"`, or `"combined"`.
 18 | #'   See **Details** for more information.
 19 | #' @param row_name (optional) default behavior is to pull the row name from the
 20 | #'   attributes of the input `tabyl` object.  If you wish to override that text,
 21 | #'   or if your input is not a `tabyl`, supply a string here.
 22 | #' @param col_name (optional) default behavior is to pull the column_name from
 23 | #'   the attributes of the input `tabyl` object.  If you wish to override that text,
 24 | #'   or if your input is not a `tabyl`, supply a string here.
 25 | #' @return The input `tabyl`, augmented with the column title.  Non-tabyl inputs
 26 | #'   that are of class `tbl_df` are downgraded to basic data.frames so that the
 27 | #'   title row prints correctly.
 28 | #'
 29 | #' @export
 30 | #' @examples
 31 | #'
 32 | #' mtcars %>%
 33 | #'   tabyl(am, cyl) %>%
 34 | #'   adorn_title(placement = "top")
 35 | #'
 36 | #' # Adding a title to a non-tabyl
 37 | #' library(tidyr)
 38 | #' library(dplyr)
 39 | #' mtcars %>%
 40 | #'   group_by(gear, am) %>%
 41 | #'   summarise(avg_mpg = mean(mpg), .groups = "drop") %>%
 42 | #'   pivot_wider(names_from = am, values_from = avg_mpg) %>%
 43 | #'   adorn_rounding() %>%
 44 | #'   adorn_title("top", row_name = "Gears", col_name = "Cylinders")
 45 | adorn_title <- function(dat, placement = "top", row_name, col_name) {
 46 |   # if input is a list, call purrr::map to recursively apply this function to each data.frame
 47 |   if (is.list(dat) && !is.data.frame(dat)) {
 48 |     purrr::map(dat, adorn_title, placement, row_name, col_name)
 49 |   } else {
 50 |     if (!is.data.frame(dat)) {
 51 |       stop("\"dat\" must be a data.frame")
 52 |     }
 53 | 
 54 |     rlang::arg_match0(placement, c("top", "combined"))
 55 | 
 56 |     if (inherits(dat, "tabyl")) {
 57 |       if (attr(dat, "tabyl_type") == "one_way") {
 58 |         warning(
 59 |           "adorn_title is meant for two-way tabyls, calling it on a one-way tabyl may not yield a meaningful result"
 60 |         )
 61 |       }
 62 |     }
 63 |     if (missing(col_name)) {
 64 |       if (!inherits(dat, "tabyl")) {
 65 |         stop("When input is not a data.frame of class tabyl, a value must be specified for the col_name argument.")
 66 |       }
 67 |       col_var <- attr(dat, "var_names")$col
 68 |     } else {
 69 |       if (!is.character(col_name)) {
 70 |         stop("col_name must be a string")
 71 |       }
 72 |       col_var <- col_name
 73 |     }
 74 | 
 75 |     if (!missing(row_name)) {
 76 |       if (!is.character(row_name)) {
 77 |         stop("row_name must be a string")
 78 |       }
 79 |       names(dat)[1] <- row_name
 80 |       row_var <- row_name
 81 |     } else {
 82 |       if (inherits(dat, "tabyl")) {
 83 |         row_var <- attr(dat, "var_names")$row
 84 |       } else {
 85 |         # for non-tabyl input, if no row_name supplied, use first existing name
 86 |         row_var <- names(dat)[1]
 87 |       }
 88 |     }
 89 | 
 90 | 
 91 |     if (placement == "top") {
 92 |       # to handle factors, problematic in first column and at bind_rows.
 93 |       dat[, ] <- lapply(dat[, ], as.character)
 94 |       # Can't use mutate_all b/c it strips attributes
 95 |       top <- dat[1, ]
 96 | 
 97 |       top[1, ] <- as.list(names(top))
 98 | 
 99 |       out <- dplyr::bind_rows(top, dat)
100 |       out <- stats::setNames(out, c("", col_var, rep("", ncol(out) - 2)))
101 |     }
102 |     if (placement == "combined") {
103 |       out <- dat
104 |       names(out)[1] <- paste(row_var, col_var, sep = "/")
105 |     }
106 |     # "top" text doesn't print if input (and thus the output) is a tibble
107 |     if (inherits(out, "tbl_df")) {
108 |       # but this prints row numbers, so don't apply to non-tbl_dfs like tabyls
109 |       out <- as.data.frame(out)
110 |     }
111 |     out
112 |   }
113 | }
114 | 


--------------------------------------------------------------------------------
/R/as_and_untabyl.R:
--------------------------------------------------------------------------------
  1 | #' Add `tabyl` attributes to a data.frame
  2 | #'
  3 | #' @description
  4 | #' A `tabyl` is a `data.frame` containing counts of a variable or
  5 | #' co-occurrences of two variables (a.k.a., a contingency table or crosstab).
  6 | #' This specialized kind of data.frame has attributes that enable `adorn_`
  7 | #' functions to be called for precise formatting and presentation of results.
  8 | #' E.g., display results as a mix of percentages, Ns, add totals rows or
  9 | #' columns, rounding options, in the style of Microsoft Excel PivotTable.
 10 | #'
 11 | #' A `tabyl` can be the result of a call to `janitor::tabyl()`, in which case
 12 | #' these attributes are added automatically.  This function adds `tabyl` class
 13 | #' attributes to a data.frame that isn't the result of a call to `tabyl` but
 14 | #' meets the requirements of a two-way tabyl: 1) First column contains values of
 15 | #' variable 1 2) Column names 2:n are the values of variable 2 3) Numeric values
 16 | #' in columns 2:n are counts of the co-occurrences of the two variables.*
 17 | #'
 18 | #' * = this is the ideal form of a `tabyl`, but janitor's `adorn_` functions tolerate
 19 | #'   and ignore non-numeric columns in positions 2:n.
 20 | #'
 21 | #' For instance, the result of [dplyr::count()] followed by [tidyr::pivot_wider()]
 22 | #' can be treated as a `tabyl`.
 23 | #'
 24 | #' The result of calling [tabyl()] on a single variable is a special class of
 25 | #' one-way tabyl; this function only pertains to the two-way tabyl.
 26 | #'
 27 | #' @param dat a data.frame with variable values in the first column and numeric
 28 | #'   values in all other columns.
 29 | #' @param axes is this a two_way tabyl or a one_way tabyl?  If this function is
 30 | #'   being called by a user, this should probably be "2".  One-way tabyls are
 31 | #'   created by `tabyl` but are a special case.
 32 | #' @param row_var_name (optional) the name of the variable in the row dimension;
 33 | #'   used by `adorn_title()`.
 34 | #' @param col_var_name (optional) the name of the variable in the column
 35 | #'   dimension; used by `adorn_title()`.
 36 | #' @return Returns the same data.frame, but with the additional class of "tabyl"
 37 | #'   and the attribute "core".
 38 | #' @export
 39 | #' @examples
 40 | #' as_tabyl(mtcars)
 41 | #'
 42 | as_tabyl <- function(dat, axes = 2, row_var_name = NULL, col_var_name = NULL) {
 43 |   if (!axes %in% 1:2) {
 44 |     stop("axes must be either 1 or 2")
 45 |   }
 46 | 
 47 |   # check whether input meets requirements
 48 |   if (!is.data.frame(dat)) {
 49 |     stop("input must be a data.frame")
 50 |   }
 51 |   if (sum(unlist(lapply(dat, is.numeric))[-1]) == 0) {
 52 |     stop("at least one one of columns 2:n must be of class numeric")
 53 |   }
 54 | 
 55 |   # assign core attribute and classes
 56 |   if (inherits(dat, "tabyl")) {
 57 |     # if already a tabyl, may have totals row.
 58 |     # Safest play is to simply reorder the core rows to match the dat rows
 59 |     attr(dat, "core") <- attr(dat, "core")[order(match(
 60 |       attr(dat, "core")[, 1],
 61 |       dat[, 1]
 62 |     )), ]
 63 |     row.names(attr(dat, "core")) <- 1:nrow(attr(dat, "core")) # if they're sorted in the prior step above, this resets
 64 |   } else {
 65 |     attr(dat, "core") <- as.data.frame(dat) # core goes first so dat does not yet have attributes attached to it
 66 |   }
 67 | 
 68 |   attr(dat, "tabyl_type") <- ifelse(
 69 |     !is.null(attr(dat, "tabyl_type")),
 70 |     attr(dat, "tabyl_type"), # if a one_way tabyl has as_tabyl called on it, it should stay a one_way #523
 71 |     dplyr::case_when(
 72 |       axes == 1 ~ "one_way",
 73 |       axes == 2 ~ "two_way"
 74 |     )
 75 |   )
 76 |   class(dat) <- c("tabyl", setdiff(class(dat), "tabyl"))
 77 | 
 78 |   if (!missing(row_var_name) | !missing(col_var_name)) {
 79 |     if (axes != 2) {
 80 |       stop("variable names are only meaningful for two-way tabyls")
 81 |     }
 82 |     attr(dat, "var_names") <- list(row = row_var_name, col = col_var_name)
 83 |   }
 84 | 
 85 |   dat
 86 | }
 87 | 
 88 | #' Remove `tabyl` attributes from a data.frame.
 89 | #'
 90 | #' Strips away all `tabyl`-related attributes from a data.frame.
 91 | #'
 92 | #' @param dat a `data.frame` of class `tabyl`.
 93 | #' @return the same `data.frame`, but without the `tabyl` class and attributes.
 94 | #' @export
 95 | #' @examples
 96 | #'
 97 | #' mtcars %>%
 98 | #'   tabyl(am) %>%
 99 | #'   untabyl() %>%
100 | #'   attributes() # tabyl-specific attributes are gone
101 | untabyl <- function(dat) {
102 |   # if input is a list, call purrr::map to recursively apply this function to each data.frame
103 |   if (is.list(dat) && !is.data.frame(dat)) {
104 |     purrr::map(dat, untabyl)
105 |   } else {
106 |     if (!inherits(dat, "tabyl")) {
107 |       warning("untabyl() called on a non-tabyl")
108 |     }
109 |     class(dat) <- class(dat)[!class(dat) %in% "tabyl"]
110 |     attr(dat, "core") <- NULL
111 |     # These attributes may not exist, but simpler to declare them NULL regardless than to check to see if they exist:
112 |     attr(dat, "totals") <- NULL
113 |     attr(dat, "tabyl_type") <- NULL # may not exist, but simpler to declare it NULL regardless than to check to see if it exists
114 |     attr(dat, "var_names") <- NULL # may not exist, but simpler to declare it NULL regardless than to check to see if it exists
115 |     dat
116 |   }
117 | }
118 | 


--------------------------------------------------------------------------------
/R/excel_dates.R:
--------------------------------------------------------------------------------
  1 | #' Convert dates encoded as serial numbers to Date class.
  2 | #'
  3 | #' @description
  4 | #' Converts numbers like `42370` into date values like `2016-01-01`.
  5 | #'
  6 | #' Defaults to the modern Excel date encoding system. However, Excel for Mac
  7 | #' 2008 and earlier Mac versions of Excel used a different date system. To
  8 | #' determine what platform to specify: if the date 2016-01-01 is represented by
  9 | #' the number 42370 in your spreadsheet, it's the modern system.  If it's 40908,
 10 | #' it's the old Mac system. More on date encoding systems at
 11 | #' http://support.office.com/en-us/article/Date-calculations-in-Excel-e7fe7167-48a9-4b96-bb53-5612a800b487.
 12 | #'
 13 | #' A list of all timezones is available from `base::OlsonNames()`, and the
 14 | #' current timezone is available from `base::Sys.timezone()`.
 15 | #'
 16 | #' If your input data has a mix of Excel numeric dates and actual dates, see the
 17 | #' more powerful functions [convert_to_date()] and `convert_to_datetime()`.
 18 | #'
 19 | #' @param date_num numeric vector of serial numbers to convert.
 20 | #' @param date_system the date system, either `"modern"` or `"mac pre-2011"`.
 21 | #' @param include_time Include the time (hours, minutes, seconds) in the output?
 22 | #'   (See details)
 23 | #' @param round_seconds Round the seconds to an integer (only has an effect when
 24 | #'   `include_time` is `TRUE`)?
 25 | #' @param tz Time zone, used when `include_time = TRUE` (see details for
 26 | #'   more information on timezones).
 27 | #' @return Returns a vector of class Date if `include_time` is
 28 | #'   `FALSE`.  Returns a vector of class POSIXlt if `include_time` is
 29 | #'   `TRUE`.
 30 | #' @details When using `include_time=TRUE`, days with leap seconds will not
 31 | #'   be accurately handled as they do not appear to be accurately handled by
 32 | #'   Windows (as described in
 33 | #'   https://support.microsoft.com/en-us/help/2722715/support-for-the-leap-second).
 34 | #'
 35 | #' @export
 36 | #' @seealso [excel_time_to_numeric()]
 37 | #' @examples
 38 | #' excel_numeric_to_date(40000)
 39 | #' excel_numeric_to_date(40000.5) # No time is included
 40 | #' excel_numeric_to_date(40000.5, include_time = TRUE) # Time is included
 41 | #' excel_numeric_to_date(40000.521, include_time = TRUE) # Time is included
 42 | #' excel_numeric_to_date(40000.521,
 43 | #'   include_time = TRUE,
 44 | #'   round_seconds = FALSE
 45 | #' ) # Time with fractional seconds is included
 46 | #' @family date-time cleaning
 47 | #' @importFrom lubridate as_date as_datetime force_tz hour minute second
 48 | excel_numeric_to_date <- function(date_num, date_system = "modern", include_time = FALSE, round_seconds = TRUE, tz = Sys.timezone()) {
 49 |   if (all(is.na(date_num))) {
 50 |     # For NA input, return the expected type of NA output.
 51 |     if (include_time) {
 52 |       return(lubridate::as_datetime(date_num, tz = tz))
 53 |     } else {
 54 |       return(lubridate::as_date(date_num))
 55 |     }
 56 |   } else if (!is.numeric(date_num)) {
 57 |     stop("argument `date_num` must be of class numeric")
 58 |   }
 59 | 
 60 |   # Manage floating point imprecision; coerce to double to avoid integer
 61 |   # overflow.
 62 |   date_num_days <- (as.double(date_num) * 86400L + 0.001) %/% 86400L
 63 |   date_num_days_no_floating_correction <- date_num %/% 1
 64 |   # If the day rolls over due to machine precision, then the seconds should be zero
 65 |   mask_day_rollover <- !is.na(date_num) & date_num_days > date_num_days_no_floating_correction
 66 |   date_num_seconds <- (date_num - date_num_days) * 86400
 67 |   date_num_seconds[mask_day_rollover] <- 0
 68 |   if (round_seconds) {
 69 |     date_num_seconds <- round(date_num_seconds)
 70 |   }
 71 |   if (any(mask_day_rollover)) {
 72 |     warning(sum(mask_day_rollover), " date_num values are within 0.001 sec of a later date and were rounded up to the next day.")
 73 |   }
 74 |   mask_excel_leap_day_bug <- !is.na(date_num_days) & floor(date_num_days) == 60
 75 |   mask_before_excel_leap_day_bug <- !is.na(date_num_days) & floor(date_num_days) < 60
 76 |   date_num_days[mask_excel_leap_day_bug] <- NA_real_
 77 |   if (any(!is.na(date_num_days) & (date_num_days < 1))) {
 78 |     warning("Only `date_num` >= 1 are valid in Excel, creating an earlier date than Excel supports.")
 79 |   }
 80 |   date_num_days[mask_before_excel_leap_day_bug] <- date_num_days[mask_before_excel_leap_day_bug] + 1
 81 |   ret <-
 82 |     if (date_system == "mac pre-2011") {
 83 |       lubridate::as_date(floor(date_num_days), origin = "1904-01-01")
 84 |     } else if (date_system == "modern") {
 85 |       lubridate::as_date(floor(date_num_days), origin = "1899-12-30")
 86 |     } else {
 87 |       stop("argument 'date_system' must be one of 'mac pre-2011' or 'modern'")
 88 |     }
 89 |   if (include_time) {
 90 |     ret <- lubridate::as_datetime(ret)
 91 |     lubridate::second(ret) <- date_num_seconds %% 60
 92 |     lubridate::minute(ret) <- floor(date_num_seconds / 60) %% 60
 93 |     lubridate::hour(ret) <- floor(date_num_seconds / 3600)
 94 |     ret <- lubridate::force_tz(ret, tzone = tz)
 95 |   }
 96 |   if (any(mask_excel_leap_day_bug)) {
 97 |     warning("NAs introduced by coercion, Excel leap day bug detected in `date_num`.  29 February 1900 does not exist.")
 98 |   }
 99 |   if (any(is.na(ret) & !is.na(date_num) & !mask_excel_leap_day_bug)) {
100 |     warning("NAs introduced by coercion, possible daylight savings time issue with input.  Consider `tz='UTC'`.")
101 |   }
102 |   ret
103 | }
104 | 


--------------------------------------------------------------------------------
/R/get_dupes.R:
--------------------------------------------------------------------------------
 1 | #' Get rows of a `data.frame` with identical values for the specified variables.
 2 | #'
 3 | #' For hunting duplicate records during data cleaning. Specify the data.frame
 4 | #' and the variable combination to search for duplicates and get back the
 5 | #' duplicated rows.
 6 | #'
 7 | #' @param dat The input `data.frame`.
 8 | #' @param ... Unquoted variable names to search for duplicates. This takes a
 9 | #'   tidyselect specification.
10 | #' @return A data.frame with the full records where the specified
11 | #'   variables have duplicated values, as well as a variable `dupe_count`
12 | #'   showing the number of rows sharing that combination of duplicated values.
13 | #'   If the input data.frame was of class `tbl_df`, the output is as well.
14 | #' @export
15 | #' @examples
16 | #' get_dupes(mtcars, mpg, hp)
17 | #'
18 | #' # or called with the magrittr pipe %>% :
19 | #' mtcars %>% get_dupes(wt)
20 | #'
21 | #' # You can use tidyselect helpers to specify variables:
22 | #' mtcars %>% get_dupes(-c(wt, qsec))
23 | #' mtcars %>% get_dupes(starts_with("cy"))
24 | #' @importFrom tidyselect eval_select
25 | #' @importFrom rlang expr dots_n syms %||%
26 | get_dupes <- function(dat, ...) {
27 |   expr <- rlang::expr(c(...))
28 |   pos <- tidyselect::eval_select(expr, data = dat)
29 | 
30 |   # Check if dat is grouped and if so, save structure and ungroup temporarily
31 |   is_grouped <- dplyr::is_grouped_df(dat)
32 | 
33 |   if (is_grouped) {
34 |     dat_groups <- dplyr::group_vars(dat)
35 |     dat <- dat %>% dplyr::ungroup()
36 |     if (getOption("get_dupes.grouped_warning", TRUE) & interactive()) {
37 |       message(paste0("Data is grouped by [", paste(dat_groups, collapse = "|"), "]. Note that get_dupes() is not group aware and does not limit duplicate detection to within-groups, but rather checks over the entire data frame. However grouping structure is preserved.\nThis message is shown once per session and may be disabled by setting options(\"get_dupes.grouped_warning\" = FALSE).")) # nocov
38 |       options("get_dupes.grouped_warning" = FALSE) # nocov
39 |     }
40 |   }
41 | 
42 |   if (rlang::dots_n(...) == 0) { # if no tidyselect variables are specified, check the whole data.frame
43 |     var_names <- names(dat)
44 |     nms <- rlang::syms(var_names)
45 |     message("No variable names specified - using all columns.\n")
46 |   } else {
47 |     var_names <- names(pos)
48 |     nms <- rlang::syms(var_names)
49 |   }
50 | 
51 |   dupe_count <- NULL # to appease NOTE for CRAN; does nothing.
52 | 
53 | 
54 |   dupes <- dat %>%
55 |     dplyr::add_count(!!!nms, name = "dupe_count") %>%
56 |     dplyr::filter(dupe_count > 1) %>%
57 |     dplyr::select(!!!nms, dupe_count, dplyr::everything()) %>%
58 |     dplyr::arrange(dplyr::desc(dupe_count), !!!nms)
59 | 
60 |   # shorten error message for large data.frames
61 |   if (length(var_names) > 10) {
62 |     var_names <- c(var_names[1:9], paste("... and", length(var_names) - 9, "other variables"))
63 |   }
64 |   if (nrow(dupes) == 0) {
65 |     message(paste0("No duplicate combinations found of: ", paste(var_names, collapse = ", ")))
66 |   }
67 | 
68 |   # Reapply groups if dat was grouped
69 |   if (is_grouped) dupes <- dupes %>% dplyr::group_by(!!!rlang::syms(dat_groups))
70 | 
71 |   return(dupes)
72 | }
73 | 


--------------------------------------------------------------------------------
/R/get_level_groups.R:
--------------------------------------------------------------------------------
 1 | # Return groupings for a factor variable in the top_levels() function
 2 | 
 3 | get_level_groups <- function(vec, n, num_levels_in_var) {
 4 |   top_n_lvls <- paste(levels(vec)[1:n], collapse = ", ")
 5 |   bot_n_lvls <- paste(levels(vec)[(num_levels_in_var - n + 1):num_levels_in_var], collapse = ", ")
 6 | 
 7 |   # Identify middle combinations, if needed
 8 |   if (num_levels_in_var > 2 * n) {
 9 |     mid_lvls <- paste(levels(vec)[(n + 1):(num_levels_in_var - n)], collapse = ", ")
10 |   } else {
11 |     mid_lvls <- NA
12 |   }
13 | 
14 |   # Truncate strings if needed
15 |   ## Middle groups are variable size, so displaying the N there is useful;
16 |   ## Top/Bottom are user-specified size, so just truncate the labels
17 |   if (!is.na(mid_lvls) & nchar(mid_lvls) > 30) {
18 |     mid_lvls <- paste0("<<< Middle Group (", num_levels_in_var - 2 * n, " categories) >>>")
19 |   }
20 |   if (nchar(top_n_lvls) > 30) {
21 |     top_n_lvls <- paste0(substr(top_n_lvls, 1, 27), "...")
22 |   }
23 |   if (nchar(bot_n_lvls) > 30) {
24 |     bot_n_lvls <- paste0(substr(bot_n_lvls, 1, 27), "...")
25 |   }
26 | 
27 |   list(top = top_n_lvls, mid = mid_lvls, bot = bot_n_lvls)
28 | }
29 | 


--------------------------------------------------------------------------------
/R/get_one_to_one.R:
--------------------------------------------------------------------------------
 1 | #' Find the list of columns that have a 1:1 mapping to each other
 2 | #'
 3 | #' @param dat A `data.frame` or similar object
 4 | #' @return A list with one element for each group of columns that map
 5 | #'   identically to each other.
 6 | #' @export
 7 | #' @examples
 8 | #' foo <- data.frame(
 9 | #'   Lab_Test_Long = c("Cholesterol, LDL", "Cholesterol, LDL", "Glucose"),
10 | #'   Lab_Test_Short = c("CLDL", "CLDL", "GLUC"),
11 | #'   LOINC = c(12345, 12345, 54321),
12 | #'   Person = c("Sam", "Bill", "Sam"),
13 | #'   stringsAsFactors = FALSE
14 | #' )
15 | #' get_one_to_one(foo)
16 | get_one_to_one <- function(dat) {
17 |   stopifnot(ncol(dat) > 0)
18 |   stopifnot(!any(duplicated(names(dat))))
19 |   dat_alt <- dat
20 |   for (idx in seq_along(dat_alt)) {
21 |     dat_alt[[idx]] <- get_one_to_one_value_order(dat_alt[[idx]])
22 |   }
23 |   remaining_cols <- names(dat_alt)
24 |   ret <- list()
25 |   while (length(remaining_cols) > 0) {
26 |     nm1 <- remaining_cols[1]
27 |     remaining_cols <- remaining_cols[-1]
28 |     current_ret <- nm1
29 |     for (nm2 in remaining_cols) {
30 |       if (identical(dat_alt[[nm1]], dat_alt[[nm2]])) {
31 |         current_ret <- c(current_ret, nm2)
32 |         remaining_cols <- setdiff(remaining_cols, nm2)
33 |       }
34 |     }
35 |     if (length(current_ret) > 1) {
36 |       ret[[length(ret) + 1]] <- current_ret
37 |     }
38 |   }
39 |   if (length(ret) == 0) {
40 |     message("No columns in `", deparse(substitute(dat)), "` map to each other")
41 |   }
42 |   ret
43 | }
44 | 
45 | get_one_to_one_value_order <- function(x) {
46 |   # Convert the value to a factor so that any subtly different values become integers
47 |   uvalues <- match(x, unique(x))
48 |   new_value <- as.integer(factor(uvalues, levels = unique(uvalues)))
49 |   new_value
50 | }
51 | 


--------------------------------------------------------------------------------
/R/janitor.R:
--------------------------------------------------------------------------------
1 | #' @section Package context:
2 | #' Advanced users can do most things covered here, but they can do it
3 | #' faster with janitor and save their thinking for more fun tasks.
4 | #' @keywords internal
5 | "_PACKAGE"
6 | ## quiets concerns of R CMD check re: the .'s that appear in pipelines
7 | ## and the "n" that is produced by dplyr::count() in a pipeline
8 | if (getRversion() >= "2.15.1") utils::globalVariables(c(".", "n"))
9 | 


--------------------------------------------------------------------------------
/R/paste_skip_na.R:
--------------------------------------------------------------------------------
 1 | #' Like `paste()`, but missing values are omitted
 2 | #'
 3 | #' @details If all values are missing, the value from the first argument is
 4 | #'   preserved.
 5 | #'
 6 | #' @param ...,sep,collapse See [base::paste()]
 7 | #' @return A character vector of pasted values.
 8 | #' @examples
 9 | #' paste_skip_na(NA) # NA_character_
10 | #' paste_skip_na("A", NA) # "A"
11 | #' paste_skip_na("A", NA, c(NA, "B"), sep = ",") # c("A", "A,B")
12 | #' @export
13 | paste_skip_na <- function(..., sep = " ", collapse = NULL) {
14 |   args <- list(...)
15 |   if (length(args) <= 1) {
16 |     if (length(args) == 0) {
17 |       # match the behavior of paste
18 |       paste(sep = sep, collapse = collapse)
19 |     } else if (!is.null(collapse)) {
20 |       if (all(is.na(args[[1]]))) {
21 |         # Collapsing with all NA values results in NA
22 |         NA_character_
23 |       } else {
24 |         # Collapsing without all NA values collapses the non-NA values
25 |         paste(na.omit(args[[1]]), sep = sep, collapse = collapse)
26 |       }
27 |     } else {
28 |       # as.character() to ensure that logical NA values are converted to
29 |       # NA_character_
30 |       as.character(args[[1]])
31 |     }
32 |   } else {
33 |     # There are at least 2 arguments; paste the first two and recurse
34 |     a1 <- args[[1]]
35 |     a2 <- args[[2]]
36 |     if (length(a1) != length(a2)) {
37 |       if (length(a1) == 1) {
38 |         a1 <- rep(a1, length(a2))
39 |       } else if (length(a2) == 1) {
40 |         a2 <- rep(a2, length(a1))
41 |       } else {
42 |         stop("Arguments must be the same length or one argument must be a scalar.")
43 |       }
44 |     }
45 |     # Which arguments are NA, if any?
46 |     mask1 <- !is.na(a1)
47 |     mask2 <- !is.na(a2)
48 |     mask_both <- mask1 & mask2
49 |     mask_only2 <- (!mask1) & mask2
50 |     firsttwo <- a1
51 |     if (any(mask_only2)) {
52 |       firsttwo[mask_only2] <- a2[mask_only2]
53 |     }
54 |     if (any(mask_both)) {
55 |       # Collapse only occurs on the final pasting
56 |       firsttwo[mask_both] <- paste(a1[mask_both], a2[mask_both], sep = sep, collapse = NULL)
57 |     }
58 |     # prepare to recurse, and recurse
59 |     new_args <- append(list(firsttwo), args[-(1:2)])
60 |     new_args$sep <- sep
61 |     new_args$collapse <- collapse
62 |     do.call(paste_skip_na, new_args)
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/R/print_tabyl.R:
--------------------------------------------------------------------------------
1 | #' @export
2 | print.tabyl <- function(x, ...) {
3 |   print.data.frame(x, row.names = FALSE)
4 | }
5 | 


--------------------------------------------------------------------------------
/R/remove_empties.R:
--------------------------------------------------------------------------------
  1 | #' Remove empty rows and/or columns from a data.frame or matrix.
  2 | #'
  3 | #' Removes all rows and/or columns from a data.frame or matrix that
  4 | #'   are composed entirely of `NA` values.
  5 | #'
  6 | #' @param dat the input data.frame or matrix.
  7 | #' @param which one of "rows", "cols", or `c("rows", "cols")`. Where no
  8 | #'   value of which is provided, defaults to removing both empty rows and empty
  9 | #'   columns, declaring the behavior with a printed message.
 10 | #' @param cutoff a row/col should have more than this fraction of non-NA values to be
 11 | #'   retained.  E.g., `cutoff = 0.8` means that rows/cols that are 20% or more missing will be dropped.
 12 | #' @param quiet Should messages be suppressed (`TRUE`) or printed
 13 | #'   (`FALSE`) indicating the summary of empty columns or rows removed?
 14 | #' @return Returns the object without its missing rows or columns.
 15 | #' @family remove functions
 16 | #' @seealso [remove_constant()] for removing
 17 | #'   constant columns.
 18 | #' @examples
 19 | #' # not run:
 20 | #' # dat %>% remove_empty("rows")
 21 | #' # addressing a common untidy-data scenario where we have a mixture of
 22 | #' # blank values in some (character) columns and NAs in others:
 23 | #' library(dplyr)
 24 | #' dd <- tibble(
 25 | #'   x = c(LETTERS[1:5], NA, rep("", 2)),
 26 | #'   y = c(1:5, rep(NA, 3))
 27 | #' )
 28 | #' # remove_empty() drops row 5 (all NA) but not 6 and 7 (blanks + NAs)
 29 | #' dd %>% remove_empty("rows")
 30 | #' # solution: preprocess to convert whitespace/empty strings to NA,
 31 | #' # _then_ remove empty (all-NA) rows
 32 | #' dd %>%
 33 | #'   mutate(across(where(is.character), ~ na_if(trimws(.), ""))) %>%
 34 | #'   remove_empty("rows")
 35 | #' @export
 36 | remove_empty <- function(dat, which = c("rows", "cols"), cutoff = 1, quiet = TRUE) {
 37 |   if (missing(which) && !missing(dat)) {
 38 |     message("value for \"which\" not specified, defaulting to c(\"rows\", \"cols\")")
 39 |     which <- c("rows", "cols")
 40 |   }
 41 |   if ((sum(which %in% c("rows", "cols")) != length(which)) && !missing(dat)) {
 42 |     stop("\"which\" must be one of \"rows\", \"cols\", or c(\"rows\", \"cols\")")
 43 |   }
 44 |   if (length(cutoff) != 1) {
 45 |     stop("cutoff must be a single value")
 46 |   } else if (!is.numeric(cutoff)) {
 47 |     stop("cutoff must be numeric")
 48 |   } else if (cutoff <= 0 | cutoff > 1) {
 49 |     stop("cutoff must be >0 and <= 1")
 50 |   } else if (length(which) > 1 & cutoff != 1) {
 51 |     stop("cutoff must be used with only one of which = 'rows' or 'cols', not both")
 52 |   }
 53 |   if ("rows" %in% which) {
 54 |     # Using different code with cutoff = 1 vs cutoff != 1 to avoid possible
 55 |     # floating point errors.
 56 |     mask_keep <-
 57 |       if (cutoff == 1) {
 58 |         rowSums(is.na(dat)) != ncol(dat)
 59 |       } else {
 60 |         (rowSums(!is.na(dat)) / ncol(dat)) > cutoff
 61 |       }
 62 |     if (!quiet) {
 63 |       remove_message(dat = dat, mask_keep = mask_keep, which = "rows", reason = "empty")
 64 |     }
 65 |     dat <- dat[mask_keep, , drop = FALSE]
 66 |   }
 67 |   if ("cols" %in% which) {
 68 |     # Using different code with cutoff = 1 vs cutoff != 1 to avoid possible
 69 |     # floating point errors.
 70 |     mask_keep <-
 71 |       if (cutoff == 1) {
 72 |         colSums(is.na(dat)) != nrow(dat)
 73 |       } else {
 74 |         (colSums(!is.na(dat)) / nrow(dat)) > cutoff
 75 |       }
 76 |     if (!quiet) {
 77 |       remove_message(dat = dat, mask_keep = mask_keep, which = "columns", reason = "empty")
 78 |     }
 79 |     dat <- dat[, mask_keep, drop = FALSE]
 80 |   }
 81 |   dat
 82 | }
 83 | 
 84 | ## Remove constant columns
 85 | 
 86 | #' @title Remove constant columns from a data.frame or matrix.
 87 | #' @param dat the input data.frame or matrix.
 88 | #' @param na.rm should `NA` values be removed when considering whether a
 89 | #'   column is constant?  The default value of `FALSE` will result in a
 90 | #'   column not being removed if it's a mix of a single value and `NA`.
 91 | #' @param quiet Should messages be suppressed (`TRUE`) or printed
 92 | #'   (`FALSE`) indicating the summary of empty columns or rows removed?
 93 | #'
 94 | #' @examples
 95 | #' remove_constant(data.frame(A = 1, B = 1:3))
 96 | #'
 97 | #' # To find the columns that are constant
 98 | #' data.frame(A = 1, B = 1:3) %>%
 99 | #'   dplyr::select(!dplyr::all_of(names(remove_constant(.)))) %>%
100 | #'   unique()
101 | #' @importFrom stats na.omit
102 | #' @family remove functions
103 | #' @seealso [remove_empty()] for removing empty
104 | #'   columns or rows.
105 | #' @export
106 | remove_constant <- function(dat, na.rm = FALSE, quiet = TRUE) {
107 |   mask <-
108 |     sapply(
109 |       X = seq_len(ncol(dat)),
110 |       FUN = function(idx) {
111 |         column_to_test <-
112 |           if (is.matrix(dat)) {
113 |             dat[, idx]
114 |           } else {
115 |             dat[[idx]]
116 |           }
117 |         length(unique(
118 |           if (na.rm) {
119 |             stats::na.omit(column_to_test)
120 |           } else {
121 |             column_to_test
122 |           }
123 |         )) <= 1 # the < is in case all values are NA with na.rm=TRUE
124 |       }
125 |     )
126 |   if (!quiet) {
127 |     remove_message(dat = dat, mask_keep = !mask, which = "columns", reason = "constant")
128 |   }
129 |   dat[, !mask, drop = FALSE]
130 | }
131 | 
132 | 
133 | #' Generate the message describing columns or rows that are being removed.
134 | #'
135 | #' @inheritParams remove_empty
136 | #' @param mask_keep A logical vector of rows or columns to keep (`TRUE`) or
137 | #'   remove (`FALSE`).
138 | #' @param reason The reason that rows are being removed (to be used in the
139 | #'   message.
140 | #' @noRd
141 | remove_message <- function(dat, mask_keep, which = c("columns", "rows"), reason = c("empty", "constant")) {
142 |   if (all(mask_keep)) {
143 |     message("No ", reason, " ", which, " to remove.")
144 |   } else {
145 |     details <-
146 |       if (which == "columns") {
147 |         if (is.null(colnames(dat)) || any(colnames(dat) %in% "")) {
148 |           sprintf("%0.3g%%", 100 * sum(!mask_keep) / length(mask_keep))
149 |         } else {
150 |           sprintf("Removed: %s", paste(names(dat)[!mask_keep], collapse = ", "))
151 |         }
152 |       } else {
153 |         sprintf("%0.3g%%", 100 * sum(!mask_keep) / length(mask_keep))
154 |       }
155 |     message(
156 |       sprintf(
157 |         "Removing %g %s %s of %g %s total (%s).",
158 |         sum(!mask_keep), reason, which, length(mask_keep), which, details
159 |       )
160 |     )
161 |   }
162 | }
163 | 


--------------------------------------------------------------------------------
/R/round_half_up.R:
--------------------------------------------------------------------------------
 1 | #' Round a numeric vector; halves will be rounded up, ala Microsoft Excel.
 2 | #'
 3 | #' @description
 4 | #' In base R `round()`, halves are rounded to even, e.g., 12.5 and
 5 | #' 11.5 are both rounded to 12.  This function rounds 12.5 to 13 (assuming
 6 | #' `digits = 0`).  Negative halves are rounded away from zero, e.g., -0.5 is
 7 | #' rounded to -1.
 8 | #'
 9 | #' This may skew subsequent statistical analysis of the data, but may be
10 | #' desirable in certain contexts.  This function is implemented exactly from
11 | #' <https://stackoverflow.com/a/12688836>; see that question and comments for
12 | #' discussion of this issue.
13 | #'
14 | #' @param x a numeric vector to round.
15 | #' @param digits how many digits should be displayed after the decimal point?
16 | #' @returns A vector with the same length as `x`
17 | #' @export
18 | #' @examples
19 | #' round_half_up(12.5)
20 | #' round_half_up(1.125, 2)
21 | #' round_half_up(1.125, 1)
22 | #' round_half_up(-0.5, 0) # negatives get rounded away from zero
23 | #'
24 | round_half_up <- function(x, digits = 0) {
25 |   posneg <- sign(x)
26 |   z <- abs(x) * 10^digits
27 |   z <- z + 0.5 + sqrt(.Machine$double.eps)
28 |   z <- trunc(z)
29 |   z <- z / 10^digits
30 |   z * posneg
31 | }
32 | 
33 | #' Round a numeric vector to the specified number of significant digits; halves will be rounded up.
34 | #'
35 | #' @description
36 | #' In base R `signif()`, halves are rounded to even, e.g.,
37 | #' `signif(11.5, 2)` and `signif(12.5, 2)` are both rounded to 12.
38 | #' This function rounds 12.5 to 13 (assuming `digits = 2`). Negative halves
39 | #' are rounded away from zero, e.g., `signif(-2.5, 1)` is rounded to -3.
40 | #'
41 | #' This may skew subsequent statistical analysis of the data, but may be
42 | #' desirable in certain contexts. This function is implemented from
43 | #' <https://stackoverflow.com/a/1581007/>; see that question and
44 | #' comments for discussion of this issue.
45 | #'
46 | #' @param x a numeric vector to round.
47 | #' @param digits integer indicating the number of significant digits to be used.
48 | #' @export
49 | #' @examples
50 | #' signif_half_up(12.5, 2)
51 | #' signif_half_up(1.125, 3)
52 | #' signif_half_up(-2.5, 1) # negatives get rounded away from zero
53 | #'
54 | signif_half_up <- function(x, digits = 6) {
55 |   xs <- which(x != 0 & !is.na(x) & !is.infinite(x))
56 | 
57 |   y <- rep(0, length(x))
58 |   z <- x
59 | 
60 |   y[xs] <- 10^(digits - ceiling(log10(abs(x[xs]))))
61 | 
62 |   z[xs] <- round_half_up(x[xs] * y[xs]) / y[xs]
63 | 
64 |   return(z)
65 | }
66 | 


--------------------------------------------------------------------------------
/R/round_to_fraction.R:
--------------------------------------------------------------------------------
 1 | #' Round to the nearest fraction of a specified denominator.
 2 | #'
 3 | #' @description
 4 | #' Round a decimal to the precise decimal value of a specified
 5 | #' fractional denominator.  Common use cases include addressing floating point
 6 | #' imprecision and enforcing that data values fall into a certain set.
 7 | #'
 8 | #' E.g., if a decimal represents hours and values should be logged to the nearest
 9 | #' minute, `round_to_fraction(x, 60)` would enforce that distribution and 0.57
10 | #' would be rounded to 0.566667, the equivalent of 34/60.  0.56 would also be rounded
11 | #' to 34/60.
12 | #'
13 | #' Set `denominator = 1` to round to whole numbers.
14 | #'
15 | #' The `digits` argument allows for rounding of the subsequent result.
16 | #'
17 | #' @details
18 | #' If `digits` is `Inf`, `x` is rounded to the fraction
19 | #' and then kept at full precision.  If `digits` is `"auto"`, the
20 | #' number of digits is automatically selected as
21 | #' `ceiling(log10(denominator)) + 1`.
22 | #'
23 | #' @param x A numeric vector
24 | #' @param denominator The denominator of the fraction for rounding (a scalar or
25 | #'   vector positive integer).
26 | #' @param digits Integer indicating the number of decimal places to be used
27 | #'   after rounding to the fraction.  This is passed to `base::round()`).
28 | #'   Negative values are allowed (see Details). (`Inf` indicates no
29 | #'   subsequent rounding)
30 | #' @return the input x rounded to a decimal value that has an integer numerator relative
31 | #'   to `denominator` (possibly subsequently rounded to a number of decimal
32 | #'   digits).
33 | #' @examples
34 | #' round_to_fraction(1.6, denominator = 2)
35 | #' round_to_fraction(pi, denominator = 7) # 22/7
36 | #' round_to_fraction(c(8.1, 9.2), denominator = c(7, 8))
37 | #' round_to_fraction(c(8.1, 9.2), denominator = c(7, 8), digits = 3)
38 | #' round_to_fraction(c(8.1, 9.2, 10.3), denominator = c(7, 8, 1001), digits = "auto")
39 | #' @export
40 | round_to_fraction <- function(x, denominator, digits = Inf) {
41 |   stopifnot(is.numeric(x))
42 |   stopifnot(length(denominator) %in% c(1, length(x)))
43 |   stopifnot(is.numeric(denominator))
44 |   stopifnot(denominator >= 1)
45 |   if (!(identical(digits, "auto") ||
46 |     (is.numeric(digits) & (length(digits) %in% c(1, length(x)))))) {
47 |     stop('`digits` must be either "auto" or a number that is either a scalar (length = 1) or the same length as `x`.')
48 |   }
49 |   ret <- round(x * denominator, digits = 0) / denominator
50 |   if (identical(digits, "auto")) {
51 |     digits <- ceiling(log10(denominator)) + 1
52 |   }
53 |   mask_inf_digits <- is.infinite(digits)
54 |   if (!all(mask_inf_digits)) {
55 |     ret[!mask_inf_digits] <-
56 |       round(ret[!mask_inf_digits], digits = digits[!mask_inf_digits])
57 |   }
58 |   ret
59 | }
60 | 


--------------------------------------------------------------------------------
/R/row_to_names.R:
--------------------------------------------------------------------------------
  1 | #' Elevate a row to be the column names of a data.frame.
  2 | #'
  3 | #' @param dat The input data.frame
  4 | #' @param row_number The row(s) of `dat` containing the variable names or the
  5 | #'   string `"find_header"` to use `find_header(dat=dat, ...)` to find
  6 | #'   the row_number. Allows for multiple rows input as a numeric vector. NA's are
  7 | #'   ignored, and if a column contains only `NA` value it will be named `"NA"`.
  8 | #' @param ... Sent to `find_header()`, if
  9 | #'   `row_number = "find_header"`.  Otherwise, ignored.
 10 | #' @param remove_row Should the row `row_number` be removed from the
 11 | #'   resulting data.frame?
 12 | #' @param remove_rows_above If `row_number != 1`, should the rows above
 13 | #'   `row_number` - that is, between `1:(row_number-1)` - be removed
 14 | #'   from the resulting data.frame?
 15 | #' @param sep A character string to separate the values in the case of multiple
 16 | #'   rows input to `row_number`.
 17 | #' @return A data.frame with new names (and some rows removed, if specified)
 18 | #' @family Set names
 19 | #' @examples
 20 | #' x <- data.frame(
 21 | #'   X_1 = c(NA, "Title", 1:3),
 22 | #'   X_2 = c(NA, "Title2", 4:6)
 23 | #' )
 24 | #' x %>%
 25 | #'   row_to_names(row_number = 2)
 26 | #'
 27 | #' x %>%
 28 | #'   row_to_names(row_number = "find_header")
 29 | #' @export
 30 | row_to_names <- function(dat, row_number, ..., remove_row = TRUE, remove_rows_above = TRUE, sep = "_") {
 31 |   # Check inputs
 32 |   if (!(is.logical(remove_row) & length(remove_row) == 1)) {
 33 |     stop("remove_row must be either TRUE or FALSE, not ", as.character(remove_row))
 34 |   } else if (!(is.logical(remove_rows_above) & length(remove_rows_above) == 1)) {
 35 |     stop("remove_rows_above must be either TRUE or FALSE, not ", as.character(remove_rows_above))
 36 |   }
 37 |   if (identical(row_number, "find_header")) {
 38 |     # no need to check if it is a character string, %in% will do that for us
 39 |     # (and will handle the odd-ball cases like someone sending in
 40 |     # factor("find_header")).
 41 |     row_number <- find_header(dat = dat, ...)
 42 |   } else if (is.numeric(row_number)) {
 43 |     extra_args <- list(...)
 44 |     if (length(extra_args) != 0) {
 45 |       stop("Extra arguments (...) may only be given if row_number = 'find_header'.")
 46 |     }
 47 |   } else {
 48 |     stop("row_number must be a numeric value or 'find_header'")
 49 |   }
 50 |   if (!is.character(sep)) {
 51 |     stop("`sep` must be of type `character`.")
 52 |   }
 53 |   if (length(sep) != 1) {
 54 |     stop("`sep` must be of length 1.")
 55 |   }
 56 |   if (is.na(sep)) {
 57 |     stop("`sep` can't be of type `NA_character_`.")
 58 |   }
 59 |   new_names <- sapply(dat[row_number, , drop = FALSE], paste_skip_na, collapse = sep) %>%
 60 |     stringr::str_replace_na()
 61 | 
 62 |   if (any(duplicated(new_names))) {
 63 |     rlang::warn(
 64 |       message = paste("Row", row_number, "does not provide unique names. Consider running clean_names() after row_to_names()."),
 65 |       class = "janitor_warn_row_to_names_not_unique"
 66 |     )
 67 |   }
 68 |   colnames(dat) <- new_names
 69 |   rows_to_remove <- c(
 70 |     if (remove_row) {
 71 |       row_number
 72 |     } else {
 73 |       c()
 74 |     },
 75 |     if (remove_rows_above) {
 76 |       seq_len(max(row_number) - 1)
 77 |     } else {
 78 |       c()
 79 |     }
 80 |   )
 81 |   if (length(rows_to_remove)) {
 82 |     dat[-(rows_to_remove), , drop = FALSE]
 83 |   } else {
 84 |     dat
 85 |   }
 86 | }
 87 | 
 88 | #' Find the header row in a data.frame
 89 | #'
 90 | #' @details
 91 | #' If `...` is missing, then the first row with no missing values is used.
 92 | #'
 93 | #' When searching for a specified value or value within a column, the first row
 94 | #' with a match will be returned, regardless of the completeness of the rest of
 95 | #' that row.  If `...` has a single character argument, then the first
 96 | #' column is searched for that value.  If `...` has a named numeric
 97 | #' argument, then the column whose position number matches the value of that
 98 | #' argument is searched for the name (see the last example below).  If more than one
 99 | #' row is found matching a value that is searched for, the number of the first
100 | #' matching row will be returned (with a warning).
101 | #'
102 | #' @inheritParams row_to_names
103 | #' @param ... See details
104 | #' @return The row number for the header row
105 | #' @family Set names
106 | #' @examples
107 | #' # the first row
108 | #' find_header(data.frame(A = "B"))
109 | #' # the second row
110 | #' find_header(data.frame(A = c(NA, "B")))
111 | #' # the second row since the first has an empty value
112 | #' find_header(data.frame(A = c(NA, "B"), B = c("C", "D")))
113 | #' # The third row because the second column was searched for the text "E"
114 | #' find_header(data.frame(A = c(NA, "B", "C", "D"), B = c("C", "D", "E", "F")), "E" = 2)
115 | #' @export
116 | find_header <- function(dat, ...) {
117 |   extra_args <- list(...)
118 |   if (length(extra_args) == 0) {
119 |     # Find the first complete row
120 |     ret <- which(rowSums(is.na(dat)) == 0)
121 |     if (length(ret) == 0) {
122 |       stop("No complete rows (rows with zero NA values) were found.")
123 |     }
124 |     ret <- ret[1]
125 |   } else if (length(extra_args) == 1) {
126 |     if (is.null(names(extra_args))) {
127 |       # Search for the argument in the first column
128 |       column_to_search <- 1
129 |       string_to_search <- extra_args[[1]]
130 |     } else {
131 |       # Search for the name of the argument in the indicated column
132 |       column_to_search <- extra_args[[1]]
133 |       string_to_search <- names(extra_args)
134 |     }
135 |     ret <- which(dat[[column_to_search]] %in% string_to_search)
136 |     if (length(ret) == 0) {
137 |       stop(sprintf(
138 |         "The string '%s' was not found in column %g", string_to_search, column_to_search
139 |       ))
140 |     } else if (length(ret) > 1) {
141 |       rlang::warn(
142 |         message =
143 |           sprintf(
144 |             "The string '%s' was found %g times in column %g, using the first row where it was found",
145 |             string_to_search, length(ret), column_to_search
146 |           ),
147 |         class = "janitor_warn_find_header_not_unique"
148 |       )
149 |       ret <- ret[1]
150 |     }
151 |   } else {
152 |     stop("Either zero or one arguments other than 'dat' may be provided.")
153 |   }
154 |   ret
155 | }
156 | 


--------------------------------------------------------------------------------
/R/sas_dates.R:
--------------------------------------------------------------------------------
 1 | #' Convert a SAS date, time or date/time to an R object
 2 | #'
 3 | #' @inheritParams excel_numeric_to_date
 4 | #' @param datetime_num numeric vector of date/time numbers (seconds since
 5 | #'   midnight 1960-01-01) to convert
 6 | #' @param time_num numeric vector of time numbers (seconds since midnight on the
 7 | #'   current day) to convert
 8 | #' @return If a date and time or datetime are provided, a POSIXct object.  If a
 9 | #'   date is provided, a Date object.  If a time is provided, an hms::hms object
10 | #' @references SAS Date, Time, and Datetime Values reference (retrieved on
11 | #'   2022-03-08): https://v8doc.sas.com/sashtml/lrcon/zenid-63.htm
12 | #' @examples
13 | #' sas_numeric_to_date(date_num = 15639) # 2002-10-26
14 | #' sas_numeric_to_date(datetime_num = 1217083532, tz = "UTC") # 1998-07-26T14:45:32Z
15 | #' sas_numeric_to_date(date_num = 15639, time_num = 3600, tz = "UTC") # 2002-10-26T01:00:00Z
16 | #' sas_numeric_to_date(time_num = 3600) # 01:00:00
17 | #' @family date-time cleaning
18 | #' @export
19 | sas_numeric_to_date <- function(date_num, datetime_num, time_num, tz = "UTC") {
20 |   # Confirm that a usable set of input arguments is given
21 |   has_date <- !missing(date_num)
22 |   has_datetime <- !missing(datetime_num)
23 |   has_time <- !missing(time_num)
24 |   stopifnot(is.character(tz))
25 |   stopifnot(length(tz) == 1)
26 |   if (tz != "UTC") {
27 |     warning("SAS may not properly store timezones other than UTC. Consider confirming the accuracy of the resulting data.")
28 |   }
29 |   if (has_date & has_datetime) {
30 |     stop("Must not give both `date_num` and `datetime_num`")
31 |   } else if (has_time & has_datetime) {
32 |     stop("Must not give both `time_num` and `datetime_num`")
33 |   }
34 |   if (has_time) {
35 |     stopifnot("`time_num` must be non-negative" = all(is.na(time_num) | time_num >= 0))
36 |     # Note the value of 86400 is allowed by the SAS standard listed in the
37 |     # references section
38 |     stopifnot("`time_num` must be within the number of seconds in a day (<= 86400)" = all(is.na(time_num) | time_num <= 86400))
39 |   }
40 |   if (has_date & has_time) {
41 |     mask_na_match <- is.na(date_num) == is.na(time_num)
42 |     if (!all(mask_na_match)) {
43 |       stop("The same values are not NA for both `date_num` and `time_num`")
44 |     }
45 |     datetime_num <- 86400 * date_num + time_num
46 |     has_datetime <- TRUE
47 |   }
48 |   if (has_datetime) {
49 |     ret <- as.POSIXct(datetime_num, origin = "1960-01-01", tz = tz)
50 |   } else if (has_date) {
51 |     ret <- as.Date(date_num, origin = "1960-01-01")
52 |   } else if (has_time) {
53 |     ret <- hms::hms(seconds = time_num)
54 |   } else {
55 |     stop("Must give one of `date_num`, `datetime_num`, `time_num`, or `date_num` and `time_num`")
56 |   }
57 |   ret
58 | }
59 | 


--------------------------------------------------------------------------------
/R/single_value.R:
--------------------------------------------------------------------------------
 1 | #' Ensure that a vector has only a single value throughout.
 2 | #'
 3 | #' Missing values are replaced with the single value, and if all values are
 4 | #' missing, the first value in `missing` is used throughout.
 5 | #'
 6 | #' @param x The vector which should have a single value
 7 | #' @param missing The vector of values to consider missing in `x`
 8 | #' @param warn_if_all_missing Generate a warning if all values are missing?
 9 | #' @param info If more than one value is found, append this to the warning or
10 | #'   error to assist with determining the location of the issue.
11 | #' @return `x` as the scalar single value found throughout (or an error if
12 | #'   more than one value is found).
13 | #' @examples
14 | #' # A simple use case with vectors of input
15 | #'
16 | #' single_value(c(NA, 1))
17 | #' # Multiple, different values of missing can be given
18 | #' single_value(c(NA, "a"), missing = c(NA, "a"))
19 | #'
20 | #' # A typical use case with a grouped data.frame used for input and the output
21 | #' # (`B` is guaranteed to have a single value and only one row, in this case)
22 | #' data.frame(
23 | #'   A = rep(1:3, each = 2),
24 | #'   B = c(rep(4:6, each = 2))
25 | #' ) %>%
26 | #'   dplyr::group_by(A) %>%
27 | #'   dplyr::summarize(
28 | #'     B = single_value(B)
29 | #'   )
30 | #'
31 | #' try(
32 | #'   # info is useful to give when multiple values may be found to see what
33 | #'   # grouping variable or what calculation is causing the error
34 | #'   data.frame(
35 | #'     A = rep(1:3, each = 2),
36 | #'     B = c(rep(1:2, each = 2), 1:2)
37 | #'   ) %>%
38 | #'     dplyr::group_by(A) %>%
39 | #'     dplyr::mutate(
40 | #'       C = single_value(B, info = paste("Calculating C for group A=", A))
41 | #'     )
42 | #' )
43 | #' @export
44 | single_value <- function(x, missing = NA, warn_if_all_missing = FALSE, info = NULL) {
45 |   mask_found <- !(x %in% missing)
46 |   if (warn_if_all_missing && !any(mask_found)) {
47 |     warning("All values are missing")
48 |   }
49 |   found_values <- unique(x[mask_found])
50 |   if (length(found_values) == 0) {
51 |     missing[1]
52 |   } else if (length(found_values) == 1) {
53 |     found_values
54 |   } else {
55 |     if (!is.null(info)) {
56 |       info <- paste(":", info)
57 |     }
58 |     stop("More than one (", length(found_values), ") value found (", paste(found_values, collapse = ", "), ")", info)
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/R/top_levels.R:
--------------------------------------------------------------------------------
 1 | #' Generate a frequency table of a factor grouped into top-n, bottom-n, and all
 2 | #' other levels.
 3 | #'
 4 | #' Get a frequency table of a factor variable, grouped into categories by level.
 5 | #'
 6 | #' @param input_vec The factor variable to tabulate.
 7 | #' @param n Number of levels to include in top and bottom groups
 8 | #' @param show_na Should cases where the variable is `NA` be shown?
 9 | #' @return A `data.frame` (actually a `tbl_df`) with the frequencies of the
10 | #'   grouped, tabulated variable.  Includes counts and percentages, and valid
11 | #'   percentages (calculated omitting `NA` values, if present in the vector and
12 | #'   `show_na = TRUE`.)
13 | #' @export
14 | #' @examples
15 | #' top_levels(as.factor(mtcars$hp), 2)
16 | top_levels <- function(input_vec, n = 2, show_na = FALSE) {
17 |   # Initial type error catching
18 |   if (!is.factor(input_vec)) {
19 |     stop("factor_vec is not of type 'factor'")
20 |   }
21 | 
22 |   num_levels_in_var <- nlevels(input_vec)
23 | 
24 |   # handle bad inputs
25 |   if (!num_levels_in_var > 2) {
26 |     stop("input factor variable must have at least 3 levels")
27 |   }
28 |   if (num_levels_in_var < 2 * n) {
29 |     stop(paste0(
30 |       "there are ", num_levels_in_var, " levels in the variable and ",
31 |       n, " levels in each of the top and bottom groups.\nSince 2 * ", n, " = ", 2 * n,
32 |       " is greater than ", num_levels_in_var, ",
33 |                 there would be overlap in the top and bottom groups and some records will be double-counted."
34 |     ))
35 |   }
36 |   if (n < 1 || n %% 1 != 0) {
37 |     stop("n must be a whole number at least 1")
38 |   }
39 | 
40 |   var_name <- deparse(substitute(input_vec))
41 | 
42 |   # Identify top/mid/bottom group labels for printing
43 |   groups <- get_level_groups(input_vec, n, num_levels_in_var)
44 | 
45 |   # convert input vector into grouped variable
46 |   new_vec <- ifelse(as.numeric(input_vec) <= n,
47 |     groups$top,
48 |     ifelse(as.numeric(input_vec) > (num_levels_in_var - n),
49 |       groups$bot,
50 |       groups$mid
51 |     )
52 |   )
53 | 
54 |   # recode variable as hi-med-lo factor so table prints w/ correct sorting
55 |   if (!is.na(groups$mid)) {
56 |     new_vec <- factor(new_vec, levels = c(groups$top, groups$mid, groups$bot))
57 |   } else {
58 |     new_vec <- factor(new_vec, levels = c(groups$top, groups$bot))
59 |   }
60 | 
61 |   # tabulate grouped variable, then reset name to match input variable name
62 |   result <- tabyl(new_vec, show_na = show_na)
63 |   names(result)[1] <- var_name
64 |   result
65 | }
66 | 


--------------------------------------------------------------------------------
/R/utils-pipe.R:
--------------------------------------------------------------------------------
 1 | #' Pipe operator
 2 | #'
 3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
 4 | #'
 5 | #' @name %>%
 6 | #' @rdname pipe
 7 | #' @keywords internal
 8 | #' @export
 9 | #' @importFrom magrittr %>%
10 | #' @usage lhs \%>\% rhs
11 | #' @param lhs A value or the magrittr placeholder.
12 | #' @param rhs A function call using the magrittr semantics.
13 | #' @return The result of calling `rhs(lhs)`.
14 | #' @examples
15 | #' mtcars %>%
16 | #'   tabyl(carb, cyl) %>%
17 | #'   adorn_totals()
18 | NULL
19 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: https://sfirke.github.io/janitor/
 2 | template:
 3 |   bootstrap: 5
 4 | 
 5 | 
 6 | reference:
 7 | - title: Cleaning data
 8 |  
 9 | - subtitle: Cleaning variable names
10 |   contents:
11 |   - contains("clean_names")
12 |    
13 | - title: Exploring data
14 |   desc: >
15 |     tabyls are an enhanced version of tables. See `vignette("tabyls")`
16 |     for more details.
17 |   contents:
18 |   - tabyl
19 |   - starts_with("adorn")
20 |   - contains("tabyl")
21 |   - -contains('.test')
22 |   
23 | - subtitle: Change order
24 |   contents:
25 |   - row_to_names
26 |   - find_header
27 | 
28 | - title: Comparison
29 |   desc: >
30 |     Compare data frames columns
31 |   contents:
32 |   - starts_with("compare_df_cols")
33 | 
34 | - title: Removing unnecessary columns / rows
35 |   contents:
36 |   - starts_with("remove_")
37 |   - get_dupes
38 |   - get_one_to_one
39 |   - top_levels
40 |   - single_value
41 |     
42 | - title: Rounding / dates helpers
43 |   desc: > 
44 |     Help to mimic some behaviour from Excel or SAS.
45 |     These should be used on vector.
46 |   contents:
47 |   - round_half_up
48 |   - signif_half_up
49 |   - round_to_fraction
50 |   - excel_numeric_to_date
51 |   - sas_numeric_to_date
52 |   - excel_time_to_numeric
53 |   - starts_with("convert_to_date")
54 | 
55 | - title: Misc / helpers
56 |   desc: >
57 |     These functions can help perform less frequent operations.
58 |   contents:
59 |   - describe_class
60 |   - paste_skip_na
61 |   - chisq.test
62 |   - fisher.test
63 |   - mu_to_u
64 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment:
 2 |   layout: "reach, diff, flags, files"
 3 |   behavior: default
 4 |   require_changes: false  # if true: only post the comment if coverage changes
 5 |   require_base: no        # [yes :: must have a base report to post]
 6 |   require_head: yes       # [yes :: must have a head report to post]
 7 |   branches: null
 8 |   ignore:
 9 |   - "print_*"
10 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | # Submission
 2 | 2024-12-22
 3 | 
 4 | ## Submission summary
 5 | 
 6 | ### janitor version 2.2.1
 7 | Contains only trivial changes needed to address failing test on CRAN, specific to how timezones are handled in Debian. 
 8 | 
 9 | ### Test environments
10 | 
11 | #### Windows
12 | * Windows Server 2022 x64 (build 20348) with R Under development (unstable) (2024-12-20 r87452 ucrt) via win-builder, checked 2024-12-21
13 | 
14 | #### Linux
15 | * Ubuntu 24.04 R-version 4.3.3 (2024-02-29) (local)
16 | 
17 | ### R CMD check results
18 | 0 errors | 0 warnings | 0 notes
19 | 
20 | ### Downstream dependencies
21 | This does not negatively affect downstream dependencies.
22 | 
23 | revdepcheck passed for 124 of 125 packages. I investigated package BFS and the erroring function and found no use of janitor's functions so believe this to be a false positive.


--------------------------------------------------------------------------------
/dirty_data.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/dirty_data.xlsx


--------------------------------------------------------------------------------
/janitor.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | PackageRoxygenize: rd,collate,namespace
19 | 
20 | UseNativePipeOperator: No
21 | 
22 | SpellingDictionary: en_US
23 | 


--------------------------------------------------------------------------------
/man/add_totals_col.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/janitor_deprecated.R
 3 | \name{add_totals_col}
 4 | \alias{add_totals_col}
 5 | \title{Append a totals column to a data.frame.}
 6 | \usage{
 7 | add_totals_col(dat, na.rm = TRUE)
 8 | }
 9 | \arguments{
10 | \item{dat}{an input data.frame with at least one numeric column.}
11 | 
12 | \item{na.rm}{should missing values (including NaN) be omitted from the calculations?}
13 | }
14 | \value{
15 | Returns a data.frame with a totals column containing row-wise sums.
16 | }
17 | \description{
18 | This function is deprecated, use \code{\link[=adorn_totals]{adorn_totals(where = "col")}} instead.
19 | }
20 | \keyword{internal}
21 | 


--------------------------------------------------------------------------------
/man/add_totals_row.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/janitor_deprecated.R
 3 | \name{add_totals_row}
 4 | \alias{add_totals_row}
 5 | \title{Append a totals row to a data.frame.}
 6 | \usage{
 7 | add_totals_row(dat, fill = "-", na.rm = TRUE)
 8 | }
 9 | \arguments{
10 | \item{dat}{an input data.frame with at least one numeric column.}
11 | 
12 | \item{fill}{if there are more than one non-numeric columns, what string should fill the bottom row of those columns?}
13 | 
14 | \item{na.rm}{should missing values (including NaN) be omitted from the calculations?}
15 | }
16 | \value{
17 | Returns a data.frame with a totals row, consisting of "Total" in the first column and column sums in the others.
18 | }
19 | \description{
20 | This function is deprecated, use \code{\link[=adorn_totals]{adorn_totals()}} instead.
21 | }
22 | \keyword{internal}
23 | 


--------------------------------------------------------------------------------
/man/adorn_crosstab.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/janitor_deprecated.R
 3 | \name{adorn_crosstab}
 4 | \alias{adorn_crosstab}
 5 | \title{Add presentation formatting to a crosstabulation table.}
 6 | \usage{
 7 | adorn_crosstab(
 8 |   dat,
 9 |   denom = "row",
10 |   show_n = TRUE,
11 |   digits = 1,
12 |   show_totals = FALSE,
13 |   rounding = "half to even"
14 | )
15 | }
16 | \arguments{
17 | \item{dat}{a data.frame with row names in the first column and numeric values in all other columns.  Usually the piped-in result of a call to  \code{crosstab} that included the argument \code{percent = "none"}.}
18 | 
19 | \item{denom}{the denominator to use for calculating percentages.  One of "row", "col", or "all".}
20 | 
21 | \item{show_n}{should counts be displayed alongside the percentages?}
22 | 
23 | \item{digits}{how many digits should be displayed after the decimal point?}
24 | 
25 | \item{show_totals}{display a totals summary? Will be a row, column, or both depending on the value of \code{denom}.}
26 | 
27 | \item{rounding}{method to use for truncating percentages - either "half to even", the base R default method, or "half up", where 14.5 rounds up to 15.}
28 | }
29 | \value{
30 | Returns a data.frame.
31 | }
32 | \description{
33 | This function is deprecated, use \code{\link[=tabyl]{tabyl()}} with the \code{adorn_} family of functions instead.
34 | }
35 | \keyword{internal}
36 | 


--------------------------------------------------------------------------------
/man/adorn_ns.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/adorn_ns.R
 3 | \name{adorn_ns}
 4 | \alias{adorn_ns}
 5 | \title{Add underlying Ns to a tabyl displaying percentages.}
 6 | \usage{
 7 | adorn_ns(
 8 |   dat,
 9 |   position = "rear",
10 |   ns = attr(dat, "core"),
11 |   format_func = function(x) {
12 |      format(x, big.mark = ",")
13 |  },
14 |   ...
15 | )
16 | }
17 | \arguments{
18 | \item{dat}{A data.frame of class \code{tabyl} that has had \code{adorn_percentages} and/or
19 | \code{adorn_pct_formatting} called on it.  If given a list of data.frames,
20 | this function will apply itself to each data.frame in the list (designed for 3-way \code{tabyl} lists).}
21 | 
22 | \item{position}{Should the N go in the front, or in the rear, of the percentage?}
23 | 
24 | \item{ns}{The Ns to append.  The default is the "core" attribute of the input tabyl
25 | \code{dat}, where the original Ns of a two-way \code{tabyl} are stored. However, if your Ns
26 | are stored somewhere else, or you need to customize them beyond what can be done
27 | with \code{format_func}, you can supply them here.}
28 | 
29 | \item{format_func}{A formatting function to run on the Ns. Consider defining
30 | with \code{\link[base:format]{base::format()}}.}
31 | 
32 | \item{...}{Columns to adorn. This takes a tidyselect specification.  By default,
33 | all columns are adorned except for the first column and columns not of class
34 | \code{numeric}, but this allows you to manually specify which columns should be adorned,
35 | for use on a data.frame that does not result from a call to \code{tabyl}.}
36 | }
37 | \value{
38 | A \code{data.frame} with Ns appended
39 | }
40 | \description{
41 | This function adds back the underlying Ns to a \code{tabyl} whose percentages were
42 | calculated using \code{\link[=adorn_percentages]{adorn_percentages()}}, to display the Ns and percentages together.
43 | You can also call it on a non-tabyl data.frame to which you wish to append Ns.
44 | }
45 | \examples{
46 | mtcars \%>\%
47 |   tabyl(am, cyl) \%>\%
48 |   adorn_percentages("col") \%>\%
49 |   adorn_pct_formatting() \%>\%
50 |   adorn_ns(position = "front")
51 | 
52 | # Format the Ns with a custom format_func:
53 | set.seed(1)
54 | bigger_dat <- data.frame(
55 |   sex = rep(c("m", "f"), 3000),
56 |   age = round(runif(3000, 1, 102), 0)
57 | )
58 | bigger_dat$age_group <- cut(bigger_dat$age, quantile(bigger_dat$age, c(0, 1 / 3, 2 / 3, 1)))
59 | 
60 | bigger_dat \%>\%
61 |   tabyl(age_group, sex, show_missing_levels = FALSE) \%>\%
62 |   adorn_totals(c("row", "col")) \%>\%
63 |   adorn_percentages("col") \%>\%
64 |   adorn_pct_formatting(digits = 1) \%>\%
65 |   adorn_ns(format_func = function(x) format(x, big.mark = ".", decimal.mark = ","))
66 | # Control the columns to be adorned with the ... variable selection argument
67 | # If using only the ... argument, you can use empty commas as shorthand
68 | # to supply the default values to the preceding arguments:
69 | 
70 | cases <- data.frame(
71 |   region = c("East", "West"),
72 |   year = 2015,
73 |   recovered = c(125, 87),
74 |   died = c(13, 12)
75 | )
76 | 
77 | cases \%>\%
78 |  adorn_percentages("col",,recovered:died) \%>\%
79 |  adorn_pct_formatting(,,,,,recovered:died) \%>\%
80 |  adorn_ns(,,,recovered:died)
81 | 
82 | }
83 | 


--------------------------------------------------------------------------------
/man/adorn_pct_formatting.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/adorn_pct_formatting.R
 3 | \name{adorn_pct_formatting}
 4 | \alias{adorn_pct_formatting}
 5 | \title{Format a \code{data.frame} of decimals as percentages.}
 6 | \usage{
 7 | adorn_pct_formatting(
 8 |   dat,
 9 |   digits = 1,
10 |   rounding = "half to even",
11 |   affix_sign = TRUE,
12 |   ...
13 | )
14 | }
15 | \arguments{
16 | \item{dat}{a data.frame with decimal values, typically the result of a call
17 | to \code{adorn_percentages} on a \code{tabyl}. If given a list of data.frames, this
18 | function will apply itself to each data.frame in the list (designed for
19 | 3-way \code{tabyl} lists).}
20 | 
21 | \item{digits}{how many digits should be displayed after the decimal point?}
22 | 
23 | \item{rounding}{method to use for rounding - either "half to even", the base
24 | R default method, or "half up", where 14.5 rounds up to 15.}
25 | 
26 | \item{affix_sign}{should the \% sign be affixed to the end?}
27 | 
28 | \item{...}{columns to adorn. This takes a tidyselect specification.  By
29 | default, all numeric columns (besides the initial column, if numeric) are
30 | adorned, but this allows you to manually specify which columns should be
31 | adorned, for use on a data.frame that does not result from a call to
32 | \code{tabyl}.}
33 | }
34 | \value{
35 | a data.frame with formatted percentages
36 | }
37 | \description{
38 | Numeric columns get multiplied by 100 and formatted as
39 | percentages according to user specifications. This function defaults to
40 | excluding the first column of the input data.frame, assuming that it contains
41 | a descriptive variable, but this can be overridden by specifying the columns
42 | to adorn in the \code{...} argument.  Non-numeric columns are always excluded.
43 | 
44 | The decimal separator character is the result of \code{getOption("OutDec")}, which
45 | is based on the user's locale. If the default behavior is undesirable,
46 | change this value ahead of calling the function, either by changing locale or
47 | with \code{options(OutDec = ",")}.  This aligns the decimal separator character
48 | with that used in \code{base::print()}.
49 | }
50 | \examples{
51 | mtcars \%>\%
52 |   tabyl(am, cyl) \%>\%
53 |   adorn_percentages("col") \%>\%
54 |   adorn_pct_formatting()
55 | 
56 | # Control the columns to be adorned with the ... variable selection argument
57 | # If using only the ... argument, you can use empty commas as shorthand
58 | # to supply the default values to the preceding arguments:
59 | 
60 | cases <- data.frame(
61 |   region = c("East", "West"),
62 |   year = 2015,
63 |   recovered = c(125, 87),
64 |   died = c(13, 12)
65 | )
66 | 
67 | cases \%>\%
68 |   adorn_percentages("col", , recovered:died) \%>\%
69 |   adorn_pct_formatting(, , , recovered:died)
70 | 
71 | }
72 | 


--------------------------------------------------------------------------------
/man/adorn_percentages.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/adorn_percentages.R
 3 | \name{adorn_percentages}
 4 | \alias{adorn_percentages}
 5 | \title{Convert a data.frame of counts to percentages.}
 6 | \usage{
 7 | adorn_percentages(dat, denominator = "row", na.rm = TRUE, ...)
 8 | }
 9 | \arguments{
10 | \item{dat}{A \code{tabyl} or other data.frame with a tabyl-like layout.
11 | If given a list of data.frames, this function will apply itself to each
12 | \code{data.frame} in the list (designed for 3-way \code{tabyl} lists).}
13 | 
14 | \item{denominator}{The direction to use for calculating percentages.
15 | One of "row", "col", or "all".}
16 | 
17 | \item{na.rm}{should missing values (including \code{NaN}) be omitted from the calculations?}
18 | 
19 | \item{...}{columns to adorn. This takes a <\code{\link[dplyr:dplyr_tidy_select]{tidy-select}}>
20 | specification. By default, all numeric columns (besides the initial column, if numeric)
21 | are adorned, but this allows you to manually specify which columns should
22 | be adorned, for use on a \code{data.frame} that does not result from a call to \code{\link[=tabyl]{tabyl()}}.}
23 | }
24 | \value{
25 | A \code{data.frame} of percentages, expressed as numeric values between 0 and 1.
26 | }
27 | \description{
28 | This function defaults to excluding the first column of the input data.frame,
29 | assuming that it contains a descriptive variable, but this can be overridden
30 | by specifying the columns to adorn in the \code{...} argument.
31 | }
32 | \examples{
33 | 
34 | mtcars \%>\%
35 |   tabyl(am, cyl) \%>\%
36 |   adorn_percentages("col")
37 | 
38 | # calculates correctly even with totals column and/or row:
39 | mtcars \%>\%
40 |   tabyl(am, cyl) \%>\%
41 |   adorn_totals("row") \%>\%
42 |   adorn_percentages()
43 | 
44 | # Control the columns to be adorned with the ... variable selection argument
45 | # If using only the ... argument, you can use empty commas as shorthand
46 | # to supply the default values to the preceding arguments:
47 | 
48 | cases <- data.frame(
49 |   region = c("East", "West"),
50 |   year = 2015,
51 |   recovered = c(125, 87),
52 |   died = c(13, 12)
53 | )
54 | 
55 | cases \%>\%
56 |   adorn_percentages(, , recovered:died)
57 | }
58 | 


--------------------------------------------------------------------------------
/man/adorn_rounding.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/adorn_rounding.R
 3 | \name{adorn_rounding}
 4 | \alias{adorn_rounding}
 5 | \title{Round the numeric columns in a data.frame.}
 6 | \usage{
 7 | adorn_rounding(dat, digits = 1, rounding = "half to even", ...)
 8 | }
 9 | \arguments{
10 | \item{dat}{A \code{tabyl} or other \code{data.frame} with similar layout.
11 | If given a list of data.frames, this function will apply itself to each
12 | \code{data.frame} in the list (designed for 3-way \code{tabyl} lists).}
13 | 
14 | \item{digits}{How many digits should be displayed after the decimal point?}
15 | 
16 | \item{rounding}{Method to use for rounding - either "half to even"
17 | (the base R default method), or "half up", where 14.5 rounds up to 15.}
18 | 
19 | \item{...}{Columns to adorn.  This takes a tidyselect specification.
20 | By default, all numeric columns (besides the initial column, if numeric)
21 | are adorned, but this allows you to manually specify which columns should
22 | be adorned, for use on a data.frame that does not result from a call to \code{tabyl}.}
23 | }
24 | \value{
25 | The \code{data.frame} with rounded numeric columns.
26 | }
27 | \description{
28 | Can run on any \code{data.frame} with at least one numeric column.
29 | This function defaults to excluding the first column of the input data.frame,
30 | assuming that it contains a descriptive variable, but this can be overridden by
31 | specifying the columns to round in the \code{...} argument.
32 | 
33 | If you're formatting percentages, e.g., the result of \code{\link[=adorn_percentages]{adorn_percentages()}},
34 | use \code{\link[=adorn_pct_formatting]{adorn_pct_formatting()}} instead.  This is a more flexible variant for ad-hoc usage.
35 | Compared to \code{adorn_pct_formatting()}, it does not multiply by 100 or pad the
36 | numbers with spaces for alignment in the results \code{data.frame}.
37 | This function retains the class of numeric input columns.
38 | }
39 | \examples{
40 | 
41 | mtcars \%>\%
42 |   tabyl(am, cyl) \%>\%
43 |   adorn_percentages() \%>\%
44 |   adorn_rounding(digits = 2, rounding = "half up")
45 | 
46 | # tolerates non-numeric columns:
47 | library(dplyr)
48 | mtcars \%>\%
49 |   tabyl(am, cyl) \%>\%
50 |   adorn_percentages("all") \%>\%
51 |   mutate(dummy = "a") \%>\%
52 |   adorn_rounding()
53 | 
54 | # Control the columns to be adorned with the ... variable selection argument
55 | # If using only the ... argument, you can use empty commas as shorthand
56 | # to supply the default values to the preceding arguments:
57 | cases <- data.frame(
58 |   region = c("East", "West"),
59 |   year = 2015,
60 |   recovered = c(125, 87),
61 |   died = c(13, 12)
62 | )
63 | 
64 | cases \%>\%
65 |   adorn_percentages(, , ends_with("ed")) \%>\%
66 |   adorn_rounding(, , all_of(c("recovered", "died")))
67 | }
68 | 


--------------------------------------------------------------------------------
/man/adorn_title.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/adorn_title.R
 3 | \name{adorn_title}
 4 | \alias{adorn_title}
 5 | \title{Add column name to the top of a two-way tabyl.}
 6 | \usage{
 7 | adorn_title(dat, placement = "top", row_name, col_name)
 8 | }
 9 | \arguments{
10 | \item{dat}{A \code{data.frame} of class \code{tabyl} or other \code{data.frame} with a tabyl-like layout.
11 | If given a list of data.frames, this function will apply itself to each \code{data.frame}
12 | in the list (designed for 3-way \code{tabyl} lists).}
13 | 
14 | \item{placement}{The title placement, one of \code{"top"}, or \code{"combined"}.
15 | See \strong{Details} for more information.}
16 | 
17 | \item{row_name}{(optional) default behavior is to pull the row name from the
18 | attributes of the input \code{tabyl} object.  If you wish to override that text,
19 | or if your input is not a \code{tabyl}, supply a string here.}
20 | 
21 | \item{col_name}{(optional) default behavior is to pull the column_name from
22 | the attributes of the input \code{tabyl} object.  If you wish to override that text,
23 | or if your input is not a \code{tabyl}, supply a string here.}
24 | }
25 | \value{
26 | The input \code{tabyl}, augmented with the column title.  Non-tabyl inputs
27 | that are of class \code{tbl_df} are downgraded to basic data.frames so that the
28 | title row prints correctly.
29 | }
30 | \description{
31 | This function adds the column variable name to the top of a \code{tabyl} for a
32 | complete display of information.  This makes the tabyl prettier, but renders
33 | the \code{data.frame} less useful for further manipulation.
34 | }
35 | \details{
36 | The \code{placement} argument indicates whether the column name should be added to
37 | the \code{top} of the tabyl in an otherwise-empty row \code{"top"} or appended to the
38 | already-present row name variable (\code{"combined"}). The formatting in the \code{"top"}
39 | option has the look of base R's \code{table()}; it also wipes out the other column
40 | names, making it hard to further use the \code{data.frame} besides formatting it for reporting.
41 | The \code{"combined"} option is more conservative in this regard.
42 | }
43 | \examples{
44 | 
45 | mtcars \%>\%
46 |   tabyl(am, cyl) \%>\%
47 |   adorn_title(placement = "top")
48 | 
49 | # Adding a title to a non-tabyl
50 | library(tidyr)
51 | library(dplyr)
52 | mtcars \%>\%
53 |   group_by(gear, am) \%>\%
54 |   summarise(avg_mpg = mean(mpg), .groups = "drop") \%>\%
55 |   pivot_wider(names_from = am, values_from = avg_mpg) \%>\%
56 |   adorn_rounding() \%>\%
57 |   adorn_title("top", row_name = "Gears", col_name = "Cylinders")
58 | }
59 | 


--------------------------------------------------------------------------------
/man/adorn_totals.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/adorn_totals.R
 3 | \name{adorn_totals}
 4 | \alias{adorn_totals}
 5 | \title{Append a totals row and/or column to a data.frame}
 6 | \usage{
 7 | adorn_totals(dat, where = "row", fill = "-", na.rm = TRUE, name = "Total", ...)
 8 | }
 9 | \arguments{
10 | \item{dat}{An input \code{data.frame} with at least one numeric column. If given a
11 | list of data.frames, this function will apply itself to each \code{data.frame}
12 | in the list (designed for 3-way \code{tabyl} lists).}
13 | 
14 | \item{where}{One of "row", "col", or \code{c("row", "col")}}
15 | 
16 | \item{fill}{If there are non-numeric columns, what should fill the bottom row
17 | of those columns? If a string, relevant columns will be coerced to character.
18 | If \code{NA} then column types are preserved.}
19 | 
20 | \item{na.rm}{Should missing values (including \code{NaN}) be omitted from the calculations?}
21 | 
22 | \item{name}{Name of the totals row and/or column.  If both are created, and
23 | \code{name} is a single string, that name is applied to both. If both are created
24 | and \code{name} is a vector of length 2, the first element of the vector will be
25 | used as the row name (in column 1), and the second element will be used as the
26 | totals column name. Defaults to "Total".}
27 | 
28 | \item{...}{Columns to total.  This takes a tidyselect specification. By default,
29 | all numeric columns (besides the initial column, if numeric) are included in
30 | the totals, but this allows you to manually specify which columns should be
31 | included, for use on a data.frame that does not result from a call to \code{tabyl}.}
32 | }
33 | \value{
34 | A \code{data.frame} augmented with a totals row, column, or both.
35 | The \code{data.frame} is now also of class \code{tabyl} and stores information about
36 | the attached totals and underlying data in the tabyl attributes.
37 | }
38 | \description{
39 | This function defaults to excluding the first column of the input data.frame,
40 | assuming that it contains a descriptive variable, but this can be overridden
41 | by specifying the columns to be totaled in the \code{...} argument. Non-numeric
42 | columns are converted to character class and have a user-specified fill character
43 | inserted in the totals row.
44 | }
45 | \examples{
46 | mtcars \%>\%
47 |   tabyl(am, cyl) \%>\%
48 |   adorn_totals()
49 | }
50 | 


--------------------------------------------------------------------------------
/man/as_tabyl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/as_and_untabyl.R
 3 | \name{as_tabyl}
 4 | \alias{as_tabyl}
 5 | \title{Add \code{tabyl} attributes to a data.frame}
 6 | \usage{
 7 | as_tabyl(dat, axes = 2, row_var_name = NULL, col_var_name = NULL)
 8 | }
 9 | \arguments{
10 | \item{dat}{a data.frame with variable values in the first column and numeric
11 | values in all other columns.}
12 | 
13 | \item{axes}{is this a two_way tabyl or a one_way tabyl?  If this function is
14 | being called by a user, this should probably be "2".  One-way tabyls are
15 | created by \code{tabyl} but are a special case.}
16 | 
17 | \item{row_var_name}{(optional) the name of the variable in the row dimension;
18 | used by \code{adorn_title()}.}
19 | 
20 | \item{col_var_name}{(optional) the name of the variable in the column
21 | dimension; used by \code{adorn_title()}.}
22 | }
23 | \value{
24 | Returns the same data.frame, but with the additional class of "tabyl"
25 | and the attribute "core".
26 | }
27 | \description{
28 | A \code{tabyl} is a \code{data.frame} containing counts of a variable or
29 | co-occurrences of two variables (a.k.a., a contingency table or crosstab).
30 | This specialized kind of data.frame has attributes that enable \code{adorn_}
31 | functions to be called for precise formatting and presentation of results.
32 | E.g., display results as a mix of percentages, Ns, add totals rows or
33 | columns, rounding options, in the style of Microsoft Excel PivotTable.
34 | 
35 | A \code{tabyl} can be the result of a call to \code{janitor::tabyl()}, in which case
36 | these attributes are added automatically.  This function adds \code{tabyl} class
37 | attributes to a data.frame that isn't the result of a call to \code{tabyl} but
38 | meets the requirements of a two-way tabyl: 1) First column contains values of
39 | variable 1 2) Column names 2:n are the values of variable 2 3) Numeric values
40 | in columns 2:n are counts of the co-occurrences of the two variables.*
41 | \itemize{
42 | \item = this is the ideal form of a \code{tabyl}, but janitor's \code{adorn_} functions tolerate
43 | and ignore non-numeric columns in positions 2:n.
44 | }
45 | 
46 | For instance, the result of \code{\link[dplyr:count]{dplyr::count()}} followed by \code{\link[tidyr:pivot_wider]{tidyr::pivot_wider()}}
47 | can be treated as a \code{tabyl}.
48 | 
49 | The result of calling \code{\link[=tabyl]{tabyl()}} on a single variable is a special class of
50 | one-way tabyl; this function only pertains to the two-way tabyl.
51 | }
52 | \examples{
53 | as_tabyl(mtcars)
54 | 
55 | }
56 | 


--------------------------------------------------------------------------------
/man/chisq.test.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/statistical_tests.R
 3 | \name{chisq.test}
 4 | \alias{chisq.test}
 5 | \alias{chisq.test.default}
 6 | \alias{chisq.test.tabyl}
 7 | \title{Apply \code{stats::chisq.test()} to a two-way tabyl}
 8 | \usage{
 9 | chisq.test(x, ...)
10 | 
11 | \method{chisq.test}{default}(x, y = NULL, ...)
12 | 
13 | \method{chisq.test}{tabyl}(x, tabyl_results = TRUE, ...)
14 | }
15 | \arguments{
16 | \item{x}{a two-way tabyl, a numeric vector or a factor}
17 | 
18 | \item{...}{other parameters passed to \code{\link[stats:chisq.test]{stats::chisq.test()}}}
19 | 
20 | \item{y}{if x is a vector, must be another vector or factor of the same length}
21 | 
22 | \item{tabyl_results}{If \code{TRUE} and \code{x} is a tabyl object,
23 | also return \code{observed}, \code{expected}, \code{residuals} and \code{stdres} as tabyl.}
24 | }
25 | \value{
26 | The result is the same as the one of \code{stats::chisq.test()}.
27 | If \code{tabyl_results} is \code{TRUE}, the returned tables \code{observed}, \code{expected},
28 | \code{residuals} and \code{stdres} are converted to tabyls.
29 | }
30 | \description{
31 | This generic function overrides \code{stats::chisq.test}. If the passed table
32 | is a two-way tabyl, it runs it through janitor::chisq.test.tabyl, otherwise
33 | it just calls \code{stats::chisq.test()}.
34 | }
35 | \examples{
36 | tab <- tabyl(mtcars, gear, cyl)
37 | chisq.test(tab)
38 | chisq.test(tab)$residuals
39 | 
40 | }
41 | 


--------------------------------------------------------------------------------
/man/compare_df_cols.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compare_df_cols.R
 3 | \name{compare_df_cols}
 4 | \alias{compare_df_cols}
 5 | \title{Compare data frames columns before merging}
 6 | \usage{
 7 | compare_df_cols(
 8 |   ...,
 9 |   return = c("all", "match", "mismatch"),
10 |   bind_method = c("bind_rows", "rbind"),
11 |   strict_description = FALSE
12 | )
13 | }
14 | \arguments{
15 | \item{...}{A combination of data.frames, tibbles, and lists of
16 | data.frames/tibbles.  The values may optionally be named arguments; if
17 | named, the output column will be the name; if not named, the output column
18 | will be the data.frame name (see examples section).}
19 | 
20 | \item{return}{Should a summary of "all" columns be returned, only return
21 | "match"ing columns, or only "mismatch"ing columns?}
22 | 
23 | \item{bind_method}{What method of binding should be used to determine
24 | matches? With "bind_rows", columns missing from a data.frame would be
25 | considered a match (as in \code{dplyr::bind_rows()}; with "rbind", columns
26 | missing from a data.frame would be considered a mismatch (as in
27 | \code{base::rbind()}.}
28 | 
29 | \item{strict_description}{Passed to \code{describe_class}.  Also, see the
30 | Details section.}
31 | }
32 | \value{
33 | A data.frame with a column named "column_name" with a value named
34 | after the input data.frames' column names, and then one column per
35 | data.frame (named after the input data.frame).  If more than one input has
36 | the same column name, the column naming will have suffixes defined by
37 | sequential use of \code{base::merge()} and may differ from expected naming.
38 | The rows within the data.frame-named columns are descriptions of the
39 | classes of the data within the columns (generated by
40 | \code{describe_class}).
41 | }
42 | \description{
43 | Generate a comparison of data.frames (or similar objects) that indicates if
44 | they will successfully bind together by rows.
45 | }
46 | \details{
47 | Due to the returned "column_name" column, no input data.frame may be
48 | named "column_name".
49 | 
50 | The \code{strict_description} argument is most typically used to understand
51 | if factor levels match or are bindable.  Factors are typically bindable,
52 | but the behavior of what happens when they bind differs based on the
53 | binding method ("bind_rows" or "rbind").  Even when
54 | \code{strict_description} is \code{FALSE}, data.frames may still bind
55 | because some classes (like factors and characters) can bind even if they
56 | appear to differ.
57 | }
58 | \examples{
59 | compare_df_cols(data.frame(A = 1), data.frame(B = 2))
60 | # user-defined names
61 | compare_df_cols(dfA = data.frame(A = 1), dfB = data.frame(B = 2))
62 | # a combination of list and data.frame input
63 | compare_df_cols(listA = list(dfA = data.frame(A = 1), dfB = data.frame(B = 2)), data.frame(A = 3))
64 | }
65 | \seealso{
66 | Other data frame type comparison: 
67 | \code{\link{compare_df_cols_same}()},
68 | \code{\link{describe_class}()}
69 | }
70 | \concept{data frame type comparison}
71 | 


--------------------------------------------------------------------------------
/man/compare_df_cols_same.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compare_df_cols.R
 3 | \name{compare_df_cols_same}
 4 | \alias{compare_df_cols_same}
 5 | \title{Do the the data.frames have the same columns & types?}
 6 | \usage{
 7 | compare_df_cols_same(
 8 |   ...,
 9 |   bind_method = c("bind_rows", "rbind"),
10 |   verbose = TRUE
11 | )
12 | }
13 | \arguments{
14 | \item{...}{A combination of data.frames, tibbles, and lists of
15 | data.frames/tibbles.  The values may optionally be named arguments; if
16 | named, the output column will be the name; if not named, the output column
17 | will be the data.frame name (see examples section).}
18 | 
19 | \item{bind_method}{What method of binding should be used to determine
20 | matches? With "bind_rows", columns missing from a data.frame would be
21 | considered a match (as in \code{dplyr::bind_rows()}; with "rbind", columns
22 | missing from a data.frame would be considered a mismatch (as in
23 | \code{base::rbind()}.}
24 | 
25 | \item{verbose}{Print the mismatching columns if binding will fail.}
26 | }
27 | \value{
28 | \code{TRUE} if row binding will succeed or \code{FALSE} if it will fail.
29 | }
30 | \description{
31 | Check whether a set of data.frames are row-bindable. Calls \code{compare_df_cols()}
32 | and returns \code{TRUE} if there are no mis-matching rows.
33 | }
34 | \examples{
35 | compare_df_cols_same(data.frame(A = 1), data.frame(A = 2))
36 | compare_df_cols_same(data.frame(A = 1), data.frame(B = 2))
37 | compare_df_cols_same(data.frame(A = 1), data.frame(B = 2), verbose = FALSE)
38 | compare_df_cols_same(data.frame(A = 1), data.frame(B = 2), bind_method = "rbind")
39 | }
40 | \seealso{
41 | Other data frame type comparison: 
42 | \code{\link{compare_df_cols}()},
43 | \code{\link{describe_class}()}
44 | }
45 | \concept{data frame type comparison}
46 | 


--------------------------------------------------------------------------------
/man/convert_to_NA.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/janitor_deprecated.R
 3 | \name{convert_to_NA}
 4 | \alias{convert_to_NA}
 5 | \title{Convert string values to true \code{NA} values.}
 6 | \usage{
 7 | convert_to_NA(dat, strings)
 8 | }
 9 | \arguments{
10 | \item{dat}{vector or data.frame to operate on.}
11 | 
12 | \item{strings}{character vector of strings to convert.}
13 | }
14 | \value{
15 | Returns a cleaned object.  Can be a vector, data.frame, or \code{tibble::tbl_df} depending on the provided input.
16 | }
17 | \description{
18 | Warning: Deprecated, do not use in new code. Use \code{\link[dplyr:na_if]{dplyr::na_if()}} instead.
19 | 
20 | Converts instances of user-specified strings into \code{NA}.  Can operate on either a single vector or an entire data.frame.
21 | }
22 | \seealso{
23 | janitor_deprecated
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/man/convert_to_date.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/convert_to_date.R
 3 | \name{convert_to_date}
 4 | \alias{convert_to_date}
 5 | \alias{convert_to_datetime}
 6 | \title{Parse dates from many formats}
 7 | \usage{
 8 | convert_to_date(
 9 |   x,
10 |   ...,
11 |   character_fun = lubridate::ymd,
12 |   string_conversion_failure = c("error", "warning")
13 | )
14 | 
15 | convert_to_datetime(
16 |   x,
17 |   ...,
18 |   tz = "UTC",
19 |   character_fun = lubridate::ymd_hms,
20 |   string_conversion_failure = c("error", "warning")
21 | )
22 | }
23 | \arguments{
24 | \item{x}{The object to convert}
25 | 
26 | \item{...}{Passed to further methods.  Eventually may be passed to
27 | \code{excel_numeric_to_date()}, \code{base::as.POSIXct()}, or \code{base::as.Date()}.}
28 | 
29 | \item{character_fun}{A function to convert non-numeric-looking, non-\code{NA} values
30 | in \code{x} to POSIXct objects.}
31 | 
32 | \item{string_conversion_failure}{If a character value fails to parse into the
33 | desired class and instead returns \code{NA}, should the function return the
34 | result with a warning or throw an error?}
35 | 
36 | \item{tz}{The timezone for POSIXct output, unless an object is POSIXt
37 | already.  Ignored for Date output.}
38 | }
39 | \value{
40 | POSIXct objects for \code{convert_to_datetime()} or Date objects for
41 | \code{convert_to_date()}.
42 | }
43 | \description{
44 | Convert many date and date-time (POSIXct) formats as may be received
45 | from Microsoft Excel.
46 | }
47 | \details{
48 | Character conversion checks if it matches something that looks like a
49 | Microsoft Excel numeric date, converts those to numeric, and then runs
50 | convert_to_datetime_helper() on those numbers.  Then, character to Date or
51 | POSIXct conversion occurs via \code{character_fun(x, ...)} or
52 | \code{character_fun(x, tz=tz, ...)}, respectively.
53 | }
54 | \examples{
55 | convert_to_date("2009-07-06")
56 | convert_to_date(40000)
57 | convert_to_date("40000.1")
58 | # Mixed date source data can be provided.
59 | convert_to_date(c("2020-02-29", "40000.1"))
60 | convert_to_datetime(
61 |   c("2009-07-06", "40000.1", "40000", NA),
62 |   character_fun = lubridate::ymd_h, truncated = 1, tz = "UTC"
63 | )
64 | }
65 | \seealso{
66 | Other date-time cleaning: 
67 | \code{\link{excel_numeric_to_date}()},
68 | \code{\link{excel_time_to_numeric}()},
69 | \code{\link{sas_numeric_to_date}()}
70 | }
71 | \concept{date-time cleaning}
72 | 


--------------------------------------------------------------------------------
/man/crosstab.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/janitor_deprecated.R
 3 | \name{crosstab}
 4 | \alias{crosstab}
 5 | \title{Generate a crosstabulation of two vectors.}
 6 | \usage{
 7 | crosstab(...)
 8 | }
 9 | \arguments{
10 | \item{...}{arguments}
11 | }
12 | \description{
13 | This function is deprecated, use \code{\link[=tabyl]{tabyl(dat, var1, var2)}} instead.
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/describe_class.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/compare_df_cols.R
 3 | \name{describe_class}
 4 | \alias{describe_class}
 5 | \alias{describe_class.factor}
 6 | \alias{describe_class.default}
 7 | \title{Describe the class(es) of an object}
 8 | \usage{
 9 | describe_class(x, strict_description = TRUE)
10 | 
11 | \method{describe_class}{factor}(x, strict_description = TRUE)
12 | 
13 | \method{describe_class}{default}(x, strict_description = TRUE)
14 | }
15 | \arguments{
16 | \item{x}{The object to describe}
17 | 
18 | \item{strict_description}{Should differing factor levels be treated
19 | as differences for the purposes of identifying mismatches?
20 | \code{strict_description = TRUE} is stricter and factors with different
21 | levels will be treated as different classes.  \code{FALSE} is more
22 | lenient: for class comparison purposes, the variable is just a "factor".}
23 | }
24 | \value{
25 | A character scalar describing the class(es) of an object where if the
26 | scalar will match, columns in a data.frame (or similar object) should bind
27 | together without issue.
28 | }
29 | \description{
30 | Describe the class(es) of an object
31 | }
32 | \details{
33 | For package developers, an S3 generic method can be written for
34 | \code{describe_class()} for custom classes that may need more definition
35 | than the default method.  This function is called by \code{\link[=compare_df_cols]{compare_df_cols()}}.
36 | }
37 | \section{Methods (by class)}{
38 | \itemize{
39 | \item \code{describe_class(factor)}: Describe factors with their levels
40 | and if they are ordered.
41 | 
42 | \item \code{describe_class(default)}: List all classes of an object.
43 | 
44 | }}
45 | \examples{
46 | describe_class(1)
47 | describe_class(factor("A"))
48 | describe_class(ordered(c("A", "B")))
49 | describe_class(ordered(c("A", "B")), strict_description = FALSE)
50 | }
51 | \seealso{
52 | Other data frame type comparison: 
53 | \code{\link{compare_df_cols}()},
54 | \code{\link{compare_df_cols_same}()}
55 | }
56 | \concept{data frame type comparison}
57 | 


--------------------------------------------------------------------------------
/man/excel_numeric_to_date.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/excel_dates.R
 3 | \name{excel_numeric_to_date}
 4 | \alias{excel_numeric_to_date}
 5 | \title{Convert dates encoded as serial numbers to Date class.}
 6 | \usage{
 7 | excel_numeric_to_date(
 8 |   date_num,
 9 |   date_system = "modern",
10 |   include_time = FALSE,
11 |   round_seconds = TRUE,
12 |   tz = Sys.timezone()
13 | )
14 | }
15 | \arguments{
16 | \item{date_num}{numeric vector of serial numbers to convert.}
17 | 
18 | \item{date_system}{the date system, either \code{"modern"} or \code{"mac pre-2011"}.}
19 | 
20 | \item{include_time}{Include the time (hours, minutes, seconds) in the output?
21 | (See details)}
22 | 
23 | \item{round_seconds}{Round the seconds to an integer (only has an effect when
24 | \code{include_time} is \code{TRUE})?}
25 | 
26 | \item{tz}{Time zone, used when \code{include_time = TRUE} (see details for
27 | more information on timezones).}
28 | }
29 | \value{
30 | Returns a vector of class Date if \code{include_time} is
31 | \code{FALSE}.  Returns a vector of class POSIXlt if \code{include_time} is
32 | \code{TRUE}.
33 | }
34 | \description{
35 | Converts numbers like \code{42370} into date values like \code{2016-01-01}.
36 | 
37 | Defaults to the modern Excel date encoding system. However, Excel for Mac
38 | 2008 and earlier Mac versions of Excel used a different date system. To
39 | determine what platform to specify: if the date 2016-01-01 is represented by
40 | the number 42370 in your spreadsheet, it's the modern system.  If it's 40908,
41 | it's the old Mac system. More on date encoding systems at
42 | http://support.office.com/en-us/article/Date-calculations-in-Excel-e7fe7167-48a9-4b96-bb53-5612a800b487.
43 | 
44 | A list of all timezones is available from \code{base::OlsonNames()}, and the
45 | current timezone is available from \code{base::Sys.timezone()}.
46 | 
47 | If your input data has a mix of Excel numeric dates and actual dates, see the
48 | more powerful functions \code{\link[=convert_to_date]{convert_to_date()}} and \code{convert_to_datetime()}.
49 | }
50 | \details{
51 | When using \code{include_time=TRUE}, days with leap seconds will not
52 | be accurately handled as they do not appear to be accurately handled by
53 | Windows (as described in
54 | https://support.microsoft.com/en-us/help/2722715/support-for-the-leap-second).
55 | }
56 | \examples{
57 | excel_numeric_to_date(40000)
58 | excel_numeric_to_date(40000.5) # No time is included
59 | excel_numeric_to_date(40000.5, include_time = TRUE) # Time is included
60 | excel_numeric_to_date(40000.521, include_time = TRUE) # Time is included
61 | excel_numeric_to_date(40000.521,
62 |   include_time = TRUE,
63 |   round_seconds = FALSE
64 | ) # Time with fractional seconds is included
65 | }
66 | \seealso{
67 | \code{\link[=excel_time_to_numeric]{excel_time_to_numeric()}}
68 | 
69 | Other date-time cleaning: 
70 | \code{\link{convert_to_date}()},
71 | \code{\link{excel_time_to_numeric}()},
72 | \code{\link{sas_numeric_to_date}()}
73 | }
74 | \concept{date-time cleaning}
75 | 


--------------------------------------------------------------------------------
/man/excel_time_to_numeric.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/excel_time_to_numeric.R
 3 | \name{excel_time_to_numeric}
 4 | \alias{excel_time_to_numeric}
 5 | \title{Convert a time that may be inconsistently or inconveniently formatted from
 6 | Microsoft Excel to a numeric number of seconds between 0 and 86400.}
 7 | \usage{
 8 | excel_time_to_numeric(time_value, round_seconds = TRUE)
 9 | }
10 | \arguments{
11 | \item{time_value}{A vector of values to convert (see Details)}
12 | 
13 | \item{round_seconds}{Should the output number of seconds be rounded to an
14 | integer?}
15 | }
16 | \value{
17 | A vector of numbers >= 0 and <86400
18 | }
19 | \description{
20 | Convert a time that may be inconsistently or inconveniently formatted from
21 | Microsoft Excel to a numeric number of seconds between 0 and 86400.
22 | }
23 | \details{
24 | \code{time_value} may be one of the following formats:
25 | \itemize{
26 | \item{numeric}{The input must be a value from 0 to 1 (exclusive of 1); this value is returned as-is.}
27 | \item{POSIXlt or POSIXct}{The input must be on the day 1899-12-31 (any other day will cause an error).  The time of day is extracted and converted to a fraction of a day.}
28 | \item{character}{Any of the following (or a mixture of the choices):}
29 | \itemize{
30 | \item{A character string that is a number between 0 and 1 (exclusive of 1).  This value will be converted like a numeric value.}
31 | \item{A character string that looks like a date on 1899-12-31 (specifically, it must start with \code{"1899-12-31 "}), converted like a POSIXct object as described above.}
32 | \item{A character string that looks like a time.  Choices are 12-hour time as hour, minute, and optionally second followed by "am" or "pm" (case insensitive) or 24-hour time when hour, minute, optionally second, and no "am" or "pm" is included.}
33 | }
34 | }
35 | }
36 | \seealso{
37 | \code{\link[=excel_numeric_to_date]{excel_numeric_to_date()}}
38 | 
39 | Other date-time cleaning: 
40 | \code{\link{convert_to_date}()},
41 | \code{\link{excel_numeric_to_date}()},
42 | \code{\link{sas_numeric_to_date}()}
43 | }
44 | \concept{date-time cleaning}
45 | 


--------------------------------------------------------------------------------
/man/figures/dirty_data.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/man/figures/dirty_data.PNG


--------------------------------------------------------------------------------
/man/figures/logo_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/man/figures/logo_small.png


--------------------------------------------------------------------------------
/man/find_header.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/row_to_names.R
 3 | \name{find_header}
 4 | \alias{find_header}
 5 | \title{Find the header row in a data.frame}
 6 | \usage{
 7 | find_header(dat, ...)
 8 | }
 9 | \arguments{
10 | \item{dat}{The input data.frame}
11 | 
12 | \item{...}{See details}
13 | }
14 | \value{
15 | The row number for the header row
16 | }
17 | \description{
18 | Find the header row in a data.frame
19 | }
20 | \details{
21 | If \code{...} is missing, then the first row with no missing values is used.
22 | 
23 | When searching for a specified value or value within a column, the first row
24 | with a match will be returned, regardless of the completeness of the rest of
25 | that row.  If \code{...} has a single character argument, then the first
26 | column is searched for that value.  If \code{...} has a named numeric
27 | argument, then the column whose position number matches the value of that
28 | argument is searched for the name (see the last example below).  If more than one
29 | row is found matching a value that is searched for, the number of the first
30 | matching row will be returned (with a warning).
31 | }
32 | \examples{
33 | # the first row
34 | find_header(data.frame(A = "B"))
35 | # the second row
36 | find_header(data.frame(A = c(NA, "B")))
37 | # the second row since the first has an empty value
38 | find_header(data.frame(A = c(NA, "B"), B = c("C", "D")))
39 | # The third row because the second column was searched for the text "E"
40 | find_header(data.frame(A = c(NA, "B", "C", "D"), B = c("C", "D", "E", "F")), "E" = 2)
41 | }
42 | \seealso{
43 | Other Set names: 
44 | \code{\link{clean_names}()},
45 | \code{\link{mu_to_u}},
46 | \code{\link{row_to_names}()}
47 | }
48 | \concept{Set names}
49 | 


--------------------------------------------------------------------------------
/man/fisher.test.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/statistical_tests.R
 3 | \name{fisher.test}
 4 | \alias{fisher.test}
 5 | \alias{fisher.test.default}
 6 | \alias{fisher.test.tabyl}
 7 | \title{Apply \code{stats::fisher.test()} to a two-way tabyl}
 8 | \usage{
 9 | fisher.test(x, ...)
10 | 
11 | \method{fisher.test}{default}(x, y = NULL, ...)
12 | 
13 | \method{fisher.test}{tabyl}(x, ...)
14 | }
15 | \arguments{
16 | \item{x}{A two-way tabyl, a numeric vector or a factor}
17 | 
18 | \item{...}{Parameters passed to \code{\link[stats:fisher.test]{stats::fisher.test()}}}
19 | 
20 | \item{y}{if x is a vector, must be another vector or factor of the same length}
21 | }
22 | \value{
23 | The same as the one of \code{stats::fisher.test()}.
24 | }
25 | \description{
26 | This generic function overrides \code{\link[stats:fisher.test]{stats::fisher.test()}}. If the passed table
27 | is a two-way tabyl, it runs it through \code{janitor::fisher.test.tabyl}, otherwise
28 | it just calls \code{stats::fisher.test()}.
29 | }
30 | \examples{
31 | tab <- tabyl(mtcars, gear, cyl)
32 | fisher.test(tab)
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/man/get_dupes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_dupes.R
 3 | \name{get_dupes}
 4 | \alias{get_dupes}
 5 | \title{Get rows of a \code{data.frame} with identical values for the specified variables.}
 6 | \usage{
 7 | get_dupes(dat, ...)
 8 | }
 9 | \arguments{
10 | \item{dat}{The input \code{data.frame}.}
11 | 
12 | \item{...}{Unquoted variable names to search for duplicates. This takes a
13 | tidyselect specification.}
14 | }
15 | \value{
16 | A data.frame with the full records where the specified
17 | variables have duplicated values, as well as a variable \code{dupe_count}
18 | showing the number of rows sharing that combination of duplicated values.
19 | If the input data.frame was of class \code{tbl_df}, the output is as well.
20 | }
21 | \description{
22 | For hunting duplicate records during data cleaning. Specify the data.frame
23 | and the variable combination to search for duplicates and get back the
24 | duplicated rows.
25 | }
26 | \examples{
27 | get_dupes(mtcars, mpg, hp)
28 | 
29 | # or called with the magrittr pipe \%>\% :
30 | mtcars \%>\% get_dupes(wt)
31 | 
32 | # You can use tidyselect helpers to specify variables:
33 | mtcars \%>\% get_dupes(-c(wt, qsec))
34 | mtcars \%>\% get_dupes(starts_with("cy"))
35 | }
36 | 


--------------------------------------------------------------------------------
/man/get_one_to_one.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_one_to_one.R
 3 | \name{get_one_to_one}
 4 | \alias{get_one_to_one}
 5 | \title{Find the list of columns that have a 1:1 mapping to each other}
 6 | \usage{
 7 | get_one_to_one(dat)
 8 | }
 9 | \arguments{
10 | \item{dat}{A \code{data.frame} or similar object}
11 | }
12 | \value{
13 | A list with one element for each group of columns that map
14 | identically to each other.
15 | }
16 | \description{
17 | Find the list of columns that have a 1:1 mapping to each other
18 | }
19 | \examples{
20 | foo <- data.frame(
21 |   Lab_Test_Long = c("Cholesterol, LDL", "Cholesterol, LDL", "Glucose"),
22 |   Lab_Test_Short = c("CLDL", "CLDL", "GLUC"),
23 |   LOINC = c(12345, 12345, 54321),
24 |   Person = c("Sam", "Bill", "Sam"),
25 |   stringsAsFactors = FALSE
26 | )
27 | get_one_to_one(foo)
28 | }
29 | 


--------------------------------------------------------------------------------
/man/janitor-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/janitor.R
 3 | \docType{package}
 4 | \name{janitor-package}
 5 | \alias{janitor}
 6 | \alias{janitor-package}
 7 | \title{janitor: Simple Tools for Examining and Cleaning Dirty Data}
 8 | \description{
 9 | The main janitor functions can: perfectly format data.frame column names; provide quick counts of variable combinations (i.e., frequency tables and crosstabs); and explore duplicate records. Other janitor functions nicely format the tabulation results. These tabulate-and-report functions approximate popular features of SPSS and Microsoft Excel. This package follows the principles of the "tidyverse" and works well with the pipe function %>%. janitor was built with beginning-to-intermediate R users in mind and is optimized for user-friendliness.
10 | }
11 | \section{Package context}{
12 | 
13 | Advanced users can do most things covered here, but they can do it
14 | faster with janitor and save their thinking for more fun tasks.
15 | }
16 | 
17 | \seealso{
18 | Useful links:
19 | \itemize{
20 |   \item \url{https://github.com/sfirke/janitor}
21 |   \item \url{https://sfirke.github.io/janitor/}
22 |   \item Report bugs at \url{https://github.com/sfirke/janitor/issues}
23 | }
24 | 
25 | }
26 | \author{
27 | \strong{Maintainer}: Sam Firke \email{samuel.firke@gmail.com}
28 | 
29 | Other contributors:
30 | \itemize{
31 |   \item Bill Denney \email{wdenney@humanpredictions.com} [contributor]
32 |   \item Chris Haid \email{chrishaid@gmail.com} [contributor]
33 |   \item Ryan Knight \email{ryangknight@gmail.com} [contributor]
34 |   \item Malte Grosser \email{malte.grosser@gmail.com} [contributor]
35 |   \item Jonathan Zadra \email{jonathan.zadra@sorensonimpact.com} [contributor]
36 |   \item Olivier Roy [contributor]
37 |   \item Josep Pueyo-Ros \email{josep.pueyo@udg.edu} [contributor]
38 | }
39 | 
40 | }
41 | \keyword{internal}
42 | 


--------------------------------------------------------------------------------
/man/janitor_deprecated.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/janitor_deprecated.R
 3 | \name{janitor_deprecated}
 4 | \alias{janitor_deprecated}
 5 | \title{Deprecated Functions in Package janitor}
 6 | \description{
 7 | These functions have already become defunct or may be defunct as soon as the next release.
 8 | }
 9 | \details{
10 | \itemize{
11 | \item \code{\link[=adorn_crosstab]{adorn_crosstab()}} -> \code{adorn_}
12 | \item \code{\link[=crosstab]{crosstab()}} -> \code{\link[=tabyl]{tabyl()}}
13 | \item \code{\link[=use_first_valid_of]{use_first_valid_of()}} -> \code{\link[dplyr:coalesce]{dplyr::coalesce()}}
14 | \item \code{\link[=convert_to_NA]{convert_to_NA()}} -> \code{\link[dplyr:na_if]{dplyr::na_if()}}
15 | \item \code{\link[=add_totals_col]{add_totals_col()}} -> \code{\link[=adorn_totals]{adorn_totals(where = "col")}}
16 | \item \code{\link[=add_totals_row]{add_totals_row()}} -> \code{\link[=adorn_totals]{adorn_totals()}}
17 | \item \code{\link[=remove_empty_rows]{remove_empty_rows()}} -> \code{\link[=remove_empty]{remove_empty("rows")}}
18 | \item \code{\link[=remove_empty_cols]{remove_empty_cols()}} -> \code{\link[=remove_empty]{remove_empty("cols")}}
19 | }
20 | }
21 | \keyword{internal}
22 | 


--------------------------------------------------------------------------------
/man/mu_to_u.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/clean_names.R
 3 | \docType{data}
 4 | \name{mu_to_u}
 5 | \alias{mu_to_u}
 6 | \title{Constant to help map from mu to u}
 7 | \format{
 8 | An object of class \code{character} of length 10.
 9 | }
10 | \usage{
11 | mu_to_u
12 | }
13 | \description{
14 | This is a character vector with names of all known Unicode code points that
15 | look like the Greek mu or the micro symbol and values of "u".  This is
16 | intended to simplify mapping from mu or micro in Unicode to the character "u"
17 | with \code{clean_names()} and \code{make_clean_names()}.
18 | }
19 | \details{
20 | See the help in \code{clean_names()} for how to use this.
21 | }
22 | \seealso{
23 | Other Set names: 
24 | \code{\link{clean_names}()},
25 | \code{\link{find_header}()},
26 | \code{\link{row_to_names}()}
27 | }
28 | \concept{Set names}
29 | \keyword{datasets}
30 | 


--------------------------------------------------------------------------------
/man/paste_skip_na.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/paste_skip_na.R
 3 | \name{paste_skip_na}
 4 | \alias{paste_skip_na}
 5 | \title{Like \code{paste()}, but missing values are omitted}
 6 | \usage{
 7 | paste_skip_na(..., sep = " ", collapse = NULL)
 8 | }
 9 | \arguments{
10 | \item{..., sep, collapse}{See \code{\link[base:paste]{base::paste()}}}
11 | }
12 | \value{
13 | A character vector of pasted values.
14 | }
15 | \description{
16 | Like \code{paste()}, but missing values are omitted
17 | }
18 | \details{
19 | If all values are missing, the value from the first argument is
20 | preserved.
21 | }
22 | \examples{
23 | paste_skip_na(NA) # NA_character_
24 | paste_skip_na("A", NA) # "A"
25 | paste_skip_na("A", NA, c(NA, "B"), sep = ",") # c("A", "A,B")
26 | }
27 | 


--------------------------------------------------------------------------------
/man/pipe.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils-pipe.R
 3 | \name{\%>\%}
 4 | \alias{\%>\%}
 5 | \title{Pipe operator}
 6 | \usage{
 7 | lhs \%>\% rhs
 8 | }
 9 | \arguments{
10 | \item{lhs}{A value or the magrittr placeholder.}
11 | 
12 | \item{rhs}{A function call using the magrittr semantics.}
13 | }
14 | \value{
15 | The result of calling \code{rhs(lhs)}.
16 | }
17 | \description{
18 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
19 | }
20 | \examples{
21 | mtcars \%>\%
22 |   tabyl(carb, cyl) \%>\%
23 |   adorn_totals()
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/man/remove_constant.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/remove_empties.R
 3 | \name{remove_constant}
 4 | \alias{remove_constant}
 5 | \title{Remove constant columns from a data.frame or matrix.}
 6 | \usage{
 7 | remove_constant(dat, na.rm = FALSE, quiet = TRUE)
 8 | }
 9 | \arguments{
10 | \item{dat}{the input data.frame or matrix.}
11 | 
12 | \item{na.rm}{should \code{NA} values be removed when considering whether a
13 | column is constant?  The default value of \code{FALSE} will result in a
14 | column not being removed if it's a mix of a single value and \code{NA}.}
15 | 
16 | \item{quiet}{Should messages be suppressed (\code{TRUE}) or printed
17 | (\code{FALSE}) indicating the summary of empty columns or rows removed?}
18 | }
19 | \description{
20 | Remove constant columns from a data.frame or matrix.
21 | }
22 | \examples{
23 | remove_constant(data.frame(A = 1, B = 1:3))
24 | 
25 | # To find the columns that are constant
26 | data.frame(A = 1, B = 1:3) \%>\%
27 |   dplyr::select(!dplyr::all_of(names(remove_constant(.)))) \%>\%
28 |   unique()
29 | }
30 | \seealso{
31 | \code{\link[=remove_empty]{remove_empty()}} for removing empty
32 | columns or rows.
33 | 
34 | Other remove functions: 
35 | \code{\link{remove_empty}()}
36 | }
37 | \concept{remove functions}
38 | 


--------------------------------------------------------------------------------
/man/remove_empty.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/remove_empties.R
 3 | \name{remove_empty}
 4 | \alias{remove_empty}
 5 | \title{Remove empty rows and/or columns from a data.frame or matrix.}
 6 | \usage{
 7 | remove_empty(dat, which = c("rows", "cols"), cutoff = 1, quiet = TRUE)
 8 | }
 9 | \arguments{
10 | \item{dat}{the input data.frame or matrix.}
11 | 
12 | \item{which}{one of "rows", "cols", or \code{c("rows", "cols")}. Where no
13 | value of which is provided, defaults to removing both empty rows and empty
14 | columns, declaring the behavior with a printed message.}
15 | 
16 | \item{cutoff}{Under what fraction (>0 to <=1) of non-empty rows or columns should
17 | \code{which} be removed? Lower values keep more rows/columns, higher values drop more.}
18 | 
19 | \item{quiet}{Should messages be suppressed (\code{TRUE}) or printed
20 | (\code{FALSE}) indicating the summary of empty columns or rows removed?}
21 | }
22 | \value{
23 | Returns the object without its missing rows or columns.
24 | }
25 | \description{
26 | Removes all rows and/or columns from a data.frame or matrix that
27 | are composed entirely of \code{NA} values.
28 | }
29 | \examples{
30 | # not run:
31 | # dat \%>\% remove_empty("rows")
32 | # addressing a common untidy-data scenario where we have a mixture of
33 | # blank values in some (character) columns and NAs in others:
34 | library(dplyr)
35 | dd <- tibble(
36 |   x = c(LETTERS[1:5], NA, rep("", 2)),
37 |   y = c(1:5, rep(NA, 3))
38 | )
39 | # remove_empty() drops row 5 (all NA) but not 6 and 7 (blanks + NAs)
40 | dd \%>\% remove_empty("rows")
41 | # solution: preprocess to convert whitespace/empty strings to NA,
42 | # _then_ remove empty (all-NA) rows
43 | dd \%>\%
44 |   mutate(across(where(is.character), ~ na_if(trimws(.), ""))) \%>\%
45 |   remove_empty("rows")
46 | }
47 | \seealso{
48 | \code{\link[=remove_constant]{remove_constant()}} for removing
49 | constant columns.
50 | 
51 | Other remove functions: 
52 | \code{\link{remove_constant}()}
53 | }
54 | \concept{remove functions}
55 | 


--------------------------------------------------------------------------------
/man/remove_empty_cols.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/janitor_deprecated.R
 3 | \name{remove_empty_cols}
 4 | \alias{remove_empty_cols}
 5 | \title{Removes empty columns from a data.frame.}
 6 | \usage{
 7 | remove_empty_cols(dat)
 8 | }
 9 | \arguments{
10 | \item{dat}{the input data.frame.}
11 | }
12 | \value{
13 | Returns the data.frame with no empty columns.
14 | }
15 | \description{
16 | This function is deprecated, use \code{\link[=remove_empty]{remove_empty("cols")}} instead.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/remove_empty_rows.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/janitor_deprecated.R
 3 | \name{remove_empty_rows}
 4 | \alias{remove_empty_rows}
 5 | \title{Removes empty rows from a data.frame.}
 6 | \usage{
 7 | remove_empty_rows(dat)
 8 | }
 9 | \arguments{
10 | \item{dat}{the input data.frame.}
11 | }
12 | \value{
13 | Returns the data.frame with no empty rows.
14 | }
15 | \description{
16 | This function is deprecated, use \code{\link[=remove_empty]{remove_empty("rows")}} instead.
17 | }
18 | \examples{
19 | # not run:
20 | # dat \%>\% remove_empty_rows
21 | }
22 | \keyword{internal}
23 | 


--------------------------------------------------------------------------------
/man/round_half_up.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/round_half_up.R
 3 | \name{round_half_up}
 4 | \alias{round_half_up}
 5 | \title{Round a numeric vector; halves will be rounded up, ala Microsoft Excel.}
 6 | \usage{
 7 | round_half_up(x, digits = 0)
 8 | }
 9 | \arguments{
10 | \item{x}{a numeric vector to round.}
11 | 
12 | \item{digits}{how many digits should be displayed after the decimal point?}
13 | }
14 | \value{
15 | A vector with the same length as \code{x}
16 | }
17 | \description{
18 | In base R \code{round()}, halves are rounded to even, e.g., 12.5 and
19 | 11.5 are both rounded to 12.  This function rounds 12.5 to 13 (assuming
20 | \code{digits = 0}).  Negative halves are rounded away from zero, e.g., -0.5 is
21 | rounded to -1.
22 | 
23 | This may skew subsequent statistical analysis of the data, but may be
24 | desirable in certain contexts.  This function is implemented exactly from
25 | \url{https://stackoverflow.com/a/12688836}; see that question and comments for
26 | discussion of this issue.
27 | }
28 | \examples{
29 | round_half_up(12.5)
30 | round_half_up(1.125, 2)
31 | round_half_up(1.125, 1)
32 | round_half_up(-0.5, 0) # negatives get rounded away from zero
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/man/round_to_fraction.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/round_to_fraction.R
 3 | \name{round_to_fraction}
 4 | \alias{round_to_fraction}
 5 | \title{Round to the nearest fraction of a specified denominator.}
 6 | \usage{
 7 | round_to_fraction(x, denominator, digits = Inf)
 8 | }
 9 | \arguments{
10 | \item{x}{A numeric vector}
11 | 
12 | \item{denominator}{The denominator of the fraction for rounding (a scalar or
13 | vector positive integer).}
14 | 
15 | \item{digits}{Integer indicating the number of decimal places to be used
16 | after rounding to the fraction.  This is passed to \code{base::round()}).
17 | Negative values are allowed (see Details). (\code{Inf} indicates no
18 | subsequent rounding)}
19 | }
20 | \value{
21 | the input x rounded to a decimal value that has an integer numerator relative
22 | to \code{denominator} (possibly subsequently rounded to a number of decimal
23 | digits).
24 | }
25 | \description{
26 | Round a decimal to the precise decimal value of a specified
27 | fractional denominator.  Common use cases include addressing floating point
28 | imprecision and enforcing that data values fall into a certain set.
29 | 
30 | E.g., if a decimal represents hours and values should be logged to the nearest
31 | minute, \code{round_to_fraction(x, 60)} would enforce that distribution and 0.57
32 | would be rounded to 0.566667, the equivalent of 34/60.  0.56 would also be rounded
33 | to 34/60.
34 | 
35 | Set \code{denominator = 1} to round to whole numbers.
36 | 
37 | The \code{digits} argument allows for rounding of the subsequent result.
38 | }
39 | \details{
40 | If \code{digits} is \code{Inf}, \code{x} is rounded to the fraction
41 | and then kept at full precision.  If \code{digits} is \code{"auto"}, the
42 | number of digits is automatically selected as
43 | \code{ceiling(log10(denominator)) + 1}.
44 | }
45 | \examples{
46 | round_to_fraction(1.6, denominator = 2)
47 | round_to_fraction(pi, denominator = 7) # 22/7
48 | round_to_fraction(c(8.1, 9.2), denominator = c(7, 8))
49 | round_to_fraction(c(8.1, 9.2), denominator = c(7, 8), digits = 3)
50 | round_to_fraction(c(8.1, 9.2, 10.3), denominator = c(7, 8, 1001), digits = "auto")
51 | }
52 | 


--------------------------------------------------------------------------------
/man/row_to_names.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/row_to_names.R
 3 | \name{row_to_names}
 4 | \alias{row_to_names}
 5 | \title{Elevate a row to be the column names of a data.frame.}
 6 | \usage{
 7 | row_to_names(
 8 |   dat,
 9 |   row_number,
10 |   ...,
11 |   remove_row = TRUE,
12 |   remove_rows_above = TRUE,
13 |   sep = "_"
14 | )
15 | }
16 | \arguments{
17 | \item{dat}{The input data.frame}
18 | 
19 | \item{row_number}{The row(s) of \code{dat} containing the variable names or the
20 | string \code{"find_header"} to use \code{find_header(dat=dat, ...)} to find
21 | the row_number. Allows for multiple rows input as a numeric vector. NA's are
22 | ignored, and if a column contains only \code{NA} value it will be named \code{"NA"}.}
23 | 
24 | \item{...}{Sent to \code{find_header()}, if
25 | \code{row_number = "find_header"}.  Otherwise, ignored.}
26 | 
27 | \item{remove_row}{Should the row \code{row_number} be removed from the
28 | resulting data.frame?}
29 | 
30 | \item{remove_rows_above}{If \code{row_number != 1}, should the rows above
31 | \code{row_number} - that is, between \code{1:(row_number-1)} - be removed
32 | from the resulting data.frame?}
33 | 
34 | \item{sep}{A character string to separate the values in the case of multiple
35 | rows input to \code{row_number}.}
36 | }
37 | \value{
38 | A data.frame with new names (and some rows removed, if specified)
39 | }
40 | \description{
41 | Elevate a row to be the column names of a data.frame.
42 | }
43 | \examples{
44 | x <- data.frame(
45 |   X_1 = c(NA, "Title", 1:3),
46 |   X_2 = c(NA, "Title2", 4:6)
47 | )
48 | x \%>\%
49 |   row_to_names(row_number = 2)
50 | 
51 | x \%>\%
52 |   row_to_names(row_number = "find_header")
53 | }
54 | \seealso{
55 | Other Set names: 
56 | \code{\link{clean_names}()},
57 | \code{\link{find_header}()},
58 | \code{\link{mu_to_u}}
59 | }
60 | \concept{Set names}
61 | 


--------------------------------------------------------------------------------
/man/sas_numeric_to_date.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sas_dates.R
 3 | \name{sas_numeric_to_date}
 4 | \alias{sas_numeric_to_date}
 5 | \title{Convert a SAS date, time or date/time to an R object}
 6 | \usage{
 7 | sas_numeric_to_date(date_num, datetime_num, time_num, tz = "UTC")
 8 | }
 9 | \arguments{
10 | \item{date_num}{numeric vector of serial numbers to convert.}
11 | 
12 | \item{datetime_num}{numeric vector of date/time numbers (seconds since
13 | midnight 1960-01-01) to convert}
14 | 
15 | \item{time_num}{numeric vector of time numbers (seconds since midnight on the
16 | current day) to convert}
17 | 
18 | \item{tz}{Time zone, used when \code{include_time = TRUE} (see details for
19 | more information on timezones).}
20 | }
21 | \value{
22 | If a date and time or datetime are provided, a POSIXct object.  If a
23 | date is provided, a Date object.  If a time is provided, an hms::hms object
24 | }
25 | \description{
26 | Convert a SAS date, time or date/time to an R object
27 | }
28 | \examples{
29 | sas_numeric_to_date(date_num = 15639) # 2002-10-26
30 | sas_numeric_to_date(datetime_num = 1217083532, tz = "UTC") # 1998-07-26T14:45:32Z
31 | sas_numeric_to_date(date_num = 15639, time_num = 3600, tz = "UTC") # 2002-10-26T01:00:00Z
32 | sas_numeric_to_date(time_num = 3600) # 01:00:00
33 | }
34 | \references{
35 | SAS Date, Time, and Datetime Values reference (retrieved on
36 | 2022-03-08): https://v8doc.sas.com/sashtml/lrcon/zenid-63.htm
37 | }
38 | \seealso{
39 | Other date-time cleaning: 
40 | \code{\link{convert_to_date}()},
41 | \code{\link{excel_numeric_to_date}()},
42 | \code{\link{excel_time_to_numeric}()}
43 | }
44 | \concept{date-time cleaning}
45 | 


--------------------------------------------------------------------------------
/man/signif_half_up.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/round_half_up.R
 3 | \name{signif_half_up}
 4 | \alias{signif_half_up}
 5 | \title{Round a numeric vector to the specified number of significant digits; halves will be rounded up.}
 6 | \usage{
 7 | signif_half_up(x, digits = 6)
 8 | }
 9 | \arguments{
10 | \item{x}{a numeric vector to round.}
11 | 
12 | \item{digits}{integer indicating the number of significant digits to be used.}
13 | }
14 | \description{
15 | In base R \code{signif()}, halves are rounded to even, e.g.,
16 | \code{signif(11.5, 2)} and \code{signif(12.5, 2)} are both rounded to 12.
17 | This function rounds 12.5 to 13 (assuming \code{digits = 2}). Negative halves
18 | are rounded away from zero, e.g., \code{signif(-2.5, 1)} is rounded to -3.
19 | 
20 | This may skew subsequent statistical analysis of the data, but may be
21 | desirable in certain contexts. This function is implemented from
22 | \url{https://stackoverflow.com/a/1581007/}; see that question and
23 | comments for discussion of this issue.
24 | }
25 | \examples{
26 | signif_half_up(12.5, 2)
27 | signif_half_up(1.125, 3)
28 | signif_half_up(-2.5, 1) # negatives get rounded away from zero
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/man/single_value.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/single_value.R
 3 | \name{single_value}
 4 | \alias{single_value}
 5 | \title{Ensure that a vector has only a single value throughout.}
 6 | \usage{
 7 | single_value(x, missing = NA, warn_if_all_missing = FALSE, info = NULL)
 8 | }
 9 | \arguments{
10 | \item{x}{The vector which should have a single value}
11 | 
12 | \item{missing}{The vector of values to consider missing in \code{x}}
13 | 
14 | \item{warn_if_all_missing}{Generate a warning if all values are missing?}
15 | 
16 | \item{info}{If more than one value is found, append this to the warning or
17 | error to assist with determining the location of the issue.}
18 | }
19 | \value{
20 | \code{x} as the scalar single value found throughout (or an error if
21 | more than one value is found).
22 | }
23 | \description{
24 | Missing values are replaced with the single value, and if all values are
25 | missing, the first value in \code{missing} is used throughout.
26 | }
27 | \examples{
28 | # A simple use case with vectors of input
29 | 
30 | single_value(c(NA, 1))
31 | # Multiple, different values of missing can be given
32 | single_value(c(NA, "a"), missing = c(NA, "a"))
33 | 
34 | # A typical use case with a grouped data.frame used for input and the output
35 | # (`B` is guaranteed to have a single value and only one row, in this case)
36 | data.frame(
37 |   A = rep(1:3, each = 2),
38 |   B = c(rep(4:6, each = 2))
39 | ) \%>\%
40 |   dplyr::group_by(A) \%>\%
41 |   dplyr::summarize(
42 |     B = single_value(B)
43 |   )
44 | 
45 | try(
46 |   # info is useful to give when multiple values may be found to see what
47 |   # grouping variable or what calculation is causing the error
48 |   data.frame(
49 |     A = rep(1:3, each = 2),
50 |     B = c(rep(1:2, each = 2), 1:2)
51 |   ) \%>\%
52 |     dplyr::group_by(A) \%>\%
53 |     dplyr::mutate(
54 |       C = single_value(B, info = paste("Calculating C for group A=", A))
55 |     )
56 | )
57 | }
58 | 


--------------------------------------------------------------------------------
/man/tabyl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/tabyl.R
 3 | \name{tabyl}
 4 | \alias{tabyl}
 5 | \alias{tabyl.default}
 6 | \alias{tabyl.data.frame}
 7 | \title{Generate a frequency table (1-, 2-, or 3-way).}
 8 | \usage{
 9 | tabyl(dat, ...)
10 | 
11 | \method{tabyl}{default}(dat, show_na = TRUE, show_missing_levels = TRUE, ...)
12 | 
13 | \method{tabyl}{data.frame}(dat, var1, var2, var3, show_na = TRUE, show_missing_levels = TRUE, ...)
14 | }
15 | \arguments{
16 | \item{dat}{A \code{data.frame} containing the variables you wish to count.
17 | Or, a vector you want to tabulate.}
18 | 
19 | \item{...}{Additional arguments passed to methods.}
20 | 
21 | \item{show_na}{Should counts of \code{NA} values be displayed?  In a one-way tabyl,
22 | the presence of \code{NA} values triggers an additional column showing valid percentages
23 | (calculated excluding \code{NA} values).}
24 | 
25 | \item{show_missing_levels}{Should counts of missing levels of factors be displayed?
26 | These will be rows and/or columns of zeroes.  Useful for keeping consistent
27 | output dimensions even when certain factor levels may not be present in the data.}
28 | 
29 | \item{var1}{The column name of the first variable.}
30 | 
31 | \item{var2}{(optional) the column name of the second variable
32 | (its values become the column names in a 2-way tabulation).}
33 | 
34 | \item{var3}{(optional) the column name of the third variable
35 | (a 3-way tabulation is split into a list on its values).}
36 | }
37 | \value{
38 | A \code{data.frame} with frequencies and percentages of the tabulated variable(s).
39 | A 3-way tabulation returns a list of data frames.
40 | }
41 | \description{
42 | A fully-featured alternative to \code{table()}. Results are data.frames and can be
43 | formatted and enhanced with janitor's family of \code{adorn_} functions.
44 | 
45 | Specify a \code{data.frame} and the one, two, or three unquoted column names you
46 | want to tabulate.  Three variables generates a list of 2-way tabyls,
47 | split by the third variable.
48 | 
49 | Alternatively, you can tabulate a single variable that isn't in a \code{data.frame}
50 | by calling \code{tabyl()} on a vector, e.g., \code{tabyl(mtcars$gear)}.
51 | }
52 | \examples{
53 | 
54 | tabyl(mtcars, cyl)
55 | tabyl(mtcars, cyl, gear)
56 | tabyl(mtcars, cyl, gear, am)
57 | 
58 | # or using the \%>\% pipe
59 | mtcars \%>\%
60 |   tabyl(cyl, gear)
61 | 
62 | # illustrating show_na functionality:
63 | my_cars <- rbind(mtcars, rep(NA, 11))
64 | my_cars \%>\% tabyl(cyl)
65 | my_cars \%>\% tabyl(cyl, show_na = FALSE)
66 | 
67 | # Calling on a single vector not in a data.frame:
68 | val <- c("hi", "med", "med", "lo")
69 | tabyl(val)
70 | }
71 | 


--------------------------------------------------------------------------------
/man/top_levels.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/top_levels.R
 3 | \name{top_levels}
 4 | \alias{top_levels}
 5 | \title{Generate a frequency table of a factor grouped into top-n, bottom-n, and all
 6 | other levels.}
 7 | \usage{
 8 | top_levels(input_vec, n = 2, show_na = FALSE)
 9 | }
10 | \arguments{
11 | \item{input_vec}{The factor variable to tabulate.}
12 | 
13 | \item{n}{Number of levels to include in top and bottom groups}
14 | 
15 | \item{show_na}{Should cases where the variable is \code{NA} be shown?}
16 | }
17 | \value{
18 | A \code{data.frame} (actually a \code{tbl_df}) with the frequencies of the
19 | grouped, tabulated variable.  Includes counts and percentages, and valid
20 | percentages (calculated omitting \code{NA} values, if present in the vector and
21 | \code{show_na = TRUE}.)
22 | }
23 | \description{
24 | Get a frequency table of a factor variable, grouped into categories by level.
25 | }
26 | \examples{
27 | top_levels(as.factor(mtcars$hp), 2)
28 | }
29 | 


--------------------------------------------------------------------------------
/man/untabyl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/as_and_untabyl.R
 3 | \name{untabyl}
 4 | \alias{untabyl}
 5 | \title{Remove \code{tabyl} attributes from a data.frame.}
 6 | \usage{
 7 | untabyl(dat)
 8 | }
 9 | \arguments{
10 | \item{dat}{a \code{data.frame} of class \code{tabyl}.}
11 | }
12 | \value{
13 | the same \code{data.frame}, but without the \code{tabyl} class and attributes.
14 | }
15 | \description{
16 | Strips away all \code{tabyl}-related attributes from a data.frame.
17 | }
18 | \examples{
19 | 
20 | mtcars \%>\%
21 |   tabyl(am) \%>\%
22 |   untabyl() \%>\%
23 |   attributes() # tabyl-specific attributes are gone
24 | }
25 | 


--------------------------------------------------------------------------------
/man/use_first_valid_of.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/janitor_deprecated.R
 3 | \name{use_first_valid_of}
 4 | \alias{use_first_valid_of}
 5 | \title{Returns first non-\code{NA} value from a set of vectors.}
 6 | \usage{
 7 | use_first_valid_of(..., if_all_NA = NA)
 8 | }
 9 | \arguments{
10 | \item{...}{the input vectors.  Order matters: these are searched and prioritized in the order they are supplied.}
11 | 
12 | \item{if_all_NA}{what value should be used when all of the vectors return \code{NA} for a certain index?  Default is \code{NA}.}
13 | }
14 | \value{
15 | Returns a single vector with the selected values.
16 | }
17 | \description{
18 | Warning: Deprecated, do not use in new code. Use \code{\link[dplyr:coalesce]{dplyr::coalesce()}} instead.
19 | 
20 | At each position of the input vectors, iterates through in order and returns the first non-NA value.
21 | This is a robust replacement of the common \code{ifelse(!is.na(x), x, ifelse(!is.na(y), y, z))}.
22 | It's more readable and handles problems like \code{\link[=ifelse]{ifelse()}}'s inability to work with dates in this way.
23 | }
24 | \seealso{
25 | janitor_deprecated
26 | }
27 | \keyword{internal}
28 | 


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/apple-touch-icon-180x180.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/revdep/.gitignore:
--------------------------------------------------------------------------------
1 | checks
2 | library
3 | checks.noindex
4 | library.noindex
5 | data.sqlite
6 | *.html
7 | 


--------------------------------------------------------------------------------
/revdep/README.md:
--------------------------------------------------------------------------------
 1 | # Platform
 2 | 
 3 | |field    |value                        |
 4 | |:--------|:----------------------------|
 5 | |version  |R version 3.4.4 (2018-03-15) |
 6 | |os       |Ubuntu 18.04.2 LTS           |
 7 | |system   |x86_64, linux-gnu            |
 8 | |ui       |RStudio                      |
 9 | |language |(EN)                         |
10 | |collate  |en_US.UTF-8                  |
11 | |tz       |America/New_York             |
12 | |date     |2019-03-13                   |
13 | 
14 | # Dependencies
15 | 
16 | |package    |old      |new        |Δ  |
17 | |:----------|:--------|:----------|:--|
18 | |janitor    |1.1.1    |1.1.1.9000 |*  |
19 | |assertthat |0.2.0    |0.2.0      |   |
20 | |BH         |1.69.0-1 |1.69.0-1   |   |
21 | |cli        |1.0.1    |1.0.1      |   |
22 | |crayon     |1.3.4    |1.3.4      |   |
23 | |dplyr      |0.8.0.1  |0.8.0.1    |   |
24 | |fansi      |0.4.0    |0.4.0      |   |
25 | |glue       |1.3.1    |1.3.1      |   |
26 | |magrittr   |1.5      |1.5        |   |
27 | |pillar     |1.3.1    |1.3.1      |   |
28 | |pkgconfig  |2.0.2    |2.0.2      |   |
29 | |plogr      |0.2.0    |0.2.0      |   |
30 | |purrr      |0.3.1    |0.3.1      |   |
31 | |R6         |2.4.0    |2.4.0      |   |
32 | |Rcpp       |1.0.0    |1.0.0      |   |
33 | |rlang      |0.3.1    |0.3.1      |   |
34 | |snakecase  |0.9.2    |0.9.2      |   |
35 | |stringi    |1.4.3    |1.4.3      |   |
36 | |stringr    |1.4.0    |1.4.0      |   |
37 | |tibble     |2.0.1    |2.0.1      |   |
38 | |tidyr      |0.8.3    |0.8.3      |   |
39 | |tidyselect |0.2.5    |0.2.5      |   |
40 | |utf8       |1.1.4    |1.1.4      |   |
41 | 
42 | # Revdeps
43 | 
44 | ## Couldn't check (2)
45 | 
46 | |package         |version |error |warning |note |
47 | |:---------------|:-------|:-----|:-------|:----|
48 | |bomrang         |?       |      |        |     |
49 | |fivethirtyeight |?       |      |        |     |
50 | 
51 | ## All (8)
52 | 
53 | |package                              |version |error |warning |note |
54 | |:------------------------------------|:-------|:-----|:-------|:----|
55 | |[ballr](problems.md#ballr)           |0.2.3   |1     |1       |1    |
56 | |bomrang                              |?       |      |        |     |
57 | |[congressbr](problems.md#congressbr) |0.2.0   |1     |1       |2    |
58 | |[driftR](problems.md#driftr)         |1.1.0   |1     |1       |     |
59 | |fivethirtyeight                      |?       |      |        |     |
60 | |[moderndive](problems.md#moderndive) |0.2.0   |1     |1       |     |
61 | |[postal](problems.md#postal)         |0.1.1   |1     |1       |     |
62 | |[questionr](problems.md#questionr)   |0.7.0   |1     |1       |2    |
63 | 
64 | 


--------------------------------------------------------------------------------
/revdep/checks.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/revdep/checks.rds


--------------------------------------------------------------------------------
/revdep/email.yml:
--------------------------------------------------------------------------------
1 | release_date: ???
2 | rel_release_date: ???
3 | my_news_url: ???
4 | release_version: ???
5 | 


--------------------------------------------------------------------------------
/revdep/problems.md:
--------------------------------------------------------------------------------
  1 | # ballr
  2 | 
  3 | Version: 0.2.3
  4 | 
  5 | ## In both
  6 | 
  7 | *   checking PDF version of manual without hyperrefs or index ... ERROR
  8 |     ```
  9 |     Re-running with no redirection of stdout/stderr.
 10 |     Hmm ... looks like a package
 11 |     You may want to clean up by 'rm -rf /tmp/RtmpUWMLZE/Rd2pdf4edb13f7208d'
 12 |     ```
 13 | 
 14 | *   checking PDF version of manual ... WARNING
 15 |     ```
 16 |     LaTeX errors when creating PDF version.
 17 |     This typically indicates Rd problems.
 18 |     ```
 19 | 
 20 | *   checking package dependencies ... NOTE
 21 |     ```
 22 |     Package suggested but not available for checking: ‘devtools’
 23 |     ```
 24 | 
 25 | # congressbr
 26 | 
 27 | Version: 0.2.0
 28 | 
 29 | ## In both
 30 | 
 31 | *   checking PDF version of manual without hyperrefs or index ... ERROR
 32 |     ```
 33 |     Re-running with no redirection of stdout/stderr.
 34 |     Hmm ... looks like a package
 35 |     You may want to clean up by 'rm -rf /tmp/RtmpUMM3at/Rd2pdf9f95c96e9c0'
 36 |     ```
 37 | 
 38 | *   checking PDF version of manual ... WARNING
 39 |     ```
 40 |     LaTeX errors when creating PDF version.
 41 |     This typically indicates Rd problems.
 42 |     ```
 43 | 
 44 | *   checking package dependencies ... NOTE
 45 |     ```
 46 |     Package suggested but not available for checking: ‘devtools’
 47 |     ```
 48 | 
 49 | *   checking data for non-ASCII characters ... NOTE
 50 |     ```
 51 |       Note: found 1 marked UTF-8 string
 52 |     ```
 53 | 
 54 | # driftR
 55 | 
 56 | Version: 1.1.0
 57 | 
 58 | ## In both
 59 | 
 60 | *   checking PDF version of manual without hyperrefs or index ... ERROR
 61 |     ```
 62 |     Re-running with no redirection of stdout/stderr.
 63 |     Hmm ... looks like a package
 64 |     You may want to clean up by 'rm -rf /tmp/Rtmph3nrYw/Rd2pdf175d6a37b8fb'
 65 |     ```
 66 | 
 67 | *   checking PDF version of manual ... WARNING
 68 |     ```
 69 |     LaTeX errors when creating PDF version.
 70 |     This typically indicates Rd problems.
 71 |     ```
 72 | 
 73 | # moderndive
 74 | 
 75 | Version: 0.2.0
 76 | 
 77 | ## In both
 78 | 
 79 | *   checking PDF version of manual without hyperrefs or index ... ERROR
 80 |     ```
 81 |     Re-running with no redirection of stdout/stderr.
 82 |     Hmm ... looks like a package
 83 |     You may want to clean up by 'rm -rf /tmp/RtmpG2Xo0M/Rd2pdf51a252e4d198'
 84 |     ```
 85 | 
 86 | *   checking PDF version of manual ... WARNING
 87 |     ```
 88 |     LaTeX errors when creating PDF version.
 89 |     This typically indicates Rd problems.
 90 |     ```
 91 | 
 92 | # postal
 93 | 
 94 | Version: 0.1.1
 95 | 
 96 | ## In both
 97 | 
 98 | *   checking PDF version of manual without hyperrefs or index ... ERROR
 99 |     ```
100 |     Re-running with no redirection of stdout/stderr.
101 |     Hmm ... looks like a package
102 |     You may want to clean up by 'rm -rf /tmp/RtmpgzvDpk/Rd2pdf5fc260d75179'
103 |     ```
104 | 
105 | *   checking PDF version of manual ... WARNING
106 |     ```
107 |     LaTeX errors when creating PDF version.
108 |     This typically indicates Rd problems.
109 |     ```
110 | 
111 | # questionr
112 | 
113 | Version: 0.7.0
114 | 
115 | ## In both
116 | 
117 | *   checking PDF version of manual without hyperrefs or index ... ERROR
118 |     ```
119 |     Re-running with no redirection of stdout/stderr.
120 |     Hmm ... looks like a package
121 |     You may want to clean up by 'rm -rf /tmp/RtmpHnFYMV/Rd2pdf289a7f920965'
122 |     ```
123 | 
124 | *   checking PDF version of manual ... WARNING
125 |     ```
126 |     LaTeX errors when creating PDF version.
127 |     This typically indicates Rd problems.
128 |     ```
129 | 
130 | *   checking Rd cross-references ... NOTE
131 |     ```
132 |     Package unavailable to check Rd xrefs: ‘Hmisc’
133 |     ```
134 | 
135 | *   checking data for non-ASCII characters ... NOTE
136 |     ```
137 |       Note: found 4145 marked UTF-8 strings
138 |     ```
139 | 
140 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
 1 | # This file is part of the standard setup for testthat.
 2 | # It is recommended that you do not modify it.
 3 | #
 4 | # Where should you do additional test configuration?
 5 | # Learn more about the roles of various files in:
 6 | # * https://r-pkgs.org/tests.html
 7 | # * https://testthat.r-lib.org/reference/test_package.html#special-files
 8 | 
 9 | library(testthat)
10 | library(janitor)
11 | 
12 | test_check("janitor")
13 | 


--------------------------------------------------------------------------------
/tests/testthat/test-adorn-pct-formatting.R:
--------------------------------------------------------------------------------
  1 | source1 <- mtcars %>%
  2 |   tabyl(cyl, am) %>%
  3 |   adorn_percentages()
  4 | 
  5 | test_that("calculations are accurate", {
  6 |   expect_equal(
  7 |     untabyl(adorn_pct_formatting(source1)), # default parameter is denom = "row"
  8 |     data.frame(
  9 |       cyl = c(4, 6, 8),
 10 |       `0` = c("27.3%", "57.1%", "85.7%"),
 11 |       `1` = c("72.7%", "42.9%", "14.3%"),
 12 |       check.names = FALSE,
 13 |       stringsAsFactors = FALSE
 14 |     )
 15 |   )
 16 | })
 17 | 
 18 | test_that("data.frames with no numeric columns beyond the first cause failure", {
 19 |   expect_error(
 20 |     adorn_pct_formatting(data.frame(a = 1:2, b = c("hi", "lo"))),
 21 |     "at least one targeted column must be of class numeric",
 22 |     fixed = TRUE
 23 |   )
 24 | })
 25 | 
 26 | dat <- data.frame(Operation = c("Login", "Posted", "Deleted"), `Total Count` = c(5, 25, 40), check.names = FALSE)
 27 | 
 28 | test_that("works with a single numeric column per #89", {
 29 |   expect_equal(
 30 |     dat %>% adorn_percentages("col") %>% untabyl(),
 31 |     data.frame(
 32 |       Operation = c("Login", "Posted", "Deleted"),
 33 |       `Total Count` = c(5 / 70, 25 / 70, 40 / 70),
 34 |       check.names = FALSE
 35 |     )
 36 |   )
 37 | })
 38 | 
 39 | test_that("works with totals row", {
 40 |   expect_equal(
 41 |     dat %>% adorn_totals("row") %>% adorn_percentages("col") %>% untabyl(),
 42 |     data.frame(
 43 |       Operation = c("Login", "Posted", "Deleted", "Total"),
 44 |       `Total Count` = c(5 / 70, 25 / 70, 40 / 70, 1),
 45 |       check.names = FALSE, stringsAsFactors = FALSE
 46 |     )
 47 |   )
 48 | })
 49 | 
 50 | test_that("works with one-way tabyl", {
 51 |   expect_equal(
 52 |     mtcars %>%
 53 |       tabyl(carb) %>%
 54 |       adorn_pct_formatting(digits = 0) %>%
 55 |       untabyl(),
 56 |     data.frame(
 57 |       carb = c(1:4, 6, 8),
 58 |       n = c(7, 10, 3, 10, 1, 1),
 59 |       percent = c("22%", "31%", "9%", "31%", "3%", "3%"),
 60 |       stringsAsFactors = FALSE
 61 |     )
 62 |   )
 63 | })
 64 | 
 65 | test_that("NAs are replaced with dashes when percentage signs are affixed", {
 66 |   # NaNs from adorn_percentages, the more common case (still uncommon)
 67 |   has_nans <- mtcars %>%
 68 |     tabyl(carb, cyl) %>%
 69 |     .[5:6, ] %>%
 70 |     adorn_percentages("col") %>%
 71 |     adorn_pct_formatting() %>%
 72 |     untabyl()
 73 |   row.names(has_nans) <- NULL
 74 |   expect_equal(
 75 |     has_nans,
 76 |     data.frame(
 77 |       carb = c(6, 8),
 78 |       `4` = c("-", "-"),
 79 |       `6` = c("100.0%", "0.0%"),
 80 |       `8` = c("0.0%", "100.0%"),
 81 |       check.names = FALSE,
 82 |       stringsAsFactors = FALSE
 83 |     )
 84 |   )
 85 | 
 86 |   # NAs convert to -
 87 |   has_nas <- data.frame(a = c("big", "little"), x = c(0.1, 0.123), y = c(0.98, NA), stringsAsFactors = FALSE)
 88 |   expect_equal(
 89 |     adorn_pct_formatting(has_nas),
 90 |     data.frame(a = c("big", "little"), x = c("10.0%", "12.3%"), y = c("98.0%", "-"), stringsAsFactors = FALSE)
 91 |   )
 92 | })
 93 | 
 94 | test_that("NAs are replaced with dashes - no percentage signs affixed", {
 95 |   # NaNs from adorn_percentages, the more common case (still uncommon)
 96 |   has_nans <- mtcars %>%
 97 |     tabyl(carb, cyl) %>%
 98 |     .[5:6, ] %>%
 99 |     adorn_percentages("col") %>%
100 |     adorn_pct_formatting(affix_sign = FALSE) %>%
101 |     untabyl()
102 |   row.names(has_nans) <- NULL
103 |   expect_equal(
104 |     has_nans,
105 |     data.frame(
106 |       carb = c(6, 8),
107 |       `4` = c("-", "-"),
108 |       `6` = c("100.0", "0.0"),
109 |       `8` = c("0.0", "100.0"),
110 |       check.names = FALSE,
111 |       stringsAsFactors = FALSE
112 |     )
113 |   )
114 | 
115 |   # NAs convert to - symbol
116 |   has_nas <- data.frame(a = c("big", "little"), x = c(0.1, 0.123), y = c(0.98, NA), stringsAsFactors = FALSE)
117 |   expect_equal(
118 |     adorn_pct_formatting(has_nas, affix_sign = FALSE),
119 |     data.frame(a = c("big", "little"), x = c("10.0", "12.3"), y = c("98.0", "-"), stringsAsFactors = FALSE)
120 |   )
121 | })
122 | 
123 | 
124 | test_that("bad rounding argument caught", {
125 |   expect_error(
126 |     dat %>%
127 |       adorn_percentages() %>%
128 |       adorn_pct_formatting(rounding = "blargh"),
129 |     "`rounding` must be one of \"half to even\" or \"half up\", not \"blargh\".",
130 |     fixed = TRUE
131 |   )
132 | })
133 | 
134 | test_that("automatically invokes purrr::map when called on a 3-way tabyl", {
135 |   three <- tabyl(mtcars, cyl, am, gear)
136 |   expect_equal(
137 |     adorn_pct_formatting(three), # vanilla call
138 |     purrr::map(three, adorn_pct_formatting)
139 |   )
140 | 
141 |   # with arguments passing through
142 |   expect_equal(
143 |     adorn_pct_formatting(three, 2, "half up", affix_sign = FALSE),
144 |     purrr::map(three, adorn_pct_formatting, 2, "half up", FALSE)
145 |   )
146 | })
147 | 
148 | test_that("non-data.frame inputs are handled", {
149 |   expect_error(adorn_pct_formatting(1:5), "adorn_pct_formatting() must be called on a data.frame or list of data.frames", fixed = TRUE)
150 | })
151 | 
152 | test_that("tidyselecting works", {
153 |   target <- data.frame(
154 |     color = c("green", "blue", "red"),
155 |     first_wave = c(1:3),
156 |     second_wave = c(4:6),
157 |     third_wave = c(3, 3, 3),
158 |     size = c("small", "medium", "large"),
159 |     stringsAsFactors = FALSE
160 |   ) %>%
161 |     adorn_percentages()
162 | 
163 |   two_cols <- target %>%
164 |     adorn_pct_formatting(, , , first_wave:second_wave)
165 |   expect_equal(two_cols$first_wave, c("12.5%", "20.0%", "25.0%"))
166 |   expect_equal(two_cols$third_wave, c(3 / 8, 3 / 10, 3 / 12))
167 | 
168 |   expect_message(
169 |     target %>%
170 |       adorn_pct_formatting(, , , third_wave:size),
171 |     "At least one non-numeric column was specified and will not be modified."
172 |   )
173 |   # correct behavior occurs when text columns are skipped
174 |   expect_message(
175 |     text_skipped <- target %>%
176 |       adorn_pct_formatting(., , , , c(first_wave, size)),
177 |     "At least one non-numeric column was specified and will not be modified."
178 |   )
179 | 
180 |   expect_equal(text_skipped$first_wave, c("12.5%", "20.0%", "25.0%"))
181 |   expect_equal(
182 |     text_skipped %>% dplyr::select(-first_wave),
183 |     target %>% dplyr::select(-first_wave),
184 |     ignore_attr = TRUE
185 |   )
186 | })
187 | 
188 | test_that("decimal.mark works", {
189 |   locale_decimal_sep <- getOption("OutDec") # not sure if it's necessary to save and restore this,
190 |   # but seems safe for locale-independent testing processes
191 |   options(OutDec = ",")
192 |   expect_true(
193 |     all(grepl(",", unlist(adorn_pct_formatting(source1)[2])))
194 |   )
195 |   options(OutDec = locale_decimal_sep)
196 | })
197 | 


--------------------------------------------------------------------------------
/tests/testthat/test-adorn-rounding.R:
--------------------------------------------------------------------------------
  1 | x <- data.frame(
  2 |   a = c(rep("x", 55), rep("y", 45)),
  3 |   b = c(rep("x", 50), rep("y", 50)),
  4 |   stringsAsFactors = FALSE
  5 | )
  6 | 
  7 | # Crosstab with decimal values ending in .5
  8 | y <- x %>%
  9 |   tabyl(a, b) %>%
 10 |   adorn_percentages("all")
 11 | 
 12 | test_that("rounding parameter works", {
 13 |   expect_equal(
 14 |     y %>%
 15 |       adorn_rounding(digits = 1, rounding = "half up") %>%
 16 |       untabyl(),
 17 |     data.frame(
 18 |       a = c("x", "y"),
 19 |       x = c(0.5, 0.0),
 20 |       y = c(0.1, 0.5),
 21 |       stringsAsFactors = FALSE
 22 |     )
 23 |   )
 24 |   # Test failing on CRAN and only there
 25 |   skip_on_cran()
 26 |   expect_equal(
 27 |     y %>%
 28 |       adorn_rounding(digits = 1) %>% # default rounding: "half to even"
 29 |       untabyl(),
 30 |     data.frame(
 31 |       a = c("x", "y"),
 32 |       x = c(0.5, 0.0),
 33 |       y = c(0.0, 0.4),
 34 |       stringsAsFactors = FALSE
 35 |     )
 36 |   )
 37 | })
 38 | 
 39 | test_that("digit control succeeds", {
 40 |   expect_equal(
 41 |     y %>%
 42 |       adorn_rounding(digits = 0, rounding = "half up") %>%
 43 |       untabyl(),
 44 |     data.frame(
 45 |       a = c("x", "y"),
 46 |       x = c(1, 0),
 47 |       y = c(0, 0),
 48 |       stringsAsFactors = FALSE
 49 |     )
 50 |   )
 51 |   expect_equal(
 52 |     y %>%
 53 |       adorn_rounding(digits = 2, rounding = "half up"), # shouldn't do anything given the input only having 2 decimal places
 54 |     y
 55 |   )
 56 | })
 57 | 
 58 | test_that("bad rounding argument caught", {
 59 |   expect_error(
 60 |     y %>%
 61 |       adorn_rounding(rounding = "blargh"),
 62 |     "'rounding' must be one of 'half to even' or 'half up'",
 63 |     fixed = TRUE
 64 |   )
 65 | })
 66 | 
 67 | test_that("works when called on a 3-way tabyl", {
 68 |   triple <- mtcars %>%
 69 |     tabyl(am, cyl, vs) %>%
 70 |     adorn_percentages("row")
 71 | 
 72 |   triple_rounded_manual <- triple
 73 |   triple_rounded_manual[[1]] <- adorn_rounding(triple[[1]])
 74 |   triple_rounded_manual[[2]] <- adorn_rounding(triple[[2]])
 75 | 
 76 |   expect_equal(
 77 |     triple %>%
 78 |       adorn_rounding(),
 79 |     triple_rounded_manual
 80 |   )
 81 | })
 82 | 
 83 | 
 84 | test_that("tidyselecting works", {
 85 |   target <- data.frame(
 86 |     color = c("green", "blue", "red"),
 87 |     first_wave = c(1:3),
 88 |     second_wave = c(4:6),
 89 |     third_wave = c(3, 3, 3),
 90 |     size = c("small", "medium", "large"),
 91 |     stringsAsFactors = FALSE
 92 |   ) %>%
 93 |     adorn_percentages()
 94 | 
 95 |   two_cols <- target %>%
 96 |     adorn_rounding(, "half up", first_wave:second_wave)
 97 |   expect_equal(two_cols$first_wave, c(.1, .2, .3))
 98 |   expect_equal(two_cols$third_wave, c(3 / 8, 3 / 10, 3 / 12))
 99 | 
100 |   expect_message(
101 |     target %>%
102 |       adorn_rounding(, , third_wave:size),
103 |     "At least one non-numeric column was specified and will not be modified."
104 |   )
105 |   expect_message(
106 |     text_skipped <- target %>%
107 |       adorn_rounding(, , c(first_wave, size)),
108 |     "At least one non-numeric column was specified and will not be modified."
109 |   )
110 |   expect_equal(text_skipped$first_wave, c(.1, .2, .2))
111 |   expect_equal(
112 |     text_skipped %>% dplyr::select(-first_wave),
113 |     target %>% dplyr::select(-first_wave),
114 |     ignore_attr = TRUE
115 |   )
116 | })
117 | 
118 | test_that("non-data.frame inputs are handled", {
119 |   expect_error(adorn_rounding(1:5), "adorn_rounding() must be called on a data.frame or list of data.frames", fixed = TRUE)
120 | })
121 | 


--------------------------------------------------------------------------------
/tests/testthat/test-adorn-title.R:
--------------------------------------------------------------------------------
  1 | source1 <- mtcars %>%
  2 |   tabyl(gear, cyl)
  3 | 
  4 | test_that("placement is correct", {
  5 |   # Top
  6 |   expect_equal(
  7 |     source1 %>%
  8 |       adorn_title() %>%
  9 |       names(),
 10 |     c("", "cyl", rep("", 2))
 11 |   )
 12 |   expect_equal(
 13 |     source1 %>%
 14 |       adorn_title() %>%
 15 |       .[1, ] %>%
 16 |       unlist() %>%
 17 |       unname(),
 18 |     c("gear", "4", "6", "8")
 19 |   )
 20 |   # Combined
 21 |   expect_equal(
 22 |     source1 %>%
 23 |       adorn_title("combined") %>%
 24 |       names(),
 25 |     c("gear/cyl", "4", "6", "8")
 26 |   )
 27 | })
 28 | 
 29 | test_that("name overrides work", {
 30 |   expect_equal(
 31 |     source1 %>%
 32 |       adorn_title(row_name = "R", col_name = "C") %>%
 33 |       names(),
 34 |     c("", "C", rep("", 2))
 35 |   )
 36 | })
 37 | 
 38 | test_that("non-tabyls are treated correctly", {
 39 |   non_tab <- mtcars %>%
 40 |     dplyr::count(gear, cyl) %>%
 41 |     tidyr::pivot_wider(names_from = gear, values_from = n)
 42 |   expect_error(adorn_title(non_tab), "When input is not a data.frame of class tabyl, a value must be specified for the col_name argument")
 43 | 
 44 |   expect_equal(
 45 |     non_tab %>% adorn_title(col_name = "col") %>% names(),
 46 |     c("", "col", rep("", 2))
 47 |   )
 48 | 
 49 |   expect_equal(
 50 |     non_tab %>% adorn_title(placement = "combined", col_name = "col") %>% names(),
 51 |     c("cyl/col", 3, 4, 5)
 52 |   )
 53 | 
 54 |   expect_equal(
 55 |     non_tab %>% adorn_title(placement = "combined", row_name = "row!", col_name = "col") %>% names(),
 56 |     c("row!/col", 3, 4, 5)
 57 |   )
 58 | })
 59 | test_that("bad inputs are caught", {
 60 |   expect_error(adorn_title(1:2),
 61 |     "\"dat\" must be a data.frame",
 62 |     fixed = TRUE
 63 |   )
 64 |   expect_error(
 65 |     adorn_title(source1,
 66 |       placement = "blargh"
 67 |     ),
 68 |     "`placement` must be one of \"top\" or \"combined\"",
 69 |     fixed = TRUE
 70 |   )
 71 |   expect_error(
 72 |     adorn_title(source1,
 73 |       row_name = 1:4
 74 |     ),
 75 |     "row_name must be a string"
 76 |   )
 77 |   expect_error(
 78 |     adorn_title(source1,
 79 |       col_name = mtcars
 80 |     ),
 81 |     "col_name must be a string"
 82 |   )
 83 | 
 84 |   # Doesn't make sense with a one-way tabyl
 85 |   expect_warning(
 86 |     mtcars %>% tabyl(cyl) %>% adorn_title(),
 87 |     "adorn_title is meant for two-way tabyls, calling it on a one-way tabyl may not yield a meaningful result"
 88 |   )
 89 | })
 90 | 
 91 | test_that("works with non-count inputs", {
 92 |   source2_base <- data.frame(sector = c("North", "South"), units = 1:2, group = c("a", "b"))
 93 |   source2_tibble <- dplyr::as_tibble(source2_base)
 94 |   expect_equal(
 95 |     adorn_title(source2_base, col_name = "Characteristics") %>% names(),
 96 |     c("", "Characteristics", "")
 97 |   )
 98 |   expect_equal(
 99 |     adorn_title(source2_base, col_name = "Characteristics"),
100 |     adorn_title(source2_tibble, col_name = "Characteristics")
101 |   )
102 | })
103 | 
104 | test_that("for printing purposes: tabyl class stays tabyl, data.frame stays data.frame, tibble is downgraded to data.frame", {
105 |   # right output classes with tabyl inputs
106 |   expect_equal(class(mtcars %>% tabyl(cyl, am) %>% adorn_title()), c("tabyl", "data.frame"))
107 |   expect_equal(class(mtcars %>% tabyl(gear, carb) %>% adorn_title(., "combined")), c("tabyl", "data.frame"))
108 | 
109 |   # Create tibble input:
110 |   mpg_by_cyl_and_am <-
111 |     mtcars %>%
112 |     dplyr::group_by(cyl, am) %>%
113 |     dplyr::summarise(mean_mpg = mean(mpg)) %>%
114 |     tidyr::pivot_wider(names_from = am, values_from = mean_mpg)
115 | 
116 |   # handles tibble input
117 |   expect_s3_class(
118 |     mpg_by_cyl_and_am %>% adorn_title("top", "Cylinders", "Automatic?"),
119 |     "data.frame"
120 |   )
121 | 
122 |   # Convert columns 2:n to strings
123 |   expect_s3_class(
124 |     mpg_by_cyl_and_am %>% adorn_pct_formatting() %>% # nonsense command here, just want to convert cols 2:n into character
125 |       adorn_title("top", "Cylinders", "Automatic?"),
126 |     "data.frame"
127 |   )
128 | 
129 |   # handles data.frame non-tabyl input
130 |   expect_s3_class(
131 |     mtcars %>% adorn_title("top", col_name = "hey look ma I'm a title"),
132 |     "data.frame"
133 |   )
134 | })
135 | 
136 | test_that("works with factors in input", {
137 |   facts <- data.frame(a = "high", large = "1", stringsAsFactors = TRUE)
138 |   # first with "top" then "combined"
139 |   expect_equal(
140 |     facts %>% adorn_title(col_name = "col"),
141 |     data.frame(a = c("a", "high"), col = c("large", "1"), stringsAsFactors = FALSE) %>%
142 |       setNames(., c("", "col"))
143 |   )
144 |   # with combined the original column types are preserved
145 |   expect_equal(
146 |     facts %>% adorn_title("combined", col_name = "col"),
147 |     data.frame(`a/col` = "high", large = "1", stringsAsFactors = TRUE, check.names = FALSE)
148 |   )
149 | })
150 | 
151 | test_that("automatically invokes purrr::map when called on a 3-way tabyl", {
152 |   three <- tabyl(mtcars, cyl, am, gear) %>%
153 |     adorn_percentages() %>%
154 |     adorn_pct_formatting()
155 |   expect_equal(
156 |     adorn_title(three), # vanilla call
157 |     purrr::map(three, adorn_title)
158 |   )
159 | 
160 |   # with arguments passing through, incl. custom row and col names
161 |   expect_equal(
162 |     adorn_title(three, "combined", "cyl", "am"),
163 |     purrr::map(three, adorn_title, "combined", "cyl", "am")
164 |   )
165 | })
166 | 


--------------------------------------------------------------------------------
/tests/testthat/test-convert_to_date.R:
--------------------------------------------------------------------------------
  1 | test_that("convert_date works", {
  2 |   expect_equal(
  3 |     convert_to_date("2009-07-06"),
  4 |     as.Date("2009-07-06")
  5 |   )
  6 |   expect_equal(
  7 |     convert_to_date(40000),
  8 |     as.Date("2009-07-06")
  9 |   )
 10 |   expect_equal(
 11 |     convert_to_date(40000.1),
 12 |     as.Date("2009-07-06")
 13 |   )
 14 |   expect_equal(
 15 |     convert_to_date("40000"),
 16 |     as.Date("2009-07-06")
 17 |   )
 18 |   expect_equal(
 19 |     convert_to_date("40000.1"),
 20 |     as.Date("2009-07-06")
 21 |   )
 22 |   expect_equal(
 23 |     convert_to_date(factor("40000.1")),
 24 |     as.Date("2009-07-06")
 25 |   )
 26 |   expect_equal(
 27 |     convert_to_date(as.Date("2009-07-06")),
 28 |     as.Date("2009-07-06")
 29 |   )
 30 |   expect_equal(
 31 |     convert_to_date(as.POSIXct("2009-07-06", tz = "UTC")),
 32 |     as.Date("2009-07-06")
 33 |   )
 34 |   expect_equal(
 35 |     convert_to_date(as.POSIXlt("2009-07-06")),
 36 |     as.Date("2009-07-06")
 37 |   )
 38 |   expect_equal(
 39 |     convert_to_date(c("2009-07-06", "40000.1", "40000", NA)),
 40 |     c(rep(as.Date("2009-07-06"), 3), NA),
 41 |     info = "Mixed input works, including NA."
 42 |   )
 43 | })
 44 | 
 45 | test_that("convert_datetime works", {
 46 |   expect_equal(
 47 |     convert_to_datetime("2009-07-06 12:13:14"),
 48 |     as.POSIXct("2009-07-06 12:13:14", tz = "UTC")
 49 |   )
 50 |   expect_equal(
 51 |     convert_to_datetime("2009-07-06 12:13:14", tz = "Etc/GMT-5"),
 52 |     as.POSIXct("2009-07-06 12:13:14", tz = "Etc/GMT-5"),
 53 |     info = "The tz argument is respected"
 54 |   )
 55 |   expect_equal(
 56 |     convert_to_datetime(40000),
 57 |     as.POSIXct("2009-07-06", tz = "UTC")
 58 |   )
 59 |   expect_equal(
 60 |     convert_to_datetime(40000.1),
 61 |     as.POSIXct("2009-07-06 02:24", tz = "UTC")
 62 |   )
 63 |   expect_equal(
 64 |     convert_to_datetime(40000.1, tz = "Etc/GMT-5"),
 65 |     as.POSIXct("2009-07-06 02:24", tz = "Etc/GMT-5")
 66 |   )
 67 |   expect_equal(
 68 |     convert_to_datetime("40000"),
 69 |     as.POSIXct("2009-07-06", tz = "UTC")
 70 |   )
 71 |   expect_equal(
 72 |     convert_to_datetime("40000.1"),
 73 |     as.POSIXct("2009-07-06 02:24", tz = "UTC")
 74 |   )
 75 |   expect_equal(
 76 |     convert_to_datetime("40000.1", tz = "Etc/GMT-5"),
 77 |     as.POSIXct("2009-07-06 02:24", tz = "Etc/GMT-5")
 78 |   )
 79 |   expect_equal(
 80 |     convert_to_datetime(factor("40000.1")),
 81 |     as.POSIXct("2009-07-06 02:24", tz = "UTC")
 82 |   )
 83 |   expect_equal(
 84 |     convert_to_datetime(as.Date("2009-07-06")),
 85 |     as.POSIXct("2009-07-06", tz = "UTC")
 86 |   )
 87 |   expect_equal(
 88 |     convert_to_datetime(as.POSIXct("2009-07-06", tz = "UTC")),
 89 |     as.POSIXct("2009-07-06", tz = "UTC")
 90 |   )
 91 |   expect_equal(
 92 |     convert_to_datetime(as.POSIXlt("2009-07-06", tz = "UTC")),
 93 |     as.POSIXct("2009-07-06", tz = "UTC")
 94 |   )
 95 |   expect_equal(
 96 |     convert_to_datetime(c("2009-07-06", "40000.1", "40000", NA), character_fun = lubridate::ymd_h, truncated = 1, tz = "UTC"),
 97 |     as.POSIXct(c("2009-07-06 00:00", "2009-07-06 02:24", "2009-07-06 00:00", NA), tz = "UTC"),
 98 |     info = "Mixed input works, including NA."
 99 |   )
100 | })
101 | 
102 | test_that("convert_date warnings and errors work", {
103 |   expect_warning(
104 |     expect_error(
105 |       convert_to_date("A"),
106 |       regexp = "Not all character strings converted to class Date."
107 |     ),
108 |     regexp = "All formats failed to parse." # lubridate warning
109 |   )
110 |   expect_warning(
111 |     expect_error(
112 |       convert_to_date(LETTERS),
113 |       regexp = "Not all character strings converted to class Date.*17 other values",
114 |       info = "Confirm the 'other values' when there are many values not converted."
115 |     ),
116 |     regexp = "All formats failed to parse." # lubridate warning
117 |   )
118 |   expect_warning(
119 |     expect_error(
120 |       convert_to_date(LETTERS),
121 |       regexp = "Not all character strings converted to class Date."
122 |     ),
123 |     regexp = "All formats failed to parse." # lubridate warning
124 |   )
125 |   expect_warning(
126 |     expect_warning(
127 |       expect_equal(
128 |         convert_to_date("A", string_conversion_failure = "warning"),
129 |         as.Date(NA)
130 |       ),
131 |       regexp = "All formats failed to parse. No formats found."
132 |     ),
133 |     regexp = "Not all character strings converted to class Date."
134 |   )
135 |   expect_error(
136 |     convert_to_date("A", character_fun = function(x) 1),
137 |     regexp = "must return class Date"
138 |   )
139 |   expect_warning(
140 |     convert_to_date("40000", include_time = TRUE),
141 |     regexp = "`include_time` is ignored in favor of `out_class`"
142 |   )
143 | })
144 | 


--------------------------------------------------------------------------------
/tests/testthat/test-get-dupes.R:
--------------------------------------------------------------------------------
  1 | test_df <- data.frame(a = c(1, 3, 3, 3, 5), b = c("a", "c", "c", "e", "c"), stringsAsFactors = FALSE)
  2 | 
  3 | test_that("Correct combinations of duplicates are found", {
  4 |   expect_equal(get_dupes(test_df, a), data.frame(a = test_df[[1]][2:4], dupe_count = rep(3L, 3), b = test_df[[2]][2:4], stringsAsFactors = FALSE))
  5 |   expect_equal(get_dupes(test_df, b), data.frame(b = test_df[[2]][c(2:3, 5)], dupe_count = rep(3L, 3), a = test_df[[1]][c(2:3, 5)], stringsAsFactors = FALSE))
  6 | })
  7 | 
  8 | test_that("calling with no specified variable names uses all variable names", {
  9 |   expect_message(
 10 |     expect_equal(get_dupes(test_df), get_dupes(test_df, a, b)),
 11 |     "No variable names specified - using all columns."
 12 |   )
 13 |   expect_message(
 14 |     expect_message(
 15 |       get_dupes(mtcars),
 16 |       "No variable names specified - using all columns."
 17 |     ),
 18 |     "No duplicate combinations found of: mpg, cyl.*and 2 other variables"
 19 |   )
 20 | })
 21 | 
 22 | no_dupes <- data.frame(a = 1, stringsAsFactors = FALSE)
 23 | 
 24 | test_that("instances of no dupes throw correct messages, return empty df", {
 25 |   expect_message(no_dupes %>% get_dupes(a), "No duplicate combinations found of: a")
 26 |   expect_message(
 27 |     no_dup_a <- no_dupes %>% get_dupes(a),
 28 |     "No duplicate combinations found of: a"
 29 |   )
 30 |   expect_equal(
 31 |     no_dup_a,
 32 |     data.frame(a = double(0), dupe_count = integer(0))
 33 |   )
 34 |   expect_message(
 35 |     expect_message(
 36 |       mtcars %>% dplyr::select(-1) %>% get_dupes(),
 37 |       "No variable names specified - using all columns."
 38 |     ),
 39 |     "No duplicate combinations found of: cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb"
 40 |   )
 41 |   expect_message(
 42 |     expect_message(
 43 |       mtcars %>% get_dupes(),
 44 |       "No variable names specified - using all columns."
 45 |     ),
 46 |     "No duplicate combinations found of: mpg, cyl, disp, hp, drat, wt, qsec, vs, am, ... and 2 other variables"
 47 |   )
 48 | })
 49 | 
 50 | test_that("incorrect variable names are handled", {
 51 |   expect_error(get_dupes(mtcars, x))
 52 | })
 53 | 
 54 | test_that("works on variables with irregular names", {
 55 |   badname_df <- mtcars %>% dplyr::mutate(`bad name!` = mpg * 1000)
 56 |   expect_equal(
 57 |     badname_df %>% get_dupes(`bad name!`, cyl) %>% dim(),
 58 |     c(10, 13)
 59 |   ) # does it return the right-sized result?
 60 |   expect_message(
 61 |     expect_message(
 62 |       badname_df_dup <- badname_df %>% get_dupes(),
 63 |       "No variable names specified - using all columns"
 64 |     ),
 65 |     "No duplicate combinations found of: mpg, cyl, disp, hp, drat, wt, qsec, vs, am, ... and 3 other variables"
 66 |   )
 67 |   expect_s3_class(badname_df_dup, "data.frame") # test for success, i.e., produces a data.frame (with 0 rows)
 68 | })
 69 | 
 70 | test_that("tidyselect specification matches exact specification", {
 71 |   expect_equal(mtcars %>% get_dupes(contains("cy"), mpg), mtcars %>% get_dupes(cyl, mpg))
 72 |   expect_equal(mtcars %>% get_dupes(mpg), mtcars %>% get_dupes(-c(cyl, disp, hp, drat, wt, qsec, vs, am, gear, carb)))
 73 |   expect_equal(
 74 |     suppressMessages(mtcars %>% dplyr::select(cyl, wt) %>% get_dupes()),
 75 |     mtcars %>% dplyr::select(cyl, wt) %>% get_dupes(dplyr::everything())
 76 |   )
 77 | })
 78 | 
 79 | test_that("grouped and ungrouped data is handled correctly", {
 80 |   expect_equal(
 81 |     mtcars %>% dplyr::group_by(carb, cyl) %>% get_dupes(mpg, carb) %>% dplyr::group_vars(),
 82 |     mtcars %>% dplyr::group_by(carb, cyl) %>% dplyr::group_vars()
 83 |   )
 84 |   expect_equal(
 85 |     mtcars %>% dplyr::group_by(carb, cyl) %>% get_dupes(mpg, carb) %>% dplyr::ungroup(),
 86 |     mtcars %>% tibble::as_tibble() %>% get_dupes(mpg, carb)
 87 |   )
 88 | })
 89 | 
 90 | test_that("tibbles stay tibbles, non-tibble stay non-tibbles", {
 91 |   expect_equal(
 92 |     class(test_df %>% get_dupes(a)),
 93 |     class(test_df)
 94 |   )
 95 |   expect_equal(
 96 |     class(tibble::as_tibble(test_df) %>% get_dupes(a)),
 97 |     class(tibble::as_tibble(test_df))
 98 |   )
 99 | })
100 | 
101 | test_that("result is sorted by dupe_count descending, #493", {
102 |   test_sort <- data.frame(
103 |     a = c("x", "x", "y", "y", "y", "a", "a"),
104 |     b = 1:7
105 |   )
106 |   res <- get_dupes(test_sort, a)
107 |   expect_equal(unique(res$a), c("y", "a", "x")) # y has 3, then alphabetically a precedes x
108 | })
109 | 


--------------------------------------------------------------------------------
/tests/testthat/test-get-level-groups.R:
--------------------------------------------------------------------------------
 1 | # Tests the get_level_groups helper function called by top_levels()
 2 | 
 3 | shorts <- factor(c("a", "b", "c", "d", "e", "f"), levels = rev(letters[1:6]))
 4 | longs <- factor(c("aaaaaaaaaaaaaaaa", "bbbbbbbbbbbbbbbbb", "cccccccccccccccccccc", "dddddddddddddddd", NA, "hhhhhhhhhhhhhhhh", "bbbbbbbbbbbbbbbbb"), levels = c("dddddddddddddddd", "aaaaaaaaaaaaaaaa", "cccccccccccccccccccc", "bbbbbbbbbbbbbbbbb", "hhhhhhhhhhhhhhhh"))
 5 | 
 6 | short1 <- get_level_groups(shorts, 1, max(as.numeric(shorts), na.rm = TRUE))
 7 | short2 <- get_level_groups(shorts, 2, max(as.numeric(shorts), na.rm = TRUE))
 8 | short3 <- get_level_groups(shorts, 3, max(as.numeric(shorts), na.rm = TRUE))
 9 | 
10 | test_that("names are grouped properly and groups are ordered correctly", {
11 |   expect_equal(short1, list(top = "f", mid = "e, d, c, b", bot = "a"))
12 |   expect_equal(short2, list(top = "f, e", mid = c("d, c"), bot = "b, a"))
13 |   expect_equal(short3, list(top = "f, e, d", mid = NA, bot = "c, b, a"))
14 | })
15 | 
16 | long1 <- get_level_groups(longs, 1, max(as.numeric(longs), na.rm = TRUE))
17 | long2 <- get_level_groups(longs, 2, max(as.numeric(longs), na.rm = TRUE))
18 | 
19 | test_that("truncation works correctly", {
20 |   expect_equal(long1, list(top = "dddddddddddddddd", mid = "<<< Middle Group (3 categories) >>>", bot = "hhhhhhhhhhhhhhhh"))
21 |   expect_equal(long2, list(top = "dddddddddddddddd, aaaaaaaaa...", mid = "cccccccccccccccccccc", bot = "bbbbbbbbbbbbbbbbb, hhhhhhhh..."))
22 |   expect_equal(nchar(long2$top), 30)
23 |   expect_equal(nchar(long2$bot), 30)
24 | })
25 | 


--------------------------------------------------------------------------------
/tests/testthat/test-get_one_to_one.R:
--------------------------------------------------------------------------------
 1 | test_that("get_one_to_one", {
 2 |   foo <- data.frame(
 3 |     Lab_Test_Long = c("Cholesterol, LDL", "Cholesterol, LDL", "Glucose"),
 4 |     Lab_Test_Short = c("CLDL", "CLDL", "GLUC"),
 5 |     LOINC = c(12345, 12345, 54321),
 6 |     Person = c("Sam", "Bill", "Sam"),
 7 |     stringsAsFactors = FALSE
 8 |   )
 9 |   expect_equal(
10 |     get_one_to_one(foo),
11 |     list(
12 |       c("Lab_Test_Long", "Lab_Test_Short", "LOINC")
13 |     )
14 |   )
15 |   # NA is respected as though it were any other value
16 |   foo <- data.frame(
17 |     Lab_Test_Long = c(NA, NA, "Glucose"),
18 |     Lab_Test_Short = c("CLDL", "CLDL", "GLUC"),
19 |     LOINC = c(12345, 12345, 54321),
20 |     Person = c("Sam", "Bill", "Sam"),
21 |     stringsAsFactors = FALSE
22 |   )
23 |   expect_equal(
24 |     get_one_to_one(foo),
25 |     list(
26 |       c("Lab_Test_Long", "Lab_Test_Short", "LOINC")
27 |     )
28 |   )
29 | })
30 | 
31 | test_that("get_one_to_one: columns are only described once", {
32 |   expect_true(
33 |     !any(duplicated(unlist(
34 |       get_one_to_one(mtcars[1:3, ])
35 |     )))
36 |   )
37 |   expect_equal(
38 |     get_one_to_one(mtcars[1:3, ]),
39 |     list(
40 |       c("mpg", "cyl", "disp", "hp", "drat", "vs", "carb"),
41 |       c("wt", "qsec"),
42 |       c("am", "gear")
43 |     )
44 |   )
45 |   # Ensure that single column outputs are dropped
46 |   expect_equal(
47 |     get_one_to_one(mtcars[1:5, ]),
48 |     list(
49 |       c("mpg", "disp", "drat"),
50 |       c("cyl", "hp"),
51 |       c("am", "gear")
52 |     )
53 |   )
54 |   expect_message(
55 |     expect_equal(
56 |       get_one_to_one(mtcars),
57 |       list()
58 |     ),
59 |     regexp = "No columns in `mtcars` map to each other"
60 |   )
61 | })
62 | 
63 | test_that("nearly duplicated dates (second decimal place differs) to not cause failure (#543)", {
64 |   dates <- tibble::tibble(
65 |     modification_time =
66 |       structure(
67 |         c(1684261364.85967, 1684274880.48328, 1684261364.85967, 1684418379.74664, 1685105253.21695, 1684418379.76668, 1684279133.50118, 1684161951.81434, 1684281651.93175, 1678483898.72893, 1685103626.03424),
68 |         class = c("POSIXct", "POSIXt")
69 |       ),
70 |     access_time =
71 |       structure(
72 |         c(1685040222.34459, 1685041485.59089, 1685105067.68569, 1685040222.51569, 1685105253.21795, 1685105067.73877, 1685105253.66953, 1685106417.48391, 1685105253.66853, 1685041485.59089, 1685103652.82275),
73 |         class = c("POSIXct", "POSIXt")
74 |       ),
75 |     change_time = structure(
76 |       c(1684261364.85967, 1684274880.48328, 1684261364.85967, 1684418379.74664, 1685105253.21695, 1684418379.76668, 1684279133.50118, 1684161951.81434, 1684281651.93175, 1678483898.72893, 1685103626.03424),
77 |       class = c("POSIXct", "POSIXt")
78 |     )
79 |   )
80 |   expect_equal(
81 |     janitor::get_one_to_one(dates),
82 |     list(c("modification_time", "change_time"))
83 |   )
84 | })
85 | 


--------------------------------------------------------------------------------
/tests/testthat/test-paste_skip_na.R:
--------------------------------------------------------------------------------
 1 | test_that("paste_skip_na", {
 2 |   # handle no arguments the same as paste()
 3 |   expect_equal(paste_skip_na(), paste())
 4 |   expect_equal(paste_skip_na(NA), NA_character_)
 5 |   expect_equal(paste_skip_na(NA, NA), NA_character_)
 6 |   expect_equal(paste_skip_na(NA, NA, sep = ","), NA_character_)
 7 |   # It does not behave like paste(NA, NA, collapse = ",") nor does it behave like paste(c(), collapse = ",")
 8 |   expect_equal(paste_skip_na(NA, NA, collapse = ","), NA_character_)
 9 | 
10 |   expect_equal(paste_skip_na("A", NA), "A")
11 |   expect_equal(paste_skip_na("A", NA, collapse = ","), "A")
12 |   expect_equal(paste_skip_na("A", NA, c(NA, "B"), collapse = ","), "A,A B")
13 |   expect_equal(paste_skip_na("A", NA, c(NA, "B"), sep = ","), c("A", "A,B"))
14 | 
15 |   expect_equal(paste_skip_na(c("A", "B"), NA), c("A", "B"))
16 |   expect_equal(paste_skip_na(NA, c("A", "B")), c("A", "B"))
17 | })
18 | 
19 | test_that("paste_skip_na expected errors", {
20 |   expect_error(
21 |     paste_skip_na(c("A", "B"), c("A", "B", "C")),
22 |     regexp = "Arguments must be the same length or one argument must be a scalar.",
23 |     fixed = TRUE
24 |   )
25 | })
26 | 


--------------------------------------------------------------------------------
/tests/testthat/test-round_to_fraction.R:
--------------------------------------------------------------------------------
 1 | test_that("round_to_fraction input requirements (confirm errors)", {
 2 |   expect_error(
 3 |     round_to_fraction(x = "A", denominator = 1),
 4 |     info = "x must be numberic"
 5 |   )
 6 |   expect_error(
 7 |     round_to_fraction(x = 1, denominator = "A"),
 8 |     info = "denominator must be numberic"
 9 |   )
10 |   expect_error(
11 |     round_to_fraction(x = 1, denominator = c(1, 2)),
12 |     info = "length(denominator) must equal length(x) or be 1"
13 |   )
14 |   expect_error(
15 |     round_to_fraction(x = 1, denominator = -1),
16 |     info = "denominator must be positive"
17 |   )
18 |   expect_error(
19 |     round_to_fraction(x = 1, denominator = 0),
20 |     info = "denominator must be positive, not zero"
21 |   )
22 |   expect_error(
23 |     round_to_fraction(x = 1, denominator = 2, digits = "A"),
24 |     info = "digits must be numeric or 'auto'"
25 |   )
26 |   expect_error(
27 |     round_to_fraction(x = 1, denominator = 2, digits = c(1, 2)),
28 |     info = "digits must be numeric or 'auto'"
29 |   )
30 |   expect_error(
31 |     round_to_fraction(x = c(1, 2), denominator = 2, digits = c(1, 2, 3)),
32 |     info = "digits must be numeric the same length as x (or scalar)"
33 |   )
34 | })
35 | 
36 | test_that("round_to_fraction results are as expected", {
37 |   # scalars
38 |   expect_equal(
39 |     round_to_fraction(x = 1.1, denominator = 7),
40 |     8 / 7
41 |   )
42 |   expect_equal(
43 |     round_to_fraction(x = 1.1, denominator = 7, digits = Inf),
44 |     round_to_fraction(x = 1.1, denominator = 7)
45 |   )
46 |   expect_equal(
47 |     round_to_fraction(x = 1.1, denominator = 7, digits = 5),
48 |     round(8 / 7, digits = 5)
49 |   )
50 |   expect_equal(
51 |     round_to_fraction(x = 1.1, denominator = 7, digits = "auto"),
52 |     round(8 / 7, digits = 2)
53 |   )
54 | 
55 |   # vectors
56 |   expect_equal(
57 |     round_to_fraction(x = c(1.1, 2.05), denominator = 7, digits = Inf),
58 |     c(8 / 7, 2)
59 |   )
60 |   expect_equal(
61 |     round_to_fraction(x = c(1.1, 2.05), denominator = c(7, 25), digits = Inf),
62 |     c(8 / 7, 51 / 25)
63 |   )
64 |   expect_equal(
65 |     round_to_fraction(x = c(1.1, 2.05), denominator = c(7, 27), digits = Inf),
66 |     c(8 / 7, 55 / 27)
67 |   )
68 |   expect_equal(
69 |     round_to_fraction(x = c(1.1, 2.05), denominator = c(7, 27), digits = 3),
70 |     round(c(8 / 7, 55 / 27), digits = 3)
71 |   )
72 |   expect_equal(
73 |     round_to_fraction(x = c(1.1, 2.05), denominator = c(7, 27), digits = c(3, 4)),
74 |     round(c(8 / 7, 55 / 27), digits = c(3, 4))
75 |   )
76 |   expect_equal(
77 |     round_to_fraction(x = c(1.1, 2.05), denominator = c(7, 27), digits = "auto"),
78 |     round(c(8 / 7, 55 / 27), digits = c(2, 3))
79 |   )
80 | })
81 | 


--------------------------------------------------------------------------------
/tests/testthat/test-sas_dates.R:
--------------------------------------------------------------------------------
 1 | test_that("sas_numeric_to_date", {
 2 |   expect_equal(
 3 |     sas_numeric_to_date(date_num = 15639),
 4 |     as.Date("2002-10-26")
 5 |   )
 6 |   expect_equal(
 7 |     sas_numeric_to_date(datetime_num = 1217083532, tz = "UTC"),
 8 |     as.POSIXct("1998-07-26 14:45:32", tz = "UTC")
 9 |   )
10 |   expect_equal(
11 |     sas_numeric_to_date(date_num = 15639, time_num = 3600, tz = "UTC"),
12 |     as.POSIXct("2002-10-26 01:00:00", tz = "UTC")
13 |   )
14 |   expect_equal(
15 |     sas_numeric_to_date(time_num = 3600),
16 |     hms::hms(3600)
17 |   )
18 |   # NA management
19 |   expect_equal(
20 |     sas_numeric_to_date(date_num = c(NA, 1), time_num = c(NA, 1), tz = "UTC"),
21 |     as.POSIXct(c(NA, "1960-01-02 00:00:01"), tz = "UTC")
22 |   )
23 |   expect_equal(
24 |     sas_numeric_to_date(date_num = NA, time_num = NA, tz = "UTC"),
25 |     as.POSIXct(NA, tz = "UTC")
26 |   )
27 |   # Timezone warning (#583)
28 |   expect_warning(
29 |     sas_numeric_to_date(date_num = 1, time_num = 1, tz = "America/New_York"),
30 |     regexp = "SAS may not properly store timezones other than UTC. Consider confirming the accuracy of the resulting data.",
31 |     fixed = TRUE
32 |   )
33 | })
34 | 
35 | test_that("sas_numeric_to_date expected errors", {
36 |   expect_error(
37 |     sas_numeric_to_date(date_num = 15639, datetime_num = 1),
38 |     regexp = "Must not give both `date_num` and `datetime_num`"
39 |   )
40 |   expect_error(
41 |     sas_numeric_to_date(datetime_num = 1, time_num = 1),
42 |     regexp = "Must not give both `time_num` and `datetime_num`"
43 |   )
44 |   expect_error(
45 |     sas_numeric_to_date(time_num = -1),
46 |     regexp = "`time_num` must be non-negative"
47 |   )
48 |   expect_error(
49 |     sas_numeric_to_date(time_num = 86401),
50 |     regexp = "`time_num` must be within the number of seconds in a day (<= 86400)",
51 |     fixed = TRUE
52 |   )
53 |   expect_error(
54 |     sas_numeric_to_date(),
55 |     regexp = "Must give one of `date_num`, `datetime_num`, `time_num`, or `date_num` and `time_num`"
56 |   )
57 |   expect_error(
58 |     sas_numeric_to_date(date_num = c(NA, 1), time_num = c(1, NA)),
59 |     regexp = "The same values are not NA for both `date_num` and `time_num`"
60 |   )
61 | })
62 | 


--------------------------------------------------------------------------------
/tests/testthat/test-signif_half_up.R:
--------------------------------------------------------------------------------
 1 | test_that("signif_half_up results are as expected", {
 2 |   # scalars
 3 |   expect_equal(
 4 |     signif_half_up(x = 12.5, digits = 2),
 5 |     13
 6 |   )
 7 |   expect_equal(
 8 |     signif_half_up(x = 0),
 9 |     0
10 |   )
11 |   expect_equal(
12 |     signif_half_up(x = -2.5, digits = 1),
13 |     -3
14 |   )
15 |   expect_equal(
16 |     signif_half_up(x = 123.45, digits = 4),
17 |     123.5
18 |   )
19 |   expect_equal(
20 |     signif_half_up(x = -123.45, digits = 4),
21 |     -123.5
22 |   )
23 |   # vectors
24 |   expect_equal(
25 |     signif_half_up(x = c(12.5, 0, -2.5, 123.45, -123.45), digits = 2),
26 |     c(13, 0, -2.5, 120, -120)
27 |   )
28 |   expect_equal(
29 |     signif_half_up(x = c(1, 1.5, 1.49, NA, NaN, -Inf, Inf), digits = 2),
30 |     c(1, 1.5, 1.5, NA, NaN, -Inf, Inf)
31 |   )
32 | })
33 | 


--------------------------------------------------------------------------------
/tests/testthat/test-single_value.R:
--------------------------------------------------------------------------------
 1 | test_that("single_value", {
 2 |   expect_equal(single_value(1), 1)
 3 |   expect_equal(single_value(c(1, NA)), 1)
 4 |   expect_equal(single_value(c(1, NA, 1)), 1)
 5 |   expect_equal(single_value(NA_real_), NA)
 6 |   expect_equal(single_value(NA, missing = NA_real_), NA_real_)
 7 | 
 8 |   # Order of `missing` affects the output
 9 |   expect_equal(single_value("", missing = c(NA, "")), NA_character_)
10 |   expect_equal(single_value(NA, missing = c("", NA)), "")
11 | 
12 |   # Check warn_if_all_missing
13 |   expect_silent(single_value(NA))
14 |   expect_warning(
15 |     single_value(NA, warn_if_all_missing = TRUE),
16 |     regexp = "All values are missing"
17 |   )
18 | 
19 |   expect_error(
20 |     single_value(1:2),
21 |     regexp = "More than one (2) value found (1, 2)",
22 |     fixed = TRUE
23 |   )
24 |   expect_error(
25 |     single_value(1:2, info = "multiple"),
26 |     regexp = "More than one (2) value found (1, 2): multiple",
27 |     fixed = TRUE
28 |   )
29 | })
30 | 


--------------------------------------------------------------------------------
/tests/testthat/test-statistical-tests.R:
--------------------------------------------------------------------------------
  1 | # Tests for two-way statistical tests
  2 | 
  3 | # Duplicate mtcars rows to avoid chis.test warnings
  4 | mtcars3 <- rbind(mtcars, mtcars, mtcars)
  5 | tab <- table(mtcars3$am, mtcars3$cyl)
  6 | ttab <- tabyl(mtcars3, am, cyl)
  7 | ow_tab <- tabyl(mtcars3, am)
  8 | 
  9 | test_that("one-way tabyl is rejected by chisq.test and fisher.test", {
 10 |   expect_error(chisq.test(ow_tab))
 11 |   expect_error(fisher.test(ow_tab))
 12 | })
 13 | 
 14 | test_that("janitor::chisq.test on a table is correct", {
 15 |   res <- stats::chisq.test(tab)
 16 |   jres <- janitor::chisq.test(tab)
 17 |   expect_equal(jres, res)
 18 | })
 19 | 
 20 | test_that("janitor::chisq.test on a matrix is correct", {
 21 |   mat <- matrix(c(151, 434, 345, 221, 145, 167), ncol = 3)
 22 |   res <- stats::chisq.test(mat)
 23 |   jres <- janitor::chisq.test(mat)
 24 |   expect_equal(jres, res)
 25 | })
 26 | 
 27 | test_that("janitor::chisq.test on two factors is correct", {
 28 |   res <- stats::chisq.test(mtcars3$am, mtcars3$cyl)
 29 |   jres <- janitor::chisq.test(mtcars3$am, mtcars3$cyl)
 30 |   expect_equal(jres, res)
 31 | })
 32 | 
 33 | test_that("janitor::chisq.test with a numeric vector and p is correct", {
 34 |   v1 <- round(runif(10, 200, 1000))
 35 |   v2 <- round(runif(10, 200, 1000))
 36 |   res <- stats::chisq.test(v1, p = v2 / sum(v2))
 37 |   jres <- janitor::chisq.test(v1, p = v2 / sum(v2))
 38 |   expect_equal(jres, res)
 39 | })
 40 | 
 41 | test_that("janitor::fisher.test on a table is correct", {
 42 |   res <- stats::fisher.test(tab)
 43 |   jres <- janitor::fisher.test(tab)
 44 |   expect_equal(jres, res)
 45 | })
 46 | 
 47 | test_that("janitor::fisher.test on a matrix is correct", {
 48 |   mat <- matrix(c(151, 434, 345, 221, 145, 167), ncol = 3)
 49 |   res <- stats::fisher.test(mat)
 50 |   jres <- janitor::fisher.test(mat)
 51 |   expect_equal(jres, res)
 52 | })
 53 | 
 54 | test_that("janitor::fisher.test on two vectors is correct", {
 55 |   res <- stats::fisher.test(mtcars3$am, mtcars3$cyl)
 56 |   jres <- janitor::fisher.test(mtcars3$am, mtcars3$cyl)
 57 |   expect_equal(jres, res)
 58 | })
 59 | 
 60 | test_that("janitor::chisq.test on a two-way tabyl is identical to stats::chisq.test", {
 61 |   tab <- tabyl(mtcars3, am, cyl)
 62 |   tres <- chisq.test(tab, tabyl_results = FALSE)
 63 |   tab <- table(mtcars3$am, mtcars3$cyl)
 64 |   res <- chisq.test(tab)
 65 |   expect_equal(tres, res)
 66 | })
 67 | 
 68 | test_that("janitor::fisher.test on a two-way tabyl is identical to stats::fisher.test", {
 69 |   tab <- tabyl(mtcars3, am, cyl)
 70 |   tres <- fisher.test(tab)
 71 |   tab <- table(mtcars3$am, mtcars3$cyl)
 72 |   res <- fisher.test(tab)
 73 |   expect_equal(tres, res)
 74 | })
 75 | 
 76 | test_that("janitor::chisq.test returns tabyl tables", {
 77 |   tres <- chisq.test(ttab, tabyl_results = TRUE)
 78 |   expect_s3_class(tres$observed, "tabyl")
 79 |   expect_s3_class(tres$expected, "tabyl")
 80 |   expect_s3_class(tres$residuals, "tabyl")
 81 |   expect_s3_class(tres$stdres, "tabyl")
 82 | })
 83 | 
 84 | test_that("returned tabyls have correct names and attributes", {
 85 |   tres <- chisq.test(ttab, tabyl_results = TRUE)
 86 |   expect_named(tres$observed, c("am", "4", "6", "8"))
 87 |   expect_named(tres$expected, c("am", "4", "6", "8"))
 88 |   expect_named(tres$residuals, c("am", "4", "6", "8"))
 89 |   expect_named(tres$stdres, c("am", "4", "6", "8"))
 90 |   expect_equal(tres$observed[[1]], c("0", "1"))
 91 |   expect_equal(tres$expected[[1]], c("0", "1"))
 92 |   expect_equal(tres$residuals[[1]], c("0", "1"))
 93 |   expect_equal(tres$stdres[[1]], c("0", "1"))
 94 |   expect_equal(attr(tres$observed, "var_names"), list(row = "am", col = "cyl"))
 95 |   expect_equal(attr(tres$expected, "var_names"), list(row = "am", col = "cyl"))
 96 |   expect_equal(attr(tres$residuals, "var_names"), list(row = "am", col = "cyl"))
 97 |   expect_equal(attr(tres$stdres, "var_names"), list(row = "am", col = "cyl"))
 98 | })
 99 | 
100 | test_that("totals are excluded from the statistical tests, #385", {
101 |   # Chi-Square
102 |   cx <- chisq.test(ttab)
103 |   cx_totals <- suppressWarnings(chisq.test(adorn_totals(ttab, "both")))
104 |   cx_totals$data.name <- "ttab" # otherwise the test shows a mismatch, as the inputs had different names
105 |   expect_equal(
106 |     cx,
107 |     cx_totals
108 |   )
109 |   expect_warning(
110 |     chisq.test(ttab %>% adorn_totals()),
111 |     "detected a totals row"
112 |   )
113 | 
114 |   # Fisher
115 |   fisher <- fisher.test(ttab)
116 |   fisher_totals <- suppressWarnings(fisher.test(adorn_totals(ttab, "both")))
117 |   fisher_totals$data.name <- "ttab" # otherwise the test shows a mismatch, as the inputs had different names
118 |   expect_equal(
119 |     fisher,
120 |     fisher_totals
121 |   )
122 |   expect_warning(
123 |     fisher.test(ttab %>% adorn_totals()),
124 |     "detected a totals row"
125 |   )
126 | })
127 | 


--------------------------------------------------------------------------------
/tests/testthat/test-tabyl-classifiers.R:
--------------------------------------------------------------------------------
  1 | # Tests tabyl class functions
  2 | 
  3 | a <- mtcars %>%
  4 |   tabyl(cyl, carb)
  5 | 
  6 | b <- mtcars %>%
  7 |   dplyr::count(cyl, carb) %>%
  8 |   tidyr::pivot_wider(
  9 |     names_from = carb,
 10 |     values_from = n,
 11 |     values_fill = 0,
 12 |     names_sort = TRUE
 13 |   ) %>%
 14 |   as.data.frame() # for comparison purposes, remove the tbl_df aspect
 15 | 
 16 | 
 17 | test_that("as_tabyl works on result of a non-janitor count/pivot_wider", {
 18 |   expect_equal(
 19 |     as_tabyl(a),
 20 |     as_tabyl(b, 2, "cyl", "carb")
 21 |   )
 22 | })
 23 | 
 24 | test_that("as_tabyl sets attributes correctly", {
 25 |   d <- as_tabyl(a)
 26 |   expect_equal(class(d), class(a))
 27 |   expect_equal(attr(d, "core"), untabyl(a))
 28 |   expect_equal(attr(d, "tabyl_type"), "two_way")
 29 | })
 30 | 
 31 | test_that("untabyl puts back to original form", {
 32 |   expect_equal(mtcars, untabyl(as_tabyl(mtcars)))
 33 | })
 34 | 
 35 | test_that("untabyl warns if called on non-tabyl", {
 36 |   expect_warning(
 37 |     untabyl(mtcars),
 38 |     "untabyl\\(\\) called on a non-tabyl"
 39 |   )
 40 | })
 41 | 
 42 | test_that("untabyl automatically invokes purrr::map when called on a 3-way tabyl", {
 43 |   three <- tabyl(mtcars, cyl, am, gear)
 44 |   expect_equal(
 45 |     untabyl(three), # vanilla call
 46 |     purrr::map(three, untabyl)
 47 |   )
 48 | })
 49 | 
 50 | test_that("as_tabyl is okay with non-numeric columns", {
 51 |   e <- b %>%
 52 |     dplyr::mutate(extra = "val")
 53 |   expect_equal(attr(as_tabyl(e), "core"), e) # implied success of as_tabyl
 54 | })
 55 | 
 56 | test_that("as_tabyl fails if no numeric columns in 2:n", {
 57 |   bad <- data.frame(
 58 |     a = 1:2,
 59 |     b = c("x", "y")
 60 |   )
 61 |   expect_error(as_tabyl(bad), "at least one one of columns 2:n must be of class numeric")
 62 | })
 63 | 
 64 | test_that("bad inputs are caught", {
 65 |   expect_error(as_tabyl(mtcars, 3),
 66 |     "axes must be either 1 or 2",
 67 |     fixed = TRUE
 68 |   )
 69 | 
 70 |   expect_error(as_tabyl(1:10),
 71 |     "input must be a data.frame",
 72 |     fixed = TRUE
 73 |   )
 74 | 
 75 |   # don't pass names to a 1-way tabyl
 76 |   expect_error(
 77 |     as_tabyl(mtcars, axes = 1, row_var_name = "foo"),
 78 |     "variable names are only meaningful for two-way tabyls"
 79 |   )
 80 | })
 81 | 
 82 | test_that("adorn_totals and adorn_percentages reset the tabyl's core to reflect sorting, #407", {
 83 |   unsorted <- mtcars %>% tabyl(am, cyl)
 84 |   sorted <- dplyr::arrange(unsorted, desc(`4`))
 85 |   expect_equal(
 86 |     sorted %>%
 87 |       adorn_totals() %>%
 88 |       attr(., "core"),
 89 |     sorted %>%
 90 |       untabyl()
 91 |   )
 92 |   expect_equal(
 93 |     sorted %>%
 94 |       adorn_percentages() %>%
 95 |       attr(., "core"),
 96 |     sorted %>%
 97 |       untabyl()
 98 |   )
 99 |   # both:
100 |   expect_equal(
101 |     sorted %>%
102 |       adorn_totals() %>%
103 |       adorn_percentages() %>%
104 |       attr(., "core"),
105 |     sorted %>%
106 |       untabyl()
107 |   )
108 |   # Ns with "Total" row sorted to top - the Total N should be up there too:
109 |   expect_equal(
110 |     sorted %>%
111 |       adorn_totals() %>%
112 |       adorn_percentages("col") %>%
113 |       dplyr::arrange(desc(`4`)) %>%
114 |       adorn_ns() %>%
115 |       dplyr::pull(`4`) %>%
116 |       dplyr::first(),
117 |     "1.0000000 (11)"
118 |   )
119 | })
120 | 


--------------------------------------------------------------------------------
/tests/testthat/test-top-levels.R:
--------------------------------------------------------------------------------
 1 | fac <- factor(c("a", "b", "c", "d", "e", "f", "f"), levels = rev(letters[1:6]))
 2 | fac_odd_lvls <- factor(fac, levels = rev(letters[1:5]))
 3 | 
 4 | # more tests - group names and ordering - are in test-get-level-groups.R
 5 | test_that("top_levels values are correct", {
 6 |   expect_equal(top_levels(fac)[[3]], c(3 / 7, 2 / 7, 2 / 7)) # default n = 2, num_levels = 6
 7 |   expect_equal(top_levels(fac)[[2]], c(3, 2, 2))
 8 |   expect_equal(top_levels(fac, 3)[[3]], c(4 / 7, 3 / 7)) # n = 3, num_levels = 6
 9 |   expect_equal(top_levels(fac, 3)[[2]], c(4, 3))
10 |   expect_equal(top_levels(fac_odd_lvls)[[2]], c(2, 1, 2)) # default n = 2, num_levels = 5
11 |   expect_equal(top_levels(fac_odd_lvls)[[3]], c(0.4, 0.2, 0.4))
12 |   expect_equal(top_levels(fac_odd_lvls, 1)[[2]], c(1, 3, 1)) # n = 1, num_levels = 5
13 |   expect_equal(top_levels(fac_odd_lvls, 1)[[3]], c(0.2, 0.6, 0.2))
14 | })
15 | 
16 | test_that("top_levels missing levels are represented", {
17 |   x <- as.factor(letters[1:5])[1:3]
18 |   expect_equal(
19 |     top_levels(x)[[1]],
20 |     structure(1:3, .Label = c("a, b", "c", "d, e"), class = "factor")
21 |   )
22 |   expect_equal(
23 |     top_levels(x)[[2]],
24 |     c(2, 1, 0)
25 |   )
26 | })
27 | 
28 | 
29 | test_that("top_levels NA results are treated appropriately", {
30 |   fac_na <- fac
31 |   fac_na[7] <- NA
32 |   expect_equal(top_levels(fac_na)[[2]], rep(2, 3))
33 |   expect_equal(top_levels(fac_na, show_na = TRUE)[[2]], c(2, 2, 2, 1))
34 |   expect_equal(top_levels(fac_na, show_na = TRUE)[[3]], c(2 / 7, 2 / 7, 2 / 7, 1 / 7))
35 |   expect_equal(top_levels(fac_na, show_na = TRUE)[[4]], c(1 / 3, 1 / 3, 1 / 3, NA))
36 | })
37 | 
38 | test_that("top_levels default n parameter works", {
39 |   expect_equal(top_levels(fac), top_levels(fac, 2))
40 | })
41 | 
42 | test_that("top_levels missing levels are treated appropriately", {
43 |   fac_missing_lvl <- fac
44 |   fac_missing_lvl[2] <- NA
45 |   expect_equal(top_levels(fac_missing_lvl)[[2]], c(3, 2, 1))
46 | })
47 | 
48 | test_that("top_levels bad type inputs are handled", {
49 |   expect_error(top_levels(c(0, 1), "factor_vec is not of type 'factor'"))
50 |   expect_error(top_levels(c("hi", "lo"), "factor_vec is not of type 'factor'"))
51 |   expect_error(top_levels(mtcars, "factor_vec is not of type 'factor'"))
52 | })
53 | 
54 | test_that("top_levels bad n value is handled", {
55 |   expect_error(top_levels(fac, 4))
56 |   expect_error(top_levels(fac_odd_lvls, 3))
57 |   expect_error(top_levels(fac, 0))
58 |   expect_error(top_levels(factor(c("a", "b"))), "input factor variable must have at least 3 levels")
59 | })
60 | 
61 | test_that("top_levels correct variable name assigned to first column of result", {
62 |   expect_equal(names(top_levels(fac))[1], "fac")
63 | })
64 | 


--------------------------------------------------------------------------------
/tests/testthat/test-utilities.R:
--------------------------------------------------------------------------------
1 | test_that("round_half_up works", {
2 |   expect_equal(round_half_up(-0.5, 0), -1)
3 |   expect_equal(round_half_up(0.5, 0), 1)
4 |   expect_equal(round_half_up(1.125, 2), 1.13)
5 |   expect_equal(round_half_up(1.135, 2), 1.14)
6 |   expect_equal(round_half_up(2436.845, 2), 2436.85)
7 | })
8 | 


--------------------------------------------------------------------------------
/tests/testthat/testdata/issue-578-sf.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sfirke/janitor/81702b6ed2b97a143319700a8edf48e8e4cce9cd/tests/testthat/testdata/issue-578-sf.rds


--------------------------------------------------------------------------------