├── .Rbuildignore
├── .devcontainer
    └── devcontainer.json
├── .github
    ├── .gitignore
    ├── CONTRIBUTING.md
    ├── dependabot.yml
    └── workflows
    │   ├── R-CMD-check.yaml
    │   └── test-coverage.yaml
├── .gitignore
├── .lintr
├── .pre-commit-config.yaml
├── DESCRIPTION
├── NAMESPACE
├── NEWS.md
├── R
    ├── deprecated.R
    ├── dplyr.R
    ├── get_skimmers.R
    ├── reshape.R
    ├── sfl.R
    ├── skim.R
    ├── skim_obj.R
    ├── skim_print.R
    ├── skim_with.R
    ├── skimr-package.R
    ├── stats.R
    ├── summary.R
    ├── utils.R
    └── vctrs.R
├── README.Rmd
├── README.md
├── codemeta.json
├── inst
    ├── figures
    │   └── skimmer_hex.png
    ├── other_docs
    │   ├── blog.Rmd
    │   ├── blog.html
    │   ├── blog.md
    │   ├── blog_v2.Rmd
    │   ├── blog_v2.html
    │   ├── blog_v2.md
    │   └── skimr_in_jupyter.ipynb
    └── rmarkdown
    │   └── templates
    │       └── fonts-in-skimr
    │           ├── skeleton
    │               └── skeleton.Rmd
    │           └── template.yaml
├── man
    ├── deprecated-v1.Rd
    ├── figures
    │   └── logo.png
    ├── fix_windows_histograms.Rd
    ├── focus.Rd
    ├── get_default_skimmers.Rd
    ├── get_skimmers.Rd
    ├── knit_print.Rd
    ├── mutate.skim_df.Rd
    ├── partition.Rd
    ├── print.Rd
    ├── reexports.Rd
    ├── repr.Rd
    ├── sfl.Rd
    ├── skim-attr.Rd
    ├── skim-obj.Rd
    ├── skim.Rd
    ├── skim_with.Rd
    ├── skimr-package.Rd
    ├── skimr-vctrs.Rd
    ├── stats.Rd
    ├── summary.skim_df.Rd
    └── to_long.Rd
├── revdep
    ├── .gitignore
    ├── README.md
    ├── cran.md
    ├── data.sqlite
    ├── failures.md
    └── problems.md
├── skimr.Rproj
├── tests
    ├── testthat.R
    └── testthat
    │   ├── _snaps
    │       ├── data-table.md
    │       ├── dplyr.md
    │       ├── skim_print.md
    │       ├── skim_tee.md
    │       └── summary.md
    │   ├── helper-expectations.R
    │   ├── test-data-table.R
    │   ├── test-dplyr.R
    │   ├── test-get_skimmers.R
    │   ├── test-reshape.R
    │   ├── test-sfl.R
    │   ├── test-skim.R
    │   ├── test-skim_obj.R
    │   ├── test-skim_print.R
    │   ├── test-skim_tee.R
    │   ├── test-skim_with.R
    │   ├── test-stats.R
    │   ├── test-summary.R
    │   └── test-vctrs.R
└── vignettes
    ├── Skimr_defaults.Rmd
    ├── Using_fonts.Rmd
    ├── extending_skimr.Rmd
    └── skimr.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^\.devcontainer$
 4 | ^\.github$
 5 | ^\.pre-commit-config\.yaml$
 6 | ^\.lintr$
 7 | ^\\inst\\doc\\other_docs\\blog\.html$
 8 | ^\\inst\\doc\\other_docs\\blog\.Rmd$
 9 | ^\\inst\\figures
10 | ^\\inst\\rmarkdown\\other
11 | ^revdep$
12 | ^README\.Rmd
13 | ^README\.html
14 | ^codemeta\.json$
15 | ^.*\.ipynb$
16 | ^doc$
17 | ^Meta$
18 | ^pkgdown$
19 | 


--------------------------------------------------------------------------------
/.devcontainer/devcontainer.json:
--------------------------------------------------------------------------------
 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the
 2 | // README at: https://github.com/rocker-org/devcontainer-templates/tree/main/src/r-ver
 3 | {
 4 | 	"name": "R (rocker/r-ver base)",
 5 | 	// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
 6 | 	"image": "ghcr.io/rocker-org/devcontainer/r-ver:4",
 7 | 
 8 | 	// Features to add to the dev container. More info: https://containers.dev/features.
 9 | 	// "features": {},
10 | 
11 | 	// Use 'forwardPorts' to make a list of ports inside the container available locally.
12 | 	// "forwardPorts": [],
13 | 
14 | 	// Use 'postCreateCommand' to run commands after the container is created.
15 | 	"postCreateCommand": "R -q -e 'devtools::install_local()'"
16 | 
17 | 	// Configure tool-specific properties.
18 | 	// "customizations": {},
19 | 
20 | 	// Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root.
21 | 	// "remoteUser": "root"
22 | }
23 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.github/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing
 2 | 
 3 | Contributions to `skimr` whether in the form of bug fixes, issue reports, new
 4 | code or documentation improvement are welcome. Please use the github issue
 5 | tracker. For any pull request please link to or open a corresponding issue in
 6 | the issue tracker. Please ensure that you have notifications turned on and
 7 | respond to questions, comments or needed changes promptly.
 8 | 
 9 | ## Understanding the scope of skimr
10 | 
11 | `skimr` solves a very specific set of problems focused on the compact, flexible
12 | and useful display of summary data in the console. By itself it is not intended
13 | as a replacement for packages that create publication ready tables. The basic
14 | concept is that of "skimming" a data frame or tibble to get an overview of the
15 | data it contains.
16 | 
17 | One intended group of users is students in a first semester statistics class. As
18 | such, the package is focused on data types that are widely used. One general
19 | guideline is that if a data type is not found in the `datasets` package it will
20 | not be directly supported in `skimr`. Fortunately, `skim()` has a generic
21 | internal function for handling a variety of data types `get_skimmers()`. See the
22 | documentation for that function or the vignette "Supporting additional objects"
23 | for documentation on how to do this.
24 | 
25 | Similarly, `skimr` is deeply tied to the `tidyverse` and `dplyr` in particular.
26 | The comes with a lot of benefits, but some constraints too. Most importantly,
27 | data processed by `skim()` needs to be an object that inherits from a data frame
28 | or in a form that can be coerced to a data frame.
29 | 
30 | ## Tests
31 | 
32 | `skimr` uses `testthat` for testing. Please try to provide 100% test coverage
33 | for any submitted code and always check that existing tests continue to pass. If
34 | you are a beginner and need help with writing a test, mention this in the issue
35 | and we will try to help.
36 | 
37 | ## Pull requests
38 | 
39 | Pull requests should be against the _develop_ branch not the main branch. You
40 | can set this when creating your pull request. Please make a separately named
41 | branch to submit. Keep each branch for a complete specific issue. If you create
42 | a pull request by editing in the GitHub web editor and you end up with multiple
43 | pull requests, note that in your issue comments.
44 | 
45 | ## Code style
46 | 
47 | We follow the [tidyverse style guide](http://style.tidyverse.org/).
48 | 
49 | ## Pre commits
50 | 
51 | To enforce coding style and support development, we rely on [pre-commit.com],
52 | and the [R precommit package](https://github.com/lorenzwalthert/precommit). This
53 | tool runs a series of additional checks for your code before `git commit`
54 | completes.
55 | 
56 | To install the package and enable precommits, run the following:
57 | 
58 | ```
59 | # once on your system
60 | remotes::install_github("lorenzwalthert/precommit")
61 | precommit::install_precommit()
62 | 
63 | # once in every git repo either
64 | # * after cloning a repo that already uses pre-commit or
65 | # * if you want introduce pre-commit to this repo
66 | precommit::use_precommit()
67 | ```
68 | 
69 | The checks will run automatically from there.
70 | 
71 | ## Code of Conduct
72 | 
73 | When contributing to `skimr` you must follow the [code of conduct defined by rOpenSci](https://ropensci.org/code-of-conduct/).
74 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # To get started with Dependabot version updates, you'll need to specify which
 2 | # package ecosystems to update and where the package manifests are located.
 3 | # Please see the documentation for more information:
 4 | # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
 5 | # https://containers.dev/guide/dependabot
 6 | 
 7 | version: 2
 8 | updates:
 9 |   - package-ecosystem: "devcontainers"
10 |     directory: "/"
11 |     schedule:
12 |       interval: weekly
13 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, develop]
 6 |   pull_request:
 7 | 
 8 | name: R-CMD-check.yaml
 9 | 
10 | permissions: read-all
11 | 
12 | jobs:
13 |   R-CMD-check:
14 |     runs-on: ${{ matrix.config.os }}
15 | 
16 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
17 | 
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         config:
22 |           - {os: macos-latest,   r: 'release'}
23 |           - {os: windows-latest, r: 'release'}
24 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
25 |           - {os: ubuntu-latest,   r: 'release'}
26 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
27 | 
28 |     env:
29 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
30 |       R_KEEP_PKG_SOURCE: yes
31 | 
32 |     steps:
33 |       - uses: actions/checkout@v4
34 | 
35 |       - uses: r-lib/actions/setup-pandoc@v2
36 | 
37 |       - uses: r-lib/actions/setup-r@v2
38 |         with:
39 |           r-version: ${{ matrix.config.r }}
40 |           http-user-agent: ${{ matrix.config.http-user-agent }}
41 |           use-public-rspm: true
42 | 
43 |       - uses: r-lib/actions/setup-r-dependencies@v2
44 |         with:
45 |           extra-packages: any::rcmdcheck
46 |           needs: check
47 | 
48 |       - uses: r-lib/actions/check-r-package@v2
49 |         with:
50 |           upload-snapshots: true
51 |           build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
52 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 | 
 8 | name: test-coverage.yaml
 9 | 
10 | permissions: read-all
11 | 
12 | jobs:
13 |   test-coverage:
14 |     runs-on: ubuntu-latest
15 |     env:
16 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
17 | 
18 |     steps:
19 |       - uses: actions/checkout@v4
20 | 
21 |       - uses: r-lib/actions/setup-r@v2
22 |         with:
23 |           use-public-rspm: true
24 | 
25 |       - uses: r-lib/actions/setup-r-dependencies@v2
26 |         with:
27 |           extra-packages: any::covr, any::xml2
28 |           needs: coverage
29 | 
30 |       - name: Test coverage
31 |         run: |
32 |           cov <- covr::package_coverage(
33 |             quiet = FALSE,
34 |             clean = FALSE,
35 |             install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
36 |           )
37 |           covr::to_cobertura(cov)
38 |         shell: Rscript {0}
39 | 
40 |       - uses: codecov/codecov-action@v4
41 |         with:
42 |           # Fail if error if not on PR, or if on PR and token is given
43 |           fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}
44 |           file: ./cobertura.xml
45 |           plugin: noop
46 |           disable_search: true
47 |           token: ${{ secrets.CODECOV_TOKEN }}
48 | 
49 |       - name: Show testthat output
50 |         if: always()
51 |         run: |
52 |           ## --------------------------------------------------------------------
53 |           find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
54 |         shell: bash
55 | 
56 |       - name: Upload test results
57 |         if: failure()
58 |         uses: actions/upload-artifact@v4
59 |         with:
60 |           name: coverage-test-failures
61 |           path: ${{ runner.temp }}/package
62 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .Rproj.user
 2 | .Rhistory
 3 | .RData
 4 | .Ruserdata
 5 | skimr.Rproj
 6 | inst/doc
 7 | .DS_Store
 8 | doc
 9 | Meta
10 | 


--------------------------------------------------------------------------------
/.lintr:
--------------------------------------------------------------------------------
1 | linters: with_defaults(
2 |     object_usage_linter = NULL,
3 |     object_name_linter = NULL
4 |   )
5 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # All available hooks: https://pre-commit.com/hooks.html
 2 | # R specific hooks: https://github.com/lorenzwalthert/precommit
 3 | repos:
 4 | -   repo: https://github.com/lorenzwalthert/precommit
 5 |     rev: v0.2.0
 6 |     hooks:
 7 |     # any R project
 8 |     -   id: codemeta-description-updated
 9 |     -   id: style-files
10 |         args: [--style_pkg=styler, --style_fun=tidyverse_style]
11 |     -   id: lintr
12 |         args: [--warn_only]
13 |         verbose: True
14 |     -   id: parsable-R
15 |     -   id: no-browser-statement
16 |     -   id: readme-rmd-rendered
17 |     -   id: roxygenize
18 |     -   id: use-tidy-description
19 |     -   id: deps-in-desc
20 | -   repo: https://github.com/pre-commit/pre-commit-hooks
21 |     rev: v2.4.0
22 |     hooks:
23 |     -   id: check-added-large-files
24 |         args: ['--maxkb=200']
25 | 
26 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
  1 | Package: skimr
  2 | Title: Compact and Flexible Summaries of Data
  3 | Version: 2.2.0
  4 | Authors@R: 
  5 |     c(person(given = "Elin",
  6 |              family = "Waring",
  7 |              role = c("cre", "aut"),
  8 |              email = "elin.waring@gmail.com"),
  9 |       person(given = "Michael",
 10 |              family = "Quinn",
 11 |              role = "aut",
 12 |              email = "msquinn@google.com"),
 13 |       person(given = "Amelia",
 14 |              family = "McNamara",
 15 |              role = "aut",
 16 |              email = "amcnamara@smith.edu"),
 17 |       person(given = "Eduardo",
 18 |              family = "Arino de la Rubia",
 19 |              role = "aut",
 20 |              email = "earino@gmail.com"),
 21 |       person(given = "Hao",
 22 |              family = "Zhu",
 23 |              role = "aut",
 24 |              email = "haozhu233@gmail.com"),
 25 |       person(given = "Julia",
 26 |              family = "Lowndes",
 27 |              role = "ctb",
 28 |              email = "lowndes@nceas.ucsb.edu"),
 29 |       person(given = "Shannon",
 30 |              family = "Ellis",
 31 |              role = "aut",
 32 |              email = "sellis18@jhmi.edu"),
 33 |       person(given = "Hope",
 34 |              family = "McLeod",
 35 |              role = "ctb",
 36 |              email = "hmgit2@gmail.com"),
 37 |       person(given = "Hadley",
 38 |              family = "Wickham",
 39 |              role = "ctb",
 40 |              email = "hadley@rstudio.com"),
 41 |       person(given = "Kirill",
 42 |              family = "Müller",
 43 |              role = "ctb",
 44 |              email = "krlmlr+r@mailbox.org"),
 45 |       person(family = "RStudio, Inc.",
 46 |              role = "cph",
 47 |              comment = "Spark functions"),
 48 |       person(given = "Connor",
 49 |              family = "Kirkpatrick",
 50 |              role = "ctb",
 51 |              email = "hello@connorkirkpatrick.com"),
 52 |       person(given = "Scott",
 53 |              family = "Brenstuhl",
 54 |              role = "ctb",
 55 |              email = "brenstsr@miamioh.edu"),
 56 |       person(given = "Patrick",
 57 |              family = "Schratz",
 58 |              role = "ctb",
 59 |              email = "patrick.schratz@gmail.com"),
 60 |       person(given = "lbusett",
 61 |              role = "ctb",
 62 |              email = "lbusett@gmail.com"),
 63 |       person(given = "Mikko",
 64 |              family = "Korpela",
 65 |              role = "ctb",
 66 |              email = "mvkorpel@iki.fi"),
 67 |       person(given = "Jennifer",
 68 |              family = "Thompson",
 69 |              role = "ctb",
 70 |              email = "thompson.jennifer@gmail.com"),
 71 |       person(given = "Harris",
 72 |              family = "McGehee",
 73 |              role = "ctb",
 74 |              email = "mcgehee.harris@gmail.com"),
 75 |       person(given = "Mark",
 76 |              family = "Roepke",
 77 |              role = "ctb",
 78 |              email = "mroepke5@gmail.com"),
 79 |       person(given = "Patrick",
 80 |              family = "Kennedy",
 81 |              role = "ctb",
 82 |              email = "pkqstr@protonmail.com"),
 83 |       person(given = "Daniel",
 84 |              family = "Possenriede",
 85 |              role = "ctb",
 86 |              email = "possenriede@gmail.com"),
 87 |       person(given = "David",
 88 |              family = "Zimmermann",
 89 |              role = "ctb",
 90 |              email = "david_j_zimmermann@hotmail.com"),
 91 |       person(given = "Kyle", 
 92 |              family = "Butts",
 93 |              role ="ctb",
 94 |              email = "buttskyle96@gmail.com"),
 95 |       person(given = "Bastian", 
 96 |              family = "Torges",
 97 |              role ="ctb",
 98 |              email = "bastian.torges@gmail.com"),
 99 |       person(given = "Rick",
100 |              family = "Saporta",
101 |              role = "ctb",
102 |              email = "Rick@TheFarmersDog.com"),
103 |       person(given = "Henry",
104 |              family = "Morgan Stewart",
105 |              role = "ctb",
106 |              email = "henry.morganstewart@gmail.com"),
107 |       person(given = "Olivier",
108 |              family = "Roy",
109 |              role = "ctb")
110 |        )
111 | Description: A simple to use summary function that can be used with pipes
112 |     and displays nicely in the console. The default summary statistics may
113 |     be modified by the user as can the default formatting.  Support for
114 |     data frames and vectors is included, and users can implement their own
115 |     skim methods for specific object types as described in a vignette.
116 |     Default summaries include support for inline spark graphs.
117 |     Instructions for managing these on specific operating systems are
118 |     given in the "Using skimr" vignette and the README.
119 | License: GPL-3
120 | URL: https://docs.ropensci.org/skimr/,
121 |     https://github.com/ropensci/skimr/
122 | BugReports: https://github.com/ropensci/skimr/issues
123 | Depends:
124 |     R (>= 3.1.2)
125 | Imports:
126 |     cli,
127 |     dplyr (>= 1.0.0),
128 |     knitr (>= 1.2),
129 |     magrittr (>= 1.5),
130 |     pillar (>= 1.6.4),
131 |     purrr,
132 |     repr,
133 |     rlang (>= 0.3.1),
134 |     stats,
135 |     stringr (>= 1.1),
136 |     tibble (>= 2.0.0),
137 |     tidyr (>= 1.0),
138 |     tidyselect (>= 1.0.0),
139 |     vctrs (>= 0.5.0)
140 | Suggests:
141 |     data.table,
142 |     dtplyr,
143 |     extrafont,
144 |     haven,
145 |     lubridate,
146 |     rmarkdown,
147 |     testthat (>= 3.0.0),
148 |     withr
149 | VignetteBuilder: 
150 |     knitr
151 | Encoding: UTF-8
152 | Roxygen: list(markdown = TRUE)
153 | RoxygenNote: 7.3.2
154 | Collate:
155 |     'deprecated.R'
156 |     'dplyr.R'
157 |     'stats.R'
158 |     'skim_with.R'
159 |     'get_skimmers.R'
160 |     'reshape.R'
161 |     'sfl.R'
162 |     'skim.R'
163 |     'skim_obj.R'
164 |     'skim_print.R'
165 |     'skimr-package.R'
166 |     'summary.R'
167 |     'utils.R'
168 |     'vctrs.R'
169 | Config/testthat/edition: 3
170 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
  1 | # Generated by roxygen2: do not edit by hand
  2 | 
  3 | S3method(ctl_new_pillar,one_skim_df)
  4 | S3method(format,summary_skim_df)
  5 | S3method(get_skimmers,AsIs)
  6 | S3method(get_skimmers,Date)
  7 | S3method(get_skimmers,POSIXct)
  8 | S3method(get_skimmers,Timespan)
  9 | S3method(get_skimmers,character)
 10 | S3method(get_skimmers,complex)
 11 | S3method(get_skimmers,default)
 12 | S3method(get_skimmers,difftime)
 13 | S3method(get_skimmers,factor)
 14 | S3method(get_skimmers,haven_labelled)
 15 | S3method(get_skimmers,list)
 16 | S3method(get_skimmers,logical)
 17 | S3method(get_skimmers,numeric)
 18 | S3method(get_skimmers,ts)
 19 | S3method(knit_print,one_skim_df)
 20 | S3method(knit_print,skim_df)
 21 | S3method(knit_print,skim_list)
 22 | S3method(knit_print,summary_skim_df)
 23 | S3method(mutate,skim_df)
 24 | S3method(print,skim_df)
 25 | S3method(print,skim_list)
 26 | S3method(print,summary_skim_df)
 27 | S3method(repr_text,one_skim_df)
 28 | S3method(repr_text,skim_df)
 29 | S3method(repr_text,skim_list)
 30 | S3method(skim_by_type,data.frame)
 31 | S3method(skim_by_type,data.table)
 32 | S3method(skim_by_type,grouped_df)
 33 | S3method(summary,skim_df)
 34 | S3method(tbl_format_header,one_skim_df)
 35 | S3method(to_long,default)
 36 | S3method(to_long,skim_df)
 37 | S3method(vec_cast,skim_df.skim_df)
 38 | S3method(vec_cast,skim_df.tbl_df)
 39 | S3method(vec_cast,tbl_df.skim_df)
 40 | S3method(vec_ptype2,skim_df.skim_df)
 41 | S3method(vec_ptype2,skim_df.tbl_df)
 42 | S3method(vec_ptype2,tbl_df.skim_df)
 43 | export("%>%")
 44 | export(assert_is_one_skim_df)
 45 | export(assert_is_skim_df)
 46 | export(assert_is_skim_list)
 47 | export(base_skimmers)
 48 | export(bind)
 49 | export(complete_rate)
 50 | export(contains)
 51 | export(could_be_skim_df)
 52 | export(data_cols)
 53 | export(data_rows)
 54 | export(df_name)
 55 | export(dt_key)
 56 | export(ends_with)
 57 | export(everything)
 58 | export(fix_windows_histograms)
 59 | export(focus)
 60 | export(get_default_skimmer_names)
 61 | export(get_default_skimmers)
 62 | export(get_one_default_skimmer)
 63 | export(get_one_default_skimmer_names)
 64 | export(get_sfl)
 65 | export(get_skimmers)
 66 | export(group_names)
 67 | export(has_skim_type_attribute)
 68 | export(has_skimmers)
 69 | export(has_skimr_attributes)
 70 | export(has_type_column)
 71 | export(has_variable_column)
 72 | export(inline_hist)
 73 | export(inline_linegraph)
 74 | export(is_data_frame)
 75 | export(is_one_skim_df)
 76 | export(is_skim_df)
 77 | export(is_skim_list)
 78 | export(list_lengths_max)
 79 | export(list_lengths_median)
 80 | export(list_lengths_min)
 81 | export(list_max_length)
 82 | export(list_min_length)
 83 | export(matches)
 84 | export(max_char)
 85 | export(min_char)
 86 | export(modify_default_skimmers)
 87 | export(n_complete)
 88 | export(n_empty)
 89 | export(n_missing)
 90 | export(n_unique)
 91 | export(n_whitespace)
 92 | export(num_range)
 93 | export(one_of)
 94 | export(partition)
 95 | export(sfl)
 96 | export(skim)
 97 | export(skim_format)
 98 | export(skim_tee)
 99 | export(skim_to_list)
100 | export(skim_to_wide)
101 | export(skim_with)
102 | export(skim_without_charts)
103 | export(skimmers_used)
104 | export(sorted_count)
105 | export(starts_with)
106 | export(to_long)
107 | export(top_counts)
108 | export(ts_end)
109 | export(ts_start)
110 | export(yank)
111 | importFrom(dplyr,mutate)
112 | importFrom(knitr,knit_print)
113 | importFrom(magrittr,"%>%")
114 | importFrom(pillar,ctl_new_pillar)
115 | importFrom(pillar,tbl_format_header)
116 | importFrom(repr,repr_text)
117 | importFrom(rlang,"%||%")
118 | importFrom(rlang,.data)
119 | importFrom(tidyselect,contains)
120 | importFrom(tidyselect,ends_with)
121 | importFrom(tidyselect,everything)
122 | importFrom(tidyselect,matches)
123 | importFrom(tidyselect,num_range)
124 | importFrom(tidyselect,one_of)
125 | importFrom(tidyselect,starts_with)
126 | importFrom(vctrs,vec_cast)
127 | importFrom(vctrs,vec_ptype2)
128 | 


--------------------------------------------------------------------------------
/R/deprecated.R:
--------------------------------------------------------------------------------
 1 | #' Deprecated functions from skimr v1
 2 | #'
 3 | #' Skimr used to offer functions that combined skimming with a secondary effect,
 4 | #' like reshaping the data, building a list or printing the results. Some of
 5 | #' these behaviors are no longer necessary. [skim()] always returns a wide
 6 | #' data frame. Others have been replaced by functions that do a single thing.
 7 | #' [partition()] creates a list-like object from a skimmed data frame.
 8 | #'
 9 | #' @param .data A tibble, or an object that can be coerced into a tibble.
10 | #' @param ...  Columns to select for skimming. When none are provided, the
11 | #'   default is to skim all columns.
12 | #' @return Either A `skim_df` or a `skim_list` object.
13 | #' @name deprecated-v1
14 | NULL
15 | 
16 | #' @describeIn deprecated-v1 [skim()] always produces a wide data frame.
17 | #' @export
18 | skim_to_wide <- function(.data, ...) {
19 |   .Deprecated("skim()")
20 |   skim(.data, ...)
21 | }
22 | 
23 | #' @describeIn deprecated-v1 [partition()] creates a list.
24 | #' @export
25 | skim_to_list <- function(.data, ...) {
26 |   .Deprecated("partition(skim())")
27 |   skim(.data, ...) %>% partition()
28 | }
29 | 
30 | #' @describeIn deprecated-v1 [print()] and [skim_with()] set options.
31 | #' @export
32 | skim_format <- function(...) {
33 |   .Deprecated(
34 |     "print()",
35 |     msg = paste(
36 |       "Formatting options are now in print() or as function",
37 |       "arguments in skim_with()."
38 |     )
39 |   )
40 | }
41 | 


--------------------------------------------------------------------------------
/R/dplyr.R:
--------------------------------------------------------------------------------
 1 | #' Mutate a skim_df
 2 | #'
 3 | #' [dplyr::mutate()] currently drops attributes, but we need to keep them around
 4 | #' for other skim behaviors. Otherwise the behavior is exactly the same. For
 5 | #' more information, see <https://github.com/tidyverse/dplyr/issues/3429>.
 6 | #'
 7 | #' @param .data A `skim_df`, which behaves like a `tbl.`
 8 | #' @param ... Name-value pairs of expressions, each with length 1 or the same
 9 | #'   length as the number of rows in the group, if using [dplyr::group_by()], or
10 | #'   in the entire input (if not using groups). The name of each argument will
11 | #'   be the name of a new variable, and the value will be its corresponding
12 | #'   value. Use `NULL` value in [dplyr::mutate()] to drop a variable. New
13 | #'   variables overwrite existing variables of the same name.
14 | #'
15 | #'   The arguments in `...` are automatically quoted with [rlang::quo()] and
16 | #'   evaluated with [rlang::eval_tidy()] in the context of the data frame. They
17 | #'   support unquoting [rlang::quasiquotation] and splicing. See
18 | #'   `vignette("programming", package = "dplyr")` for an introduction to these
19 | #'   concepts.
20 | #' @return A `skim_df` object, which also inherits the class(es) of the input
21 | #'   data. In many ways, the object behaves like a [tibble::tibble()].
22 | #' @seealso [dplyr::mutate()] for the function's expected behavior.
23 | #' @importFrom dplyr mutate
24 | #' @export
25 | mutate.skim_df <- function(.data, ...) {
26 |   mutated <- NextMethod("mutate")
27 |   if (could_be_skim_df(mutated)) {
28 |     reassign_skim_attrs(mutated, .data)
29 |   } else {
30 |     strip_skim_attrs(mutated)
31 |   }
32 | }
33 | 


--------------------------------------------------------------------------------
/R/reshape.R:
--------------------------------------------------------------------------------
  1 | #' Separate a big `skim_df` into smaller data frames, by type.
  2 | #'
  3 | #' The data frames produced by [skim()] are wide and sparse, filled with
  4 | #' columns that are mostly `NA`. For that reason, it can be convenient to
  5 | #' work with "by type" subsets of the original data frame. These smaller
  6 | #' subsets have their `NA` columns removed.
  7 | #'
  8 | #' `partition()` creates a list of smaller `skim_df` data frames. Each entry
  9 | #' in the list is a data type from the original `skim_df`. The inverse of
 10 | #' `partition()` is `bind()`, which takes the list and produces the original
 11 | #' `skim_df`. While `partition()` keeps all of the subtables as list entries,
 12 | #' `yank()` gives you a single subtable for a data type.
 13 | #'
 14 | #' @param data A `skim_df`.
 15 | #' @param skim_type A character scalar. The subtable to extract from a
 16 | #'   `skim_df`.
 17 | #' @return A `skim_list` of `skim_df`'s, by type.
 18 | #' @examples
 19 | #' # Create a wide skimmed data frame (a skim_df)
 20 | #' skimmed <- skim(iris)
 21 | #'
 22 | #' # Separate into a list of subtables by type
 23 | #' separate <- partition(skimmed)
 24 | #'
 25 | #' # Put back together
 26 | #' identical(bind(separate), skimmed)
 27 | #' # > TRUE
 28 | #'
 29 | #' # Alternatively, get the subtable of a particular type
 30 | #' yank(skimmed, "factor")
 31 | #' @export
 32 | partition <- function(data) {
 33 |   assert_is_skim_df(data)
 34 |   data_as_list <- split(data, data$skim_type)
 35 |   groups <- group_names(data)
 36 |   base <- base_skimmers(data)
 37 | 
 38 |   skimmers <- reconcile_skimmers(data, groups, base)
 39 | 
 40 |   # Check to see that there are at least one, known used skim function
 41 |   # within the data. This can be reduced after using focus(). This indexing
 42 |   # is used so that the lookup order matches that of `data_as_list`.
 43 |   has_skim_data <- lengths(skimmers)[names(data_as_list)] > 0
 44 |   elements_to_keep <- has_skim_data | any(base %in% names(data))
 45 |   reduced <- purrr::imap(
 46 |     data_as_list[elements_to_keep],
 47 |     simplify_skimdf,
 48 |     skimmers = skimmers,
 49 |     groups = groups,
 50 |     base = base
 51 |   )
 52 | 
 53 |   reassign_skim_attrs(
 54 |     reduced,
 55 |     data,
 56 |     class = c("skim_list", "list"),
 57 |     skimmers_used = skimmers
 58 |   )
 59 | }
 60 | 
 61 | #' Align the skimmers_used attribute with the current columns in the data
 62 | #'
 63 | #' This catches the case where users partition (or more likely print) a data
 64 | #' frame that has had columns added after skimming.
 65 | #' @noRd
 66 | reconcile_skimmers <- function(data, groups, base) {
 67 |   all_columns <- colnames(data)
 68 |   skimmers_used <- skimmers_used(data)
 69 |   if (length(skimmers_used) == 0) {
 70 |     return(skimmers_used)
 71 |   }
 72 |   skimmers_from_names <- skimmers_from_names(all_columns, skimmers_used)
 73 |   with_base_columns <- c(
 74 |     "skim_variable",
 75 |     "skim_type",
 76 |     groups,
 77 |     base,
 78 |     collapse_skimmers(skimmers_used)
 79 |   )
 80 |   matched_cols <- dplyr::intersect(all_columns, with_base_columns)
 81 |   extra_cols <- dplyr::setdiff(all_columns, with_base_columns)
 82 |   if (length(extra_cols) > 0) {
 83 |     grouped <- dplyr::group_by(data, .data$skim_type)
 84 |     complete_by_type <- dplyr::summarise(
 85 |       grouped,
 86 |       dplyr::across(tidyselect::all_of(extra_cols),  ~ !all(is.na(.x)))
 87 |     )
 88 |     complete_cols <- purrr::pmap(
 89 |       complete_by_type,
 90 |       get_complete_columns,
 91 |       names = extra_cols
 92 |     )
 93 |     new_cols_by_type <- rlang::set_names(
 94 |       complete_cols,
 95 |       complete_by_type$skim_type
 96 |     )
 97 |     skimmers_from_names <- purrr::list_merge(
 98 |       skimmers_from_names,
 99 |       !!!new_cols_by_type
100 |     )
101 |   }
102 | 
103 |   skimmers_from_names
104 | }
105 | 
106 | skimmers_from_names <- function(names, skimmers) {
107 |   matched <- purrr::imap(skimmers, match_skimmers, names)
108 |   purrr::set_names(matched, names(skimmers))
109 | }
110 | 
111 | match_skimmers <- function(fun_names, type, values_to_match) {
112 |   stripped_values <- stringr::str_remove(
113 |     values_to_match,
114 |     paste0("^", type, "\\.")
115 |   )
116 |   dplyr::intersect(stripped_values, fun_names)
117 | }
118 | 
119 | collapse_skimmers <- function(skimmers_used) {
120 |   with_type <- purrr::imap(skimmers_used, ~ paste(.y, .x, sep = "."))
121 |   purrr::flatten_chr(with_type)
122 | }
123 | 
124 | get_complete_columns <- function(skim_type, ..., names) {
125 |   names[c(...)]
126 | }
127 | 
128 | #' For each type subtable, only select columns generated by this type's skimmers
129 | #' This function also catches the case where the user removed columns from
130 | #' the skim_df
131 | #' @noRd
132 | simplify_skimdf <- function(data, skim_type, skimmers, groups, base) {
133 |   stopifnot(has_variable_column(data))
134 |   names(data) <- stringr::str_remove(names(data), paste0(skim_type, "\\."))
135 |   keep <- c("skim_variable", groups, base, skimmers[[skim_type]])
136 |   out <- dplyr::select(data, tidyselect::any_of(keep))
137 | 
138 |   structure(
139 |     out,
140 |     class = c("one_skim_df", "tbl_df", "tbl", "data.frame"),
141 |     skim_type = skim_type
142 |   )
143 | }
144 | 
145 | #' @describeIn partition The inverse of a `partition()`. Rebuild the original
146 | #'   `skim_df`.
147 | #' @export
148 | bind <- function(data) {
149 |   assert_is_skim_list(data)
150 |   with_namespaces <- purrr::imap(data, add_namespaces)
151 |   combined <- dplyr::bind_rows(!!!with_namespaces, .id = "skim_type")
152 |   reassign_skim_attrs(combined, data)
153 | }
154 | 
155 | add_namespaces <- function(data, skim_type) {
156 |   base <- base_skimmers(data)
157 |   meta_columns <- c("skim_variable", dplyr::group_vars(data), base)
158 |   no_meta_columns <- dplyr::setdiff(names(data), meta_columns)
159 |   with_namespace <- paste(skim_type, no_meta_columns, sep = ".")
160 | 
161 |   # TODO(michaelquinn32): Drop this when vctrs interface works correctly.
162 |   names(data) <- c(meta_columns, with_namespace)
163 |   attr(data, "skim_type") <- NULL
164 |   tibble::as_tibble(data)
165 | }
166 | 
167 | #' @describeIn partition Extract a subtable from a `skim_df` with a particular
168 | #'   type.
169 | #' @export
170 | yank <- function(data, skim_type) {
171 |   partition(data)[[skim_type]]
172 | }
173 | 
174 | #' Only show a subset of summary statistics after skimming
175 | #'
176 | #' This function is a variant of [dplyr::select()] designed to work with
177 | #' `skim_df` objects. When using `focus()`, `skimr` metadata columns are kept,
178 | #' and `skimr` print methods are still utilized. Otherwise, the signature and
179 | #' behavior is identical to [dplyr::select()].
180 | #'
181 | #' @param .data A `skim_df` object.
182 | #' @param ...  One or more unquoted expressions separated by commas. Variable
183 | #'   names can be used as if they were positions in the data frame, so
184 | #'   expressions like x:y can be used to select a range of variables.
185 | #' @examples
186 | #' # Compare
187 | #' iris %>%
188 | #'   skim() %>%
189 | #'   dplyr::select(n_missing)
190 | #'
191 | #' iris %>%
192 | #'   skim() %>%
193 | #'   focus(n_missing)
194 | #'
195 | #' # This is equivalent to
196 | #' iris %>%
197 | #'   skim() %>%
198 | #'   dplyr::select(skim_variable, skim_type, n_missing)
199 | #' @export
200 | focus <- function(.data, ...) {
201 |   assert_is_skim_df(.data)
202 |   reduced <- dplyr::select(.data, "skim_type", "skim_variable", ...)
203 |   if (could_be_skim_df(reduced)) {
204 |     skimmers <- reconcile_skimmers(
205 |       reduced,
206 |       group_names(.data),
207 |       base_skimmers(.data)
208 |     )
209 |     reassign_skim_attrs(reduced, .data, skimmers_used = skimmers)
210 |   } else {
211 |     stop("Cannot drop 'skim_variable' or 'skim_type' columns")
212 |   }
213 | }
214 | 
215 | #' Create "long" skim output
216 | #'
217 | #' Skim results returned as a tidy long data frame with four columns:
218 | #' variable, type, stat and formatted.
219 | #'
220 | #' @param .data A data frame or an object that can be coerced into a data frame.
221 | #' @param ...  Columns to select for skimming. When none are provided, the
222 | #'   default is to skim all columns.
223 | #' @param skim_fun The skim function used.
224 | #' @return A tibble
225 | #' @examples
226 | #' to_long(iris)
227 | #' to_long(skim(iris))
228 | #' @export
229 | to_long <- function(.data, ..., skim_fun = skim) {
230 |   UseMethod("to_long")
231 | }
232 | 
233 | #' @describeIn to_long Skim a data frame and convert the results to a
234 | #'   long data frame.
235 | #' @export
236 | to_long.default <- function(.data, ..., skim_fun = skim) {
237 |   skimmed <- skim_fun(.data, ...)
238 |   to_long(skimmed, ..., skim_fun)
239 | }
240 | 
241 | #' @describeIn  to_long Transform a skim_df to a long data frame.
242 | #' @export
243 | to_long.skim_df <- function(.data, ..., skim_fun = skim) {
244 |   .data <- dplyr::mutate(.data, dplyr::across(dplyr::everything(), as.character))
245 |   tidyr::pivot_longer(
246 |     .data,
247 |     cols = c(-"skim_type", -"skim_variable"),
248 |     names_to = "stat",
249 |     values_to = "formatted",
250 |     values_drop_na = TRUE
251 |   )
252 | }
253 | 


--------------------------------------------------------------------------------
/R/sfl.R:
--------------------------------------------------------------------------------
 1 | #' Create a skimr function list
 2 | #'
 3 | #' This constructor is used to create a named list of functions. It also you
 4 | #' also pass `NULL` to identify a skimming function that you wish to remove.
 5 | #' Only functions that return a single value, working with [dplyr::summarize()],
 6 | #' can be used within `sfl`.
 7 | #'
 8 | #' `sfl()` will automatically generate callables and names for a variety of
 9 | #' inputs, including functions, formulas and strings. Nonetheless, we recommend
10 | #' providing names when reasonable to get better [skim()] output.
11 | #'
12 | #' @param ... Inherited from dplyr::data_masking() for dplyr version 1 or later
13 | #'  or dplyr::funs() for older versions of dplyr.
14 | #'  A list of functions
15 | #'   specified by:
16 | #'
17 | #'  * Their name, `"mean"`
18 | #'  * The function itself, `mean`
19 | #'  * A call to the function with `.` as a dummy argument,
20 | #'    `mean(., na.rm = TRUE)`
21 | #'  * An anonymous function in \pkg{purrr} notation, `~mean(., na.rm = TRUE)`
22 | #'
23 | #' @param skim_type A character scalar. This is used to match locally-provided
24 | #'   skimmers with defaults. See [get_skimmers()] for more detail.
25 | #' @return A `skimr_function_list`, which contains a list of `fun_calls`,
26 | #'   returned by `dplyr::funs()` and a list of skimming functions to drop.
27 | #' @seealso [dplyr::funs()], [skim_with()] and [get_skimmers()].
28 | #' @examples
29 | #' # sfl's can take a variety of input formats and will generate names
30 | #' # if not provided.
31 | #' sfl(mad, "var", ~ length(.)^2)
32 | #'
33 | #' # But these can generate unpredictable names in your output.
34 | #' # Better to set your own names.
35 | #' sfl(mad = mad, variance = "var", length_sq = ~ length(.)^2)
36 | #'
37 | #' # sfl's can remove individual skimmers from defaults by passing NULL.
38 | #' sfl(hist = NULL)
39 | #'
40 | #' # When working interactively, you don't need to set a type.
41 | #' # But you should when defining new defaults with `get_skimmers()`.
42 | #' get_skimmers.my_new_class <- function(column) {
43 | #'   sfl(n_missing, skim_type = "my_new_class")
44 | #' }
45 | #' @export
46 | sfl <- function(..., skim_type = "") {
47 |   stopifnot(length(skim_type) == 1, is.character(skim_type))
48 |   funs <- build_sfl_names(...)
49 |   structure(
50 |     list(funs = funs, skim_type = skim_type),
51 |     class = "skimr_function_list"
52 |   )
53 | }
54 | 
55 | build_sfl_names <- function(...) {
56 |   labels <- rlang::quos_auto_name(rlang::enquos(...))
57 |   rlang::set_names(rlang::list2(...), names(labels))
58 | }
59 | 


--------------------------------------------------------------------------------
/R/skim.R:
--------------------------------------------------------------------------------
  1 | #' Skim a data frame, getting useful summary statistics
  2 | #'
  3 | #' `skim()` is an alternative to [summary()], quickly providing a broad
  4 | #' overview of a data frame. It handles data of all types, dispatching a
  5 | #' different set of summary functions based on the types of columns in the data
  6 | #' frame.
  7 | #'
  8 | #' Each call produces a `skim_df`, which is a fundamentally a tibble with a
  9 | #' special print method. One unusual feature of this data frame is pseudo-
 10 | #' namespace for columns. `skim()` computes statistics by data type, and it
 11 | #' stores them in the data frame as `<type>.<statistic>`. These types are
 12 | #' stripped when printing the results. The "base" skimmers (`n_missing` and
 13 | #' `complete_rate`) are the only columns that don't follow this behavior.
 14 | #' See [skim_with()] for more details on customizing `skim()` and
 15 | #' [get_default_skimmers()] for a list of default functions.
 16 | #'
 17 | #' If you just want to see the printed output, call `skim_tee()` instead.
 18 | #' This function returns the original data. `skim_tee()` uses the default
 19 | #' `skim()`, but you can replace it with the `skim` argument.
 20 | #'
 21 | #' The data frame produced by `skim` is wide and sparse. To avoid type coercion
 22 | #' `skimr` uses a type namespace for all summary statistics. Columns for numeric
 23 | #' summary statistics all begin `numeric`; for factor summary statistics
 24 | #' begin `factor`; and so on.
 25 | #'
 26 | #' See [partition()] and [yank()] for methods for transforming this wide data
 27 | #' frame. The first function splits it into a list, with each entry
 28 | #' corresponding to a data type. The latter pulls a single subtable for a
 29 | #' particular type from the `skim_df`.
 30 | #'
 31 | #' `skim()` is designed to operate in pipes and to generally play nicely with
 32 | #' other `tidyverse` functions. This means that you can use `tidyselect` helpers
 33 | #' within `skim` to select or drop specific columns for summary. You can also
 34 | #' further work with a `skim_df` using `dplyr` functions in a pipeline.
 35 | #'
 36 | #' @section Customizing skim:
 37 | #' `skim()` is an intentionally simple function, with minimal arguments like
 38 | #' [summary()]. Nonetheless, this package provides two broad approaches to
 39 | #' how you can customize `skim()`'s behavior. You can customize the functions
 40 | #' that are called to produce summary statistics with [skim_with()].
 41 | #'
 42 | #' @section Unicode rendering:
 43 | #' If the rendered examples show unencoded values such as `<U+2587>` you will
 44 | #' need to change your locale to allow proper rendering. Please review the
 45 | #' *Using Skimr* vignette for more information
 46 | #' (`vignette("Using_skimr", package = "skimr")`).
 47 | #'
 48 | #' Otherwise, we export `skim_without_charts()` to produce summaries without the
 49 | #' spark graphs. These are the source of the unicode dependency.
 50 | #'
 51 | #' @param data A tibble, or an object that can be coerced into a tibble.
 52 | #' @param ...  Columns to select for skimming. When none are provided, the
 53 | #'   default is to skim all columns.
 54 | #' @param .data_name The name to use for the data. Defaults to the same as data.
 55 | #' @param skim  The skimming function to use in `skim_tee()`.
 56 | #' @return A `skim_df` object, which also inherits the class(es) of the input
 57 | #'   data. In many ways, the object behaves like a [tibble::tibble()].
 58 | #' @examples
 59 | #' skim(iris)
 60 | #'
 61 | #' # Use tidyselect
 62 | #' skim(iris, Species)
 63 | #' skim(iris, starts_with("Sepal"))
 64 | #' skim(iris, where(is.numeric))
 65 | #'
 66 | #' # Skim also works groupwise
 67 | #' iris %>%
 68 | #'   dplyr::group_by(Species) %>%
 69 | #'   skim()
 70 | #'
 71 | #' # Which five numeric columns have the greatest mean value?
 72 | #' # Look in the `numeric.mean` column.
 73 | #' iris %>%
 74 | #'   skim() %>%
 75 | #'   dplyr::select(numeric.mean) %>%
 76 | #'   dplyr::slice_head(n = 5)
 77 | #'
 78 | #' # Which of my columns have missing values? Use the base skimmer n_missing.
 79 | #' iris %>%
 80 | #'   skim() %>%
 81 | #'   dplyr::filter(n_missing > 0)
 82 | #'
 83 | #' # Use skim_tee to view the skim results and
 84 | #' # continue using the original data.
 85 | #' chickwts %>%
 86 | #'   skim_tee() %>%
 87 | #'   dplyr::filter(feed == "sunflower")
 88 | #'
 89 | #' # Produce a summary without spark graphs
 90 | #' iris %>%
 91 | #'   skim_without_charts()
 92 | #' @export
 93 | skim <- skim_with()
 94 | 
 95 | #' @rdname skim
 96 | #' @param skim_fun The skim function used.
 97 | #' @export
 98 | skim_tee <- function(data, ..., skim_fun = skim) {
 99 |   skimmed <- skim_fun(data, ...)
100 |   print(skimmed)
101 |   invisible(data)
102 | }
103 | 
104 | #' @rdname skim
105 | #' @export
106 | skim_without_charts <- skim_with(
107 |   numeric = sfl(hist = NULL),
108 |   ts = sfl(line_graph = NULL)
109 | )
110 | 


--------------------------------------------------------------------------------
/R/skim_obj.R:
--------------------------------------------------------------------------------
  1 | #' Functions for accessing skim_df attributes
  2 | #'
  3 | #' These functions simplify access to attributes contained within a `skim_df`.
  4 | #' While all attributes are read-only, being able to extract this information
  5 | #' is useful for different analyses. These functions should always be preferred
  6 | #' over calling base R's attribute functions.
  7 | #'
  8 | #' @param object A `skim_df` or `skim_list`.
  9 | #' @return Data contained within the requested `skimr` attribute.
 10 | #' @name skim-attr
 11 | NULL
 12 | 
 13 | #' @describeIn skim-attr Get the number of rows in the skimmed data frame.
 14 | #' @export
 15 | data_rows <- function(object) {
 16 |   attr(object, "data_rows")
 17 | }
 18 | 
 19 | #' @describeIn skim-attr Get the number of columns in the skimmed data frame.
 20 | #' @export
 21 | data_cols <- function(object) {
 22 |   attr(object, "data_cols")
 23 | }
 24 | 
 25 | #' @describeIn skim-attr Get the name of the skimmed data frame. This is only
 26 | #'   available in contexts where the name can be looked up. This is often not
 27 | #'   the case within a pipeline.
 28 | #' @export
 29 | df_name <- function(object) {
 30 |   attr(object, "df_name")
 31 | }
 32 | 
 33 | #' @describeIn skim-attr Get the key of the skimmed data.table. This is only
 34 | #'   available in contexts where `data` is of class `data.table`.
 35 | #' @export
 36 | dt_key <- function(object) {
 37 |   attr(object, "dt_key")
 38 | }
 39 | 
 40 | #' @describeIn skim-attr Get the names of the groups in the original data frame.
 41 | #'   Only available if the data was grouped. Otherwise, `NULL`.
 42 | #' @export
 43 | group_names <- function(object) {
 44 |   attr(object, "groups")
 45 | }
 46 | 
 47 | #' @describeIn skim-attr Get the names of the base skimming functions used.
 48 | #' @export
 49 | base_skimmers <- function(object) {
 50 |   attr(object, "base_skimmers")
 51 | }
 52 | 
 53 | #' @describeIn skim-attr Get the names of the skimming functions used, separated
 54 | #'   by data type.
 55 | #' @export
 56 | skimmers_used <- function(object) {
 57 |   attr(object, "skimmers_used")
 58 | }
 59 | 
 60 | 
 61 | #' Test if an object is compatible with `skimr`
 62 | #'
 63 | #' Objects within `skimr` are identified by a class, but they require additional
 64 | #' attributes and data columns for all operations to succeed. These checks help
 65 | #' ensure this. While they have some application externally, they are mostly
 66 | #' used internally.
 67 | #'
 68 | #' Most notably, a `skim_df` has columns `skim_type` and `skim_variable`. And
 69 | #' has the following special attributes
 70 | #'
 71 | #' * `data_rows`: n rows in the original data
 72 | #' * `data_cols`: original number of columns
 73 | #' * `df_name`: name of the original data frame
 74 | #' * `dt_key`: name of the key if original is a data.table
 75 | #' * `groups`: if there were group variables
 76 | #' * `base_skimmers`: names of functions applied to all skim types
 77 | #' * `skimmers_used`: names of functions used to skim each type
 78 | #'
 79 | #' The functions in these checks work like [all.equal()]. The return `TRUE` if
 80 | #' the check passes, or otherwise notifies why the check failed. This makes them
 81 | #' more useful when throwing errors.
 82 | #'
 83 | #' @param object Any `R` object.
 84 | #' @name skim-obj
 85 | NULL
 86 | 
 87 | #' @describeIn skim-obj Does the object have the `skim_type` column?
 88 | #' @export
 89 | has_type_column <- function(object) {
 90 |   make_issue("skim_type" %in% names(object), "missing column `skim_type`")
 91 | }
 92 | 
 93 | #' @describeIn skim-obj Does the object have the `skim_variable` column?
 94 | #' @export
 95 | has_variable_column <- function(object) {
 96 |   make_issue(
 97 |     "skim_variable" %in% names(object), "missing column `skim_variable`"
 98 |   )
 99 | }
100 | 
101 | #' @describeIn skim-obj Does the object have the appropriate `skimr` attributes?
102 | #' @export
103 | has_skimr_attributes <- function(object) {
104 |   skimr_attrs <- c(
105 |     "data_rows", "data_cols", "df_name", "dt_key", "base_skimmers",
106 |     "skimmers_used"
107 |   )
108 |   missing <- !(skimr_attrs %in% names(attributes(object)))
109 |   make_issue(!any(missing), show_missing_attributes(skimr_attrs, missing))
110 | }
111 | 
112 | show_missing_attributes <- function(attributes, missing) {
113 |   missing_msg <- if (length(missing) > 0) {
114 |     paste0(attributes[missing], collapse = ", ")
115 |   } else {
116 |     ""
117 |   }
118 |   paste("missing attributes:", missing_msg)
119 | }
120 | 
121 | #' @describeIn skim-obj Does the object have a `skim_type` attribute? This makes
122 | #'   it a `one_skim_df`.
123 | #' @export
124 | has_skim_type_attribute <- function(object) {
125 |   make_issue(
126 |     "skim_type" %in% names(attributes(object)),
127 |     "missing attribute: `skim_type`"
128 |   )
129 | }
130 | 
131 | #' @describeIn skim-obj Does the object have skimmers?
132 | #' @export
133 | has_skimmers <- function(object) {
134 |   base <- base_skimmers(object)
135 |   skimmers <- reconcile_skimmers(
136 |     object,
137 |     group_names(object),
138 |     base
139 |   )
140 |   make_issue(
141 |     any(lengths(skimmers) > 0, base %in% names(object)),
142 |     "does not have defined skimmers."
143 |   )
144 | }
145 | 
146 | #' @describeIn skim-obj Is the object a data frame?
147 | #' @export
148 | is_data_frame <- function(object) {
149 |   make_issue(inherits(object, "data.frame"), "not a data.frame")
150 | }
151 | 
152 | make_issue <- function(check, message) {
153 |   structure(check, message = if (!check) message else character())
154 | }
155 | 
156 | #' @describeIn skim-obj Is the object a `skim_df`?
157 | #' @export
158 | is_skim_df <- function(object) {
159 |   check_issues(
160 |     "Object is not a `skim_df`",
161 |     is_data_frame(object),
162 |     has_type_column(object),
163 |     has_variable_column(object),
164 |     has_skimr_attributes(object),
165 |     has_skimmers(object)
166 |   )
167 | }
168 | 
169 | #' @describeIn skim-obj Is the object a `one_skim_df`? This is similar to a
170 | #'   `skim_df`, but does not have the `type` column. That is stored as an
171 | #'   attribute instead.
172 | #' @export
173 | is_one_skim_df <- function(object) {
174 |   check_issues(
175 |     "Object is not a `one_skim_df`",
176 |     is_data_frame(object),
177 |     has_skim_type_attribute(object),
178 |     has_variable_column(object),
179 |     has_skimr_attributes(object)
180 |   )
181 | }
182 | 
183 | #' @describeIn skim-obj Is the object a `skim_list`?
184 | #' @export
185 | is_skim_list <- function(object) {
186 |   check_issues(
187 |     "Object is not a `skim_list`",
188 |     has_skimr_attributes(object),
189 |     !!!purrr::map(object, is_one_skim_df)
190 |   )
191 | }
192 | 
193 | #' @describeIn skim-obj Is this a data frame with `skim_variable` and
194 | #'  `skim_type` columns?
195 | #' @export
196 | could_be_skim_df <- function(object) {
197 |   check_issues(
198 |     "Object cannot behave like a `skim_df`",
199 |     is_data_frame(object),
200 |     has_variable_column(object),
201 |     has_type_column(object)
202 |   )
203 | }
204 | 
205 | check_issues <- function(condition, ...) {
206 |   issues <- rlang::list2(...)
207 |   msgs <- purrr::map(issues, ~ attr(.x, "message"))
208 |   check <- all(purrr::flatten_lgl(issues))
209 |   message <- if (check) {
210 |     character()
211 |   } else {
212 |     paste0(condition, ": ", paste0(unlist(msgs), collapse = "; "))
213 |   }
214 |   structure(check, message = message)
215 | }
216 | 
217 | #' @describeIn skim-obj Stop if the object is not a `skim_df`.
218 | #' @export
219 | assert_is_skim_df <- function(object) {
220 |   assert(is_skim_df(object))
221 | }
222 | 
223 | #' @describeIn skim-obj Stop if the object is not a `skim_list`.
224 | #' @export
225 | assert_is_skim_list <- function(object) {
226 |   assert(is_skim_list(object))
227 | }
228 | 
229 | #' @describeIn skim-obj Stop if the object is not a `one_skim_df`.
230 | #' @export
231 | assert_is_one_skim_df <- function(object) {
232 |   assert(is_one_skim_df(object))
233 | }
234 | 
235 | assert <- function(check) {
236 |   if (check) {
237 |     invisible(NULL)
238 |   } else {
239 |     stop(attr(check, "message"))
240 |   }
241 | }
242 | 
243 | #' Remove `skimr` class if not needed.
244 | #' @noRd
245 | strip_skim_attrs <- function(object) {
246 |   attrs <- attributes(object)
247 |   stripped <- purrr::list_modify(
248 |     attrs,
249 |     class = class(object)[-1],
250 |     data_rows = NULL,
251 |     data_cols = NULL,
252 |     df_name = NULL,
253 |     dt_key = NULL,
254 |     base_skimmers = NULL,
255 |     skimmers_used = NULL
256 |   )
257 |   attributes(object) <- stripped
258 |   object
259 | }
260 | 
261 | #' Pass attributes from a `skimr` object to a new object.
262 | #' @noRd
263 | reassign_skim_attrs <- function(object, skim_df, ...) {
264 |   defaults <- list(
265 |     class = c("skim_df", "tbl_df", "tbl", "data.frame"),
266 |     data_rows = data_rows(skim_df),
267 |     data_cols = data_cols(skim_df),
268 |     df_name = df_name(skim_df),
269 |     dt_key = dt_key(skim_df),
270 |     groups = group_names(skim_df),
271 |     base_skimmers = base_skimmers(skim_df),
272 |     skimmers_used = skimmers_used(skim_df)
273 |   )
274 |   updated <- purrr::list_modify(defaults, ...)
275 |   assign_new_attributes(object, !!!updated)
276 | }
277 | 
278 | assign_new_attributes <- function(object, ...) {
279 |   original <- attributes(object)
280 |   attributes(object) <- purrr::list_modify(original, ...)
281 |   object
282 | }
283 | 


--------------------------------------------------------------------------------
/R/skim_print.R:
--------------------------------------------------------------------------------
  1 | #' Print `skim` objects
  2 | #'
  3 | #' `skimr` has custom print methods for all supported objects. Default printing
  4 | #' methods for `knitr`/ `rmarkdown` documents is also provided.
  5 | #'
  6 | #' @section Printing options:
  7 | #'
  8 | #' For better or for worse, `skimr` often produces more output than can fit in
  9 | #' the standard R console. Fortunately, most modern environments like RStudio
 10 | #' and Jupyter support more than 80 character outputs. Call
 11 | #' `options(width = 90)` to get a better experience with `skimr`.
 12 | #'
 13 | #' The print methods in `skimr` wrap those in the [tibble][tibble::formatting]
 14 | #' package. You can control printing behavior using the same global options.
 15 | #'
 16 | #' @section Behavior in `dplyr` pipelines:
 17 | #'
 18 | #' Printing a `skim_df` requires specific columns that might be dropped when
 19 | #' using [dplyr::select()] or [dplyr::summarize()] on a `skim_df`. In those
 20 | #' cases, this method falls back to [tibble::print.tbl()].
 21 | #'
 22 | #' @section Options for controlling print behavior:
 23 | #'
 24 | #' You can control the width rule line for the printed subtables with an option:
 25 | #' `skimr_table_header_width`.
 26 | #'
 27 | #' @inheritParams tibble::print.tbl
 28 | #' @seealso [tibble::trunc_mat()] For a list of global options for customizing
 29 | #'   print formatting. [cli::num_ansi_colors()] for the variety of issues that
 30 | #'   affect tibble's color support.
 31 | #' @param include_summary Whether a summary of the data frame should be printed
 32 | #' @param summary_rule_width Width of Data Summary cli rule, defaults to 40.
 33 | #' @name print
 34 | NULL
 35 | 
 36 | #' @describeIn print Print a skimmed data frame (`skim_df` from [skim()]).
 37 | #' @export
 38 | print.skim_df <- function(x,
 39 |                           include_summary = TRUE,
 40 |                           n = Inf,
 41 |                           width = Inf,
 42 |                           summary_rule_width = getOption(
 43 |                             "skimr_summary_rule_width",
 44 |                             default = 40
 45 |                           ),
 46 |                           ...) {
 47 |   if (is_skim_df(x) && nrow(x) > 0) {
 48 |     if (include_summary) {
 49 |       print(summary(x), .summary_rule_width = summary_rule_width, ...)
 50 |     }
 51 |     by_type <- partition(x)
 52 |     purrr::map(
 53 |       by_type,
 54 |       print,
 55 |       width = width,
 56 |       n = n,
 57 |       ...
 58 |     )
 59 |     invisible(x)
 60 |   } else {
 61 |     NextMethod("print")
 62 |   }
 63 | }
 64 | 
 65 | # Methods for correctly formatting a a `one_skim_df`. We leverage the
 66 | # customiztion options in `pillar` for this. It divides the results into: a
 67 | # header, which we customize; a body, where we strip some values; and a footer,
 68 | # which we drop. For more details, see
 69 | # https://pillar.r-lib.org/articles/extending.html
 70 | 
 71 | #' @importFrom pillar tbl_format_header
 72 | #' @export
 73 | tbl_format_header.one_skim_df <- function(x, setup, ...) {
 74 |   variable_type <- paste("Variable type:", attr(x, "skim_type"))
 75 |   rule <- cli::rule(
 76 |     line = 1,
 77 |     left = variable_type,
 78 |     width = getOption("skimr_table_header_width", getOption("width", 80))
 79 |   )
 80 |   # Add an empty line before the rule
 81 |   c("", rule)
 82 | }
 83 | 
 84 | #' @importFrom pillar ctl_new_pillar
 85 | #' @export
 86 | ctl_new_pillar.one_skim_df <- function(controller,
 87 |                                        x,
 88 |                                        width,
 89 |                                        ...,
 90 |                                        title = NULL) {
 91 |   out <- NextMethod()
 92 |   out$type <- NULL
 93 |   out
 94 | }
 95 | 
 96 | #' @describeIn print Print a `skim_list`, a list of `skim_df` objects.
 97 | #' @export
 98 | print.skim_list <- function(x,
 99 |                             n = Inf,
100 |                             width = Inf,
101 |                             ...) {
102 |   nms <- names(x)
103 |   attributes(x) <- NULL
104 |   print(rlang::set_names(x, nms))
105 | }
106 | 
107 | 
108 | #' @describeIn print Print method for a `summary_skim_df` object.
109 | #' @param .summary_rule_width the width for the main rule above the summary.
110 | #' @export
111 | print.summary_skim_df <- function(x, .summary_rule_width = 40, ...) {
112 |   with_title <- c(
113 |     cli::rule(line = 1, left = "Data Summary", width = .summary_rule_width),
114 |     format(x)
115 |   )
116 |   writeLines(with_title)
117 | }
118 | 
119 | #' Provide a default printing method for knitr.
120 | #'
121 | #' Instead of standard R output, `knitr` and `RMarkdown` documents will have
122 | #' formatted [knitr::kable()] output on return. You can disable this by setting
123 | #' the chunk option `render = normal_print`.
124 | #'
125 | #' The summary statistics for the original data frame can be disabled by setting
126 | #' the `knitr` chunk option `skimr_include_summary = FALSE`. See
127 | #' [knitr::opts_chunk] for more information. You can change the number of digits
128 | #' shown in the printed table with the `skimr_digits` chunk option.
129 | #'
130 | #' Alternatively, you can call [yank()] to get the particular
131 | #' `skim_df` objects and format them however you like. One warning though.
132 | #' Because histograms contain unicode characters, they can have unexpected
133 | #' print results, as R as varying levels of unicode support. This affects
134 | #' Windows users most commonly. Call `vignette("Using_fonts")` for more details.
135 | #'
136 | #' @seealso [knitr::kable()]
137 | #' @inheritParams knitr::knit_print
138 | #' @param options Options passed into the print function.
139 | #' @return A `knit_asis` object. Which is used by `knitr` when rendered.
140 | #' @importFrom knitr knit_print
141 | #' @name knit_print
142 | NULL
143 | 
144 | #' @describeIn knit_print Default `knitr` print for `skim_df` objects.
145 | #' @export
146 | knit_print.skim_df <- function(x, options = NULL, ...) {
147 |   if (is_skim_df(x) && nrow(x) > 0) {
148 |     if (options$skimr_include_summary %||% TRUE) {
149 |       summary_stats <- summary(x)
150 |       kabled <- knit_print(summary_stats)
151 |     } else {
152 |       kabled <- c()
153 |     }
154 | 
155 |     by_type <- partition(x)
156 |     knit_print_by_type(by_type, options, kabled)
157 |   } else {
158 |     NextMethod("knit_print")
159 |   }
160 | }
161 | 
162 | knit_print_by_type <- function(x, options, summary) {
163 |   all_tables <- purrr::imap(x, knit_print_one, options)
164 |   combined <- c("", summary, "", "", unlist(all_tables))
165 |   knitr::asis_output(paste(combined, collapse = "\n"))
166 | }
167 | 
168 | knit_print_one <- function(by_type, type, options) {
169 |   kabled <- knitr::kable(
170 |     by_type,
171 |     digits = options$skimr_digits %||% 2
172 |   )
173 |   caption <- sprintf("**Variable type: %s**", type)
174 |   c(caption, "", kabled, "", "")
175 | }
176 | 
177 | #' @describeIn knit_print Default `knitr` print for a `skim_list`.
178 | #' @export
179 | knit_print.skim_list <- function(x, options = NULL, ...) {
180 |   knit_print_by_type(x, options, NULL)
181 | }
182 | 
183 | #' @describeIn knit_print Default `knitr` print within a partitioned `skim_df`.
184 | #' @export
185 | knit_print.one_skim_df <- function(x, options = NULL, ...) {
186 |   kabled <- knit_print_one(x, attr(x, "skim_type"), options)
187 |   combined <- c("", "", kabled, "")
188 |   knitr::asis_output(paste(combined, collapse = "\n"))
189 | }
190 | 
191 | #' @describeIn knit_print Default `knitr` print for `skim_df` summaries.
192 | #' @export
193 | knit_print.summary_skim_df <- function(x, options = NULL, ...) {
194 |   summary_mat <- cbind(
195 |     get_summary_dnames(x),
196 |     get_summary_values(x),
197 |     deparse.level = 0
198 |   )
199 |   kabled <- knitr::kable(
200 |     summary_mat,
201 |     table.attr = "style='width: auto;'
202 |       class='table table-condensed'",
203 |     caption = "Data summary"
204 |   )
205 | 
206 |   knitr::asis_output(paste(kabled, collapse = "\n"))
207 | }
208 | 
209 | 
210 | #' Skimr printing within Jupyter notebooks
211 | #'
212 | #' This reproduces printed results in the console. By default Jupyter kernels
213 | #' render the final object in the cell. We want the version printed by
214 | #' `skimr` instead of the data that it contains.
215 | #'
216 | #' @param obj The object to \link{print} and then return the output.
217 | #' @param ... ignored.
218 | #' @return None. `invisible(NULL)`.
219 | #' @importFrom repr repr_text
220 | #' @name repr
221 | 
222 | #' @rdname repr
223 | #' @export
224 | repr_text.skim_df <- function(obj, ...) {
225 |   print(obj)
226 | }
227 | 
228 | #' @rdname repr
229 | #' @export
230 | repr_text.skim_list <- repr_text.skim_df
231 | 
232 | #' @rdname repr
233 | #' @export
234 | repr_text.one_skim_df <- repr_text.skim_df
235 | 


--------------------------------------------------------------------------------
/R/skimr-package.R:
--------------------------------------------------------------------------------
 1 | #' Skim a data frame
 2 | #'
 3 | #' This package provides an alternative to the default summary functions
 4 | #' within R. The package's API is tidy, functions take data frames, return
 5 | #' data frames and can work as part of a pipeline. The returned `skimr`
 6 | #' object is subsettable and offers a human readable output.
 7 | #'
 8 | #' `skimr` is opinionated, providing a strong set of summary statistics
 9 | #' that are generated for a variety of different data types. It is also
10 | #' provides an API for customization. Users can change both the functions
11 | #' dispatched and the way the results are formatted.
12 | #'
13 | "_PACKAGE"
14 | 
15 | 
16 | # Imports -----------------------------------------------------------------
17 | 
18 | #' @importFrom rlang %||% .data
19 | 
20 | #' @importFrom magrittr %>%
21 | #' @export
22 | magrittr::`%>%`
23 | 
24 | #' @importFrom tidyselect contains
25 | #' @export
26 | tidyselect::contains
27 | 
28 | #' @importFrom tidyselect ends_with
29 | #' @export
30 | tidyselect::ends_with
31 | 
32 | #' @importFrom tidyselect everything
33 | #' @export
34 | tidyselect::everything
35 | 
36 | #' @importFrom tidyselect matches
37 | #' @export
38 | tidyselect::matches
39 | 
40 | #' @importFrom tidyselect num_range
41 | #' @export
42 | tidyselect::num_range
43 | 
44 | #' @importFrom tidyselect one_of
45 | #' @export
46 | tidyselect::one_of
47 | 
48 | #' @importFrom tidyselect starts_with
49 | #' @export
50 | tidyselect::starts_with
51 | 


--------------------------------------------------------------------------------
/R/stats.R:
--------------------------------------------------------------------------------
  1 | #' Summary statistic functions
  2 | #'
  3 | #' `skimr` provides extensions to a variety of functions with R's stats package
  4 | #' to simplify creating summaries of data. All functions are vectorized over the
  5 | #' first argument. Additional arguments should be set in the [sfl()] that sets
  6 | #' the appropriate skimmers for a data type. You can use these, along with other
  7 | #' vectorized R functions, for creating custom sets of summary functions for
  8 | #' a given data type.
  9 | #'
 10 | #' @seealso [get_skimmers()] for customizing the functions called by [skim()].
 11 | #' @param x A vector
 12 | #' @param n_bins In `inline_hist`, the number of histogram bars.
 13 | #' @param length.out In `inline_linegraph`, the length of the character time
 14 | #'   series.
 15 | #' @param max_char In `top` = 3, max_levels = 4
 16 | #' @name stats
 17 | NULL
 18 | 
 19 | #' @describeIn stats Calculate the sum of `NA` and `NULL` (i.e. missing) values.
 20 | #' @export
 21 | n_missing <- function(x) {
 22 |   sum(is.na(x) | is.null(x))
 23 | }
 24 | 
 25 | #' @describeIn stats Calculate the sum of not `NA` and `NULL` (i.e. missing)
 26 | #'   values.
 27 | #' @export
 28 | n_complete <- function(x) {
 29 |   sum(!is.na(x) & !is.null(x))
 30 | }
 31 | 
 32 | #' @describeIn stats Calculate complete values; complete values are not missing.
 33 | #' @export
 34 | complete_rate <- function(x) {
 35 |   1 - n_missing(x) / length(x)
 36 | }
 37 | 
 38 | #' @describeIn stats Calculate the number of rows containing only whitespace
 39 | #'   values using s+ regex.
 40 | #' @export
 41 | n_whitespace <- function(x) {
 42 |   whitespace <- grepl("^\\s+$", x)
 43 |   sum(whitespace)
 44 | }
 45 | 
 46 | #' @describeIn stats Create a contingency table and arrange its levels in
 47 | #'   descending order. In case of ties, the ordering of results is alphabetical
 48 | #'   and depends upon the locale. `NA` is treated as a ordinary value for
 49 | #'   sorting.
 50 | #' @export
 51 | sorted_count <- function(x) {
 52 |   tab <- table(x, useNA = "no")
 53 |   names_tab <- names(tab)
 54 |   if (is.element("", names_tab)) {
 55 |     names_tab[names_tab == ""] <- "empty"
 56 |     warning(
 57 |       "Variable contains value(s) of \"\" that have been ",
 58 |       "converted to \"empty\"."
 59 |     )
 60 |   }
 61 |   out <- rlang::set_names(as.integer(tab), names_tab)
 62 |   sort(out, decreasing = TRUE)
 63 | }
 64 | 
 65 | #' @describeIn stats Compute and collapse a contingency table into a single
 66 | #'   character scalar. Wraps [sorted_count()].
 67 | #' @param max_levels The maximum number of levels to be displayed.
 68 | #' @export
 69 | top_counts <- function(x, max_char = 3, max_levels = 4) {
 70 |   counts <- sorted_count(x)
 71 |   if (length(counts) > max_levels) {
 72 |     top <- counts[seq_len(max_levels)]
 73 |   } else {
 74 |     top <- counts
 75 |   }
 76 |   top_names <- substr(names(top), 1, max_char)
 77 |   paste0(top_names, ": ", top, collapse = ", ")
 78 | }
 79 | 
 80 | #' @describeIn stats Generate inline histogram for numeric variables. The
 81 | #'   character length of the histogram is controlled by the formatting options
 82 | #'   for character vectors.
 83 | #' @export
 84 | inline_hist <- function(x, n_bins = 8) {
 85 |   # For the purposes of the histogram, treat infinite as NA
 86 |   # (before the test for all NA)
 87 |   if (any(is.infinite(x))) {
 88 |     x[is.infinite(x)] <- NA
 89 |     warning(
 90 |       "Variable contains Inf or -Inf value(s) that were converted to NA."
 91 |     )
 92 |   }
 93 | 
 94 |   # Handle empty and NA vectors (is.na is TRUE for NaN)
 95 |   if (length(x) < 1 || all(is.na(x))) {
 96 |     return(" ")
 97 |   }
 98 | 
 99 |   # Addresses a known bug in cut()
100 |   if (all(x == 0, na.rm = TRUE)) x <- x + 1
101 |   hist_dt <- table(cut(x, n_bins))
102 |   hist_dt <- hist_dt / max(hist_dt)
103 |   spark_bar(hist_dt)
104 | }
105 | 
106 | #' Draw a sparkline bar graph with unicode block characters
107 | #'
108 | #' Rendered using
109 | #' [block elements](https://en.wikipedia.org/wiki/Block_Elements).
110 | #' In most common fixed width fonts these are rendered wider than regular
111 | #' characters which means they are not suitable if you need precise alignment.
112 | #' Based on the function in the pillar package.
113 | #'
114 | #' @param x A numeric vector between 0 and 1
115 | #' @param safe Nominally there are 8 block elements from 1/8 height to full
116 | #'   height (8/8). However, the half-height and full-height blocks appear
117 | #'   to be rendered inconsistently (possibly due to font substitution).
118 | #' @examples
119 | #' \dontrun{
120 | #' x <- seq(0, 1, length = 6)
121 | #' spark_bar(x)
122 | #' spark_bar(sample(x))
123 | #'
124 | #' # This might work if you're lucky
125 | #' spark_bar(seq(0, 1, length = 8), safe = FALSE)
126 | #'
127 | #' spark_bar(c(0, NA, 0.5, NA, 1))
128 | #' }
129 | #' @noRd
130 | spark_bar <- function(x, safe = TRUE) {
131 |   stopifnot(is.numeric(x))
132 | 
133 |   bars <- vapply(0x2581:0x2588, intToUtf8, character(1))
134 |   if (safe) {
135 |     bars <- bars[-c(4, 8)]
136 |   }
137 | 
138 |   factor <- cut(
139 |     x,
140 |     breaks = seq(0, 1, length.out = length(bars) + 1),
141 |     labels = bars,
142 |     include.lowest = TRUE
143 |   )
144 |   chars <- as.character(factor)
145 |   chars[is.na(chars)] <- bars[length(bars)]
146 |   paste0(chars, collapse = "")
147 | }
148 | 
149 | #' @describeIn stats Calculate the number of blank values in a character vector.
150 | #'   A "blank" is equal to "".
151 | #' @export
152 | n_empty <- function(x) {
153 |   empty_strings <- c("")
154 |   sum(x %in% empty_strings)
155 | }
156 | 
157 | #' @describeIn stats Calculate the minimum number of characters within a
158 | #'   character vector.
159 | #' @export
160 | min_char <- function(x) {
161 |   if (all(is.na(x))) {
162 |     return(NA)
163 |   }
164 |   characters <- nchar(x, allowNA = TRUE)
165 |   min(characters, na.rm = TRUE)
166 | }
167 | 
168 | #' @describeIn stats Calculate the maximum number of characters within a
169 | #'   character vector.
170 | #' @export
171 | max_char <- function(x) {
172 |   if (all(is.na(x))) {
173 |     return(NA)
174 |   }
175 |   characters <- nchar(x, allowNA = TRUE)
176 |   max(characters, na.rm = TRUE)
177 | }
178 | 
179 | #' @describeIn stats Calculate the number of unique elements but remove `NA`.
180 | #' @export
181 | n_unique <- function(x) {
182 |   un <- x[!is.na(x)]
183 |   un <- unique(un)
184 |   length(un)
185 | }
186 | 
187 | #' @describeIn stats Get the start for a time series without the frequency.
188 | #' @export
189 | ts_start <- function(x) {
190 |   stats::start(x)[1]
191 | }
192 | 
193 | #' @describeIn stats Get the finish for a time series without the frequency.
194 | #' @export
195 | ts_end <- function(x) {
196 |   stats::end(x)[1]
197 | }
198 | 
199 | #' @describeIn stats Generate inline line graph for time series variables. The
200 | #'   character length of the line graph is controlled by the formatting options
201 | #'   for character vectors.
202 | #'   Based on the function in the pillar package.
203 | #' @export
204 | inline_linegraph <- function(x, length.out = 16) {
205 |   t <- x[!is.na(x)]
206 |   id <- seq(1, length(t), length.out = length.out)
207 |   normalized <- normalize01(t[floor(id)])
208 |   spark_line(normalized)
209 | }
210 | 
211 | # Rescale data to be between 0 and 1
212 | normalize01 <- function(x) {
213 |   (x - min(x)) / (max(x) - min(x))
214 | }
215 | 
216 | #' Draw a sparkline line graph with Braille characters.
217 | #'
218 | #' @inheritParams spark_bar
219 | #' @examples
220 | #' \dontrun{
221 | #' x <- seq(0, 1, length = 10)
222 | #' spark_line(x)
223 | #' }
224 | #' @noRd
225 | spark_line <- function(x) {
226 |   stopifnot(is.numeric(x))
227 | 
228 |   y <- findInterval(x, seq(0, 1, length.out = 5), all.inside = TRUE)
229 | 
230 |   ind <- matrix(y, ncol = 2, byrow = TRUE)
231 |   ind[, 2] <- ind[, 2] + 4
232 | 
233 |   chars <- apply(ind, 1, braille)
234 |   paste0(chars, collapse = "")
235 | }
236 | 
237 | # https://en.wikipedia.org/wiki/Braille_Patterns
238 | braille <- function(x) {
239 |   # remap to braille sequence
240 |   x <- c(7L, 3L, 2L, 1L, 8L, 6L, 5L, 4L)[x]
241 | 
242 |   raised <- 1:8 %in% x
243 | 
244 |   # offset in hex is 2800
245 |   val <- 10240 + sum(raised * 2^(0:7))
246 | 
247 |   intToUtf8(val)
248 | }
249 | 
250 | #' @describeIn stats Get the length of the shortest list in a vector of lists.
251 | #' @export
252 | list_lengths_min <- function(x) {
253 |   x <- x[!is.na(x)]
254 |   l <- lengths(x)
255 |   if (length(l) > 0) {
256 |     min(l)
257 |   } else {
258 |     NA
259 |   }
260 | }
261 | 
262 | #' @describeIn stats Get the median length of the lists.
263 | #' @export
264 | list_lengths_median <- function(x) {
265 |   x <- x[!is.na(x)]
266 |   l <- lengths(x)
267 |   if (length(l) > 0) {
268 |     stats::median(l)
269 |   } else {
270 |     NA
271 |   }
272 | }
273 | 
274 | #' @describeIn stats Get the maximum length of the lists.
275 | #' @export
276 | list_lengths_max <- function(x) {
277 |   x <- x[!is.na(x)]
278 |   l <- lengths(x)
279 |   if (length(l) > 0) {
280 |     max(l)
281 |   } else {
282 |     NA
283 |   }
284 | }
285 | 
286 | #' @describeIn stats Get the length of the shortest list in a vector of lists.
287 | #' @export
288 | list_min_length <- function(x) {
289 |   l <- lengths(x)
290 |   min(l)
291 | }
292 | 
293 | #' @describeIn stats Get the length of the longest list in a vector of lists.
294 | #' @export
295 | list_max_length <- function(x) {
296 |   l <- lengths(x)
297 |   max(l)
298 | }
299 | 


--------------------------------------------------------------------------------
/R/summary.R:
--------------------------------------------------------------------------------
 1 | #' Summary function for skim_df
 2 | #'
 3 | #' This is a method of the generic function [summary()].
 4 | #'
 5 | #' @param object a skim dataframe.
 6 | #' @param ... Additional arguments affecting the summary produced. Not used.
 7 | #' @return A summary of the skim data frame.
 8 | #' @examples
 9 | #' a <- skim(mtcars)
10 | #' summary(a)
11 | #' @export
12 | summary.skim_df <- function(object, ...) {
13 |   if (is.null(object)) {
14 |     stop("dataframe is null.")
15 |   }
16 |   duplicated <- duplicated(object$skim_variable)
17 |   counts <- table(type = object$skim_type[!duplicated])
18 |   types <- dimnames(counts)[[1]]
19 | 
20 |   possible_names <- group_names(object)
21 |   possible_groups <- if (length(possible_names) > 0) {
22 |     paste(possible_names, collapse = ", ")
23 |   } else {
24 |     "None"
25 |   }
26 | 
27 |   structure(
28 |     list(
29 |       data_name = process_data_name(object),
30 |       counts = counts,
31 |       types = types,
32 |       possible_groups = possible_groups,
33 |       dt_key = dt_key(object),
34 |       data_rows = data_rows(object),
35 |       data_cols = data_cols(object)
36 |     ),
37 |     class = "summary_skim_df"
38 |   )
39 | }
40 | 
41 | process_data_name <- function(object) {
42 |   raw_name <- df_name(object)
43 |   no_ticks <- gsub("`", "", raw_name)
44 |   if (no_ticks %in% c(".", ".data")) {
45 |     "Piped data"
46 |   } else if (nchar(no_ticks) > 25) {
47 |     paste0(substring(no_ticks, 1, 25), "...")
48 |   } else {
49 |     no_ticks
50 |   }
51 | }
52 | 
53 | #' @export
54 | format.summary_skim_df <- function(x, ...) {
55 |   dnames <- c("", get_summary_dnames(x))
56 |   summary_values <- c("Values", get_summary_values(x))
57 |   paste(
58 |     format(dnames),
59 |     format(summary_values)
60 |   )
61 | }
62 | 
63 | get_summary_dnames <- function(summary_object) {
64 |   c(
65 |     "Name",
66 |     "Number of rows ",
67 |     "Number of columns ",
68 |     if (!is.na(summary_object$dt_key)) "Key",
69 |     "_______________________ ",
70 |     "Column type frequency: ",
71 |     paste0("  ", summary_object$types),
72 |     "________________________  ",
73 |     "Group variables"
74 |   )
75 | }
76 | 
77 | get_summary_values <- function(summary_object) {
78 |   c(
79 |     summary_object$data_name,
80 |     summary_object$data_rows,
81 |     summary_object$data_cols,
82 |     if (!is.na(summary_object$dt_key)) summary_object$dt_key,
83 |     " ",
84 |     " ",
85 |     unname(summary_object$counts),
86 |     " ",
87 |     summary_object$possible_groups
88 |   )
89 | }
90 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
 1 | #' Fix unicode histograms on Windows
 2 | #'
 3 | #' This functions changes your session's locale to address issues with printing
 4 | #' histograms on Windows on versions of R below 4.2.1.
 5 | #'
 6 | #' There are known issues with printing the spark-histogram characters when
 7 | #' printing a data frame, appearing like this: "<U+2582><U+2585><U+2587>".
 8 | #' This longstanding problem originates in the low-level code for printing
 9 | #' dataframes. This was addressed in R version 4.2.1.
10 | #'
11 | #' @seealso [skim_without_charts()]
12 | #' @export
13 | fix_windows_histograms <- function() {
14 |   message(
15 |     "This function will change your system locale. It may have other ",
16 |     "unintended effects."
17 |   )
18 |   response <- readline("Continue? (Y/n)")
19 |   if (tolower(response) != "n") {
20 |     Sys.setlocale("LC_CTYPE", "Chinese")
21 |   } else {
22 |     message("Locale was not changed.")
23 |   }
24 |   invisible(NULL)
25 | }
26 | 


--------------------------------------------------------------------------------
/R/vctrs.R:
--------------------------------------------------------------------------------
  1 | #' Functions for working with the vctrs package
  2 | #'
  3 | #' These make it clear that we need to use the tibble behavior when joining,
  4 | #' concatenating or casting `skim_df` objects. For a better discussion, on
  5 | #' why this is important and how these functions work, see:
  6 | #' <https://vctrs.r-lib.org/reference/howto-faq-coercion-data-frame.html>.
  7 | #'
  8 | #' `vec_ptype2.*` handles finding common prototypes between `skim_df` and
  9 | #' similar objects. `vec_cast.*` handles casting between objects. Note that
 10 | #' as of `dplyr 1.0.2`, [dplyr::bind_rows()] does not full support combining
 11 | #' attributes and [vctrs::vec_rbind()] is preferred instead.
 12 | #'
 13 | #' @importFrom vctrs vec_ptype2 vec_cast
 14 | #' @name skimr-vctrs
 15 | #' @keywords internal
 16 | NULL
 17 | 
 18 | #' @rdname skimr-vctrs
 19 | #' @export
 20 | vec_ptype2.skim_df.skim_df <- function(x, y, ...) {
 21 |   combine_compatible_objects(x, y, ...)
 22 | }
 23 | 
 24 | #' @rdname skimr-vctrs
 25 | #' @export
 26 | vec_ptype2.skim_df.tbl_df <- function(x, y, ...) {
 27 |   combine_compatible_objects(x, y, ...)
 28 | }
 29 | 
 30 | #' @rdname skimr-vctrs
 31 | #' @export
 32 | vec_ptype2.tbl_df.skim_df <- function(x, y, ...) {
 33 |   combine_compatible_objects(x, y, ...)
 34 | }
 35 | 
 36 | #' @rdname skimr-vctrs
 37 | #' @export
 38 | vec_cast.skim_df.skim_df <- function(x, to, ...) {
 39 |   cast_compatible_object(x, to, ...)
 40 | }
 41 | 
 42 | #' @rdname skimr-vctrs
 43 | #' @export
 44 | vec_cast.skim_df.tbl_df <- function(x, to, ...) {
 45 |   vctrs::tib_cast(x, to, ...)
 46 | }
 47 | 
 48 | #' @rdname skimr-vctrs
 49 | #' @export
 50 | vec_cast.tbl_df.skim_df <- function(x, to, ...) {
 51 |   vctrs::tib_cast(x, to, ...)
 52 | }
 53 | 
 54 | #' We only combine skim_df's that were built with the same set of skimmers.
 55 | #' @noRd
 56 | has_compatible_skimmers <- function(x, y) {
 57 |   has_identical_base(x, y) && has_identical_skimmers(x, y)
 58 | }
 59 | 
 60 | has_identical_base <- function(x, y) {
 61 |   base_x <- attr(x, "base_skimmers") %||% attr(y, "base_skimmers")
 62 |   base_y <- attr(y, "base_skimmers") %||% attr(x, "base_skimmers")
 63 |   identical(base_x, base_y)
 64 | }
 65 | 
 66 | has_identical_skimmers <- function(x, y) {
 67 |   skim_list_x <- attr(x, "skimmers_used") %||% attr(y, "skimmers_used")
 68 |   skim_list_y <- attr(y, "skimmers_used") %||% attr(x, "skimmers_used")
 69 |   x_names <- names(skim_list_x)
 70 |   y_names <- names(skim_list_y)
 71 |   all_names <- union(x_names, y_names)
 72 |   all(purrr::map_lgl(
 73 |     all_names,
 74 |     check_identical_skimmers,
 75 |     x_names,
 76 |     y_names,
 77 |     skim_list_x,
 78 |     skim_list_y
 79 |   ))
 80 | }
 81 | 
 82 | check_identical_skimmers <- function(name,
 83 |                                      x_names,
 84 |                                      y_names,
 85 |                                      skim_list_x,
 86 |                                      skim_list_y) {
 87 |   if ((name %in% x_names) && (name %in% y_names)) {
 88 |     identical(skim_list_x[name], skim_list_y[name])
 89 |   } else {
 90 |     TRUE
 91 |   }
 92 | }
 93 | 
 94 | combine_compatible_objects <- function(x, y, ..., x_arg = "", y_arg = "") {
 95 |   if (!has_compatible_skimmers(x, y)) {
 96 |     vctrs::stop_incompatible_type(
 97 |       x,
 98 |       y,
 99 |       x_arg = x_arg,
100 |       y_arg = y_arg,
101 |       details = "Can't combine different sets of skim functions."
102 |     )
103 |   }
104 | 
105 |   if (could_be_skim_df(x) && could_be_skim_df(y)) {
106 |     reassign_skim_attrs(
107 |       vctrs::tib_ptype2(x, y, ...),
108 |       x,
109 |       data_rows = data_rows(x) + data_rows(y),
110 |       data_cols = data_cols(x) + data_cols(y),
111 |       df_name = paste0(df_name(x), "+", df_name(y)),
112 |       groups = c(group_names(x), group_names(y)),
113 |       skimmers_used = union(skimmers_used(x), skimmers_used(y))
114 |     )
115 |   } else {
116 |     strip_skim_attrs(vctrs::tib_ptype2(x, y, ...))
117 |   }
118 | }
119 | 
120 | cast_compatible_object <- function(x, to, ..., x_arg = "", to_arg = "") {
121 |   out <- vctrs::tib_cast(x, to, ..., x_arg = x_arg, to_arg = to_arg)
122 |   if (could_be_skim_df(out)) {
123 |     reassign_skim_attrs(out, to)
124 |   } else {
125 |     strip_skim_attrs(out)
126 |   }
127 | }
128 | 


--------------------------------------------------------------------------------
/inst/figures/skimmer_hex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/skimr/c0018a7eae3a8858a873d85eeda55120489fb03f/inst/figures/skimmer_hex.png


--------------------------------------------------------------------------------
/inst/other_docs/blog.md:
--------------------------------------------------------------------------------
  1 | Like every R user who uses summary statistics (so, everyone), our team has to rely on some combination of summary functions beyond `summary()` and `str()`. But we found them all lacking in some way because they can be generic, they don't always provide easy-to-operate-on data structures, and they are not pipeable. What we wanted was a frictionless approach for quickly skimming useful and tidy summary statistics as part of a pipeline. And so at [rOpenSci \#unconf17](http://unconf17.ropensci.org/), we developed [`skimr`](https://github.com/ropenscilabs/skimr#skimr).
  2 | 
  3 | In a nutshell, `skimr` will create a `skim_df` object that can be further operated upon or that provides a human-readable printout in the console. It presents reasonable default summary statistics for numerics, factors, etc, and lists counts, and missing and unique values. And the momentum is still going, thanks to our awesome team (see below)!
  4 | 
  5 | Backstory
  6 | ---------
  7 | 
  8 | The idea for skimr as a project for the \#unconf17 [was proposed by Amelia McNamara](https://github.com/ropensci/unconf17/issues/50) following [discussions on Twitter](https://twitter.com/AmeliaMN/status/774348524653834241) and an [initial package Hadley Wickham](https://github.com/hadley/precis).
  9 | 
 10 | Once we were together in Los Angeles, we formed a solid team, set up a Google Doc, a Slack channel, the `ropensci/skimr` repo, and grabbed a whiteboard.
 11 | 
 12 | We started off by brainstorming what we liked about existing summary packages and what other features we wanted. We started looking at example data, `mtcars`.
 13 | 
 14 | Here's what we liked and disliked, in Amelia's words:
 15 | 
 16 | ``` r
 17 | ### "I like what we get here because mpg is numeric so these stats make sense:" 
 18 | summary(mtcars$mpg) 
 19 | #   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 20 | #  10.40   15.42   19.20   20.09   22.80   33.90 
 21 | 
 22 | 
 23 | ### "But I don’t like this because cyl should really be a factor and shouldn't have these stats:"
 24 | summary(mtcars$cyl)
 25 | #   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 26 | #  4.000   4.000   6.000   6.188   8.000   8.000 
 27 | 
 28 | 
 29 | ### "This is OK, but not descriptive enough. It could be clearer what I'm looking at."
 30 | mosaic::tally(~cyl, data=mtcars) # install.packages('mosaic')
 31 | #cyl
 32 | # 4  6  8 
 33 | #11  7 14 
 34 | 
 35 | 
 36 | ### "But this output isn't labeled, not ideal." 
 37 | table(mtcars$cyl, mtcars$vs)
 38 | #   
 39 | #     0  1
 40 | #  4  1 10
 41 | #  6  3  4
 42 | #  8 14  0
 43 | 
 44 | 
 45 | ### "I like this because it returns 'sd', 'n' and 'missing'":
 46 | mosaic::favstats(~mpg, data=mtcars) 
 47 | #  min     Q1 median   Q3  max     mean       sd  n missing
 48 | # 10.4 15.425   19.2 22.8 33.9 20.09062 6.026948 32       0
 49 | ```
 50 | 
 51 | Once we had an idea of what we thought would be useful, we did a bit of market research (i.e. we made a short [presentation](https://docs.google.com/presentation/d/13Ky3-Y70STzufLJtCm6GXN8SMj2Y11riDef8b9cBgAQ/edit#slide=id.p) and ran it by other unconfers at lunch.
 52 | 
 53 | Introducing `skimr`
 54 | -------------------
 55 | 
 56 | So what does `skimr` actually do? It allows you to skim useful summary statistics in the console, or use those statistics in a pipeable workflow.
 57 | 
 58 | Some features of output in the console:
 59 | 
 60 | -   reports missing, complete, n, sd, and quantiles
 61 | -   reports numeric/int/double separately from factor/chr, and identifies class
 62 | -   handles dates, logicals
 63 | -   uses [Hadley's pillars](https://github.com/hadley/pillar), specifically `pillar::spark-bar()`
 64 | 
 65 | Here are examples of `skimr` in action:
 66 | 
 67 | ### Quick skim in the console:
 68 | 
 69 | **Nicely separates numeric and factor variables:**
 70 | 
 71 | ![](https://github.com/ropenscilabs/skimr/blob/24c733d7e4752c37e46e4c36693da107f42f3f55/man/figures/skim_iris.png) <br>
 72 | 
 73 | **Clearly displays many numeric variables:**
 74 | 
 75 | ![](https://github.com/ropenscilabs/skimr/blob/ecb90e22047d4a1b228bcf471650eb79b733e52e/man/figures/skim_mtcars.png) <br>
 76 | 
 77 | **Also works with strings:**
 78 | 
 79 | ![](https://github.com/ropenscilabs/skimr/blob/ecb90e22047d4a1b228bcf471650eb79b733e52e/man/figures/skim_babynames.png) <br>
 80 | 
 81 | ### Exploring a skim\_df object
 82 | 
 83 | By default `skim` prints beautifully in the console, but it also produces a long, tidy-format `skim_df` object that can be computed on.
 84 | 
 85 | ``` r
 86 | a <-  skim(chickwts)
 87 | dim(a)
 88 | # [1] 22  5
 89 | View(a)
 90 | ```
 91 | 
 92 | <img src="https://github.com/ropenscilabs/skimr/blob/ecb90e22047d4a1b228bcf471650eb79b733e52e/man/figures/skim_chickwts_df.png" width="450px">
 93 | 
 94 | ### Computing with the skim\_df object
 95 | 
 96 | Maybe you just want to skim a specific portion of your data frame. Use skimr with a pipe!
 97 | 
 98 | ``` r
 99 | > skim(mtcars) %>% filter(stat=="hist")
100 | # A tibble: 11 × 5
101 |      var    type  stat      level value
102 |    <chr>   <chr> <chr>      <chr> <dbl>
103 | 1    mpg numeric  hist ▂▅▇▇▇▃▁▁▂▂     0
104 | 2    cyl numeric  hist ▆▁▁▁▃▁▁▁▁▇     0
105 | 3   disp numeric  hist ▇▇▅▁▁▇▃▂▁▃     0
106 | 4     hp numeric  hist ▆▆▇▂▇▂▃▁▁▁     0
107 | 5   drat numeric  hist ▃▇▂▂▃▆▅▁▁▁     0
108 | 6     wt numeric  hist ▂▂▂▂▇▆▁▁▁▂     0
109 | 7   qsec numeric  hist ▂▃▇▇▇▅▅▁▁▁     0
110 | 8     vs numeric  hist ▇▁▁▁▁▁▁▁▁▆     0
111 | 9     am numeric  hist ▇▁▁▁▁▁▁▁▁▆     0
112 | 10  gear numeric  hist ▇▁▁▁▆▁▁▁▁▂     0
113 | 11  carb numeric  hist ▆▇▂▁▇▁▁▁▁▁     0
114 | ```
115 | 
116 | ### Specifying your own statistics
117 | 
118 | Another possibility is specifying your own statistics to display with `skimr`:
119 | 
120 | ``` r
121 |  funs <- list(iqr = IQR,
122 |     quantile = purrr::partial(quantile, probs = .99))
123 |   skim_with(numeric = funs, append = FALSE)
124 |   skim_v(iris$Sepal.Length)
125 |   
126 | #  A tibble: 2 × 4
127 | #      type     stat level value
128 | #     <chr>    <chr> <chr> <dbl>
129 | # 1 numeric      iqr  .all   1.3
130 | # 2 numeric quantile   99%   7.7
131 | ```
132 | 
133 | Our awesome team
134 | ----------------
135 | 
136 | We had a really fantastic team with diverse backgrounds, and it was really cool how organically everyone found a role for themselves during the development of `skimr`. Between brainstorming sessions, experienced coders began to iteratively develop the code while others worked on documentation and tests, and got more involved. Everyone asked questions and brainstormed together; it was a really welcoming environment. We knew that by the end of the second day of the unconf, we would present our work using only the repo's [README](https://github.com/ropenscilabs/skimr#skimr) file. So we focused on communication throughout the entire development process.
137 | 
138 | A lot of the heavy lifting at the unconf was done by Michael, Elin, and Eduardo, and Elin has continued leading development in the month since!
139 | 
140 | This was the original team in alphabetical order. We have also had many virtual contributors as well: see the full list of contributors [here](https://github.com/ropenscilabs/skimr/graphs/contributors).
141 | 
142 | **Eduardo Arino de la Rubia**
143 | Job Title: Chief Data Scientist at Domino Data Lab
144 | Project Contributions: Coder
145 | 
146 | **Shannon Ellis**
147 | Job Title: Postdoctoral fellow in the Biostatistics Department at the Johns Hopkins Bloomberg School of Public Health
148 | Project Contributions: Test Scripts
149 | 
150 | **Julia Stewart Lowndes**
151 | Job Title: Marine Data Scientist at the National Center for Ecological Analysis and Synthesis
152 | Project Contributions: Documentation and test scripts
153 | 
154 | **Hope McLeod**
155 | Job Title: Data Engineer at Kobalt Music
156 | Project Contributions: Documentation
157 | 
158 | **Amelia McNamara**
159 | Job Title: Visiting Assistant Professor of Statistical & Data Sciences at Smith College
160 | Project Contributions: Coder
161 | 
162 | **Michael Quinn**
163 | Job Title: Quantitative Analyst at Google
164 | Project Contributions: Coder
165 | 
166 | **Elin Waring**
167 | Job Title: Professor at Lehman College Sociology Department, City University of New York
168 | Project Contributions: Coder
169 | 
170 | **Hao Zhu**
171 | Job Title: Programmer Analyst at the Institute for Aging Research
172 | Project Contributions: Coder
173 | 
174 | In summary (ha...)
175 | ------------------
176 | 
177 | The work we did together was only possible because of rOpenSci's incredible community and culture. For us to be able to dream up something we wanted to build and have the time and space to actually do it together was really exciting. So thank you rOpenSci and everyone in the greater community!
178 | 
179 | There is more work to be done on `skimr`, so please check out the [`skimr`](https://github.com/ropenscilabs/skimr) repo for the latest features and improvements!
180 | 


--------------------------------------------------------------------------------
/inst/other_docs/skimr_in_jupyter.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 3,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "library(skimr)\n",
 10 |     "options(width = 90)"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 4,
 16 |    "metadata": {
 17 |     "scrolled": false
 18 |    },
 19 |    "outputs": [
 20 |     {
 21 |      "name": "stdout",
 22 |      "output_type": "stream",
 23 |      "text": [
 24 |       "── Data Summary ────────────────────────\n",
 25 |       "                        Value\n",
 26 |       "Name                     iris\n",
 27 |       "Number of rows            150\n",
 28 |       "Number of columns           5\n",
 29 |       "                             \n",
 30 |       "Column type frequency:       \n",
 31 |       "  factor                    1\n",
 32 |       "  numeric                   4\n",
 33 |       "                             \n",
 34 |       "Group variables          None\n",
 35 |       "\n",
 36 |       "── Variable type: factor ─────────────────────────────────────────────────────────────────\n",
 37 |       "  skim_variable missing complete     n ordered n_unique top_counts               \n",
 38 |       "1 Species             0      150   150 FALSE          3 set: 50, ver: 50, vir: 50\n",
 39 |       "\n",
 40 |       "── Variable type: numeric ────────────────────────────────────────────────────────────────\n",
 41 |       "  skim_variable missing complete     n  mean    sd    p0   p25   p50   p75  p100 hist \n",
 42 |       "1 Sepal.Length        0      150   150  5.84 0.828   4.3   5.1  5.8    6.4   7.9 ▆▇▇▅▂\n",
 43 |       "2 Sepal.Width         0      150   150  3.06 0.436   2     2.8  3      3.3   4.4 ▁▆▇▂▁\n",
 44 |       "3 Petal.Length        0      150   150  3.76 1.77    1     1.6  4.35   5.1   6.9 ▇▁▆▇▂\n",
 45 |       "4 Petal.Width         0      150   150  1.20 0.762   0.1   0.3  1.3    1.8   2.5 ▇▁▇▅▃\n"
 46 |      ]
 47 |     }
 48 |    ],
 49 |    "source": [
 50 |     "skim(iris)"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 5,
 56 |    "metadata": {},
 57 |    "outputs": [
 58 |     {
 59 |      "data": {},
 60 |      "metadata": {},
 61 |      "output_type": "display_data"
 62 |     },
 63 |     {
 64 |      "name": "stdout",
 65 |      "output_type": "stream",
 66 |      "text": [
 67 |       "$factor\n",
 68 |       "\n",
 69 |       "── Variable type: factor ─────────────────────────────────────────────────────────────────\n",
 70 |       "  skim_variable missing complete     n ordered n_unique top_counts               \n",
 71 |       "1 Species             0      150   150 FALSE          3 set: 50, ver: 50, vir: 50\n",
 72 |       "\n",
 73 |       "$numeric\n",
 74 |       "\n",
 75 |       "── Variable type: numeric ────────────────────────────────────────────────────────────────\n",
 76 |       "  skim_variable missing complete     n  mean    sd    p0   p25   p50   p75  p100 hist \n",
 77 |       "1 Sepal.Length        0      150   150  5.84 0.828   4.3   5.1  5.8    6.4   7.9 ▆▇▇▅▂\n",
 78 |       "2 Sepal.Width         0      150   150  3.06 0.436   2     2.8  3      3.3   4.4 ▁▆▇▂▁\n",
 79 |       "3 Petal.Length        0      150   150  3.76 1.77    1     1.6  4.35   5.1   6.9 ▇▁▆▇▂\n",
 80 |       "4 Petal.Width         0      150   150  1.20 0.762   0.1   0.3  1.3    1.8   2.5 ▇▁▇▅▃\n",
 81 |       "\n"
 82 |      ]
 83 |     }
 84 |    ],
 85 |    "source": [
 86 |     "skim(iris) %>%\n",
 87 |     "  partition()"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 6,
 93 |    "metadata": {},
 94 |    "outputs": [
 95 |     {
 96 |      "name": "stdout",
 97 |      "output_type": "stream",
 98 |      "text": [
 99 |       "\n",
100 |       "── Variable type: numeric ────────────────────────────────────────────────────────────────\n",
101 |       "  skim_variable missing complete     n  mean    sd    p0   p25   p50   p75  p100 hist \n",
102 |       "1 Sepal.Length        0      150   150  5.84 0.828   4.3   5.1  5.8    6.4   7.9 ▆▇▇▅▂\n",
103 |       "2 Sepal.Width         0      150   150  3.06 0.436   2     2.8  3      3.3   4.4 ▁▆▇▂▁\n",
104 |       "3 Petal.Length        0      150   150  3.76 1.77    1     1.6  4.35   5.1   6.9 ▇▁▆▇▂\n",
105 |       "4 Petal.Width         0      150   150  1.20 0.762   0.1   0.3  1.3    1.8   2.5 ▇▁▇▅▃\n"
106 |      ]
107 |     }
108 |    ],
109 |    "source": [
110 |     "skim(iris) %>%\n",
111 |     "  yank(\"numeric\")"
112 |    ]
113 |   }
114 |  ],
115 |  "metadata": {
116 |   "kernelspec": {
117 |    "display_name": "R",
118 |    "language": "R",
119 |    "name": "ir"
120 |   },
121 |   "language_info": {
122 |    "codemirror_mode": "r",
123 |    "file_extension": ".r",
124 |    "mimetype": "text/x-r-source",
125 |    "name": "R",
126 |    "pygments_lexer": "r",
127 |    "version": "3.5.3"
128 |   }
129 |  },
130 |  "nbformat": 4,
131 |  "nbformat_minor": 2
132 | }
133 | 


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/fonts-in-skimr/skeleton/skeleton.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Untitled"
 3 | mainfont: DejaVu Sans
 4 | output:
 5 |   html_document: default
 6 |   pdf_document:
 7 |     latex_engine: xelatex
 8 |   word_document: default
 9 | font-family: Times New Roman
10 | ---
11 | 
12 | ## Getting ready for fonts
13 | 
14 | Notice that the yaml at the beginning of this file includes a latex_engine which will be used 
15 | when creating a pdf document.
16 | 
17 | It also includes a mainfont setting called _DejaVu Sans_.  This is not the only font that will 
18 | work to produce the spark graphs. However, it is a free font available through the
19 | `extrafont` package.  If you have not installed extrafont you should do so using the normal
20 | package installation procedures. You should then make sure that the desired font is installed.
21 | 
22 | The code below will not run automatically when you knit, instead you should run it in the
23 | console. 
24 | 
25 | ```
26 | install.packages(c("extrafont"))
27 | font_install("DejaVu Sans")
28 | ```
29 | 
30 | If there are any difficulties please read the extrafont documentation.
31 | 
32 | ## Experimenting with rendering
33 |  
34 | ```{r, message=FALSE}
35 | library(knitr)
36 | library(skimr)
37 | ```
38 | 
39 | Try knitting this document to PDF, HTML, doc or any other format you wish to try. You 
40 | will notice that there are slight differences between them.  To understand the impact
41 | of the engine and font choices you should experiment with different options.
42 | 
43 | The first example shows what printing the basic skim function looks like. 
44 | You can try knitting to different formats to see how it changes.
45 | 
46 | ```{r }
47 | skim(iris)
48 | ```
49 | 
50 | It is possible that the histograms will not print in all of the formats.
51 | 
52 | Unfortunately this is outside the control of the skimr team because 
53 | it relates to the operating system you are using, fonts installed, 
54 | and locale. 
55 | 
56 | 


--------------------------------------------------------------------------------
/inst/rmarkdown/templates/fonts-in-skimr/template.yaml:
--------------------------------------------------------------------------------
1 | name: Fonts_in_skimr
2 | description: >
3 |  Using fonts in skimr
4 | 


--------------------------------------------------------------------------------
/man/deprecated-v1.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/deprecated.R
 3 | \name{deprecated-v1}
 4 | \alias{deprecated-v1}
 5 | \alias{skim_to_wide}
 6 | \alias{skim_to_list}
 7 | \alias{skim_format}
 8 | \title{Deprecated functions from skimr v1}
 9 | \usage{
10 | skim_to_wide(.data, ...)
11 | 
12 | skim_to_list(.data, ...)
13 | 
14 | skim_format(...)
15 | }
16 | \arguments{
17 | \item{.data}{A tibble, or an object that can be coerced into a tibble.}
18 | 
19 | \item{...}{Columns to select for skimming. When none are provided, the
20 | default is to skim all columns.}
21 | }
22 | \value{
23 | Either A \code{skim_df} or a \code{skim_list} object.
24 | }
25 | \description{
26 | Skimr used to offer functions that combined skimming with a secondary effect,
27 | like reshaping the data, building a list or printing the results. Some of
28 | these behaviors are no longer necessary. \code{\link[=skim]{skim()}} always returns a wide
29 | data frame. Others have been replaced by functions that do a single thing.
30 | \code{\link[=partition]{partition()}} creates a list-like object from a skimmed data frame.
31 | }
32 | \section{Functions}{
33 | \itemize{
34 | \item \code{skim_to_wide()}: \code{\link[=skim]{skim()}} always produces a wide data frame.
35 | 
36 | \item \code{skim_to_list()}: \code{\link[=partition]{partition()}} creates a list.
37 | 
38 | \item \code{skim_format()}: \code{\link[=print]{print()}} and \code{\link[=skim_with]{skim_with()}} set options.
39 | 
40 | }}
41 | 


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/skimr/c0018a7eae3a8858a873d85eeda55120489fb03f/man/figures/logo.png


--------------------------------------------------------------------------------
/man/fix_windows_histograms.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/utils.R
 3 | \name{fix_windows_histograms}
 4 | \alias{fix_windows_histograms}
 5 | \title{Fix unicode histograms on Windows}
 6 | \usage{
 7 | fix_windows_histograms()
 8 | }
 9 | \description{
10 | This functions changes your session's locale to address issues with printing
11 | histograms on Windows on versions of R below 4.2.1.
12 | }
13 | \details{
14 | There are known issues with printing the spark-histogram characters when
15 | printing a data frame, appearing like this: "<U+2582><U+2585><U+2587>".
16 | This longstanding problem originates in the low-level code for printing
17 | dataframes. This was addressed in R version 4.2.1.
18 | }
19 | \seealso{
20 | \code{\link[=skim_without_charts]{skim_without_charts()}}
21 | }
22 | 


--------------------------------------------------------------------------------
/man/focus.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reshape.R
 3 | \name{focus}
 4 | \alias{focus}
 5 | \title{Only show a subset of summary statistics after skimming}
 6 | \usage{
 7 | focus(.data, ...)
 8 | }
 9 | \arguments{
10 | \item{.data}{A \code{skim_df} object.}
11 | 
12 | \item{...}{One or more unquoted expressions separated by commas. Variable
13 | names can be used as if they were positions in the data frame, so
14 | expressions like x:y can be used to select a range of variables.}
15 | }
16 | \description{
17 | This function is a variant of \code{\link[dplyr:select]{dplyr::select()}} designed to work with
18 | \code{skim_df} objects. When using \code{focus()}, \code{skimr} metadata columns are kept,
19 | and \code{skimr} print methods are still utilized. Otherwise, the signature and
20 | behavior is identical to \code{\link[dplyr:select]{dplyr::select()}}.
21 | }
22 | \examples{
23 | # Compare
24 | iris \%>\%
25 |   skim() \%>\%
26 |   dplyr::select(n_missing)
27 | 
28 | iris \%>\%
29 |   skim() \%>\%
30 |   focus(n_missing)
31 | 
32 | # This is equivalent to
33 | iris \%>\%
34 |   skim() \%>\%
35 |   dplyr::select(skim_variable, skim_type, n_missing)
36 | }
37 | 


--------------------------------------------------------------------------------
/man/get_default_skimmers.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get_skimmers.R
 3 | \name{get_default_skimmers}
 4 | \alias{get_default_skimmers}
 5 | \alias{get_one_default_skimmer}
 6 | \alias{get_default_skimmer_names}
 7 | \alias{get_one_default_skimmer_names}
 8 | \alias{get_sfl}
 9 | \title{View default skimmer names and functions}
10 | \usage{
11 | get_default_skimmers(skim_type = NULL)
12 | 
13 | get_one_default_skimmer(skim_type)
14 | 
15 | get_default_skimmer_names(skim_type = NULL)
16 | 
17 | get_one_default_skimmer_names(skim_type)
18 | 
19 | get_sfl(skim_type)
20 | }
21 | \arguments{
22 | \item{skim_type}{The class of the column being skimmed.}
23 | }
24 | \description{
25 | These utility functions look up the currently-available defaults for one or
26 | more \code{skim_type}'s. They work with all defaults in the \code{skimr} package, as
27 | well as the defaults in any package that extends \code{skimr}. See
28 | \code{\link[=get_skimmers]{get_skimmers()}} for writing lookup methods for different.
29 | }
30 | \details{
31 | The functions differ in return type and whether or not the result is in
32 | a list. \code{\link[=get_default_skimmers]{get_default_skimmers()}} and \code{\link[=get_one_default_skimmer]{get_one_default_skimmer()}} return
33 | functions. The former returns functions within a typed list, i.e.
34 | \code{list(numeric = list(...functions...))}.
35 | 
36 | The functions differ in return type and whether or not the result is in
37 | a list. \code{\link[=get_default_skimmer_names]{get_default_skimmer_names()}} and \code{\link[=get_one_default_skimmer_names]{get_one_default_skimmer_names()}}
38 | return a list of character vectors or a single character vector.
39 | 
40 | \code{\link[=get_sfl]{get_sfl()}} returns the skimmer function list for a particular \code{skim_type}.
41 | It differs from \code{\link[=get_default_skimmers]{get_default_skimmers()}} in that the returned \code{sfl} contains
42 | a list of functions and a \code{skim_type}.
43 | }
44 | \section{Functions}{
45 | \itemize{
46 | \item \code{get_one_default_skimmer()}: Get the functions associated with one
47 | \code{skim_type}.
48 | 
49 | \item \code{get_default_skimmer_names()}: Get the names of the functions used in one
50 | or more \code{skim_type}'s.
51 | 
52 | \item \code{get_one_default_skimmer_names()}: Get the names of the functions used in one
53 | \code{skim_type}.
54 | 
55 | \item \code{get_sfl()}: Get the \code{sfl} for a \code{skim_type}.
56 | 
57 | }}
58 | 


--------------------------------------------------------------------------------
/man/get_skimmers.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/get_skimmers.R
  3 | \name{get_skimmers}
  4 | \alias{get_skimmers}
  5 | \alias{get_skimmers.default}
  6 | \alias{get_skimmers.numeric}
  7 | \alias{get_skimmers.factor}
  8 | \alias{get_skimmers.character}
  9 | \alias{get_skimmers.logical}
 10 | \alias{get_skimmers.complex}
 11 | \alias{get_skimmers.Date}
 12 | \alias{get_skimmers.POSIXct}
 13 | \alias{get_skimmers.difftime}
 14 | \alias{get_skimmers.Timespan}
 15 | \alias{get_skimmers.ts}
 16 | \alias{get_skimmers.list}
 17 | \alias{get_skimmers.AsIs}
 18 | \alias{get_skimmers.haven_labelled}
 19 | \alias{modify_default_skimmers}
 20 | \title{Retrieve the summary functions for a specific data type}
 21 | \usage{
 22 | get_skimmers(column)
 23 | 
 24 | \method{get_skimmers}{default}(column)
 25 | 
 26 | \method{get_skimmers}{numeric}(column)
 27 | 
 28 | \method{get_skimmers}{factor}(column)
 29 | 
 30 | \method{get_skimmers}{character}(column)
 31 | 
 32 | \method{get_skimmers}{logical}(column)
 33 | 
 34 | \method{get_skimmers}{complex}(column)
 35 | 
 36 | \method{get_skimmers}{Date}(column)
 37 | 
 38 | \method{get_skimmers}{POSIXct}(column)
 39 | 
 40 | \method{get_skimmers}{difftime}(column)
 41 | 
 42 | \method{get_skimmers}{Timespan}(column)
 43 | 
 44 | \method{get_skimmers}{ts}(column)
 45 | 
 46 | \method{get_skimmers}{list}(column)
 47 | 
 48 | \method{get_skimmers}{AsIs}(column)
 49 | 
 50 | \method{get_skimmers}{haven_labelled}(column)
 51 | 
 52 | modify_default_skimmers(skim_type, new_skim_type = NULL, new_funs = list())
 53 | }
 54 | \arguments{
 55 | \item{column}{An atomic vector or list. A column from a data frame.}
 56 | 
 57 | \item{skim_type}{A character scalar. The class of the object with default
 58 | skimmers.}
 59 | 
 60 | \item{new_skim_type}{The type to assign to the looked up set of skimmers.}
 61 | 
 62 | \item{new_funs}{Replacement functions for those in}
 63 | }
 64 | \value{
 65 | A \code{skim_function_list} object.
 66 | }
 67 | \description{
 68 | These functions are used to set the default skimming functions for a data
 69 | type. They are combined with the base skim function list (\code{sfl}) in
 70 | \code{\link[=skim_with]{skim_with()}}, to create the summary tibble for each type.
 71 | }
 72 | \details{
 73 | When creating your own set of skimming functions, call \code{\link[=sfl]{sfl()}} within a
 74 | \code{\link[=get_skimmers]{get_skimmers()}} method for your particular type. Your call to \code{\link[=sfl]{sfl()}} should
 75 | also provide a matching class in the \code{skim_type} argument.  Otherwise, it
 76 | will not be possible to dynamically reassign your default functions when
 77 | working interactively.
 78 | 
 79 | Call \code{\link[=get_default_skimmers]{get_default_skimmers()}} to see the functions for each type of summary
 80 | function currently supported. Call \code{\link[=get_default_skimmer_names]{get_default_skimmer_names()}} to just see
 81 | the names of these functions. Use \code{\link[=modify_default_skimmers]{modify_default_skimmers()}} for a method
 82 | for changing the \code{skim_type} or functions for a default \code{sfl}. This is useful
 83 | for creating new default \code{sfl}'s.
 84 | }
 85 | \section{Methods (by class)}{
 86 | \itemize{
 87 | \item \code{get_skimmers(default)}: The default method for skimming data. Only used when
 88 | a column's data type doesn't match currently installed types. Call
 89 | \link{get_default_skimmer_names} to see these defaults.
 90 | 
 91 | \item \code{get_skimmers(numeric)}: Summary functions for numeric columns, covering both
 92 | \code{\link[=double]{double()}} and \code{\link[=integer]{integer()}} classes: \code{\link[=mean]{mean()}}, \code{\link[=sd]{sd()}}, \code{\link[=quantile]{quantile()}} and
 93 | \code{\link[=inline_hist]{inline_hist()}}.
 94 | 
 95 | \item \code{get_skimmers(factor)}: Summary functions for factor columns:
 96 | \code{\link[=is.ordered]{is.ordered()}}, \code{\link[=n_unique]{n_unique()}} and \code{\link[=top_counts]{top_counts()}}.
 97 | 
 98 | \item \code{get_skimmers(character)}: Summary functions for character columns. Also, the
 99 | default for unknown columns: \code{\link[=min_char]{min_char()}}, \code{\link[=max_char]{max_char()}}, \code{\link[=n_empty]{n_empty()}},
100 | \code{\link[=n_unique]{n_unique()}} and \code{\link[=n_whitespace]{n_whitespace()}}.
101 | 
102 | \item \code{get_skimmers(logical)}: Summary functions for logical/ boolean columns:
103 | \code{\link[=mean]{mean()}}, which produces rates for each value, and \code{\link[=top_counts]{top_counts()}}.
104 | 
105 | \item \code{get_skimmers(complex)}: Summary functions for complex columns: \code{\link[=mean]{mean()}}.
106 | 
107 | \item \code{get_skimmers(Date)}: Summary functions for \code{Date} columns: \code{\link[=min]{min()}},
108 | \code{\link[=max]{max()}}, \code{\link[=median]{median()}} and \code{\link[=n_unique]{n_unique()}}.
109 | 
110 | \item \code{get_skimmers(POSIXct)}: Summary functions for \code{POSIXct} columns: \code{\link[=min]{min()}},
111 | \code{\link[=max]{max()}}, \code{\link[=median]{median()}} and \code{\link[=n_unique]{n_unique()}}.
112 | 
113 | \item \code{get_skimmers(difftime)}: Summary functions for \code{difftime} columns: \code{\link[=min]{min()}},
114 | \code{\link[=max]{max()}}, \code{\link[=median]{median()}} and \code{\link[=n_unique]{n_unique()}}.
115 | 
116 | \item \code{get_skimmers(Timespan)}: Summary functions for \code{Timespan} columns: \code{\link[=min]{min()}},
117 | \code{\link[=max]{max()}}, \code{\link[=median]{median()}} and \code{\link[=n_unique]{n_unique()}}.
118 | 
119 | \item \code{get_skimmers(ts)}: Summary functions for \code{ts} columns: \code{\link[=min]{min()}},
120 | \code{\link[=max]{max()}}, \code{\link[=median]{median()}} and \code{\link[=n_unique]{n_unique()}}.
121 | 
122 | \item \code{get_skimmers(list)}: Summary functions for \code{list} columns: \code{\link[=n_unique]{n_unique()}},
123 | \code{\link[=list_min_length]{list_min_length()}} and \code{\link[=list_max_length]{list_max_length()}}.
124 | 
125 | \item \code{get_skimmers(AsIs)}: Summary functions for \code{AsIs} columns: \code{\link[=n_unique]{n_unique()}},
126 | \code{\link[=list_min_length]{list_min_length()}} and \code{\link[=list_max_length]{list_max_length()}}.
127 | 
128 | \item \code{get_skimmers(haven_labelled)}: Summary functions for \code{haven_labelled} columns.
129 | Finds the appropriate skimmers for the underlying data in the vector.
130 | 
131 | }}
132 | \examples{
133 | # Defining default skimming functions for a new class, `my_class`.
134 | # Note that the class argument is required for dynamic reassignment.
135 | get_skimmers.my_class <- function(column) {
136 |   sfl(
137 |     skim_type = "my_class",
138 |     mean,
139 |     sd
140 |   )
141 | }
142 | 
143 | # Integer and double columns are both "numeric" and are treated the same
144 | # by default. To switch this behavior in another package, add a method.
145 | get_skimmers.integer <- function(column) {
146 |   sfl(
147 |     skim_type = "integer",
148 |     p50 = ~ stats::quantile(
149 |       .,
150 |       probs = .50, na.rm = TRUE, names = FALSE, type = 1
151 |     )
152 |   )
153 | }
154 | x <- mtcars[c("gear", "carb")]
155 | class(x$carb) <- "integer"
156 | skim(x)
157 | \dontrun{
158 | # In a package, to revert to the V1 behavior of skimming separately with the
159 | # same functions, assign the numeric `get_skimmers`.
160 | get_skimmers.integer <- skimr::get_skimmers.numeric
161 | 
162 | # Or, in a local session, use `skim_with` to create a different `skim`.
163 | new_skim <- skim_with(integer = skimr::get_skimmers.numeric())
164 | 
165 | # To apply a set of skimmers from an old type to a new type
166 | get_skimmers.new_type <- function(column) {
167 |   modify_default_skimmers("old_type", new_skim_type = "new_type")
168 | }
169 | }
170 | }
171 | \seealso{
172 | \code{\link[=sfl]{sfl()}}
173 | }
174 | 


--------------------------------------------------------------------------------
/man/knit_print.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/skim_print.R
 3 | \name{knit_print}
 4 | \alias{knit_print}
 5 | \alias{knit_print.skim_df}
 6 | \alias{knit_print.skim_list}
 7 | \alias{knit_print.one_skim_df}
 8 | \alias{knit_print.summary_skim_df}
 9 | \title{Provide a default printing method for knitr.}
10 | \usage{
11 | \method{knit_print}{skim_df}(x, options = NULL, ...)
12 | 
13 | \method{knit_print}{skim_list}(x, options = NULL, ...)
14 | 
15 | \method{knit_print}{one_skim_df}(x, options = NULL, ...)
16 | 
17 | \method{knit_print}{summary_skim_df}(x, options = NULL, ...)
18 | }
19 | \arguments{
20 | \item{x}{An R object to be printed}
21 | 
22 | \item{options}{Options passed into the print function.}
23 | 
24 | \item{...}{Additional arguments passed to the S3 method. Currently ignored,
25 | except two optional arguments \code{options} and \code{inline}; see
26 | the references below.}
27 | }
28 | \value{
29 | A \code{knit_asis} object. Which is used by \code{knitr} when rendered.
30 | }
31 | \description{
32 | Instead of standard R output, \code{knitr} and \code{RMarkdown} documents will have
33 | formatted \code{\link[knitr:kable]{knitr::kable()}} output on return. You can disable this by setting
34 | the chunk option \code{render = normal_print}.
35 | }
36 | \details{
37 | The summary statistics for the original data frame can be disabled by setting
38 | the \code{knitr} chunk option \code{skimr_include_summary = FALSE}. See
39 | \link[knitr:opts_chunk]{knitr::opts_chunk} for more information. You can change the number of digits
40 | shown in the printed table with the \code{skimr_digits} chunk option.
41 | 
42 | Alternatively, you can call \code{\link[=yank]{yank()}} to get the particular
43 | \code{skim_df} objects and format them however you like. One warning though.
44 | Because histograms contain unicode characters, they can have unexpected
45 | print results, as R as varying levels of unicode support. This affects
46 | Windows users most commonly. Call \code{vignette("Using_fonts")} for more details.
47 | }
48 | \section{Methods (by class)}{
49 | \itemize{
50 | \item \code{knit_print(skim_df)}: Default \code{knitr} print for \code{skim_df} objects.
51 | 
52 | \item \code{knit_print(skim_list)}: Default \code{knitr} print for a \code{skim_list}.
53 | 
54 | \item \code{knit_print(one_skim_df)}: Default \code{knitr} print within a partitioned \code{skim_df}.
55 | 
56 | \item \code{knit_print(summary_skim_df)}: Default \code{knitr} print for \code{skim_df} summaries.
57 | 
58 | }}
59 | \seealso{
60 | \code{\link[knitr:kable]{knitr::kable()}}
61 | }
62 | 


--------------------------------------------------------------------------------
/man/mutate.skim_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dplyr.R
 3 | \name{mutate.skim_df}
 4 | \alias{mutate.skim_df}
 5 | \title{Mutate a skim_df}
 6 | \usage{
 7 | \method{mutate}{skim_df}(.data, ...)
 8 | }
 9 | \arguments{
10 | \item{.data}{A \code{skim_df}, which behaves like a \code{tbl.}}
11 | 
12 | \item{...}{Name-value pairs of expressions, each with length 1 or the same
13 | length as the number of rows in the group, if using \code{\link[dplyr:group_by]{dplyr::group_by()}}, or
14 | in the entire input (if not using groups). The name of each argument will
15 | be the name of a new variable, and the value will be its corresponding
16 | value. Use \code{NULL} value in \code{\link[dplyr:mutate]{dplyr::mutate()}} to drop a variable. New
17 | variables overwrite existing variables of the same name.
18 | 
19 | The arguments in \code{...} are automatically quoted with \code{\link[rlang:defusing-advanced]{rlang::quo()}} and
20 | evaluated with \code{\link[rlang:eval_tidy]{rlang::eval_tidy()}} in the context of the data frame. They
21 | support unquoting \link[rlang:topic-inject]{rlang::quasiquotation} and splicing. See
22 | \code{vignette("programming", package = "dplyr")} for an introduction to these
23 | concepts.}
24 | }
25 | \value{
26 | A \code{skim_df} object, which also inherits the class(es) of the input
27 | data. In many ways, the object behaves like a \code{\link[tibble:tibble]{tibble::tibble()}}.
28 | }
29 | \description{
30 | \code{\link[dplyr:mutate]{dplyr::mutate()}} currently drops attributes, but we need to keep them around
31 | for other skim behaviors. Otherwise the behavior is exactly the same. For
32 | more information, see \url{https://github.com/tidyverse/dplyr/issues/3429}.
33 | }
34 | \seealso{
35 | \code{\link[dplyr:mutate]{dplyr::mutate()}} for the function's expected behavior.
36 | }
37 | 


--------------------------------------------------------------------------------
/man/partition.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reshape.R
 3 | \name{partition}
 4 | \alias{partition}
 5 | \alias{bind}
 6 | \alias{yank}
 7 | \title{Separate a big \code{skim_df} into smaller data frames, by type.}
 8 | \usage{
 9 | partition(data)
10 | 
11 | bind(data)
12 | 
13 | yank(data, skim_type)
14 | }
15 | \arguments{
16 | \item{data}{A \code{skim_df}.}
17 | 
18 | \item{skim_type}{A character scalar. The subtable to extract from a
19 | \code{skim_df}.}
20 | }
21 | \value{
22 | A \code{skim_list} of \code{skim_df}'s, by type.
23 | }
24 | \description{
25 | The data frames produced by \code{\link[=skim]{skim()}} are wide and sparse, filled with
26 | columns that are mostly \code{NA}. For that reason, it can be convenient to
27 | work with "by type" subsets of the original data frame. These smaller
28 | subsets have their \code{NA} columns removed.
29 | }
30 | \details{
31 | \code{partition()} creates a list of smaller \code{skim_df} data frames. Each entry
32 | in the list is a data type from the original \code{skim_df}. The inverse of
33 | \code{partition()} is \code{bind()}, which takes the list and produces the original
34 | \code{skim_df}. While \code{partition()} keeps all of the subtables as list entries,
35 | \code{yank()} gives you a single subtable for a data type.
36 | }
37 | \section{Functions}{
38 | \itemize{
39 | \item \code{bind()}: The inverse of a \code{partition()}. Rebuild the original
40 | \code{skim_df}.
41 | 
42 | \item \code{yank()}: Extract a subtable from a \code{skim_df} with a particular
43 | type.
44 | 
45 | }}
46 | \examples{
47 | # Create a wide skimmed data frame (a skim_df)
48 | skimmed <- skim(iris)
49 | 
50 | # Separate into a list of subtables by type
51 | separate <- partition(skimmed)
52 | 
53 | # Put back together
54 | identical(bind(separate), skimmed)
55 | # > TRUE
56 | 
57 | # Alternatively, get the subtable of a particular type
58 | yank(skimmed, "factor")
59 | }
60 | 


--------------------------------------------------------------------------------
/man/print.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/skim_print.R
 3 | \name{print}
 4 | \alias{print}
 5 | \alias{print.skim_df}
 6 | \alias{print.skim_list}
 7 | \alias{print.summary_skim_df}
 8 | \title{Print \code{skim} objects}
 9 | \usage{
10 | \method{print}{skim_df}(
11 |   x,
12 |   include_summary = TRUE,
13 |   n = Inf,
14 |   width = Inf,
15 |   summary_rule_width = getOption("skimr_summary_rule_width", default = 40),
16 |   ...
17 | )
18 | 
19 | \method{print}{skim_list}(x, n = Inf, width = Inf, ...)
20 | 
21 | \method{print}{summary_skim_df}(x, .summary_rule_width = 40, ...)
22 | }
23 | \arguments{
24 | \item{x}{Object to format or print.}
25 | 
26 | \item{include_summary}{Whether a summary of the data frame should be printed}
27 | 
28 | \item{n}{Number of rows to show. If \code{NULL}, the default, will print all rows
29 | if less than the \code{print_max} \link[pillar:pillar_options]{option}.
30 | Otherwise, will print as many rows as specified by the
31 | \code{print_min} \link[pillar:pillar_options]{option}.}
32 | 
33 | \item{width}{Width of text output to generate. This defaults to \code{NULL}, which
34 | means use the \code{width} \link[pillar:pillar_options]{option}.}
35 | 
36 | \item{summary_rule_width}{Width of Data Summary cli rule, defaults to 40.}
37 | 
38 | \item{...}{Passed on to \code{\link[=tbl_format_setup]{tbl_format_setup()}}.}
39 | 
40 | \item{.summary_rule_width}{the width for the main rule above the summary.}
41 | }
42 | \description{
43 | \code{skimr} has custom print methods for all supported objects. Default printing
44 | methods for \code{knitr}/ \code{rmarkdown} documents is also provided.
45 | }
46 | \section{Methods (by class)}{
47 | \itemize{
48 | \item \code{print(skim_df)}: Print a skimmed data frame (\code{skim_df} from \code{\link[=skim]{skim()}}).
49 | 
50 | \item \code{print(skim_list)}: Print a \code{skim_list}, a list of \code{skim_df} objects.
51 | 
52 | \item \code{print(summary_skim_df)}: Print method for a \code{summary_skim_df} object.
53 | 
54 | }}
55 | \section{Printing options}{
56 | 
57 | 
58 | For better or for worse, \code{skimr} often produces more output than can fit in
59 | the standard R console. Fortunately, most modern environments like RStudio
60 | and Jupyter support more than 80 character outputs. Call
61 | \code{options(width = 90)} to get a better experience with \code{skimr}.
62 | 
63 | The print methods in \code{skimr} wrap those in the \link[tibble:formatting]{tibble}
64 | package. You can control printing behavior using the same global options.
65 | }
66 | 
67 | \section{Behavior in \code{dplyr} pipelines}{
68 | 
69 | 
70 | Printing a \code{skim_df} requires specific columns that might be dropped when
71 | using \code{\link[dplyr:select]{dplyr::select()}} or \code{\link[dplyr:summarise]{dplyr::summarize()}} on a \code{skim_df}. In those
72 | cases, this method falls back to \code{\link[tibble:formatting]{tibble::print.tbl()}}.
73 | }
74 | 
75 | \section{Options for controlling print behavior}{
76 | 
77 | 
78 | You can control the width rule line for the printed subtables with an option:
79 | \code{skimr_table_header_width}.
80 | }
81 | 
82 | \seealso{
83 | \code{\link[tibble:trunc_mat]{tibble::trunc_mat()}} For a list of global options for customizing
84 | print formatting. \code{\link[cli:num_ansi_colors]{cli::num_ansi_colors()}} for the variety of issues that
85 | affect tibble's color support.
86 | }
87 | 


--------------------------------------------------------------------------------
/man/reexports.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/skimr-package.R
 3 | \docType{import}
 4 | \name{reexports}
 5 | \alias{reexports}
 6 | \alias{\%>\%}
 7 | \alias{contains}
 8 | \alias{ends_with}
 9 | \alias{everything}
10 | \alias{matches}
11 | \alias{num_range}
12 | \alias{one_of}
13 | \alias{starts_with}
14 | \title{Objects exported from other packages}
15 | \keyword{internal}
16 | \description{
17 | These objects are imported from other packages. Follow the links
18 | below to see their documentation.
19 | 
20 | \describe{
21 |   \item{magrittr}{\code{\link[magrittr:pipe]{\%>\%}}}
22 | 
23 |   \item{tidyselect}{\code{\link[tidyselect:starts_with]{contains}}, \code{\link[tidyselect:starts_with]{ends_with}}, \code{\link[tidyselect]{everything}}, \code{\link[tidyselect:starts_with]{matches}}, \code{\link[tidyselect:starts_with]{num_range}}, \code{\link[tidyselect]{one_of}}, \code{\link[tidyselect]{starts_with}}}
24 | }}
25 | 
26 | 


--------------------------------------------------------------------------------
/man/repr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/skim_print.R
 3 | \name{repr}
 4 | \alias{repr}
 5 | \alias{repr_text.skim_df}
 6 | \alias{repr_text.skim_list}
 7 | \alias{repr_text.one_skim_df}
 8 | \title{Skimr printing within Jupyter notebooks}
 9 | \usage{
10 | \method{repr_text}{skim_df}(obj, ...)
11 | 
12 | \method{repr_text}{skim_list}(obj, ...)
13 | 
14 | \method{repr_text}{one_skim_df}(obj, ...)
15 | }
16 | \arguments{
17 | \item{obj}{The object to \link{print} and then return the output.}
18 | 
19 | \item{...}{ignored.}
20 | }
21 | \value{
22 | None. \code{invisible(NULL)}.
23 | }
24 | \description{
25 | This reproduces printed results in the console. By default Jupyter kernels
26 | render the final object in the cell. We want the version printed by
27 | \code{skimr} instead of the data that it contains.
28 | }
29 | 


--------------------------------------------------------------------------------
/man/sfl.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/sfl.R
 3 | \name{sfl}
 4 | \alias{sfl}
 5 | \title{Create a skimr function list}
 6 | \usage{
 7 | sfl(..., skim_type = "")
 8 | }
 9 | \arguments{
10 | \item{...}{Inherited from dplyr::data_masking() for dplyr version 1 or later
11 | or dplyr::funs() for older versions of dplyr.
12 | A list of functions
13 | specified by:
14 | \itemize{
15 | \item Their name, \code{"mean"}
16 | \item The function itself, \code{mean}
17 | \item A call to the function with \code{.} as a dummy argument,
18 | \code{mean(., na.rm = TRUE)}
19 | \item An anonymous function in \pkg{purrr} notation, \code{~mean(., na.rm = TRUE)}
20 | }}
21 | 
22 | \item{skim_type}{A character scalar. This is used to match locally-provided
23 | skimmers with defaults. See \code{\link[=get_skimmers]{get_skimmers()}} for more detail.}
24 | }
25 | \value{
26 | A \code{skimr_function_list}, which contains a list of \code{fun_calls},
27 | returned by \code{dplyr::funs()} and a list of skimming functions to drop.
28 | }
29 | \description{
30 | This constructor is used to create a named list of functions. It also you
31 | also pass \code{NULL} to identify a skimming function that you wish to remove.
32 | Only functions that return a single value, working with \code{\link[dplyr:summarise]{dplyr::summarize()}},
33 | can be used within \code{sfl}.
34 | }
35 | \details{
36 | \code{sfl()} will automatically generate callables and names for a variety of
37 | inputs, including functions, formulas and strings. Nonetheless, we recommend
38 | providing names when reasonable to get better \code{\link[=skim]{skim()}} output.
39 | }
40 | \examples{
41 | # sfl's can take a variety of input formats and will generate names
42 | # if not provided.
43 | sfl(mad, "var", ~ length(.)^2)
44 | 
45 | # But these can generate unpredictable names in your output.
46 | # Better to set your own names.
47 | sfl(mad = mad, variance = "var", length_sq = ~ length(.)^2)
48 | 
49 | # sfl's can remove individual skimmers from defaults by passing NULL.
50 | sfl(hist = NULL)
51 | 
52 | # When working interactively, you don't need to set a type.
53 | # But you should when defining new defaults with `get_skimmers()`.
54 | get_skimmers.my_new_class <- function(column) {
55 |   sfl(n_missing, skim_type = "my_new_class")
56 | }
57 | }
58 | \seealso{
59 | \code{\link[dplyr:funs]{dplyr::funs()}}, \code{\link[=skim_with]{skim_with()}} and \code{\link[=get_skimmers]{get_skimmers()}}.
60 | }
61 | 


--------------------------------------------------------------------------------
/man/skim-attr.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/skim_obj.R
 3 | \name{skim-attr}
 4 | \alias{skim-attr}
 5 | \alias{data_rows}
 6 | \alias{data_cols}
 7 | \alias{df_name}
 8 | \alias{dt_key}
 9 | \alias{group_names}
10 | \alias{base_skimmers}
11 | \alias{skimmers_used}
12 | \title{Functions for accessing skim_df attributes}
13 | \usage{
14 | data_rows(object)
15 | 
16 | data_cols(object)
17 | 
18 | df_name(object)
19 | 
20 | dt_key(object)
21 | 
22 | group_names(object)
23 | 
24 | base_skimmers(object)
25 | 
26 | skimmers_used(object)
27 | }
28 | \arguments{
29 | \item{object}{A \code{skim_df} or \code{skim_list}.}
30 | }
31 | \value{
32 | Data contained within the requested \code{skimr} attribute.
33 | }
34 | \description{
35 | These functions simplify access to attributes contained within a \code{skim_df}.
36 | While all attributes are read-only, being able to extract this information
37 | is useful for different analyses. These functions should always be preferred
38 | over calling base R's attribute functions.
39 | }
40 | \section{Functions}{
41 | \itemize{
42 | \item \code{data_rows()}: Get the number of rows in the skimmed data frame.
43 | 
44 | \item \code{data_cols()}: Get the number of columns in the skimmed data frame.
45 | 
46 | \item \code{df_name()}: Get the name of the skimmed data frame. This is only
47 | available in contexts where the name can be looked up. This is often not
48 | the case within a pipeline.
49 | 
50 | \item \code{dt_key()}: Get the key of the skimmed data.table. This is only
51 | available in contexts where \code{data} is of class \code{data.table}.
52 | 
53 | \item \code{group_names()}: Get the names of the groups in the original data frame.
54 | Only available if the data was grouped. Otherwise, \code{NULL}.
55 | 
56 | \item \code{base_skimmers()}: Get the names of the base skimming functions used.
57 | 
58 | \item \code{skimmers_used()}: Get the names of the skimming functions used, separated
59 | by data type.
60 | 
61 | }}
62 | 


--------------------------------------------------------------------------------
/man/skim-obj.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/skim_obj.R
  3 | \name{skim-obj}
  4 | \alias{skim-obj}
  5 | \alias{has_type_column}
  6 | \alias{has_variable_column}
  7 | \alias{has_skimr_attributes}
  8 | \alias{has_skim_type_attribute}
  9 | \alias{has_skimmers}
 10 | \alias{is_data_frame}
 11 | \alias{is_skim_df}
 12 | \alias{is_one_skim_df}
 13 | \alias{is_skim_list}
 14 | \alias{could_be_skim_df}
 15 | \alias{assert_is_skim_df}
 16 | \alias{assert_is_skim_list}
 17 | \alias{assert_is_one_skim_df}
 18 | \title{Test if an object is compatible with \code{skimr}}
 19 | \usage{
 20 | has_type_column(object)
 21 | 
 22 | has_variable_column(object)
 23 | 
 24 | has_skimr_attributes(object)
 25 | 
 26 | has_skim_type_attribute(object)
 27 | 
 28 | has_skimmers(object)
 29 | 
 30 | is_data_frame(object)
 31 | 
 32 | is_skim_df(object)
 33 | 
 34 | is_one_skim_df(object)
 35 | 
 36 | is_skim_list(object)
 37 | 
 38 | could_be_skim_df(object)
 39 | 
 40 | assert_is_skim_df(object)
 41 | 
 42 | assert_is_skim_list(object)
 43 | 
 44 | assert_is_one_skim_df(object)
 45 | }
 46 | \arguments{
 47 | \item{object}{Any \code{R} object.}
 48 | }
 49 | \description{
 50 | Objects within \code{skimr} are identified by a class, but they require additional
 51 | attributes and data columns for all operations to succeed. These checks help
 52 | ensure this. While they have some application externally, they are mostly
 53 | used internally.
 54 | }
 55 | \details{
 56 | Most notably, a \code{skim_df} has columns \code{skim_type} and \code{skim_variable}. And
 57 | has the following special attributes
 58 | \itemize{
 59 | \item \code{data_rows}: n rows in the original data
 60 | \item \code{data_cols}: original number of columns
 61 | \item \code{df_name}: name of the original data frame
 62 | \item \code{dt_key}: name of the key if original is a data.table
 63 | \item \code{groups}: if there were group variables
 64 | \item \code{base_skimmers}: names of functions applied to all skim types
 65 | \item \code{skimmers_used}: names of functions used to skim each type
 66 | }
 67 | 
 68 | The functions in these checks work like \code{\link[=all.equal]{all.equal()}}. The return \code{TRUE} if
 69 | the check passes, or otherwise notifies why the check failed. This makes them
 70 | more useful when throwing errors.
 71 | }
 72 | \section{Functions}{
 73 | \itemize{
 74 | \item \code{has_type_column()}: Does the object have the \code{skim_type} column?
 75 | 
 76 | \item \code{has_variable_column()}: Does the object have the \code{skim_variable} column?
 77 | 
 78 | \item \code{has_skimr_attributes()}: Does the object have the appropriate \code{skimr} attributes?
 79 | 
 80 | \item \code{has_skim_type_attribute()}: Does the object have a \code{skim_type} attribute? This makes
 81 | it a \code{one_skim_df}.
 82 | 
 83 | \item \code{has_skimmers()}: Does the object have skimmers?
 84 | 
 85 | \item \code{is_data_frame()}: Is the object a data frame?
 86 | 
 87 | \item \code{is_skim_df()}: Is the object a \code{skim_df}?
 88 | 
 89 | \item \code{is_one_skim_df()}: Is the object a \code{one_skim_df}? This is similar to a
 90 | \code{skim_df}, but does not have the \code{type} column. That is stored as an
 91 | attribute instead.
 92 | 
 93 | \item \code{is_skim_list()}: Is the object a \code{skim_list}?
 94 | 
 95 | \item \code{could_be_skim_df()}: Is this a data frame with \code{skim_variable} and
 96 | \code{skim_type} columns?
 97 | 
 98 | \item \code{assert_is_skim_df()}: Stop if the object is not a \code{skim_df}.
 99 | 
100 | \item \code{assert_is_skim_list()}: Stop if the object is not a \code{skim_list}.
101 | 
102 | \item \code{assert_is_one_skim_df()}: Stop if the object is not a \code{one_skim_df}.
103 | 
104 | }}
105 | 


--------------------------------------------------------------------------------
/man/skim.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/skim.R
  3 | \name{skim}
  4 | \alias{skim}
  5 | \alias{skim_tee}
  6 | \alias{skim_without_charts}
  7 | \title{Skim a data frame, getting useful summary statistics}
  8 | \usage{
  9 | skim(data, ..., .data_name = NULL)
 10 | 
 11 | skim_tee(data, ..., skim_fun = skim)
 12 | 
 13 | skim_without_charts(data, ..., .data_name = NULL)
 14 | }
 15 | \arguments{
 16 | \item{data}{A tibble, or an object that can be coerced into a tibble.}
 17 | 
 18 | \item{...}{Columns to select for skimming. When none are provided, the
 19 | default is to skim all columns.}
 20 | 
 21 | \item{.data_name}{The name to use for the data. Defaults to the same as data.}
 22 | 
 23 | \item{skim_fun}{The skim function used.}
 24 | 
 25 | \item{skim}{The skimming function to use in \code{skim_tee()}.}
 26 | }
 27 | \value{
 28 | A \code{skim_df} object, which also inherits the class(es) of the input
 29 | data. In many ways, the object behaves like a \code{\link[tibble:tibble]{tibble::tibble()}}.
 30 | }
 31 | \description{
 32 | \code{skim()} is an alternative to \code{\link[=summary]{summary()}}, quickly providing a broad
 33 | overview of a data frame. It handles data of all types, dispatching a
 34 | different set of summary functions based on the types of columns in the data
 35 | frame.
 36 | }
 37 | \details{
 38 | Each call produces a \code{skim_df}, which is a fundamentally a tibble with a
 39 | special print method. One unusual feature of this data frame is pseudo-
 40 | namespace for columns. \code{skim()} computes statistics by data type, and it
 41 | stores them in the data frame as \verb{<type>.<statistic>}. These types are
 42 | stripped when printing the results. The "base" skimmers (\code{n_missing} and
 43 | \code{complete_rate}) are the only columns that don't follow this behavior.
 44 | See \code{\link[=skim_with]{skim_with()}} for more details on customizing \code{skim()} and
 45 | \code{\link[=get_default_skimmers]{get_default_skimmers()}} for a list of default functions.
 46 | 
 47 | If you just want to see the printed output, call \code{skim_tee()} instead.
 48 | This function returns the original data. \code{skim_tee()} uses the default
 49 | \code{skim()}, but you can replace it with the \code{skim} argument.
 50 | 
 51 | The data frame produced by \code{skim} is wide and sparse. To avoid type coercion
 52 | \code{skimr} uses a type namespace for all summary statistics. Columns for numeric
 53 | summary statistics all begin \code{numeric}; for factor summary statistics
 54 | begin \code{factor}; and so on.
 55 | 
 56 | See \code{\link[=partition]{partition()}} and \code{\link[=yank]{yank()}} for methods for transforming this wide data
 57 | frame. The first function splits it into a list, with each entry
 58 | corresponding to a data type. The latter pulls a single subtable for a
 59 | particular type from the \code{skim_df}.
 60 | 
 61 | \code{skim()} is designed to operate in pipes and to generally play nicely with
 62 | other \code{tidyverse} functions. This means that you can use \code{tidyselect} helpers
 63 | within \code{skim} to select or drop specific columns for summary. You can also
 64 | further work with a \code{skim_df} using \code{dplyr} functions in a pipeline.
 65 | }
 66 | \section{Customizing skim}{
 67 | 
 68 | \code{skim()} is an intentionally simple function, with minimal arguments like
 69 | \code{\link[=summary]{summary()}}. Nonetheless, this package provides two broad approaches to
 70 | how you can customize \code{skim()}'s behavior. You can customize the functions
 71 | that are called to produce summary statistics with \code{\link[=skim_with]{skim_with()}}.
 72 | }
 73 | 
 74 | \section{Unicode rendering}{
 75 | 
 76 | If the rendered examples show unencoded values such as \verb{<U+2587>} you will
 77 | need to change your locale to allow proper rendering. Please review the
 78 | \emph{Using Skimr} vignette for more information
 79 | (\code{vignette("Using_skimr", package = "skimr")}).
 80 | 
 81 | Otherwise, we export \code{skim_without_charts()} to produce summaries without the
 82 | spark graphs. These are the source of the unicode dependency.
 83 | }
 84 | 
 85 | \examples{
 86 | skim(iris)
 87 | 
 88 | # Use tidyselect
 89 | skim(iris, Species)
 90 | skim(iris, starts_with("Sepal"))
 91 | skim(iris, where(is.numeric))
 92 | 
 93 | # Skim also works groupwise
 94 | iris \%>\%
 95 |   dplyr::group_by(Species) \%>\%
 96 |   skim()
 97 | 
 98 | # Which five numeric columns have the greatest mean value?
 99 | # Look in the `numeric.mean` column.
100 | iris \%>\%
101 |   skim() \%>\%
102 |   dplyr::select(numeric.mean) \%>\%
103 |   dplyr::slice_head(n = 5)
104 | 
105 | # Which of my columns have missing values? Use the base skimmer n_missing.
106 | iris \%>\%
107 |   skim() \%>\%
108 |   dplyr::filter(n_missing > 0)
109 | 
110 | # Use skim_tee to view the skim results and
111 | # continue using the original data.
112 | chickwts \%>\%
113 |   skim_tee() \%>\%
114 |   dplyr::filter(feed == "sunflower")
115 | 
116 | # Produce a summary without spark graphs
117 | iris \%>\%
118 |   skim_without_charts()
119 | }
120 | 


--------------------------------------------------------------------------------
/man/skim_with.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/skim_with.R
 3 | \name{skim_with}
 4 | \alias{skim_with}
 5 | \title{Set or add the summary functions for a particular type of data}
 6 | \usage{
 7 | skim_with(
 8 |   ...,
 9 |   base = sfl(n_missing = n_missing, complete_rate = complete_rate),
10 |   append = TRUE
11 | )
12 | }
13 | \arguments{
14 | \item{...}{One or more (\code{sfl}) \code{skimmer_function_list} objects, with an
15 | argument name that matches a particular data type.}
16 | 
17 | \item{base}{An \code{sfl} that sets skimmers for all column types.}
18 | 
19 | \item{append}{Whether the provided options should be in addition to the
20 | defaults already in \code{skim}. Default is \code{TRUE}.}
21 | }
22 | \value{
23 | A new \code{skim()} function. This is callable. See \code{\link[=skim]{skim()}} for more
24 | details.
25 | }
26 | \description{
27 | While skim is designed around having an opinionated set of defaults, you
28 | can use this function to change the summary statistics that it returns.
29 | }
30 | \details{
31 | \code{skim_with()} is a closure: a function that returns a new function. This
32 | lets you have several skimming functions in a single R session, but it
33 | also means that you need to assign the return of \code{skim_with()} before
34 | you can use it.
35 | 
36 | You assign values within \code{skim_with} by using the \code{\link[=sfl]{sfl()}} helper (\code{skimr}
37 | function list). This helper behaves mostly like \code{\link[dplyr:funs]{dplyr::funs()}}, but lets
38 | you also identify which skimming functions you want to remove, by setting
39 | them to \code{NULL}. Assign an \code{sfl} to each column type that you wish to modify.
40 | 
41 | Functions that summarize all data types, and always return the same type
42 | of value, can be assigned to the \code{base} argument. The default base skimmers
43 | compute the number of missing values \code{\link[=n_missing]{n_missing()}} and the rate of values
44 | being complete, i.e. not missing, \code{\link[=complete_rate]{complete_rate()}}.
45 | 
46 | When \code{append = TRUE} and local skimmers have names matching the names of
47 | entries in the default \code{skim_function_list}, the values in the default list
48 | are overwritten. Similarly, if \code{NULL} values are passed within \code{sfl()}, these
49 | default skimmers are dropped. Otherwise, if \code{append = FALSE}, only the
50 | locally-provided skimming functions are used.
51 | 
52 | Note that \code{append} only applies to the \code{typed} skimmers (i.e. non-base).
53 | See \code{\link[=get_default_skimmer_names]{get_default_skimmer_names()}} for a list of defaults.
54 | }
55 | \examples{
56 | # Use new functions for numeric functions. If you don't provide a name,
57 | # one will be automatically generated.
58 | my_skim <- skim_with(numeric = sfl(median, mad), append = FALSE)
59 | my_skim(faithful)
60 | 
61 | # If you want to remove a particular skimmer, set it to NULL
62 | # This removes the inline histogram
63 | my_skim <- skim_with(numeric = sfl(hist = NULL))
64 | my_skim(faithful)
65 | 
66 | # This works with multiple skimmers. Just match names to overwrite
67 | my_skim <- skim_with(numeric = sfl(iqr = IQR, p25 = NULL, p75 = NULL))
68 | my_skim(faithful)
69 | 
70 | # Alternatively, set `append = FALSE` to replace the skimmers of a type.
71 | my_skim <- skim_with(numeric = sfl(mean = mean, sd = sd), append = FALSE)
72 | 
73 | # Skimmers are unary functions. Partially apply arguments during assigment.
74 | # For example, you might want to remove NA values.
75 | my_skim <- skim_with(numeric = sfl(iqr = ~ IQR(., na.rm = TRUE)))
76 | 
77 | # Set multiple types of skimmers simultaneously.
78 | my_skim <- skim_with(numeric = sfl(mean), character = sfl(length))
79 | 
80 | # Or pass the same as a list, unquoting the input.
81 | my_skimmers <- list(numeric = sfl(mean), character = sfl(length))
82 | my_skim <- skim_with(!!!my_skimmers)
83 | 
84 | # Use the v1 base skimmers instead.
85 | my_skim <- skim_with(base = sfl(
86 |   missing = n_missing,
87 |   complete = n_complete,
88 |   n = length
89 | ))
90 | 
91 | # Remove the base skimmers entirely
92 | my_skim <- skim_with(base = NULL)
93 | }
94 | 


--------------------------------------------------------------------------------
/man/skimr-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/skimr-package.R
 3 | \docType{package}
 4 | \name{skimr-package}
 5 | \alias{skimr}
 6 | \alias{skimr-package}
 7 | \title{Skim a data frame}
 8 | \description{
 9 | This package provides an alternative to the default summary functions
10 | within R. The package's API is tidy, functions take data frames, return
11 | data frames and can work as part of a pipeline. The returned \code{skimr}
12 | object is subsettable and offers a human readable output.
13 | }
14 | \details{
15 | \code{skimr} is opinionated, providing a strong set of summary statistics
16 | that are generated for a variety of different data types. It is also
17 | provides an API for customization. Users can change both the functions
18 | dispatched and the way the results are formatted.
19 | }
20 | \seealso{
21 | Useful links:
22 | \itemize{
23 |   \item \url{https://docs.ropensci.org/skimr/}
24 |   \item \url{https://github.com/ropensci/skimr/}
25 |   \item Report bugs at \url{https://github.com/ropensci/skimr/issues}
26 | }
27 | 
28 | }
29 | \author{
30 | \strong{Maintainer}: Elin Waring \email{elin.waring@gmail.com}
31 | 
32 | Authors:
33 | \itemize{
34 |   \item Michael Quinn \email{msquinn@google.com}
35 |   \item Amelia McNamara \email{amcnamara@smith.edu}
36 |   \item Eduardo Arino de la Rubia \email{earino@gmail.com}
37 |   \item Hao Zhu \email{haozhu233@gmail.com}
38 |   \item Shannon Ellis \email{sellis18@jhmi.edu}
39 | }
40 | 
41 | Other contributors:
42 | \itemize{
43 |   \item Julia Lowndes \email{lowndes@nceas.ucsb.edu} [contributor]
44 |   \item Hope McLeod \email{hmgit2@gmail.com} [contributor]
45 |   \item Hadley Wickham \email{hadley@rstudio.com} [contributor]
46 |   \item Kirill Müller \email{krlmlr+r@mailbox.org} [contributor]
47 |   \item  RStudio, Inc. (Spark functions) [copyright holder]
48 |   \item Connor Kirkpatrick \email{hello@connorkirkpatrick.com} [contributor]
49 |   \item Scott Brenstuhl \email{brenstsr@miamioh.edu} [contributor]
50 |   \item Patrick Schratz \email{patrick.schratz@gmail.com} [contributor]
51 |   \item lbusett \email{lbusett@gmail.com} [contributor]
52 |   \item Mikko Korpela \email{mvkorpel@iki.fi} [contributor]
53 |   \item Jennifer Thompson \email{thompson.jennifer@gmail.com} [contributor]
54 |   \item Harris McGehee \email{mcgehee.harris@gmail.com} [contributor]
55 |   \item Mark Roepke \email{mroepke5@gmail.com} [contributor]
56 |   \item Patrick Kennedy \email{pkqstr@protonmail.com} [contributor]
57 |   \item Daniel Possenriede \email{possenriede@gmail.com} [contributor]
58 |   \item David Zimmermann \email{david_j_zimmermann@hotmail.com} [contributor]
59 |   \item Kyle Butts \email{buttskyle96@gmail.com} [contributor]
60 |   \item Bastian Torges \email{bastian.torges@gmail.com} [contributor]
61 |   \item Rick Saporta \email{Rick@TheFarmersDog.com} [contributor]
62 |   \item Henry Morgan Stewart \email{henry.morganstewart@gmail.com} [contributor]
63 |   \item Olivier Roy [contributor]
64 | }
65 | 
66 | }
67 | 


--------------------------------------------------------------------------------
/man/skimr-vctrs.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/vctrs.R
 3 | \name{skimr-vctrs}
 4 | \alias{skimr-vctrs}
 5 | \alias{vec_ptype2.skim_df.skim_df}
 6 | \alias{vec_ptype2.skim_df.tbl_df}
 7 | \alias{vec_ptype2.tbl_df.skim_df}
 8 | \alias{vec_cast.skim_df.skim_df}
 9 | \alias{vec_cast.skim_df.tbl_df}
10 | \alias{vec_cast.tbl_df.skim_df}
11 | \title{Functions for working with the vctrs package}
12 | \usage{
13 | \method{vec_ptype2}{skim_df.skim_df}(x, y, ...)
14 | 
15 | \method{vec_ptype2}{skim_df.tbl_df}(x, y, ...)
16 | 
17 | \method{vec_ptype2}{tbl_df.skim_df}(x, y, ...)
18 | 
19 | \method{vec_cast}{skim_df.skim_df}(x, to, ...)
20 | 
21 | \method{vec_cast}{skim_df.tbl_df}(x, to, ...)
22 | 
23 | \method{vec_cast}{tbl_df.skim_df}(x, to, ...)
24 | }
25 | \description{
26 | These make it clear that we need to use the tibble behavior when joining,
27 | concatenating or casting \code{skim_df} objects. For a better discussion, on
28 | why this is important and how these functions work, see:
29 | \url{https://vctrs.r-lib.org/reference/howto-faq-coercion-data-frame.html}.
30 | }
31 | \details{
32 | \verb{vec_ptype2.*} handles finding common prototypes between \code{skim_df} and
33 | similar objects. \verb{vec_cast.*} handles casting between objects. Note that
34 | as of \verb{dplyr 1.0.2}, \code{\link[dplyr:bind_rows]{dplyr::bind_rows()}} does not full support combining
35 | attributes and \code{\link[vctrs:vec_bind]{vctrs::vec_rbind()}} is preferred instead.
36 | }
37 | \keyword{internal}
38 | 


--------------------------------------------------------------------------------
/man/stats.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/stats.R
  3 | \name{stats}
  4 | \alias{stats}
  5 | \alias{n_missing}
  6 | \alias{n_complete}
  7 | \alias{complete_rate}
  8 | \alias{n_whitespace}
  9 | \alias{sorted_count}
 10 | \alias{top_counts}
 11 | \alias{inline_hist}
 12 | \alias{n_empty}
 13 | \alias{min_char}
 14 | \alias{max_char}
 15 | \alias{n_unique}
 16 | \alias{ts_start}
 17 | \alias{ts_end}
 18 | \alias{inline_linegraph}
 19 | \alias{list_lengths_min}
 20 | \alias{list_lengths_median}
 21 | \alias{list_lengths_max}
 22 | \alias{list_min_length}
 23 | \alias{list_max_length}
 24 | \title{Summary statistic functions}
 25 | \usage{
 26 | n_missing(x)
 27 | 
 28 | n_complete(x)
 29 | 
 30 | complete_rate(x)
 31 | 
 32 | n_whitespace(x)
 33 | 
 34 | sorted_count(x)
 35 | 
 36 | top_counts(x, max_char = 3, max_levels = 4)
 37 | 
 38 | inline_hist(x, n_bins = 8)
 39 | 
 40 | n_empty(x)
 41 | 
 42 | min_char(x)
 43 | 
 44 | max_char(x)
 45 | 
 46 | n_unique(x)
 47 | 
 48 | ts_start(x)
 49 | 
 50 | ts_end(x)
 51 | 
 52 | inline_linegraph(x, length.out = 16)
 53 | 
 54 | list_lengths_min(x)
 55 | 
 56 | list_lengths_median(x)
 57 | 
 58 | list_lengths_max(x)
 59 | 
 60 | list_min_length(x)
 61 | 
 62 | list_max_length(x)
 63 | }
 64 | \arguments{
 65 | \item{x}{A vector}
 66 | 
 67 | \item{max_char}{In \code{top} = 3, max_levels = 4}
 68 | 
 69 | \item{max_levels}{The maximum number of levels to be displayed.}
 70 | 
 71 | \item{n_bins}{In \code{inline_hist}, the number of histogram bars.}
 72 | 
 73 | \item{length.out}{In \code{inline_linegraph}, the length of the character time
 74 | series.}
 75 | }
 76 | \description{
 77 | \code{skimr} provides extensions to a variety of functions with R's stats package
 78 | to simplify creating summaries of data. All functions are vectorized over the
 79 | first argument. Additional arguments should be set in the \code{\link[=sfl]{sfl()}} that sets
 80 | the appropriate skimmers for a data type. You can use these, along with other
 81 | vectorized R functions, for creating custom sets of summary functions for
 82 | a given data type.
 83 | }
 84 | \section{Functions}{
 85 | \itemize{
 86 | \item \code{n_missing()}: Calculate the sum of \code{NA} and \code{NULL} (i.e. missing) values.
 87 | 
 88 | \item \code{n_complete()}: Calculate the sum of not \code{NA} and \code{NULL} (i.e. missing)
 89 | values.
 90 | 
 91 | \item \code{complete_rate()}: Calculate complete values; complete values are not missing.
 92 | 
 93 | \item \code{n_whitespace()}: Calculate the number of rows containing only whitespace
 94 | values using s+ regex.
 95 | 
 96 | \item \code{sorted_count()}: Create a contingency table and arrange its levels in
 97 | descending order. In case of ties, the ordering of results is alphabetical
 98 | and depends upon the locale. \code{NA} is treated as a ordinary value for
 99 | sorting.
100 | 
101 | \item \code{top_counts()}: Compute and collapse a contingency table into a single
102 | character scalar. Wraps \code{\link[=sorted_count]{sorted_count()}}.
103 | 
104 | \item \code{inline_hist()}: Generate inline histogram for numeric variables. The
105 | character length of the histogram is controlled by the formatting options
106 | for character vectors.
107 | 
108 | \item \code{n_empty()}: Calculate the number of blank values in a character vector.
109 | A "blank" is equal to "".
110 | 
111 | \item \code{min_char()}: Calculate the minimum number of characters within a
112 | character vector.
113 | 
114 | \item \code{max_char()}: Calculate the maximum number of characters within a
115 | character vector.
116 | 
117 | \item \code{n_unique()}: Calculate the number of unique elements but remove \code{NA}.
118 | 
119 | \item \code{ts_start()}: Get the start for a time series without the frequency.
120 | 
121 | \item \code{ts_end()}: Get the finish for a time series without the frequency.
122 | 
123 | \item \code{inline_linegraph()}: Generate inline line graph for time series variables. The
124 | character length of the line graph is controlled by the formatting options
125 | for character vectors.
126 | Based on the function in the pillar package.
127 | 
128 | \item \code{list_lengths_min()}: Get the length of the shortest list in a vector of lists.
129 | 
130 | \item \code{list_lengths_median()}: Get the median length of the lists.
131 | 
132 | \item \code{list_lengths_max()}: Get the maximum length of the lists.
133 | 
134 | \item \code{list_min_length()}: Get the length of the shortest list in a vector of lists.
135 | 
136 | \item \code{list_max_length()}: Get the length of the longest list in a vector of lists.
137 | 
138 | }}
139 | \seealso{
140 | \code{\link[=get_skimmers]{get_skimmers()}} for customizing the functions called by \code{\link[=skim]{skim()}}.
141 | }
142 | 


--------------------------------------------------------------------------------
/man/summary.skim_df.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/summary.R
 3 | \name{summary.skim_df}
 4 | \alias{summary.skim_df}
 5 | \title{Summary function for skim_df}
 6 | \usage{
 7 | \method{summary}{skim_df}(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{a skim dataframe.}
11 | 
12 | \item{...}{Additional arguments affecting the summary produced. Not used.}
13 | }
14 | \value{
15 | A summary of the skim data frame.
16 | }
17 | \description{
18 | This is a method of the generic function \code{\link[=summary]{summary()}}.
19 | }
20 | \examples{
21 | a <- skim(mtcars)
22 | summary(a)
23 | }
24 | 


--------------------------------------------------------------------------------
/man/to_long.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/reshape.R
 3 | \name{to_long}
 4 | \alias{to_long}
 5 | \alias{to_long.default}
 6 | \alias{to_long.skim_df}
 7 | \title{Create "long" skim output}
 8 | \usage{
 9 | to_long(.data, ..., skim_fun = skim)
10 | 
11 | \method{to_long}{default}(.data, ..., skim_fun = skim)
12 | 
13 | \method{to_long}{skim_df}(.data, ..., skim_fun = skim)
14 | }
15 | \arguments{
16 | \item{.data}{A data frame or an object that can be coerced into a data frame.}
17 | 
18 | \item{...}{Columns to select for skimming. When none are provided, the
19 | default is to skim all columns.}
20 | 
21 | \item{skim_fun}{The skim function used.}
22 | }
23 | \value{
24 | A tibble
25 | }
26 | \description{
27 | Skim results returned as a tidy long data frame with four columns:
28 | variable, type, stat and formatted.
29 | }
30 | \section{Methods (by class)}{
31 | \itemize{
32 | \item \code{to_long(default)}: Skim a data frame and convert the results to a
33 | long data frame.
34 | 
35 | \item \code{to_long(skim_df)}: Transform a skim_df to a long data frame.
36 | 
37 | }}
38 | \examples{
39 | to_long(iris)
40 | to_long(skim(iris))
41 | }
42 | 


--------------------------------------------------------------------------------
/revdep/.gitignore:
--------------------------------------------------------------------------------
1 | checks
2 | library
3 | checks.noindex
4 | library.noindex
5 | data.sqlite
6 | *.html
7 | download
8 | libs
9 | 


--------------------------------------------------------------------------------
/revdep/README.md:
--------------------------------------------------------------------------------
 1 | # Platform
 2 | 
 3 | |field    |value                        |
 4 | |:--------|:----------------------------|
 5 | |version  |R version 4.4.2 (2024-10-31) |
 6 | |os       |Ubuntu 24.04.1 LTS           |
 7 | |system   |x86_64, linux-gnu            |
 8 | |ui       |X11                          |
 9 | |language |(EN)                         |
10 | |collate  |en_US.UTF-8                  |
11 | |ctype    |en_US.UTF-8                  |
12 | |tz       |Etc/UTC                      |
13 | |date     |2025-01-23                   |
14 | |pandoc   |3.6.2 @ /usr/bin/pandoc      |
15 | 
16 | # Dependencies
17 | 
18 | |package |old   |new   |Δ  |
19 | |:-------|:-----|:-----|:--|
20 | |skimr   |2.1.5 |2.1.5 |NA |
21 | 
22 | # Revdeps
23 | 
24 | ## Failed to check (23)
25 | 
26 | |package           |version |error |warning |note |
27 | |:-----------------|:-------|:-----|:-------|:----|
28 | |AdverseEvents     |?       |      |        |     |
29 | |archetyper        |?       |      |        |     |
30 | |BDgraph           |?       |      |        |     |
31 | |chronicle         |?       |      |        |     |
32 | |codebook          |?       |      |        |     |
33 | |dataCompare       |?       |      |        |     |
34 | |dispositionEffect |?       |      |        |     |
35 | |fastml            |?       |      |        |     |
36 | |HVT               |?       |      |        |     |
37 | |IGoRRR            |?       |      |        |     |
38 | |industRial        |?       |      |        |     |
39 | |liver             |?       |      |        |     |
40 | |mdsr              |?       |      |        |     |
41 | |NIMAA             |?       |      |        |     |
42 | |panelr            |?       |      |        |     |
43 | |papaja            |?       |      |        |     |
44 | |REDCapTidieR      |?       |      |        |     |
45 | |sdcLog            |?       |      |        |     |
46 | |ssgraph           |?       |      |        |     |
47 | |studentlife       |?       |      |        |     |
48 | |tidyprompt        |?       |      |        |     |
49 | |tidyREDCap        |?       |      |        |     |
50 | |volker            |?       |      |        |     |
51 | 
52 | 


--------------------------------------------------------------------------------
/revdep/cran.md:
--------------------------------------------------------------------------------
1 | ## revdepcheck results
2 | 
3 | We checked 23 reverse dependencies (0 from CRAN + 23 from Bioconductor), comparing R CMD check results across CRAN and dev versions of this package.
4 | 
5 |  * We saw 0 new problems
6 |  * We failed to check 0 packages
7 | 
8 | 


--------------------------------------------------------------------------------
/revdep/data.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ropensci/skimr/c0018a7eae3a8858a873d85eeda55120489fb03f/revdep/data.sqlite


--------------------------------------------------------------------------------
/revdep/failures.md:
--------------------------------------------------------------------------------
  1 | # AdverseEvents
  2 | 
  3 | <details>
  4 | 
  5 | * Version: 
  6 | * GitHub: https://github.com/ropensci/skimr
  7 | * Source code: NA
  8 | * Number of recursive dependencies: 0
  9 | 
 10 | </details>
 11 | 
 12 | ## Error before installation
 13 | 
 14 | ### Devel
 15 | 
 16 | ```
 17 | 
 18 | 
 19 | 
 20 | 
 21 | 
 22 | 
 23 | ```
 24 | ### CRAN
 25 | 
 26 | ```
 27 | 
 28 | 
 29 | 
 30 | 
 31 | 
 32 | 
 33 | ```
 34 | # archetyper
 35 | 
 36 | <details>
 37 | 
 38 | * Version: 
 39 | * GitHub: https://github.com/ropensci/skimr
 40 | * Source code: NA
 41 | * Number of recursive dependencies: 0
 42 | 
 43 | </details>
 44 | 
 45 | ## Error before installation
 46 | 
 47 | ### Devel
 48 | 
 49 | ```
 50 | 
 51 | 
 52 | 
 53 | 
 54 | 
 55 | 
 56 | ```
 57 | ### CRAN
 58 | 
 59 | ```
 60 | 
 61 | 
 62 | 
 63 | 
 64 | 
 65 | 
 66 | ```
 67 | # BDgraph
 68 | 
 69 | <details>
 70 | 
 71 | * Version: 
 72 | * GitHub: https://github.com/ropensci/skimr
 73 | * Source code: NA
 74 | * Number of recursive dependencies: 0
 75 | 
 76 | </details>
 77 | 
 78 | ## Error before installation
 79 | 
 80 | ### Devel
 81 | 
 82 | ```
 83 | 
 84 | 
 85 | 
 86 | 
 87 | 
 88 | 
 89 | ```
 90 | ### CRAN
 91 | 
 92 | ```
 93 | 
 94 | 
 95 | 
 96 | 
 97 | 
 98 | 
 99 | ```
100 | # chronicle
101 | 
102 | <details>
103 | 
104 | * Version: 
105 | * GitHub: https://github.com/ropensci/skimr
106 | * Source code: NA
107 | * Number of recursive dependencies: 0
108 | 
109 | </details>
110 | 
111 | ## Error before installation
112 | 
113 | ### Devel
114 | 
115 | ```
116 | 
117 | 
118 | 
119 | 
120 | 
121 | 
122 | ```
123 | ### CRAN
124 | 
125 | ```
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | ```
133 | # codebook
134 | 
135 | <details>
136 | 
137 | * Version: 
138 | * GitHub: https://github.com/ropensci/skimr
139 | * Source code: NA
140 | * Number of recursive dependencies: 0
141 | 
142 | </details>
143 | 
144 | ## Error before installation
145 | 
146 | ### Devel
147 | 
148 | ```
149 | 
150 | 
151 | 
152 | 
153 | 
154 | 
155 | ```
156 | ### CRAN
157 | 
158 | ```
159 | 
160 | 
161 | 
162 | 
163 | 
164 | 
165 | ```
166 | # dataCompare
167 | 
168 | <details>
169 | 
170 | * Version: 
171 | * GitHub: https://github.com/ropensci/skimr
172 | * Source code: NA
173 | * Number of recursive dependencies: 0
174 | 
175 | </details>
176 | 
177 | ## Error before installation
178 | 
179 | ### Devel
180 | 
181 | ```
182 | 
183 | 
184 | 
185 | 
186 | 
187 | 
188 | ```
189 | ### CRAN
190 | 
191 | ```
192 | 
193 | 
194 | 
195 | 
196 | 
197 | 
198 | ```
199 | # dispositionEffect
200 | 
201 | <details>
202 | 
203 | * Version: 
204 | * GitHub: https://github.com/ropensci/skimr
205 | * Source code: NA
206 | * Number of recursive dependencies: 0
207 | 
208 | </details>
209 | 
210 | ## Error before installation
211 | 
212 | ### Devel
213 | 
214 | ```
215 | 
216 | 
217 | 
218 | 
219 | 
220 | 
221 | ```
222 | ### CRAN
223 | 
224 | ```
225 | 
226 | 
227 | 
228 | 
229 | 
230 | 
231 | ```
232 | # fastml
233 | 
234 | <details>
235 | 
236 | * Version: 
237 | * GitHub: https://github.com/ropensci/skimr
238 | * Source code: NA
239 | * Number of recursive dependencies: 0
240 | 
241 | </details>
242 | 
243 | ## Error before installation
244 | 
245 | ### Devel
246 | 
247 | ```
248 | 
249 | 
250 | 
251 | 
252 | 
253 | 
254 | ```
255 | ### CRAN
256 | 
257 | ```
258 | 
259 | 
260 | 
261 | 
262 | 
263 | 
264 | ```
265 | # HVT
266 | 
267 | <details>
268 | 
269 | * Version: 
270 | * GitHub: https://github.com/ropensci/skimr
271 | * Source code: NA
272 | * Number of recursive dependencies: 0
273 | 
274 | </details>
275 | 
276 | ## Error before installation
277 | 
278 | ### Devel
279 | 
280 | ```
281 | 
282 | 
283 | 
284 | 
285 | 
286 | 
287 | ```
288 | ### CRAN
289 | 
290 | ```
291 | 
292 | 
293 | 
294 | 
295 | 
296 | 
297 | ```
298 | # IGoRRR
299 | 
300 | <details>
301 | 
302 | * Version: 
303 | * GitHub: https://github.com/ropensci/skimr
304 | * Source code: NA
305 | * Number of recursive dependencies: 0
306 | 
307 | </details>
308 | 
309 | ## Error before installation
310 | 
311 | ### Devel
312 | 
313 | ```
314 | 
315 | 
316 | 
317 | 
318 | 
319 | 
320 | ```
321 | ### CRAN
322 | 
323 | ```
324 | 
325 | 
326 | 
327 | 
328 | 
329 | 
330 | ```
331 | # industRial
332 | 
333 | <details>
334 | 
335 | * Version: 
336 | * GitHub: https://github.com/ropensci/skimr
337 | * Source code: NA
338 | * Number of recursive dependencies: 0
339 | 
340 | </details>
341 | 
342 | ## Error before installation
343 | 
344 | ### Devel
345 | 
346 | ```
347 | 
348 | 
349 | 
350 | 
351 | 
352 | 
353 | ```
354 | ### CRAN
355 | 
356 | ```
357 | 
358 | 
359 | 
360 | 
361 | 
362 | 
363 | ```
364 | # liver
365 | 
366 | <details>
367 | 
368 | * Version: 
369 | * GitHub: https://github.com/ropensci/skimr
370 | * Source code: NA
371 | * Number of recursive dependencies: 0
372 | 
373 | </details>
374 | 
375 | ## Error before installation
376 | 
377 | ### Devel
378 | 
379 | ```
380 | 
381 | 
382 | 
383 | 
384 | 
385 | 
386 | ```
387 | ### CRAN
388 | 
389 | ```
390 | 
391 | 
392 | 
393 | 
394 | 
395 | 
396 | ```
397 | # mdsr
398 | 
399 | <details>
400 | 
401 | * Version: 
402 | * GitHub: https://github.com/ropensci/skimr
403 | * Source code: NA
404 | * Number of recursive dependencies: 0
405 | 
406 | </details>
407 | 
408 | ## Error before installation
409 | 
410 | ### Devel
411 | 
412 | ```
413 | 
414 | 
415 | 
416 | 
417 | 
418 | 
419 | ```
420 | ### CRAN
421 | 
422 | ```
423 | 
424 | 
425 | 
426 | 
427 | 
428 | 
429 | ```
430 | # NIMAA
431 | 
432 | <details>
433 | 
434 | * Version: 
435 | * GitHub: https://github.com/ropensci/skimr
436 | * Source code: NA
437 | * Number of recursive dependencies: 0
438 | 
439 | </details>
440 | 
441 | ## Error before installation
442 | 
443 | ### Devel
444 | 
445 | ```
446 | 
447 | 
448 | 
449 | 
450 | 
451 | 
452 | ```
453 | ### CRAN
454 | 
455 | ```
456 | 
457 | 
458 | 
459 | 
460 | 
461 | 
462 | ```
463 | # panelr
464 | 
465 | <details>
466 | 
467 | * Version: 
468 | * GitHub: https://github.com/ropensci/skimr
469 | * Source code: NA
470 | * Number of recursive dependencies: 0
471 | 
472 | </details>
473 | 
474 | ## Error before installation
475 | 
476 | ### Devel
477 | 
478 | ```
479 | 
480 | 
481 | 
482 | 
483 | 
484 | 
485 | ```
486 | ### CRAN
487 | 
488 | ```
489 | 
490 | 
491 | 
492 | 
493 | 
494 | 
495 | ```
496 | # papaja
497 | 
498 | <details>
499 | 
500 | * Version: 
501 | * GitHub: https://github.com/ropensci/skimr
502 | * Source code: NA
503 | * Number of recursive dependencies: 0
504 | 
505 | </details>
506 | 
507 | ## Error before installation
508 | 
509 | ### Devel
510 | 
511 | ```
512 | 
513 | 
514 | 
515 | 
516 | 
517 | 
518 | ```
519 | ### CRAN
520 | 
521 | ```
522 | 
523 | 
524 | 
525 | 
526 | 
527 | 
528 | ```
529 | # REDCapTidieR
530 | 
531 | <details>
532 | 
533 | * Version: 
534 | * GitHub: https://github.com/ropensci/skimr
535 | * Source code: NA
536 | * Number of recursive dependencies: 0
537 | 
538 | </details>
539 | 
540 | ## Error before installation
541 | 
542 | ### Devel
543 | 
544 | ```
545 | 
546 | 
547 | 
548 | 
549 | 
550 | 
551 | ```
552 | ### CRAN
553 | 
554 | ```
555 | 
556 | 
557 | 
558 | 
559 | 
560 | 
561 | ```
562 | # sdcLog
563 | 
564 | <details>
565 | 
566 | * Version: 
567 | * GitHub: https://github.com/ropensci/skimr
568 | * Source code: NA
569 | * Number of recursive dependencies: 0
570 | 
571 | </details>
572 | 
573 | ## Error before installation
574 | 
575 | ### Devel
576 | 
577 | ```
578 | 
579 | 
580 | 
581 | 
582 | 
583 | 
584 | ```
585 | ### CRAN
586 | 
587 | ```
588 | 
589 | 
590 | 
591 | 
592 | 
593 | 
594 | ```
595 | # ssgraph
596 | 
597 | <details>
598 | 
599 | * Version: 
600 | * GitHub: https://github.com/ropensci/skimr
601 | * Source code: NA
602 | * Number of recursive dependencies: 0
603 | 
604 | </details>
605 | 
606 | ## Error before installation
607 | 
608 | ### Devel
609 | 
610 | ```
611 | 
612 | 
613 | 
614 | 
615 | 
616 | 
617 | ```
618 | ### CRAN
619 | 
620 | ```
621 | 
622 | 
623 | 
624 | 
625 | 
626 | 
627 | ```
628 | # studentlife
629 | 
630 | <details>
631 | 
632 | * Version: 
633 | * GitHub: https://github.com/ropensci/skimr
634 | * Source code: NA
635 | * Number of recursive dependencies: 0
636 | 
637 | </details>
638 | 
639 | ## Error before installation
640 | 
641 | ### Devel
642 | 
643 | ```
644 | 
645 | 
646 | 
647 | 
648 | 
649 | 
650 | ```
651 | ### CRAN
652 | 
653 | ```
654 | 
655 | 
656 | 
657 | 
658 | 
659 | 
660 | ```
661 | # tidyprompt
662 | 
663 | <details>
664 | 
665 | * Version: 
666 | * GitHub: https://github.com/ropensci/skimr
667 | * Source code: NA
668 | * Number of recursive dependencies: 0
669 | 
670 | </details>
671 | 
672 | ## Error before installation
673 | 
674 | ### Devel
675 | 
676 | ```
677 | 
678 | 
679 | 
680 | 
681 | 
682 | 
683 | ```
684 | ### CRAN
685 | 
686 | ```
687 | 
688 | 
689 | 
690 | 
691 | 
692 | 
693 | ```
694 | # tidyREDCap
695 | 
696 | <details>
697 | 
698 | * Version: 
699 | * GitHub: https://github.com/ropensci/skimr
700 | * Source code: NA
701 | * Number of recursive dependencies: 0
702 | 
703 | </details>
704 | 
705 | ## Error before installation
706 | 
707 | ### Devel
708 | 
709 | ```
710 | 
711 | 
712 | 
713 | 
714 | 
715 | 
716 | ```
717 | ### CRAN
718 | 
719 | ```
720 | 
721 | 
722 | 
723 | 
724 | 
725 | 
726 | ```
727 | # volker
728 | 
729 | <details>
730 | 
731 | * Version: 
732 | * GitHub: https://github.com/ropensci/skimr
733 | * Source code: NA
734 | * Number of recursive dependencies: 0
735 | 
736 | </details>
737 | 
738 | ## Error before installation
739 | 
740 | ### Devel
741 | 
742 | ```
743 | 
744 | 
745 | 
746 | 
747 | 
748 | 
749 | ```
750 | ### CRAN
751 | 
752 | ```
753 | 
754 | 
755 | 
756 | 
757 | 
758 | 
759 | ```
760 | 


--------------------------------------------------------------------------------
/revdep/problems.md:
--------------------------------------------------------------------------------
1 | *Wow, no problems at all. :)*


--------------------------------------------------------------------------------
/skimr.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: XeLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | 
17 | BuildType: Package
18 | PackageUseDevtools: Yes
19 | PackageInstallArgs: --no-multiarch --with-keep.source
20 | PackageRoxygenize: rd,collate,namespace
21 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
 1 | # This file is part of the standard setup for testthat.
 2 | # It is recommended that you do not modify it.
 3 | #
 4 | # Where should you do additional test configuration?
 5 | # Learn more about the roles of various files in:
 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
 7 | # * https://testthat.r-lib.org/articles/special-files.html
 8 | 
 9 | library(testthat)
10 | library(skimr)
11 | 
12 | test_check("skimr")
13 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/data-table.md:
--------------------------------------------------------------------------------
  1 | # skim of a simple data.table produces output as expected
  2 | 
  3 |     Code
  4 |       skimmed_DT_letters
  5 |     Output
  6 |       -- Data Summary ------------------------
  7 |                                  Values    
  8 |       Name                       DT_letters
  9 |       Number of rows             26        
 10 |       Number of columns          1         
 11 |       Key                        NULL      
 12 |       _______________________              
 13 |       Column type frequency:               
 14 |         character                1         
 15 |       ________________________             
 16 |       Group variables            None      
 17 |       
 18 |       -- Variable type: character ---------------------------------------------------------------
 19 |         skim_variable n_missing complete_rate min max empty n_unique whitespace
 20 |       1 abc                   0             1   1   1     0       26          0
 21 | 
 22 | # skim of data.table produces output as expected
 23 | 
 24 |     Code
 25 |       skim(DT_factors)
 26 |     Output
 27 |       -- Data Summary ------------------------
 28 |                                  Values    
 29 |       Name                       DT_factors
 30 |       Number of rows             26        
 31 |       Number of columns          3         
 32 |       Key                        NULL      
 33 |       _______________________              
 34 |       Column type frequency:               
 35 |         character                1         
 36 |         factor                   1         
 37 |         numeric                  1         
 38 |       ________________________             
 39 |       Group variables            None      
 40 |       
 41 |       -- Variable type: character ---------------------------------------------------------------
 42 |         skim_variable n_missing complete_rate min max empty n_unique whitespace
 43 |       1 abc                   0             1   1   1     0       26          0
 44 |       
 45 |       -- Variable type: factor ------------------------------------------------------------------
 46 |         skim_variable n_missing complete_rate ordered n_unique top_counts   
 47 |       1 grps                  0             1 FALSE          2 AA: 18, BB: 8
 48 |       
 49 |       -- Variable type: numeric -----------------------------------------------------------------
 50 |         skim_variable n_missing complete_rate   mean    sd    p0    p25     p50   p75 p100 hist 
 51 |       1 values                0             1 0.0121 0.937 -2.21 -0.335 -0.0306 0.742 1.36 ▂▂▃▇▆
 52 | 
 53 | ---
 54 | 
 55 |     Code
 56 |       skim(DT_factors)
 57 |     Output
 58 |       -- Data Summary ------------------------
 59 |                                  Values    
 60 |       Name                       DT_factors
 61 |       Number of rows             26        
 62 |       Number of columns          3         
 63 |       Key                        abc, grps 
 64 |       _______________________              
 65 |       Column type frequency:               
 66 |         character                1         
 67 |         factor                   1         
 68 |         numeric                  1         
 69 |       ________________________             
 70 |       Group variables            None      
 71 |       
 72 |       -- Variable type: character ---------------------------------------------------------------
 73 |         skim_variable n_missing complete_rate min max empty n_unique whitespace
 74 |       1 abc                   0             1   1   1     0       26          0
 75 |       
 76 |       -- Variable type: factor ------------------------------------------------------------------
 77 |         skim_variable n_missing complete_rate ordered n_unique top_counts   
 78 |       1 grps                  0             1 FALSE          2 AA: 18, BB: 8
 79 |       
 80 |       -- Variable type: numeric -----------------------------------------------------------------
 81 |         skim_variable n_missing complete_rate   mean    sd    p0    p25     p50   p75 p100 hist 
 82 |       1 values                0             1 0.0121 0.937 -2.21 -0.335 -0.0306 0.742 1.36 ▂▂▃▇▆
 83 | 
 84 | ---
 85 | 
 86 |     Code
 87 |       skim(DF_factors)
 88 |     Output
 89 |       -- Data Summary ------------------------
 90 |                                  Values    
 91 |       Name                       DF_factors
 92 |       Number of rows             26        
 93 |       Number of columns          3         
 94 |       _______________________              
 95 |       Column type frequency:               
 96 |         character                1         
 97 |         factor                   1         
 98 |         numeric                  1         
 99 |       ________________________             
100 |       Group variables            None      
101 |       
102 |       -- Variable type: character ---------------------------------------------------------------
103 |         skim_variable n_missing complete_rate min max empty n_unique whitespace
104 |       1 abc                   0             1   1   1     0       26          0
105 |       
106 |       -- Variable type: factor ------------------------------------------------------------------
107 |         skim_variable n_missing complete_rate ordered n_unique top_counts   
108 |       1 grps                  0             1 FALSE          2 AA: 18, BB: 8
109 |       
110 |       -- Variable type: numeric -----------------------------------------------------------------
111 |         skim_variable n_missing complete_rate   mean    sd    p0    p25     p50   p75 p100 hist 
112 |       1 values                0             1 0.0121 0.937 -2.21 -0.335 -0.0306 0.742 1.36 ▂▂▃▇▆
113 | 
114 | ---
115 | 
116 |     Code
117 |       skim(tibble_factors)
118 |     Output
119 |       -- Data Summary ------------------------
120 |                                  Values        
121 |       Name                       tibble_factors
122 |       Number of rows             26            
123 |       Number of columns          3             
124 |       _______________________                  
125 |       Column type frequency:                   
126 |         character                1             
127 |         factor                   1             
128 |         numeric                  1             
129 |       ________________________                 
130 |       Group variables            None          
131 |       
132 |       -- Variable type: character ---------------------------------------------------------------
133 |         skim_variable n_missing complete_rate min max empty n_unique whitespace
134 |       1 abc                   0             1   1   1     0       26          0
135 |       
136 |       -- Variable type: factor ------------------------------------------------------------------
137 |         skim_variable n_missing complete_rate ordered n_unique top_counts   
138 |       1 grps                  0             1 FALSE          2 AA: 18, BB: 8
139 |       
140 |       -- Variable type: numeric -----------------------------------------------------------------
141 |         skim_variable n_missing complete_rate   mean    sd    p0    p25     p50   p75 p100 hist 
142 |       1 values                0             1 0.0121 0.937 -2.21 -0.335 -0.0306 0.742 1.36 ▂▂▃▇▆
143 | 
144 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/dplyr.md:
--------------------------------------------------------------------------------
  1 | # dplyr::filter works as expected
  2 | 
  3 |     Code
  4 |       dplyr::filter(skimmed_iris, skim_type == "numeric")
  5 |     Output
  6 |       -- Data Summary ------------------------
  7 |                                  Values
  8 |       Name                       iris  
  9 |       Number of rows             150   
 10 |       Number of columns          5     
 11 |       _______________________          
 12 |       Column type frequency:           
 13 |         numeric                  4     
 14 |       ________________________         
 15 |       Group variables            None  
 16 |       
 17 |       -- Variable type: numeric ------------------------------------------------------
 18 |         skim_variable n_missing complete_rate mean    sd  p0 p25  p50 p75 p100 hist 
 19 |       1 Sepal.Length          0             1 5.84 0.828 4.3 5.1 5.8  6.4  7.9 ▆▇▇▅▂
 20 |       2 Sepal.Width           0             1 3.06 0.436 2   2.8 3    3.3  4.4 ▁▆▇▂▁
 21 |       3 Petal.Length          0             1 3.76 1.77  1   1.6 4.35 5.1  6.9 ▇▁▆▇▂
 22 |       4 Petal.Width           0             1 1.20 0.762 0.1 0.3 1.3  1.8  2.5 ▇▁▇▅▃
 23 |     Code
 24 |       dplyr::filter(skimmed_iris, skim_type == "no_type")
 25 |     Output
 26 |       # A tibble: 0 x 15
 27 |       # i 15 variables: skim_type <chr>, skim_variable <chr>, n_missing <int>,
 28 |       #   complete_rate <dbl>, factor.ordered <lgl>, factor.n_unique <int>,
 29 |       #   factor.top_counts <chr>, numeric.mean <dbl>, numeric.sd <dbl>,
 30 |       #   numeric.p0 <dbl>, numeric.p25 <dbl>, numeric.p50 <dbl>, numeric.p75 <dbl>,
 31 |       #   numeric.p100 <dbl>, numeric.hist <chr>
 32 | 
 33 | # dplyr::select works as expected
 34 | 
 35 |     Code
 36 |       with_type
 37 |     Output
 38 |       # A tibble: 5 x 2
 39 |         skim_type skim_variable
 40 |         <chr>     <chr>        
 41 |       1 factor    Species      
 42 |       2 numeric   Sepal.Length 
 43 |       3 numeric   Sepal.Width  
 44 |       4 numeric   Petal.Length 
 45 |       5 numeric   Petal.Width  
 46 | 
 47 | ---
 48 | 
 49 |     Code
 50 |       without_type
 51 |     Output
 52 |       # A tibble: 5 x 1
 53 |         numeric.mean
 54 |                <dbl>
 55 |       1        NA   
 56 |       2         5.84
 57 |       3         3.06
 58 |       4         3.76
 59 |       5         1.20
 60 | 
 61 | # dplyr::mutate works as expected
 62 | 
 63 |     Code
 64 |       input
 65 |     Output
 66 |       -- Data Summary ------------------------
 67 |                                  Values
 68 |       Name                       iris  
 69 |       Number of rows             150   
 70 |       Number of columns          5     
 71 |       _______________________          
 72 |       Column type frequency:           
 73 |         factor                   1     
 74 |         numeric                  4     
 75 |       ________________________         
 76 |       Group variables            None  
 77 |       
 78 |       -- Variable type: factor -------------------------------------------------------
 79 |         skim_variable n_missing complete_rate ordered n_unique
 80 |       1 Species               0             1 FALSE          3
 81 |         top_counts               
 82 |       1 set: 50, ver: 50, vir: 50
 83 |       
 84 |       -- Variable type: numeric ------------------------------------------------------
 85 |         skim_variable n_missing complete_rate mean    sd  p0 p25  p50 p75 p100 hist 
 86 |       1 Sepal.Length          0             1 5.84 0.828 4.3 5.1 5.8  6.4  7.9 ▆▇▇▅▂
 87 |       2 Sepal.Width           0             1 3.06 0.436 2   2.8 3    3.3  4.4 ▁▆▇▂▁
 88 |       3 Petal.Length          0             1 3.76 1.77  1   1.6 4.35 5.1  6.9 ▇▁▆▇▂
 89 |       4 Petal.Width           0             1 1.20 0.762 0.1 0.3 1.3  1.8  2.5 ▇▁▇▅▃
 90 |         mean2
 91 |       1 34.1 
 92 |       2  9.35
 93 |       3 14.1 
 94 |       4  1.44
 95 | 
 96 | # dplyr::slice works as expected
 97 | 
 98 |     Code
 99 |       input
100 |     Output
101 |       -- Data Summary ------------------------
102 |                                  Values
103 |       Name                       iris  
104 |       Number of rows             150   
105 |       Number of columns          5     
106 |       _______________________          
107 |       Column type frequency:           
108 |         factor                   1     
109 |         numeric                  2     
110 |       ________________________         
111 |       Group variables            None  
112 |       
113 |       -- Variable type: factor -------------------------------------------------------
114 |         skim_variable n_missing complete_rate ordered n_unique
115 |       1 Species               0             1 FALSE          3
116 |         top_counts               
117 |       1 set: 50, ver: 50, vir: 50
118 |       
119 |       -- Variable type: numeric ------------------------------------------------------
120 |         skim_variable n_missing complete_rate mean    sd  p0 p25 p50 p75 p100 hist 
121 |       1 Sepal.Length          0             1 5.84 0.828 4.3 5.1 5.8 6.4  7.9 ▆▇▇▅▂
122 |       2 Sepal.Width           0             1 3.06 0.436 2   2.8 3   3.3  4.4 ▁▆▇▂▁
123 | 
124 | # dplyr::arrange works as expected
125 | 
126 |     Code
127 |       dplyr::arrange(skimmed_iris, desc(numeric.mean))
128 |     Output
129 |       -- Data Summary ------------------------
130 |                                  Values
131 |       Name                       iris  
132 |       Number of rows             150   
133 |       Number of columns          5     
134 |       _______________________          
135 |       Column type frequency:           
136 |         factor                   1     
137 |         numeric                  4     
138 |       ________________________         
139 |       Group variables            None  
140 |       
141 |       -- Variable type: factor -------------------------------------------------------
142 |         skim_variable n_missing complete_rate ordered n_unique
143 |       1 Species               0             1 FALSE          3
144 |         top_counts               
145 |       1 set: 50, ver: 50, vir: 50
146 |       
147 |       -- Variable type: numeric ------------------------------------------------------
148 |         skim_variable n_missing complete_rate mean    sd  p0 p25  p50 p75 p100 hist 
149 |       1 Sepal.Length          0             1 5.84 0.828 4.3 5.1 5.8  6.4  7.9 ▆▇▇▅▂
150 |       2 Petal.Length          0             1 3.76 1.77  1   1.6 4.35 5.1  6.9 ▇▁▆▇▂
151 |       3 Sepal.Width           0             1 3.06 0.436 2   2.8 3    3.3  4.4 ▁▆▇▂▁
152 |       4 Petal.Width           0             1 1.20 0.762 0.1 0.3 1.3  1.8  2.5 ▇▁▇▅▃
153 | 
154 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/skim_tee.md:
--------------------------------------------------------------------------------
 1 | # Using skim_tee prints returns the object
 2 | 
 3 |     Code
 4 |       skim_object <- skim_tee(chickwts)
 5 |     Output
 6 |       -- Data Summary ------------------------
 7 |                                  Values
 8 |       Name                       data  
 9 |       Number of rows             71    
10 |       Number of columns          2     
11 |       _______________________          
12 |       Column type frequency:           
13 |         factor                   1     
14 |         numeric                  1     
15 |       ________________________         
16 |       Group variables            None  
17 |       
18 |       -- Variable type: factor -------------------------------------------------------
19 |         skim_variable n_missing complete_rate ordered n_unique
20 |       1 feed                  0             1 FALSE          6
21 |         top_counts                        
22 |       1 soy: 14, cas: 12, lin: 12, sun: 12
23 |       
24 |       -- Variable type: numeric ------------------------------------------------------
25 |         skim_variable n_missing complete_rate mean   sd  p0  p25 p50  p75 p100 hist 
26 |       1 weight                0             1 261. 78.1 108 204. 258 324.  423 ▆▆▇▇▃
27 | 
28 | # skim_tee prints only selected columns, but returns full object
29 | 
30 |     Code
31 |       obj <- skim_tee(iris, Species)
32 |     Output
33 |       -- Data Summary ------------------------
34 |                                  Values
35 |       Name                       data  
36 |       Number of rows             150   
37 |       Number of columns          5     
38 |       _______________________          
39 |       Column type frequency:           
40 |         factor                   1     
41 |       ________________________         
42 |       Group variables            None  
43 |       
44 |       -- Variable type: factor -------------------------------------------------------
45 |         skim_variable n_missing complete_rate ordered n_unique
46 |       1 Species               0             1 FALSE          3
47 |         top_counts               
48 |       1 set: 50, ver: 50, vir: 50
49 | 
50 | # skim_tee supports dplyr helpers
51 | 
52 |     Code
53 |       obj <- skim_tee(iris, starts_with("Sepal"))
54 |     Output
55 |       -- Data Summary ------------------------
56 |                                  Values
57 |       Name                       data  
58 |       Number of rows             150   
59 |       Number of columns          5     
60 |       _______________________          
61 |       Column type frequency:           
62 |         numeric                  2     
63 |       ________________________         
64 |       Group variables            None  
65 |       
66 |       -- Variable type: numeric ------------------------------------------------------
67 |         skim_variable n_missing complete_rate mean    sd  p0 p25 p50 p75 p100 hist 
68 |       1 Sepal.Length          0             1 5.84 0.828 4.3 5.1 5.8 6.4  7.9 ▆▇▇▅▂
69 |       2 Sepal.Width           0             1 3.06 0.436 2   2.8 3   3.3  4.4 ▁▆▇▂▁
70 | 
71 | # Skim_tee works with groups
72 | 
73 |     Code
74 |       obj <- skim_tee(iris_grouped, Sepal.Length, skim_fun = my_skim)
75 |     Output
76 |       -- Data Summary ------------------------
77 |                                  Values 
78 |       Name                       data   
79 |       Number of rows             150    
80 |       Number of columns          5      
81 |       _______________________           
82 |       Column type frequency:            
83 |         numeric                  1      
84 |       ________________________          
85 |       Group variables            Species
86 |       
87 |       -- Variable type: numeric ------------------------------------------------------
88 |         skim_variable Species    n_missing complete_rate mean    sd  p0  p25 p50 p75
89 |       1 Sepal.Length  setosa             0             1 5.01 0.352 4.3 4.8  5   5.2
90 |       2 Sepal.Length  versicolor         0             1 5.94 0.516 4.9 5.6  5.9 6.3
91 |       3 Sepal.Length  virginica          0             1 6.59 0.636 4.9 6.22 6.5 6.9
92 |         p100
93 |       1  5.8
94 |       2  7  
95 |       3  7.9
96 | 
97 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/summary.md:
--------------------------------------------------------------------------------
 1 | # The summary print method prints the correct object
 2 | 
 3 |     Code
 4 |       skim_summary_input
 5 |     Output
 6 |       -- Data Summary ------------------------
 7 |                                  Values
 8 |       Name                       iris  
 9 |       Number of rows             150   
10 |       Number of columns          5     
11 |       _______________________          
12 |       Column type frequency:           
13 |         factor                   1     
14 |         numeric                  4     
15 |       ________________________         
16 |       Group variables            None  
17 | 
18 | # The summary print method prints the correct object when piped
19 | 
20 |     Code
21 |       summary_input
22 |     Output
23 |       -- Data Summary ------------------------
24 |                                  Values    
25 |       Name                       Piped data
26 |       Number of rows             150       
27 |       Number of columns          5         
28 |       _______________________              
29 |       Column type frequency:               
30 |         factor                   1         
31 |         numeric                  4         
32 |       ________________________             
33 |       Group variables            None      
34 | 
35 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-expectations.R:
--------------------------------------------------------------------------------
 1 | # Simpler expectations for data frames.
 2 | 
 3 | expect_n_columns <- function(object, n) {
 4 |   stopifnot(is.numeric(n), length(n) == 1)
 5 |   act <- testthat::quasi_label(rlang::enquo(object))
 6 |   act$n <- length(act$val)
 7 |   testthat::expect(act$n == n, sprintf(
 8 |     "%s has %i columns, not %i columns.",
 9 |     act$lab, act$n, n
10 |   ))
11 |   invisible(act$val)
12 | }
13 | 
14 | expect_n_rows <- function(object, n) {
15 |   stopifnot(is.numeric(n), length(n) == 1)
16 |   act <- testthat::quasi_label(rlang::enquo(object))
17 |   act$n <- nrow(act$val)
18 |   testthat::expect(
19 |     act$n == n,
20 |     sprintf("%s has %i rows, not %i rows", act$lab, act$n, n)
21 |   )
22 |   invisible(act$val)
23 | }
24 | 
25 | expect_NA <- function(object) {
26 |   act <- testthat::quasi_label(rlang::enquo(object))
27 |   testthat::expect(is.na(act$val), sprintf("%s is not NA", act$lab))
28 |   invisible(act$val)
29 | }
30 | 


--------------------------------------------------------------------------------
/tests/testthat/test-data-table.R:
--------------------------------------------------------------------------------
 1 | 
 2 | test_that("skim of a simple data.table produces no warnings", {
 3 |   skip_if_not_installed("data.table")
 4 |   withr::local_options(list(width = 91))
 5 |   DT_letters <- data.table::data.table(abc = letters)
 6 |   expect_no_warning(skim(DT_letters))
 7 | })
 8 | 
 9 | test_that("skim of a simple data.table produces no warnings even with dtplyr", {
10 |   skip_if_not_installed("data.table")
11 |   skip_if_not_installed("dtplyr")
12 |   withr::local_options(list(width = 91))
13 |   DT_letters <- data.table::data.table(abc = letters)
14 |   expect_no_warning(skim(DT_letters))
15 | })
16 | 
17 | test_that("skim of a simple data.table produces output as expected", {
18 |   skip_if_not_installed("data.table")
19 |   DT_letters <- data.table::data.table(abc = letters)
20 | 
21 |   skimmed_DT_letters <- skim(DT_letters)
22 |   withr::local_options(list(cli.unicode = FALSE, width = 91))
23 |   expect_snapshot(skimmed_DT_letters)
24 | })
25 | 
26 | 
27 | test_that("skim of data.table produces output as expected", {
28 |   skip_if_not_installed("data.table")
29 |   set.seed(1L)
30 | 
31 |   DT_factors <- data.table::data.table(
32 |     abc = letters,
33 |     grps = factor(sample(c("AA", "BB"), 26, TRUE)),
34 |     values = rnorm(26)
35 |   )
36 | 
37 |   withr::local_options(list(cli.unicode = FALSE, width = 91))
38 |   expect_snapshot(skim(DT_factors))
39 | 
40 |   data.table::setkeyv(DT_factors, c("abc", "grps"))
41 |   expect_snapshot(skim(DT_factors))
42 | 
43 |   DF_factors <- as.data.frame(DT_factors)
44 |   expect_snapshot(skim(DF_factors))
45 | 
46 |   tibble_factors <- tibble::as_tibble(DT_factors)
47 |   expect_snapshot(skim(tibble_factors))
48 | })
49 | 


--------------------------------------------------------------------------------
/tests/testthat/test-dplyr.R:
--------------------------------------------------------------------------------
 1 | skimmed_iris <- skim(iris)
 2 | 
 3 | test_that("dplyr::filter works as expected", {
 4 |   skip_if_not(l10n_info()$`UTF-8`)
 5 |   withr::local_options(list(cli.unicode = FALSE))
 6 |   expect_snapshot({
 7 |     dplyr::filter(skimmed_iris, skim_type == "numeric")
 8 |     # no rows
 9 |     dplyr::filter(skimmed_iris, skim_type == "no_type")
10 |   })
11 | })
12 | 
13 | test_that("dplyr::select works as expected", {
14 |   skip_if_not(l10n_info()$`UTF-8`)
15 |   withr::local_options(list(cli.unicode = FALSE))
16 |   with_type <- dplyr::select(skimmed_iris, skim_type, skim_variable)
17 |   expect_snapshot(with_type)
18 |   without_type <- dplyr::select(skimmed_iris, numeric.mean)
19 |   expect_snapshot(without_type)
20 | })
21 | 
22 | test_that("dplyr::mutate works as expected", {
23 |   skip_if_not(l10n_info()$`UTF-8`)
24 |   withr::local_options(list(cli.unicode = FALSE))
25 |   input <- dplyr::mutate(skimmed_iris, mean2 = numeric.mean^2)
26 |   expect_snapshot(input)
27 | 
28 |   no_variable <- dplyr::mutate(skimmed_iris, skim_variable = NULL)
29 |   identical(
30 |     capture.output(print(no_variable)),
31 |     capture.output(print(tibble::as_tibble(no_variable)))
32 |   )
33 | })
34 | 
35 | test_that("dplyr::slice works as expected", {
36 |   skip_if_not(l10n_info()$`UTF-8`)
37 |   withr::local_options(list(cli.unicode = FALSE))
38 |   input <- dplyr::slice(skimmed_iris, 1:3)
39 |   expect_snapshot(input)
40 | })
41 | 
42 | test_that("dplyr::arrange works as expected", {
43 |   skip_if_not(l10n_info()$`UTF-8`)
44 |   withr::local_options(list(cli.unicode = FALSE))
45 |   expect_snapshot(dplyr::arrange(skimmed_iris, desc(numeric.mean)))
46 | })
47 | 


--------------------------------------------------------------------------------
/tests/testthat/test-get_skimmers.R:
--------------------------------------------------------------------------------
 1 | test_that("get_sfl() behaves correctly", {
 2 |   my_sfl <- get_sfl("numeric")
 3 |   expect_s3_class(my_sfl, "skimr_function_list")
 4 |   expect_equal(my_sfl$skim_type, "numeric")
 5 |   expect_named(my_sfl$funs, c(
 6 |     "mean", "sd", "p0", "p25", "p50", "p75", "p100", "hist"
 7 |   ))
 8 | 
 9 |   expect_warning(get_sfl("missing class"), "no default")
10 | })
11 | 
12 | test_that("get_default_skimmer_names() has a correct list of defaults", {
13 |   defaults <- get_default_skimmer_names()
14 |   expect_setequal(
15 |     names(defaults),
16 |     c(
17 |       "AsIs", "character", "complex", "Date", "difftime", "factor",
18 |       "haven_labelled", "list", "logical", "numeric", "POSIXct", "Timespan",
19 |       "ts"
20 |     )
21 |   )
22 |   expect_identical(defaults$AsIs, c("n_unique", "min_length", "max_length"))
23 |   expect_identical(
24 |     defaults$character, c("min", "max", "empty", "n_unique", "whitespace")
25 |   )
26 |   expect_identical(defaults$complex, c("mean"))
27 |   expect_identical(defaults$Date, c("min", "max", "median", "n_unique"))
28 |   expect_identical(defaults$difftime, c("min", "max", "median", "n_unique"))
29 |   expect_identical(defaults$Timespan, c("min", "max", "median", "n_unique"))
30 |   expect_identical(defaults$factor, c("ordered", "n_unique", "top_counts"))
31 |   expect_identical(defaults$list, c("n_unique", "min_length", "max_length"))
32 |   expect_identical(defaults$logical, c("mean", "count"))
33 |   expect_identical(
34 |     defaults$numeric, c("mean", "sd", "p0", "p25", "p50", "p75", "p100", "hist")
35 |   )
36 |   expect_identical(defaults$POSIXct, c("min", "max", "median", "n_unique"))
37 |   expect_identical(
38 |     defaults$ts,
39 |     c(
40 |       "start", "end", "frequency", "deltat", "mean", "sd", "min", "max",
41 |       "median", "line_graph"
42 |     )
43 |   )
44 | })
45 | 
46 | test_that("You can get the default skimmers for a particular class", {
47 |   input <- get_default_skimmer_names("numeric")
48 |   expect_named(input, "numeric")
49 |   expect_identical(
50 |     input$numeric, c("mean", "sd", "p0", "p25", "p50", "p75", "p100", "hist")
51 |   )
52 | })
53 | 
54 | test_that("You can get the default skimmers for multiple classes", {
55 |   input <- get_default_skimmer_names(c("list", "AsIs"))
56 |   expect_named(input, c("list", "AsIs"))
57 |   expect_identical(input$AsIs, c("n_unique", "min_length", "max_length"))
58 |   expect_identical(input$list, c("n_unique", "min_length", "max_length"))
59 | })
60 | 
61 | test_that("A warning is given for classes that don't have defaults", {
62 |   expect_warning(get_default_skimmer_names("no_class"), "no default")
63 | })
64 | 
65 | test_that("get_one_default_skimmer_names() behaves as expected", {
66 |   expect_identical(
67 |     get_one_default_skimmer_names("list"),
68 |     c("n_unique", "min_length", "max_length")
69 |   )
70 | })
71 | 


--------------------------------------------------------------------------------
/tests/testthat/test-reshape.R:
--------------------------------------------------------------------------------
  1 | test_that("You can parition a skim_df", {
  2 |   skimmed <- skim(iris)
  3 |   input <- partition(skimmed)
  4 |   expect_s3_class(input, "skim_list")
  5 |   expect_length(input, 2)
  6 |   expect_named(input, c("factor", "numeric"))
  7 |   attrs <- attributes(input)
  8 |   expect_equal(attrs$data_rows, 150)
  9 |   expect_equal(attrs$data_cols, 5)
 10 |   expect_identical(attrs$df_name, "`iris`")
 11 |   expect_identical(attrs$base_skimmers, c("n_missing", "complete_rate"))
 12 |   expect_identical(
 13 |     attrs$skimmers_used,
 14 |     list(
 15 |       numeric = c("mean", "sd", "p0", "p25", "p50", "p75", "p100", "hist"),
 16 |       factor = c("ordered", "n_unique", "top_counts")
 17 |     )
 18 |   )
 19 | 
 20 |   # Subtables
 21 |   expect_s3_class(input$factor, c("one_skim_df", "tbl_df", "tbl", "data.frame"))
 22 |   expect_n_rows(input$factor, 1)
 23 |   expect_n_columns(input$factor, 6)
 24 |   expect_named(input$factor, c(
 25 |     "skim_variable", "n_missing", "complete_rate", "ordered", "n_unique",
 26 |     "top_counts"
 27 |   ))
 28 | 
 29 |   expect_s3_class(input$numeric, c("one_skim_df", "tbl_df", "tbl", "data.frame"))
 30 |   expect_n_rows(input$numeric, 4)
 31 |   expect_n_columns(input$numeric, 11)
 32 |   expect_named(input$numeric, c(
 33 |     "skim_variable", "n_missing", "complete_rate", "mean",
 34 |     "sd", "p0", "p25", "p50", "p75", "p100",
 35 |     "hist"
 36 |   ))
 37 | })
 38 | 
 39 | test_that("Partitioning works in a round trip", {
 40 |   skimmed <- skim(iris)
 41 |   partitioned <- partition(skimmed)
 42 |   input <- bind(partitioned)
 43 |   expect_identical(input, skimmed)
 44 | })
 45 | 
 46 | test_that("You can yank a subtable from a skim_df", {
 47 |   skimmed <- skim(iris)
 48 |   input <- yank(skimmed, "numeric")
 49 |   expect_s3_class(input, c("one_skim_df", "tbl_df", "tbl", "data.frame"))
 50 |   expect_n_rows(input, 4)
 51 |   expect_n_columns(input, 11)
 52 |   expect_named(input, c(
 53 |     "skim_variable", "n_missing", "complete_rate", "mean",
 54 |     "sd", "p0", "p25", "p50", "p75", "p100",
 55 |     "hist"
 56 |   ))
 57 | })
 58 | 
 59 | test_that("Partition is safe if some skimmers are missing", {
 60 |   skimmed <- skim(iris)
 61 |   reduced <- dplyr::select(skimmed, skim_variable, skim_type, n_missing)
 62 |   partitioned <- partition(reduced)
 63 |   expect_length(partitioned, 2)
 64 |   expect_named(partitioned, c("factor", "numeric"))
 65 |   expect_named(partitioned$numeric, c("skim_variable", "n_missing"))
 66 | })
 67 | 
 68 | test_that("Partition handles new columns", {
 69 |   skimmed <- skim(iris)
 70 |   expanded <- dplyr::mutate(
 71 |     skimmed,
 72 |     mean2 = numeric.mean^2,
 73 |     complete2 = complete_rate^2
 74 |   )
 75 |   partitioned <- partition(expanded)
 76 |   expect_named(partitioned$numeric, c(
 77 |     "skim_variable", "n_missing", "complete_rate", "mean",
 78 |     "sd", "p0", "p25", "p50", "p75", "p100",
 79 |     "hist", "mean2", "complete2"
 80 |   ))
 81 | })
 82 | 
 83 | test_that("focus() matches select(data, skim_type, skim_variable, ...)", {
 84 |   skimmed <- skim(iris)
 85 |   expected <- dplyr::select(
 86 |     skimmed, skim_type, skim_variable, n_missing
 87 |   )
 88 |   expect_equal(focus(skimmed, n_missing), expected, ignore_attr = "skimmers_used")
 89 | })
 90 | 
 91 | test_that("focus() does not allow dropping skim metadata columns", {
 92 |   skimmed <- skim(iris)
 93 |   expect_error(focus(skimmed, -skim_variable), "Cannot drop")
 94 |   expect_error(focus(skimmed, -skim_type), "Cannot drop")
 95 | })
 96 | 
 97 | test_that("skim_to_wide() returns a deprecation warning", {
 98 |   expect_warning(skim_to_wide(iris))
 99 | })
100 | 
101 | test_that("skim_to_list() returns a deprecation warning", {
102 |   expect_warning(skim_to_list(iris))
103 | })
104 | 
105 | test_that("to_long() returns a long tidy data frame with 4 columns", {
106 |   skimmed_long <- to_long(iris)
107 |   # Statistics from the skim_df  with values of NA are not included
108 |   expect_n_rows(skimmed_long, 45)
109 |   expect_equal(
110 |     names(skimmed_long),
111 |     c("skim_type", "skim_variable", "stat", "formatted")
112 |   )
113 |   expect_length(unique(skimmed_long$stat), 13)
114 |   expect_length(unique(skimmed_long$skim_type), 2)
115 |   expect_length(unique(skimmed_long$skim_variable), 5)
116 | })
117 | 
118 | test_that("to_long() on a skim_df returns a long tidy df with 4 columns", {
119 |   skimmed_long <- to_long(skim(iris))
120 |   # Statistics from the skim_df  with values of NA are not included
121 |   expect_n_rows(skimmed_long, 45)
122 |   expect_named(
123 |   skimmed_long,
124 |     c("skim_type", "skim_variable", "stat", "formatted")
125 |   )
126 |   expect_length(unique(skimmed_long$stat), 13)
127 |   expect_length(unique(skimmed_long$skim_type), 2)
128 |   expect_length(unique(skimmed_long$skim_variable), 5)
129 | })
130 | 
131 | test_that("to_long() on a df and a skim_df from same df are identical", {
132 |   expect_identical(to_long(skim(chickwts)), to_long(chickwts))
133 | })
134 | 


--------------------------------------------------------------------------------
/tests/testthat/test-sfl.R:
--------------------------------------------------------------------------------
 1 | test_that("Only string scalars for types", {
 2 |   expect_error(sfl(mad, skim_type = c("1", "2")))
 3 | })
 4 | 
 5 | test_that("Zero-length sfl's supported", {
 6 |   input <- sfl(skim_type = "numeric")
 7 |   expect_length(input$funs, 0)
 8 | })
 9 | 
10 | test_that("The interface for sfl's separates keep and drop functions", {
11 |   input <- sfl(mad = mad, hist = NULL, skim_type = "test")
12 |   expect_s3_class(input, "skimr_function_list")
13 |   expect_length(input, 2)
14 |   expect_named(input, c("funs", "skim_type"))
15 |   expect_identical(input$skim_type, "test")
16 | 
17 |   funs <- input$funs
18 |   expect_type(funs, "list")
19 |   expect_named(funs, c("mad", "hist"))
20 | })
21 | 
22 | test_that("sfl's support dummy names", {
23 |   input <- sfl(mean = ~ mean(., na.rm = TRUE), skim_type = "test")
24 |   funs <- input$funs
25 |   expect_equal(funs$mean, rlang::quo(mean(., na.rm = TRUE)), ignore_attr = "class")
26 | })
27 | 
28 | test_that("sfl's automatically generate function names", {
29 |   input <- sfl(mad, hist = NULL, ~ length(.)^2, "var")
30 |   expect_named(input$funs, c("mad", "hist", "~length(.)^2", '"var"'))
31 | })
32 | 


--------------------------------------------------------------------------------
/tests/testthat/test-skim_obj.R:
--------------------------------------------------------------------------------
 1 | test_that("skim produces skim_df objects", {
 2 |   skimmed <- skim(iris)
 3 |   expect_true(is_skim_df(skimmed))
 4 |   expect_false(is_skim_list(skimmed))
 5 |   expect_error(assert_is_skim_df(skimmed), NA)
 6 |   expect_error(assert_is_skim_list(skimmed))
 7 | })
 8 | 
 9 | test_that("Partition produces skim_list objects", {
10 |   skimmed <- skim(iris)
11 |   split <- partition(skimmed)
12 |   expect_false(is_skim_df(split))
13 |   expect_true(is_skim_list(split))
14 |   expect_error(assert_is_skim_df(split), "not a data.frame")
15 |   expect_error(assert_is_skim_list(split), NA)
16 | 
17 |   expect_true(is_one_skim_df(split[[1]]))
18 |   expect_error(assert_is_one_skim_df(split[[1]]), NA)
19 | })
20 | 
21 | test_that("Error messages are correct", {
22 |   expect_error(assert_is_skim_df(iris), "missing column `skim_type`")
23 |   expect_error(assert_is_skim_df(iris), "missing column `skim_variable`")
24 |   expect_error(assert_is_skim_df(iris), "missing attributes:")
25 | })
26 | 
27 | test_that("Bind produces skim_df objects", {
28 |   skimmed <- skim(iris)
29 |   split <- partition(skimmed)
30 |   combined <- bind(split)
31 |   expect_true(is_skim_df(combined))
32 |   expect_false(is_skim_list(combined))
33 |   expect_error(assert_is_skim_df(combined), NA)
34 |   expect_error(assert_is_skim_list(combined))
35 | })
36 | 
37 | test_that("Creating a skim_df requires skim_type and skim_variable columns", {
38 |   expect_true(could_be_skim_df(
39 |     data.frame(
40 |       skim_type = "t",
41 |       skim_variable = "v"
42 |     )
43 |   ))
44 |   expect_false(could_be_skim_df(
45 |     list(
46 |       type = "t",
47 |       variable = "v"
48 |     )
49 |   ))
50 |   expect_false(could_be_skim_df(
51 |     data.frame(
52 |       variable = "v"
53 |     )
54 |   ))
55 |   expect_false(could_be_skim_df(
56 |     data.frame(
57 |       type = "t"
58 |     )
59 |   ))
60 | })
61 | 


--------------------------------------------------------------------------------
/tests/testthat/test-skim_print.R:
--------------------------------------------------------------------------------
  1 | test_that("Skim prints a header for the entire output and each type", {
  2 |   withr::local_options(list(cli.unicode = FALSE))
  3 |   skip_if_not(l10n_info()$`UTF-8`)
  4 |   expect_snapshot({
  5 |     input <- skim(iris)
  6 |     input
  7 |     input$numeric.hist <- NULL
  8 |     input
  9 |   })
 10 | })
 11 | 
 12 | test_that("Skim prints a special header for grouped data frames", {
 13 |   skip_if_not(l10n_info()$`UTF-8`)
 14 |   withr::local_options(list(cli.unicode = FALSE))
 15 |   expect_snapshot( skim(dplyr::group_by(iris, Species)))
 16 | })
 17 | 
 18 | test_that("Skim lists print as expected", {
 19 |   skip_if_not(l10n_info()$`UTF-8`)
 20 |   withr::local_options(list(cli.unicode = FALSE))
 21 |   skimmed <- skim(iris)
 22 |   expect_snapshot(partition(skimmed))
 23 | })
 24 | 
 25 | test_that("knit_print produces expected results", {
 26 |   skip_if_not(l10n_info()$`UTF-8`)
 27 |   withr::local_options(list(cli.unicode = FALSE))
 28 |   skimmed <- skim(iris)
 29 |   input <- knit_print(skimmed)
 30 |   expect_s3_class(input, "knit_asis")
 31 |   expect_length(input, 1)
 32 |   expect_snapshot(cat(input))
 33 | })
 34 | 
 35 | test_that("knit_print works with skim summaries", {
 36 |   withr::local_options(list(cli.unicode = FALSE))
 37 |   skimmed <- skim(iris)
 38 |   summarized <- summary(skimmed)
 39 |   expect_snapshot(cat(knitr::knit_print(summarized)))
 40 | })
 41 | 
 42 | test_that("knit_print appropriately falls back to tibble printing", {
 43 |   withr::local_options(list(cli.unicode = FALSE))
 44 |   skimmed <- skim(iris)
 45 |   reduced <- dplyr::select(skimmed, skim_variable, numeric.mean)
 46 |   expect_snapshot({
 47 |     input <- knitr::knit_print(reduced)
 48 |   })
 49 |   expect_s3_class(input, "data.frame")
 50 | })
 51 | 
 52 | test_that("Summaries can be suppressed within knitr", {
 53 |   skip_if_not(l10n_info()$`UTF-8`)
 54 |   withr::local_options(list(cli.unicode = FALSE))
 55 |   skimmed <- skim(iris)
 56 |   options <- list(skimr_include_summary = FALSE)
 57 |   expect_snapshot(cat(knitr::knit_print(skimmed, options = options)))
 58 | })
 59 | 
 60 | test_that("Skim lists have a separate knit_print method", {
 61 |   skip_if_not(l10n_info()$`UTF-8`)
 62 |   withr::local_options(list(cli.unicode = FALSE))
 63 |   skimmed <- skim(iris)
 64 |   skim_list <- partition(skimmed)
 65 |   expect_snapshot(cat(knit_print(skim_list)))
 66 | })
 67 | 
 68 | test_that("You can yank a type from a skim_df and call knit_print", {
 69 |   withr::local_options(list(cli.unicode = FALSE))
 70 |   skimmed <- skim(iris)
 71 |   skim_one <- yank(skimmed, "factor")
 72 |   expect_snapshot(cat(knit_print(skim_one)))
 73 | })
 74 | 
 75 | test_that("Skim falls back to tibble::print.tbl() appropriately", {
 76 |   withr::local_options(list(cli.unicode = FALSE))
 77 |   
 78 |   expect_snapshot({
 79 |     input <- skim(iris)
 80 |     dplyr::select(input, numeric.mean)
 81 |   })
 82 | })
 83 | 
 84 | test_that("Print focused objects appropriately", {
 85 |   withr::local_options(list(cli.unicode = FALSE))
 86 |   skip_if_not(l10n_info()$`UTF-8`)
 87 |   skimmed <- skim(iris)
 88 |   expect_snapshot(focus(skimmed, n_missing))
 89 | })
 90 | 
 91 | test_that("Support for smaller consoles can be set with the width option", {
 92 |   withr::local_options(list(cli.unicode = FALSE))
 93 |   skip_if_not(l10n_info()$`UTF-8`)
 94 |   expect_snapshot(skim(iris))
 95 | })
 96 | 
 97 | test_that("Table header width can be controlled by an option", {
 98 |   withr::local_options(list(cli.unicode = FALSE))
 99 |   skip_if_not(l10n_info()$`UTF-8`)
100 |   skimmed <- skim(iris)
101 |   expect_snapshot(skimmed)
102 | })
103 | 
104 | test_that("skimr creates appropriate output for Jupyter", {
105 |   withr::local_options(list(cli.unicode = FALSE))
106 |   skip_if_not(l10n_info()$`UTF-8`)
107 |   skimmed <- skim(iris)
108 |   expect_snapshot(skimmed)
109 | })
110 | 


--------------------------------------------------------------------------------
/tests/testthat/test-skim_tee.R:
--------------------------------------------------------------------------------
 1 | test_that("Using skim_tee prints returns the object", {
 2 |   skip_if_not(l10n_info()$`UTF-8`)
 3 |   expect_snapshot({
 4 |     skim_object <- skim_tee(chickwts)
 5 |   })
 6 |   expect_identical(chickwts, skim_object)
 7 | })
 8 | 
 9 | test_that("skim_tee prints only selected columns, but returns full object", {
10 |   skip_if_not(l10n_info()$`UTF-8`)
11 |   expect_snapshot({
12 |     obj <- skim_tee(iris, Species)
13 |   })
14 |   expect_identical(obj, iris)
15 | })
16 | 
17 | test_that("skim_tee supports dplyr helpers", {
18 |   skip_if_not(l10n_info()$`UTF-8`)
19 |   expect_snapshot({
20 |     obj <- skim_tee(iris, starts_with("Sepal"))
21 |   })
22 |   expect_identical(obj, iris)
23 | })
24 | 
25 | test_that("Skim_tee works with groups", {
26 |   skip_if_not(l10n_info()$`UTF-8`)
27 |   iris_grouped <- dplyr::group_by(iris, Species)
28 |   my_skim <- skim_with(numeric = sfl(hist = NULL))
29 |   expect_snapshot({
30 |     obj <- skim_tee(iris_grouped, Sepal.Length, skim_fun = my_skim)
31 |   })
32 |   expect_identical(obj, iris_grouped)
33 | })
34 | 


--------------------------------------------------------------------------------
/tests/testthat/test-skim_with.R:
--------------------------------------------------------------------------------
  1 | test_that("Skimmer list is updated correctly when changing functions", {
  2 |   funs <- sfl(median = median, mad = mad)
  3 |   new_skim <- skim_with(numeric = funs, append = FALSE)
  4 |   x <- tibble::tibble(rnorm(10))
  5 |   input <- new_skim(x)
  6 |   used <- attr(input, "skimmers_used")
  7 |   expect_identical(used, list(numeric = c("median", "mad")))
  8 | })
  9 | 
 10 | test_that("Skimming functions can be changed for multiple types", {
 11 |   newfuns1 <- sfl(iqr = IQR, q99 = ~ quantile(., probs = .99))
 12 |   newfuns2 <- sfl(n2 = length)
 13 |   new_skim <- skim_with(numeric = newfuns1, factor = newfuns2, append = FALSE)
 14 |   input <- new_skim(iris)
 15 |   used <- attr(input, "skimmers_used")
 16 |   expect_identical(used, list(numeric = c("iqr", "q99"), factor = "n2"))
 17 | })
 18 | 
 19 | test_that("Skimming functions can be changed with a list", {
 20 |   newfuns1 <- sfl(iqr = IQR, q99 = ~ quantile(., probs = .99))
 21 |   new_skim <- skim_with(list(numeric = newfuns1), append = FALSE)
 22 |   input <- new_skim(iris)
 23 |   used <- attr(input, "skimmers_used")
 24 |   expect_identical(used$numeric, c("iqr", "q99"))
 25 | })
 26 | 
 27 | test_that("Skimming functions can be changed with a list", {
 28 |   newfuns1 <- sfl(iqr = IQR, q99 = ~ quantile(., probs = .99))
 29 |   newfuns2 <- sfl(n2 = length)
 30 |   new_skim <- skim_with(
 31 |     list(numeric = newfuns1, factor = newfuns2),
 32 |     append = FALSE
 33 |   )
 34 |   input <- new_skim(iris)
 35 |   used <- attr(input, "skimmers_used")
 36 |   expect_identical(
 37 |     used, list(numeric = c("iqr", "q99"), factor = c("n2"))
 38 |   )
 39 | })
 40 | 
 41 | test_that("Skimming functions can be appended.", {
 42 |   funs <- sfl(iqr = IQR)
 43 |   new_skim <- skim_with(numeric = funs)
 44 |   x <- tibble::tibble(rnorm(10))
 45 |   input <- new_skim(x)
 46 |   used <- attr(input, "skimmers_used")
 47 |   expect_identical(
 48 |     used,
 49 |     list(
 50 |       numeric = c(
 51 |         "mean", "sd", "p0", "p25", "p50", "p75", "p100", "hist", "iqr"
 52 |       )
 53 |     )
 54 |   )
 55 | })
 56 | 
 57 | test_that("Setting a statistic to null removes it from skimmers", {
 58 |   new_skim <- skim_with(numeric = sfl(hist = NULL))
 59 |   x <- tibble::tibble(rnorm(10))
 60 |   input <- new_skim(x)
 61 |   used <- attr(input, "skimmers_used")
 62 |   expect_identical(
 63 |     used, list(numeric = c("mean", "sd", "p0", "p25", "p50", "p75", "p100"))
 64 |   )
 65 | })
 66 | 
 67 | test_that("Skimmers can be removed and added at the same time", {
 68 |   new_skim <- skim_with(numeric = sfl(hist = NULL, iqr = IQR))
 69 |   x <- tibble::tibble(rnorm(10))
 70 |   input <- new_skim(x)
 71 |   used <- attr(input, "skimmers_used")
 72 |   expect_identical(
 73 |     used,
 74 |     list(numeric = c("mean", "sd", "p0", "p25", "p50", "p75", "p100", "iqr"))
 75 |   )
 76 | })
 77 | 
 78 | test_that("Skimming functions for new types can be added", {
 79 |   funs <- sfl(iqr = IQR, quantile = ~ quantile(., probs = .99))
 80 |   expect_message(new_skim <- skim_with(new_type = funs), "new_type")
 81 |   x <- tibble::tibble(x = rnorm(10))
 82 |   class(x$x) <- "new_type"
 83 |   input <- new_skim(x)
 84 |   used <- attr(input, "skimmers_used")
 85 |   expect_identical(used, list(new_type = c("iqr", "quantile")))
 86 | })
 87 | 
 88 | test_that("Set multiple sets of skimming functions", {
 89 |   funs <- sfl(iqr = IQR, quantile = ~ quantile(., probs = .99))
 90 |   expect_message(
 91 |     new_skim <- skim_with(numeric = funs, new_type = funs),
 92 |     "new_type"
 93 |   )
 94 |   x <- tibble::tibble(x = rnorm(10), y = rnorm(10))
 95 |   class(x$x) <- "new_type"
 96 |   input <- new_skim(x)
 97 |   used <- attr(input, "skimmers_used")
 98 |   expect_named(used, c("new_type", "numeric"))
 99 |   expect_identical(used$new_type, c("iqr", "quantile"))
100 |   expect_identical(
101 |     used$numeric,
102 |     c(
103 |       "mean", "sd", "p0", "p25", "p50", "p75", "p100", "hist", "iqr", "quantile"
104 |     )
105 |   )
106 | })
107 | 
108 | 
109 | test_that("Set multiple sets of skimming functions, rlang", {
110 |   funs <- sfl(iqr = IQR, quantile = ~ quantile(., probs = .99))
111 |   expect_message(new_skim <- skim_with(!!!list(numeric = funs, new_type = funs),
112 |     append = FALSE
113 |   ))
114 |   x <- tibble::tibble(x = rnorm(10), y = rnorm(10))
115 |   class(x$x) <- "new_type"
116 |   input <- new_skim(x)
117 |   used <- attr(input, "skimmers_used")
118 |   expect_named(used, c("new_type", "numeric"))
119 |   expect_identical(used$new_type, c("iqr", "quantile"))
120 |   expect_identical(used$numeric, c("iqr", "quantile"))
121 | })
122 | 
123 | test_that("Skimming functions without a class return a message.", {
124 |   funs_no_class <- sfl(IQR)
125 |   expect_error(skim_with(funs_no_class), "arguments to be named.")
126 |   expect_error(
127 |     skim_with(funs_no_class, numeric = funs_no_class),
128 |     "arguments to be named."
129 |   )
130 | })
131 | 
132 | test_that("An empty call to skim_with() returns the default skim()", {
133 |   input <- skim_with()
134 |   expect_identical(input(iris), skim(iris))
135 | })
136 | 
137 | test_that("User-defined defaults require sfl's with class names", {
138 |   local_mocked_bindings(get_skimmers = function(column) sfl(length))
139 |   expect_error(skim(data.frame(1)), "Default skimming functions")
140 | })
141 | 
142 | test_that("Sfl's can be passed as an unquoted list", {
143 |   my_skimmers <- list(numeric = sfl(mean), factor = sfl(length))
144 |   my_skim <- skim_with(!!!my_skimmers, append = FALSE)
145 |   input <- my_skim(iris)
146 |   expect_named(
147 |     input,
148 |     c(
149 |       "skim_type", "skim_variable", "n_missing", "complete_rate",
150 |       "factor.length", "numeric.mean"
151 |     )
152 |   )
153 | })
154 | 
155 | test_that("Doubles and integers are both 'numeric'", {
156 |   df <- data.frame(int = 1:3, dbl = 1:3 + 0.5)
157 |   my_skim <- skim_with(numeric = sfl(hist = NULL))
158 |   input <- my_skim(df)
159 | 
160 |   expect_false("numeric.hist" %in% names(input))
161 |   expect_equal(
162 |     attr(input, "skimmers_used")$numeric,
163 |     c("mean", "sd", "p0", "p25", "p50", "p75", "p100")
164 |   )
165 | })
166 | 
167 | test_that("Defining an integer sfl changes behavior", {
168 |   df <- data.frame(int = 1:3, dbl = 1:3 + 0.5)
169 |   expect_message(
170 |     my_skim <- skim_with(
171 |       numeric = sfl(hist = NULL), integer = sfl(int_mean = mean)
172 |     )
173 |   )
174 |   input <- my_skim(df)
175 | 
176 |   expect_false("numeric.hist" %in% names(input))
177 |   expect_true("integer.int_mean" %in% names(input))
178 |   expect_identical(
179 |     attr(input, "skimmers_used"),
180 |     list(
181 |       integer = c("int_mean"),
182 |       numeric = c("mean", "sd", "p0", "p25", "p50", "p75", "p100")
183 |     )
184 |   )
185 | })
186 | 
187 | test_that("Base skimmers can be changed", {
188 |   my_skim <- skim_with(base = sfl(length = length))
189 |   skimmed <- my_skim(iris)
190 |   expect_true("length" %in% names(skimmed))
191 |   expect_equal(attr(skimmed, "base_skimmers"), "length")
192 | })
193 | 
194 | test_that("Base skimmers require an sfl", {
195 |   expect_error(skim_with(base = list(length = length)))
196 | })
197 | 
198 | test_that("Base skimmers can be removed", {
199 |   my_skim <- skim_with(base = NULL)
200 |   skimmed <- my_skim(iris)
201 |   used <- attr(skimmed, "skimmers_used")
202 |   expect_false("base" %in% names(used))
203 | })
204 | 


--------------------------------------------------------------------------------
/tests/testthat/test-stats.R:
--------------------------------------------------------------------------------
  1 | test_that("inline_linegraph returns expected response for a ts vector", {
  2 |   input <- inline_linegraph(freeny$y)
  3 |   expect_identical(input, "⣀⣀⠤⠤⠒⠒⠉⠉")
  4 | })
  5 | 
  6 | test_that("inline_linegraph returns expected response for a long ts vector.", {
  7 |   input <- inline_linegraph(AirPassengers)
  8 |   expect_identical(input, "⣀⣀⣀⠔⠤⠊⠑⠊")
  9 | })
 10 | 
 11 | test_that("inline_linegraph returns expected response for an NA ts vector.", {
 12 |   input <- inline_linegraph(ts(c(NA, NA, NA, NA, NA, NA)))
 13 |   expect_identical(input, "⠀")
 14 | })
 15 | 
 16 | test_that("ts_start returns expected response for a ts vector", {
 17 |   input <- ts_start(freeny$y)
 18 |   expect_identical(input, 1962)
 19 | })
 20 | 
 21 | test_that("ts_start returns expected response for a ts vector", {
 22 |   input <- ts_end(freeny$y)
 23 |   expect_identical(input, 1971)
 24 | })
 25 | 
 26 | test_that("n_missing is calculated correctly.", {
 27 |   data <- c("a", "b", "c", NA, " ")
 28 |   input <- n_missing(data)
 29 |   expect_identical(input, 1L)
 30 | })
 31 | 
 32 | test_that("n_complete is calculated correctly.", {
 33 |   data <- c("a", "b", "c", NA)
 34 |   input <- n_complete(data)
 35 |   expect_identical(input, 3L)
 36 | })
 37 | 
 38 | test_that("complete_rate is calculated correctly.", {
 39 |   data <- c("a", "b", "c", NA, " ")
 40 |   input <- complete_rate(data)
 41 |   expect_equal(input, .8, tolerance = .001)
 42 | })
 43 | 
 44 | test_that("n_whitespace is calculated correctly.", {
 45 |   data <- c("a", "b", "c", NA, " ", "  a", "   ", "   a")
 46 |   input <- n_whitespace(data)
 47 |   expect_identical(input, 2L)
 48 | })
 49 | 
 50 | test_that("inline histogram is calculated correctly.", {
 51 |   input <- inline_hist(iris$Sepal.Length)
 52 |   expect_identical(input, "▂▇▅▇▆▅▂▂")
 53 | })
 54 | 
 55 | test_that("inline histogram is calculated correctly when x is all zeros.", {
 56 |   input <- inline_hist(numeric(10))
 57 |   expect_identical(input, "▁▁▁▇▁▁▁▁")
 58 | })
 59 | 
 60 | test_that("inline histogram returns an empty string when x is length 0.", {
 61 |   input <- inline_hist(numeric(0))
 62 |   expect_identical(input, " ")
 63 | })
 64 | 
 65 | test_that("inline hist is calculated correctly when x is all zeores or NAs", {
 66 |   input <- inline_hist(as.numeric(c(NA, NA, NA, 0, 0)))
 67 |   expect_identical(input, "▁▁▁▇▁▁▁▁")
 68 | })
 69 | 
 70 | test_that("inline histogram is calculated correctly when x is all 1s.", {
 71 |   input <- inline_hist(c(1, 1, 1, 1, 1, 1))
 72 |   expect_identical(input, "▁▁▁▇▁▁▁▁")
 73 | })
 74 | 
 75 | test_that("inline histogram returns empty string when x is all NAs.", {
 76 |   input <- inline_hist(as.numeric(rep(NA, 10)))
 77 |   expect_identical(input, " ")
 78 | })
 79 | 
 80 | test_that("inline histogram is returns empty string when x is all NaN.", {
 81 |   input <- inline_hist(rep(NaN, 10))
 82 |   expect_identical(input, " ")
 83 | })
 84 | 
 85 | test_that("inline histogram is correct when x is evenly distributed.", {
 86 |   input <- inline_hist(c(1, 2, 3, 4, 5, 6, 7, 8))
 87 |   expect_identical(input, "▇▇▇▇▇▇▇▇")
 88 | })
 89 | 
 90 | test_that("inline histogram is calculated correctly with NaN.", {
 91 |   input <- inline_hist(c(1, 2, 3, 3, 6, 6, 6, 8, NaN))
 92 |   expect_identical(input, "▂▂▅▁▁▇▁▂")
 93 | })
 94 | 
 95 | test_that("inline histogram is calculated correctly with NA.", {
 96 |   input <- inline_hist(c(1, 2, 3, 3, 6, 6, 6, 8, NA))
 97 |   expect_identical(input, "▂▂▅▁▁▇▁▂")
 98 | })
 99 | 
100 | test_that("inline histogram is calculated correctly with Inf.", {
101 |   expect_warning(input <-
102 |     inline_hist(c(1, 2, 3, 3, 6, 6, 6, 8, Inf, -Inf)))
103 |   expect_identical(input, "▂▂▅▁▁▇▁▂")
104 | })
105 | 
106 | test_that("n_empty is calculated correctly.", {
107 |   data <- c("a", "ab", "abc", "")
108 |   correct <- as.integer(1)
109 |   input <- n_empty(data)
110 |   expect_identical(input, correct)
111 | })
112 | 
113 | test_that("min_char is calculated correctly, including empty strings.", {
114 |   data <- c("a", "ab", "abc", "")
115 |   correct <- as.integer(0)
116 |   input <- min_char(data)
117 |   expect_identical(input, correct)
118 | })
119 | 
120 | test_that("min_char with a multibyte character does not throw an error.", {
121 |   data <- c("a", "ab", "abc", "Coleophora asteris M\x9fhl.")
122 |   expect_error(min_char(data), NA)
123 | })
124 | 
125 | test_that("min_char returns NA when there are only NA values.", {
126 |   data <- as.character(c(NA, NA, NA, NA))
127 |   input <- min_char(data)
128 |   expect_equal(input, NA)
129 | })
130 | 
131 | test_that("max_char is calculated correctly, including empty strings.", {
132 |   data <- c("a", "ab", "abc", "")
133 |   correct <- as.integer(3)
134 |   input <- max_char(data)
135 |   expect_identical(input, correct)
136 | })
137 | 
138 | test_that("max_char with a multibyte character does not throw an error.", {
139 |   data <- c("a", "ab", "abc", "Coleophora asteris M\x9fhl.")
140 |   # correct <- as.integer(3)
141 |   # input <- max_char(data)
142 |   # expect_identical(input, correct)
143 |   expect_error(max_char(data, NA))
144 | })
145 | 
146 | test_that("max_char returns NA when there are only NA values.", {
147 |   data <- as.character(c(NA, NA, NA, NA))
148 |   input <- max_char(data)
149 |   expect_equal(input, NA)
150 | })
151 | 
152 | test_that("n_unique is calculated correctly.", {
153 |   correct <- 4L
154 |   data <- c("a", "ab", "abc", "")
155 |   input <- n_unique(data)
156 |   expect_identical(input, correct)
157 | })
158 | 
159 | test_that("n_unique handles NA as expected.", {
160 |   correct <- 4L
161 |   data <- c("a", "ab", "abc", "", NA)
162 |   input <- n_unique(data)
163 |   expect_identical(input, correct)
164 | })
165 | 
166 | test_that("list_lengths_min is calculated correctly.", {
167 |   dat <- list(
168 |     list("a", "b", "c"),
169 |     list("d", "b", "d"),
170 |     list("e", "f", "g"),
171 |     d = list("h"),
172 |     e = list("i", "j", "k", "l"),
173 |     f = NA
174 |   )
175 |   expect_identical(list_lengths_min(dat), 1L)
176 | })
177 | 
178 | test_that("list_lengths_max is calculated correctly.", {
179 |   dat <- list(
180 |     list("a", "b", "c"),
181 |     list("d", "b", "d"),
182 |     list("e", "f", "g"),
183 |     d = list("h"),
184 |     e = list("i", "j", "k", "l"),
185 |     f = NA
186 |   )
187 |   expect_identical(list_lengths_max(dat), 4L)
188 | })
189 | 
190 | test_that("list_lengths_median is calculated correctly.", {
191 |   dat <- list(
192 |     list("a", "b", "c"),
193 |     list("d", "b", "d"),
194 |     list("e", "f", "g"),
195 |     d = list("h"),
196 |     e = list("i", "j", "k", "l"),
197 |     f = NA
198 |   )
199 |   expect_identical(list_lengths_median(dat), 3L)
200 | })
201 | 
202 | test_that("list_min_length is calculated correctly.", {
203 |   dat <- I(list(5, 5:6, 5:10, NA))
204 |   expect_identical(list_min_length(dat), 1L)
205 | })
206 | 
207 | test_that("list_max_length is calculated correctly.", {
208 |   dat <- I(list(5, 5:6, 5:10, NA))
209 |   expect_identical(list_max_length(dat), 6L)
210 | })
211 | 
212 | test_that("sorted count is calculated correctly.", {
213 |   dat <- c("A", "A", "A", "B", "C", "C")
214 |   expect_equal(sorted_count(dat)[1:3], c("A" = 3, "C" = 2, "B" = 1))
215 |   expect_equal(names(sorted_count(dat)), c("A", "C", "B"))
216 | })
217 | 
218 | test_that("top counts is calculated correctly", {
219 |   dat <- c("A", "A", "A", "B", "C", "C")
220 |   expect_equal(top_counts(dat), "A: 3, C: 2, B: 1")
221 | })
222 | 
223 | test_that("sorted count is calculated correctly with a NA.", {
224 |   # NA should be sorted as if it is a regular value
225 |   dat <- c("A", "A", "A", "A", "B", NA, NA, "C", "C", "C")
226 |   expect_equal(unname(sorted_count(dat)), c(4, 3, 1))
227 |   expect_equal(names(sorted_count(dat)), c("A", "C", "B"))
228 | })
229 | 
230 | test_that("top counts is calculated correctly with a NA", {
231 |   dat <- c("A", "A", "A", "B", "C", "C")
232 |   expect_equal(top_counts(dat), "A: 3, C: 2, B: 1")
233 | })
234 | 
235 | test_that("sorted count is calculated correctly with \"\".", {
236 |   # \"\" should be converted to \"empty \" and a warning issued.
237 |   dat <- c("A", "A", "A", "A", "B", "", "", "C", "C", "C")
238 |   dat <- as.factor(dat)
239 |   expect_warning(expected <- sorted_count(dat))
240 |   expect_equal(unname(expected), c(4, 3, 2, 1))
241 |   expect_equal(names(expected), c("A", "C", "empty", "B"))
242 | })
243 | 


--------------------------------------------------------------------------------
/tests/testthat/test-summary.R:
--------------------------------------------------------------------------------
 1 | test_that("Summary creates the correct summary object", {
 2 |   withr::local_options(list(cli.unicode = FALSE))
 3 |   # Test it creates the correct 4 parts
 4 |   skim_input <- skim(iris)
 5 |   summary_input <- summary(skim_input)
 6 |   expect_named(
 7 |     summary_input,
 8 |     c(
 9 |       "data_name", "counts", "types", "possible_groups",
10 |       "dt_key", "data_rows", "data_cols"
11 |     )
12 |   )
13 |   expect_identical(summary_input$data_name, "iris")
14 |   expect_identical(summary_input$types, c("factor", "numeric"))
15 |   expect_identical(summary_input$data_rows, 150L)
16 |   expect_identical(summary_input$data_cols, 5L)
17 | })
18 | 
19 | test_that("The summary print method prints the correct object", {
20 |   skip_if_not(l10n_info()$`UTF-8`)
21 |   withr::local_options(list(cli.unicode = FALSE))
22 |   skim_summary_input <- summary(skim(iris))
23 |   expect_snapshot(skim_summary_input)
24 | })
25 | 
26 | test_that("The summary print method prints the correct object when piped", {
27 |   skip_if_not(l10n_info()$`UTF-8`)
28 |   withr::local_options(list(cli.unicode = FALSE))
29 |   # Test that the correct lines are output, no name should be output.
30 |   summary_input <- iris %>%
31 |     skim() %>%
32 |     summary()
33 |   expect_snapshot(summary_input)
34 | })
35 | 
36 | test_that("null object gets expected message", {
37 |   input <- NULL
38 |   expect_error(summary.skim_df(input), "dataframe is null.")
39 | })
40 | 


--------------------------------------------------------------------------------
/tests/testthat/test-vctrs.R:
--------------------------------------------------------------------------------
 1 | test_that("You can bind skim_df rows", {
 2 |   skimmed1 <- skim(iris)
 3 |   skimmed2 <- skim(mtcars)
 4 |   combined <- vctrs::vec_rbind(skimmed1, skimmed2)
 5 |   expect_s3_class(combined, "skim_df")
 6 | 
 7 |   attrs <- attributes(combined)
 8 |   expect_equal(attrs$data_rows, 182)
 9 |   expect_equal(attrs$data_cols, 16)
10 |   expect_equal(attrs$df_name, "`iris`+`mtcars`")
11 |   expect_equal(
12 |     attrs$skimmers_used,
13 |     list(
14 |       numeric = c("mean", "sd", "p0", "p25", "p50", "p75", "p100", "hist"),
15 |       factor = c("ordered", "n_unique", "top_counts")
16 |     )
17 |   )
18 | })
19 | 
20 | test_that("When binding columns, fall back to tbl_df", {
21 |   skimmed <- skim(iris)
22 |   combined <- vctrs::vec_cbind(skimmed, skimmed, .name_repair = "universal_quiet")
23 |   expect_s3_class(combined, "tbl")
24 |   expect_false("skim_df" %in% class(combined))
25 | })
26 | 


--------------------------------------------------------------------------------
/vignettes/Skimr_defaults.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Skimr defaults"
 3 | date: "`r Sys.Date()`"
 4 | output: 
 5 |   rmarkdown::html_vignette:
 6 |     latex_engine: xelatex
 7 | vignette: >
 8 |   %\VignetteIndexEntry{Skimr defaults}
 9 |   %\VignetteEngine{knitr::rmarkdown}
10 |   %\VignetteEncoding{UTF-8}
11 | ---
12 | 
13 | ## Introduction
14 | 
15 | This vignette simply displays the default data types and summary functions for
16 | `skimr`. Customizing `skimr` is explained in the **Using Skimr** vignette.
17 | 
18 | ## The base skimmers
19 | 
20 | `skimr` has a group of functions that it applies to all data types. We call
21 | these the "base"" skimmers:
22 | 
23 | *  `n_missing`: The number of missing values in the column.
24 | *  `complete_rate`: The ratio of non-missing values to the total values in the
25 |    column.
26 | 
27 | ## Default skimmers
28 | 
29 | To learn more about the functions used in this package, use the function
30 | `get_default_skimmer_names()`.
31 | 
32 | ```{r}
33 | library(skimr)
34 | get_default_skimmer_names()
35 | ```
36 | 
37 | The counterpart to this function is `get_default_skimmers()`, which returns the
38 | functions themselves. If you are interested in a particular class within
39 | `skimr`, pass it as a string to either function.
40 | 
41 | ```{r}
42 | get_default_skimmer_names("numeric")
43 | ```
44 | 
45 | The same information is stored in the `skimmers_used` attribute of the object
46 | returned by `skim()`.
47 | 


--------------------------------------------------------------------------------
/vignettes/Using_fonts.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Using Fonts"
 3 | output: 
 4 |   rmarkdown::html_vignette:
 5 |     latex_engine: xelatex
 6 | vignette: >
 7 |   %\VignetteIndexEntry{Using Fonts}
 8 |   %\VignetteEngine{knitr::rmarkdown}
 9 |   %\VignetteEncoding{UTF-8}
10 | ---
11 | 
12 | ```{r setup, include = FALSE}
13 | knitr::opts_chunk$set(
14 |   collapse = TRUE,
15 |   comment = "#>"
16 | )
17 | ```
18 | 
19 | One of the features of skim is the production of spark graphs for numeric data.
20 | However these graphs may not render properly because of lack of font support 
21 | or for other reasons, such as an operating system that does not support UTF-8. 
22 | In your specific environment this may depend on the fonts 
23 | installed and the operating system and may occur for only 
24 | specific types of documents such as PDF documents.
25 | Skimr supports kable() formatted tables, which is used in conjunction with 
26 | fonts to render the spark graphs.
27 | 
28 | To produce spark histograms a font that supports block elements must be used. To
29 | support the spark line graphs a font with Braille support must be used. 
30 | 
31 | Well-known fonts that support block elements include _DejaVu Sans_ and _Arial_. 
32 | Their availability depends on your operating system. 
33 | 
34 | The yaml at the beginning of a document using custom fonts will generally be
35 | similar to that shown below.
36 | 
37 | ```
38 | ---
39 | title: "Untitled"
40 | mainfont: DejaVu Sans
41 | output:
42 |   html_document: default
43 |   pdf_document:
44 |     latex_engine: xelatex
45 |   word_document: default
46 | font-family: Times New Roman
47 | ---
48 | ```
49 | 
50 | 
51 | A further discussion of this (with examples) is available in the 
52 | "Using fonts" template for skimr.  If you are using RStudio you can open that 
53 | template by opening a new markdown file and selecting "From template"
54 | and then choosing it.  Alternatively this file is available inside the 
55 | skimr folder or repository at 
56 | inst/markdown/templates/fonts_in_skimr/skeleton/skeleton.Rmd. 
57 | 
58 | If you are having difficulties making the spark graphs work, you can opt to turn 
59 | them off using the code below.
60 | 
61 | ```
62 | no_sparks <- skim_with(numeric = sfl(hist = NULL), ts = sfl(line_graph = NULL))
63 | 
64 | ```
65 | 


--------------------------------------------------------------------------------