├── .Rbuildignore
├── .clang-format
├── .github
├── .gitignore
└── workflows
│ ├── R-CMD-check.yaml
│ ├── pkgdown.yaml
│ ├── pr-commands.yaml
│ └── test-coverage.yaml
├── .gitignore
├── .vscode
├── c_cpp_properties.json
├── extensions.json
├── launch.json
└── settings.json
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
├── arithmatic.R
├── coerce-vector.R
├── coerce.R
├── extractors.R
├── has_sparse_elements.R
├── import-standalone-obj-type.R
├── import-standalone-types-check.R
├── options.R
├── sparse_character.R
├── sparse_double.R
├── sparse_dummy.R
├── sparse_integer.R
├── sparse_is_na.R
├── sparse_lag.R
├── sparse_logical.R
├── sparse_mean.R
├── sparse_median.R
├── sparse_replace_na.R
├── sparse_sd.R
├── sparse_sqrt.R
├── sparse_var.R
├── sparse_which_na.R
├── sparsevctrs-package.R
├── sparsity.R
├── type-predicates.R
└── validate-input.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── air.toml
├── codecov.yml
├── cran-comments.md
├── man
├── coerce-vector.Rd
├── coerce_to_sparse_data_frame.Rd
├── coerce_to_sparse_matrix.Rd
├── coerce_to_sparse_tibble.Rd
├── extractors.Rd
├── figures
│ └── logo.png
├── has_sparse_elements.Rd
├── sparse-arithmatic-scalar.Rd
├── sparse-arithmatic.Rd
├── sparse_character.Rd
├── sparse_double.Rd
├── sparse_dummy.Rd
├── sparse_integer.Rd
├── sparse_is_na.Rd
├── sparse_lag.Rd
├── sparse_logical.Rd
├── sparse_mean.Rd
├── sparse_median.Rd
├── sparse_replace_na.Rd
├── sparse_sd.Rd
├── sparse_sqrt.Rd
├── sparse_var.Rd
├── sparse_which_na.Rd
├── sparsevctrs-package.Rd
├── sparsevctrs_options.Rd
├── sparsity.Rd
└── type-predicates.Rd
├── revdep
├── README.md
├── cran.md
├── failures.md
└── problems.md
├── sparsevctrs.Rproj
├── src
├── .gitignore
├── altrep-sparse-double.c
├── altrep-sparse-double.h
├── altrep-sparse-integer.c
├── altrep-sparse-integer.h
├── altrep-sparse-logical.c
├── altrep-sparse-string.c
├── init.c
├── sparse-arithmatic.c
├── sparse-arithmatic.h
├── sparse-dummy.c
├── sparse-dummy.h
├── sparse-extractors.c
├── sparse-extractors.h
├── sparse-utils.c
└── sparse-utils.h
├── tests
├── testthat.R
└── testthat
│ ├── _snaps
│ ├── coerce.md
│ ├── sparse_character.md
│ ├── sparse_double.md
│ ├── sparse_dummy.md
│ ├── sparse_integer.md
│ ├── sparse_logical.md
│ └── sparsity.md
│ ├── test-arithmatic.R
│ ├── test-coerce-vector.R
│ ├── test-coerce.R
│ ├── test-extractors.R
│ ├── test-has_sparse_elements.R
│ ├── test-sparse_character.R
│ ├── test-sparse_double.R
│ ├── test-sparse_dummy.R
│ ├── test-sparse_integer.R
│ ├── test-sparse_is_na.R
│ ├── test-sparse_lag.R
│ ├── test-sparse_logical.R
│ ├── test-sparse_mean.R
│ ├── test-sparse_median.R
│ ├── test-sparse_replace_na.R
│ ├── test-sparse_sd.R
│ ├── test-sparse_sqrt.R
│ ├── test-sparse_var.R
│ ├── test-sparse_which_na.R
│ ├── test-sparsity.R
│ └── test-type-predicates.R
└── vignettes
├── .gitignore
├── articles
├── .gitignore
└── when-to-use.Rmd
└── design.Rmd
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^sparsevctrs\.Rproj$
2 | ^\.Rproj\.user$
3 | ^LICENSE\.md$
4 | ^README\.Rmd$
5 | ^_pkgdown\.yml$
6 | ^docs$
7 | ^pkgdown$
8 | ^\.github$
9 | ^vignettes/articles$
10 | ^compile_commands\.json$
11 | .clang-format
12 | ^.cache$
13 | ^codecov\.yml$
14 | ^\.vscode$
15 | ^debug$
16 | ^cran-comments\.md$
17 | ^CRAN-SUBMISSION$
18 | ^revdep$
19 | ^[\.]?air\.toml$
20 |
--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
1 | BasedOnStyle: Google
2 | IndentWidth: 2
3 | DerivePointerAlignment: false
4 | PointerAlignment: Left
5 | ColumnLimit: 80
6 | AlignAfterOpenBracket: BlockIndent
7 | SpaceAfterCStyleCast: true
8 | IncludeBlocks: Regroup
9 | SortIncludes: Never
10 | AllowShortFunctionsOnASingleLine: Empty
11 | BinPackArguments: false
12 | BinPackParameters: false
13 | AllowAllParametersOfDeclarationOnNextLine: false
--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 |
--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | #
4 | # NOTE: This workflow is overkill for most R packages and
5 | # check-standard.yaml is likely a better choice.
6 | # usethis::use_github_action("check-standard") will install it.
7 | on:
8 | push:
9 | branches: [main, master]
10 | pull_request:
11 |
12 | name: R-CMD-check.yaml
13 |
14 | permissions: read-all
15 |
16 | jobs:
17 | R-CMD-check:
18 | runs-on: ${{ matrix.config.os }}
19 |
20 | name: ${{ matrix.config.os }} (${{ matrix.config.r }})
21 |
22 | strategy:
23 | fail-fast: false
24 | matrix:
25 | config:
26 | - {os: macos-latest, r: 'release'}
27 |
28 | - {os: windows-latest, r: 'release'}
29 | # use 4.0 or 4.1 to check with rtools40's older compiler
30 | - {os: windows-latest, r: 'oldrel-4'}
31 |
32 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
33 | - {os: ubuntu-latest, r: 'release'}
34 | - {os: ubuntu-latest, r: 'oldrel-1'}
35 | - {os: ubuntu-latest, r: 'oldrel-2'}
36 | - {os: ubuntu-latest, r: 'oldrel-3'}
37 | - {os: ubuntu-latest, r: 'oldrel-4'}
38 |
39 | env:
40 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
41 | R_KEEP_PKG_SOURCE: yes
42 |
43 | steps:
44 | - uses: actions/checkout@v4
45 |
46 | - uses: r-lib/actions/setup-pandoc@v2
47 |
48 | - uses: r-lib/actions/setup-r@v2
49 | with:
50 | r-version: ${{ matrix.config.r }}
51 | http-user-agent: ${{ matrix.config.http-user-agent }}
52 | use-public-rspm: true
53 |
54 | - uses: r-lib/actions/setup-r-dependencies@v2
55 | with:
56 | extra-packages: any::rcmdcheck
57 | needs: check
58 |
59 | - uses: r-lib/actions/check-r-package@v2
60 | with:
61 | upload-snapshots: true
62 | build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")'
63 |
--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 | release:
8 | types: [published]
9 | workflow_dispatch:
10 |
11 | name: pkgdown.yaml
12 |
13 | permissions: read-all
14 |
15 | jobs:
16 | pkgdown:
17 | runs-on: ubuntu-latest
18 | # Only restrict concurrency for non-PR jobs
19 | concurrency:
20 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
21 | env:
22 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
23 | permissions:
24 | contents: write
25 | steps:
26 | - uses: actions/checkout@v4
27 |
28 | - uses: r-lib/actions/setup-pandoc@v2
29 |
30 | - uses: r-lib/actions/setup-r@v2
31 | with:
32 | use-public-rspm: true
33 |
34 | - uses: r-lib/actions/setup-r-dependencies@v2
35 | with:
36 | extra-packages: any::pkgdown, local::.
37 | needs: website
38 |
39 | - name: Build site
40 | run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
41 | shell: Rscript {0}
42 |
43 | - name: Deploy to GitHub pages 🚀
44 | if: github.event_name != 'pull_request'
45 | uses: JamesIves/github-pages-deploy-action@v4.5.0
46 | with:
47 | clean: false
48 | branch: gh-pages
49 | folder: docs
50 |
--------------------------------------------------------------------------------
/.github/workflows/pr-commands.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | issue_comment:
5 | types: [created]
6 |
7 | name: pr-commands.yaml
8 |
9 | permissions: read-all
10 |
11 | jobs:
12 | document:
13 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/document') }}
14 | name: document
15 | runs-on: ubuntu-latest
16 | env:
17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 | permissions:
19 | contents: write
20 | steps:
21 | - uses: actions/checkout@v4
22 |
23 | - uses: r-lib/actions/pr-fetch@v2
24 | with:
25 | repo-token: ${{ secrets.GITHUB_TOKEN }}
26 |
27 | - uses: r-lib/actions/setup-r@v2
28 | with:
29 | use-public-rspm: true
30 |
31 | - uses: r-lib/actions/setup-r-dependencies@v2
32 | with:
33 | extra-packages: any::roxygen2
34 | needs: pr-document
35 |
36 | - name: Document
37 | run: roxygen2::roxygenise()
38 | shell: Rscript {0}
39 |
40 | - name: commit
41 | run: |
42 | git config --local user.name "$GITHUB_ACTOR"
43 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
44 | git add man/\* NAMESPACE
45 | git commit -m 'Document'
46 |
47 | - uses: r-lib/actions/pr-push@v2
48 | with:
49 | repo-token: ${{ secrets.GITHUB_TOKEN }}
50 |
51 | style:
52 | if: ${{ github.event.issue.pull_request && (github.event.comment.author_association == 'MEMBER' || github.event.comment.author_association == 'OWNER') && startsWith(github.event.comment.body, '/style') }}
53 | name: style
54 | runs-on: ubuntu-latest
55 | env:
56 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
57 | permissions:
58 | contents: write
59 | steps:
60 | - uses: actions/checkout@v4
61 |
62 | - uses: r-lib/actions/pr-fetch@v2
63 | with:
64 | repo-token: ${{ secrets.GITHUB_TOKEN }}
65 |
66 | - uses: r-lib/actions/setup-r@v2
67 |
68 | - name: Install dependencies
69 | run: install.packages("styler")
70 | shell: Rscript {0}
71 |
72 | - name: Style
73 | run: styler::style_pkg()
74 | shell: Rscript {0}
75 |
76 | - name: commit
77 | run: |
78 | git config --local user.name "$GITHUB_ACTOR"
79 | git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com"
80 | git add \*.R
81 | git commit -m 'Style'
82 |
83 | - uses: r-lib/actions/pr-push@v2
84 | with:
85 | repo-token: ${{ secrets.GITHUB_TOKEN }}
86 |
--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 |
8 | name: test-coverage.yaml
9 |
10 | permissions: read-all
11 |
12 | jobs:
13 | test-coverage:
14 | runs-on: ubuntu-latest
15 | env:
16 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
17 |
18 | steps:
19 | - uses: actions/checkout@v4
20 |
21 | - uses: r-lib/actions/setup-r@v2
22 | with:
23 | use-public-rspm: true
24 |
25 | - uses: r-lib/actions/setup-r-dependencies@v2
26 | with:
27 | extra-packages: any::covr, any::xml2
28 | needs: coverage
29 |
30 | - name: Test coverage
31 | run: |
32 | cov <- covr::package_coverage(
33 | quiet = FALSE,
34 | clean = FALSE,
35 | install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
36 | )
37 | print(cov)
38 | covr::to_cobertura(cov)
39 | shell: Rscript {0}
40 |
41 | - uses: codecov/codecov-action@v5
42 | with:
43 | # Fail if error if not on PR, or if on PR and token is given
44 | fail_ci_if_error: ${{ github.event_name != 'pull_request' || secrets.CODECOV_TOKEN }}
45 | files: ./cobertura.xml
46 | plugins: noop
47 | disable_search: true
48 | token: ${{ secrets.CODECOV_TOKEN }}
49 |
50 | - name: Show testthat output
51 | if: always()
52 | run: |
53 | ## --------------------------------------------------------------------
54 | find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
55 | shell: bash
56 |
57 | - name: Upload test results
58 | if: failure()
59 | uses: actions/upload-artifact@v4
60 | with:
61 | name: coverage-test-failures
62 | path: ${{ runner.temp }}/package
63 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .Rdata
4 | .httr-oauth
5 | .DS_Store
6 | docs
7 | compile_commands.json
8 | .cache
9 | debug.R
10 | inst/doc
11 | revdep/checks.noindex
12 | revdep/library.noindex
13 | revdep/data.sqlite
14 | revdep/cloud.noindex/*
15 |
--------------------------------------------------------------------------------
/.vscode/c_cpp_properties.json:
--------------------------------------------------------------------------------
1 | {
2 | "configurations": [
3 | {
4 | "name": "Mac",
5 | "includePath": [
6 | "${workspaceFolder}/**",
7 |
8 | "/Library/Frameworks/R.framework/Resources/include",
9 | "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/include",
10 | "/usr/local/include"
11 | ],
12 | "macFrameworkPath": [
13 | "/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks"
14 | ],
15 | "compilerPath": "/usr/bin/clang",
16 | "cStandard": "c99",
17 | "cppStandard": "c++11"
18 | }
19 | ],
20 | "version": 4
21 | }
22 |
--------------------------------------------------------------------------------
/.vscode/extensions.json:
--------------------------------------------------------------------------------
1 | {
2 | "recommendations": [
3 | "Posit.air-vscode"
4 | ]
5 | }
6 |
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | "version": "0.2.0",
3 | "configurations": [
4 | {
5 | "name": "(lldb) Launch R",
6 | "type": "lldb",
7 | "request": "launch",
8 | "program": "/Library/Frameworks/R.framework/Resources/bin/exec/R",
9 | "args": [
10 | "--vanilla",
11 | "-e",
12 | "source('debug/debug.R')"
13 | ],
14 | "env": {
15 | "R_HOME" : "/Library/Frameworks/R.framework/Resources"
16 | },
17 | "terminal": "console",
18 | "stopOnEntry": false
19 | },
20 | {
21 | "name": "(lldb) Attach to R",
22 | "type": "lldb",
23 | "request": "attach",
24 | "pid": "${command:pickMyProcess}",
25 | "stopOnEntry": false
26 | }
27 | ]
28 | }
29 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "[c]": {
3 | "editor.formatOnSave": true,
4 | "editor.defaultFormatter": "llvm-vs-code-extensions.vscode-clangd"
5 | },
6 | "clangd.arguments": [
7 | "-header-insertion=never"
8 | ],
9 | "[r]": {
10 | "editor.formatOnSave": true,
11 | "editor.defaultFormatter": "Posit.air-vscode"
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: sparsevctrs
2 | Title: Sparse Vectors for Use in Data Frames
3 | Version: 0.3.4.9000
4 | Authors@R: c(
5 | person("Emil", "Hvitfeldt", , "emil.hvitfeldt@posit.co", role = c("aut", "cre"),
6 | comment = c(ORCID = "0000-0002-0679-1945")),
7 | person("Davis", "Vaughan", , "davis@posit.co", role = "ctb"),
8 | person("Posit Software, PBC", role = c("cph", "fnd"), comment = c(ROR = "03wc8by49"))
9 | )
10 | Description: Provides sparse vectors powered by ALTREP (Alternative
11 | Representations for R Objects) that behave like regular vectors, and
12 | can thus be used in data frames. Also provides tools to convert
13 | between sparse matrices and data frames with sparse columns and
14 | functions to interact with sparse vectors.
15 | License: MIT + file LICENSE
16 | URL: https://github.com/r-lib/sparsevctrs,
17 | https://r-lib.github.io/sparsevctrs/
18 | BugReports: https://github.com/r-lib/sparsevctrs/issues
19 | Depends:
20 | R (>= 4.1)
21 | Imports:
22 | cli (>= 3.4.0),
23 | rlang (>= 1.1.0),
24 | vctrs
25 | Suggests:
26 | knitr,
27 | Matrix,
28 | methods,
29 | rmarkdown,
30 | testthat (>= 3.0.0),
31 | tibble,
32 | withr
33 | VignetteBuilder:
34 | knitr
35 | Config/Needs/website: tidyverse/tidytemplate, rmarkdown, lobstr, ggplot2,
36 | bench, tidyr, ggbeeswarm
37 | Config/testthat/edition: 3
38 | Encoding: UTF-8
39 | Roxygen: list(markdown = TRUE)
40 | RoxygenNote: 7.3.2
41 | Config/usethis/last-upkeep: 2025-05-25
42 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2025
2 | COPYRIGHT HOLDER: sparsevctrs authors
3 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | # MIT License
2 |
3 | Copyright (c) 2025 sparsevctrs authors
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(as_sparse_character)
4 | export(as_sparse_double)
5 | export(as_sparse_integer)
6 | export(as_sparse_logical)
7 | export(coerce_to_sparse_data_frame)
8 | export(coerce_to_sparse_matrix)
9 | export(coerce_to_sparse_tibble)
10 | export(has_sparse_elements)
11 | export(is_sparse_character)
12 | export(is_sparse_double)
13 | export(is_sparse_integer)
14 | export(is_sparse_logical)
15 | export(is_sparse_numeric)
16 | export(is_sparse_vector)
17 | export(sparse_addition_scalar)
18 | export(sparse_character)
19 | export(sparse_default)
20 | export(sparse_division_scalar)
21 | export(sparse_double)
22 | export(sparse_dummy)
23 | export(sparse_integer)
24 | export(sparse_is_na)
25 | export(sparse_lag)
26 | export(sparse_logical)
27 | export(sparse_mean)
28 | export(sparse_median)
29 | export(sparse_multiplication)
30 | export(sparse_multiplication_scalar)
31 | export(sparse_positions)
32 | export(sparse_replace_na)
33 | export(sparse_sd)
34 | export(sparse_sqrt)
35 | export(sparse_subtraction_scalar)
36 | export(sparse_values)
37 | export(sparse_var)
38 | export(sparse_which_na)
39 | export(sparsity)
40 | import(rlang)
41 | useDynLib(sparsevctrs, .registration = TRUE)
42 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # sparsevctrs (development version)
2 |
3 | # sparsevctrs 0.3.4
4 |
5 | ## Bug Fixes
6 |
7 | * Fixed bug where `sparse_multiplication()` had a stack imbalence when returning all 0s. (#113)
8 |
9 | * Fixed bug where `sparse_is_na(type = "integer")` would error on character vectors. (#116)
10 |
11 | # sparsevctrs 0.3.3
12 |
13 | ## Bug Fixes
14 |
15 | * Fixed bug where `coerce_to_sparse_matrix()` would sometimes error if input had NA values. (#109)
16 |
17 | # sparsevctrs 0.3.2
18 |
19 | ## Bug Fixes
20 |
21 | * Fixed bug where `sparsity()` error on numeric vectors with classes. (#106)
22 |
23 | # sparsevctrs 0.3.1
24 |
25 | * Fixed bug where sparse multiplication caused undefined behaviour. ($103)
26 |
27 | # sparsevctrs 0.3.0
28 |
29 | ## New Functions
30 |
31 | * Adding the scalar arithmatic functions `sparse_division_scalar()`, `sparse_multiplication_scalar()`, `sparse_addition_scalar()`, `sparse_subtraction_scalar()`. (#87)
32 |
33 | * Adding the arithmatic function `sparse_multiplication()`. (#93)
34 |
35 | * Helper function `sparse_lag()` bas been added. (#99)
36 |
37 | * Helper function `sparse_sqrt()` has been added. (#90)
38 |
39 | * Helper function `sparse_replace_na()` has been added. (#91)
40 |
41 | * Helper functions `sparse_is_na()` and `sparse_which_na()` have been added. (#92)
42 |
43 | ## Improvements
44 |
45 | * Adding `wts` argument to `sparse_mean()`. (#95)
46 |
47 | ## Bug Fixes
48 |
49 | * Fixed bug in `coerce_to_sparse_data_frame()` and `coerce_to_sparse_tibble()` where they didn't work with ngCMatrix. (#89)
50 |
51 | # sparsevctrs 0.2.0
52 |
53 | ## New Functions
54 |
55 | * `sparsity()` has been added, allows sparsity calculations of data.frames, matrices, and sparse matrices. (#82)
56 |
57 | * Utility function `has_sparse_elements()` has been added. (#70)
58 |
59 | * Helper function `sparse_dummy()` has beenn added. (#49)
60 |
61 | * Helper functions `sparse_mean()`, `sparse_var()`, `sparse_sd()`, `sparse_median()` has been added. (#49)
62 |
63 | ## Improvements
64 |
65 | * All sparse vector types now have a significant smaller base object size. (#67)
66 |
67 | * All coerce functions have received a `call` argument. (#72)
68 |
69 | * `is_sparse_vector()` has been rewritten for speed improvement. (#76)
70 |
71 | * `coerce_to_sparse_matrix()` Now turns dense zeroes into sparse zeroes. (#77)
72 |
73 | ## Bug Fixes
74 |
75 | * Fixed bug where `coerce_to_sparse_data_frame()` and `coerce_to_sparse_tibble()` didn't work with matrices with fully sparse columns. (#69)
76 |
77 | * Fixed bugs where `coerce_to_sparse_matrix()` would error for completely sparse columns. (#77)
78 |
79 | # sparsevctrs 0.1.0
80 |
81 | * Initial CRAN submission.
82 |
--------------------------------------------------------------------------------
/R/arithmatic.R:
--------------------------------------------------------------------------------
1 | #' Scalar arithmatic with sparse vectors
2 | #'
3 | #' Do Arithmatic on sparse vectors without destroying the sparsity. Note that
4 | #' only multiplication and division preserves the default value.
5 | #'
6 | #' @param x A sparse vector.
7 | #' @param val A single numeric value.
8 | #'
9 | #' @details
10 | #' No checking of the inputs are being done.
11 | #'
12 | #' `sparse_division_scalar()` and `sparse_multiplication_scalar()` are the most
13 | #' used ones, as they preserve the default, which is often what you want to do.
14 | #'
15 | #' `sparse_division_scalar()` always produces double vectors, regardless of
16 | #' whether they could be represented as integers or not. Expect when `val = 1`
17 | #' as the input is returned unchanged, or `val = NA` as the input returned will
18 | #' be `NA` or the appropiate type.
19 | #'
20 | #' @return A sparse vector of same type.
21 | #'
22 | #' @examples
23 | #' x_sparse <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10)
24 | #'
25 | #' sparse_division_scalar(x_sparse, 2)
26 | #' sparse_multiplication_scalar(x_sparse, 2)
27 | #' sparse_addition_scalar(x_sparse, 2)
28 | #' sparse_subtraction_scalar(x_sparse, 2)
29 | #' @name sparse-arithmatic-scalar
30 | NULL
31 |
32 | #' @rdname sparse-arithmatic-scalar
33 | #' @export
34 | sparse_division_scalar <- function(x, val) {
35 | if (is.na(val)) {
36 | if (is.integer(x)) {
37 | return(rep(NA_integer_, length(x)))
38 | } else {
39 | return(rep(NA_real_, length(x)))
40 | }
41 | }
42 |
43 | if (val == 0) {
44 | return(rep(Inf, length(x)))
45 | }
46 |
47 | if (val == 1) {
48 | return(x)
49 | }
50 |
51 | res <- sparse_double(
52 | values = sparse_values(x) / val,
53 | positions = sparse_positions(x),
54 | length = length(x),
55 | default = sparse_default(x)
56 | )
57 |
58 | res
59 | }
60 |
61 | #' @rdname sparse-arithmatic-scalar
62 | #' @export
63 | sparse_multiplication_scalar <- function(x, val) {
64 | if (is.na(val)) {
65 | if (is.integer(x)) {
66 | return(rep(NA_integer_, length(x)))
67 | } else {
68 | return(rep(NA_real_, length(x)))
69 | }
70 | }
71 |
72 | if (val == 1) {
73 | return(x)
74 | }
75 |
76 | if (is_sparse_integer(x)) {
77 | if (val == 0) {
78 | res <- sparse_integer(
79 | values = integer(),
80 | positions = integer(),
81 | length = length(x),
82 | default = sparse_default(x)
83 | )
84 | } else {
85 | res <- sparse_integer(
86 | values = sparse_values(x) * val,
87 | positions = sparse_positions(x),
88 | length = length(x),
89 | default = sparse_default(x)
90 | )
91 | }
92 | }
93 | if (is_sparse_double(x)) {
94 | if (val == 0) {
95 | res <- sparse_double(
96 | values = double(),
97 | positions = integer(),
98 | length = length(x),
99 | default = sparse_default(x)
100 | )
101 | } else {
102 | res <- sparse_double(
103 | values = sparse_values(x) * val,
104 | positions = sparse_positions(x),
105 | length = length(x),
106 | default = sparse_default(x)
107 | )
108 | }
109 | }
110 |
111 | res
112 | }
113 |
114 | #' @rdname sparse-arithmatic-scalar
115 | #' @export
116 | sparse_addition_scalar <- function(x, val) {
117 | if (is.na(val)) {
118 | if (is.integer(x)) {
119 | return(rep(NA_integer_, length(x)))
120 | } else {
121 | return(rep(NA_real_, length(x)))
122 | }
123 | }
124 |
125 | if (val == 0) {
126 | return(x)
127 | }
128 |
129 | if (is_sparse_integer(x)) {
130 | res <- sparse_integer(
131 | values = sparse_values(x) + val,
132 | positions = sparse_positions(x),
133 | length = length(x),
134 | default = sparse_default(x) + val
135 | )
136 | }
137 | if (is_sparse_double(x)) {
138 | res <- sparse_double(
139 | values = sparse_values(x) + val,
140 | positions = sparse_positions(x),
141 | length = length(x),
142 | default = sparse_default(x) + val
143 | )
144 | }
145 |
146 | res
147 | }
148 |
149 | #' @rdname sparse-arithmatic-scalar
150 | #' @export
151 | sparse_subtraction_scalar <- function(x, val) {
152 | if (is.na(val)) {
153 | if (is.integer(x)) {
154 | return(rep(NA_integer_, length(x)))
155 | } else {
156 | return(rep(NA_real_, length(x)))
157 | }
158 | }
159 |
160 | if (val == 0) {
161 | return(x)
162 | }
163 |
164 | if (is_sparse_integer(x)) {
165 | res <- sparse_integer(
166 | values = sparse_values(x) - val,
167 | positions = sparse_positions(x),
168 | length = length(x),
169 | default = sparse_default(x) - val
170 | )
171 | }
172 | if (is_sparse_double(x)) {
173 | res <- sparse_double(
174 | values = sparse_values(x) - val,
175 | positions = sparse_positions(x),
176 | length = length(x),
177 | default = sparse_default(x) - val
178 | )
179 | }
180 |
181 | res
182 | }
183 |
184 | #' Vector arithmatic with sparse vectors
185 | #'
186 | #' Do arithmatic operations on sparse vectors while trying to void destroying
187 | #' the sparsity.
188 | #'
189 | #' @param x A numeric vector.
190 | #' @param y A numeric vector.
191 | #'
192 | #' @details
193 | #'
194 | #' Note that this function works with both sparse and dense vectors for both `x`
195 | #' and `y`, returning a sparse or dense vector according to the input.
196 | #'
197 | #' For `sparse_multiplication()` the class of the resulting vector depends on
198 | #' the classes of `x` and `y`. If both `x` and `y` are integer vectors then an
199 | #' integer vector is returned, otherwise a double vector is returned.
200 | #'
201 | #' `sparse_multiplication()` will return a non-sparse vector if both `x` and `y`
202 | #' is non-sparse. Otherwise a sparse vector is returned.
203 | #'
204 | #' `sparse_multiplication()` will destroy sparsity of sparse vectors with non-0
205 | #' `default` values.
206 | #'
207 | #' @return A sparse vector of same type.
208 | #'
209 | #' @examples
210 | #' x_sparse <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10)
211 | #'
212 | #' sparse_multiplication(x_sparse, x_sparse)
213 | #' @name sparse-arithmatic
214 | NULL
215 |
216 | #' @rdname sparse-arithmatic
217 | #' @export
218 | sparse_multiplication <- function(x, y) {
219 | if (!is.numeric(x)) {
220 | cli::cli_abort("{.arg x} must me numeric, not {.obj_type_friendly {x}}.")
221 | }
222 | if (!is.numeric(y)) {
223 | cli::cli_abort("{.arg y} must me numeric not {.obj_type_friendly {x}}.")
224 | }
225 |
226 | if (length(x) != length(y)) {
227 | x_len <- length(x)
228 | y_len <- length(y)
229 | cli::cli_abort(
230 | "{.arg x} ({x_len}) and {.arg y} ({y_len}) must be the same length."
231 | )
232 | }
233 |
234 | x_class <- class(x)
235 | y_class <- class(y)
236 |
237 | if (x_class != y_class) {
238 | if (x_class == "integer") {
239 | if (is_sparse_vector(x)) {
240 | x <- as_sparse_double(x, default = sparse_default(x))
241 | } else {
242 | x <- as.double(x)
243 | }
244 | } else {
245 | if (is_sparse_vector(y)) {
246 | y <- as_sparse_double(y, default = sparse_default(y))
247 | } else {
248 | y <- as.double(y)
249 | }
250 | }
251 | }
252 |
253 | x_default <- sparse_default(x)
254 | y_default <- sparse_default(y)
255 |
256 | if (is_altrep_non_sparse_vector(x) || (!is.na(x_default) && x_default != 0)) {
257 | x <- x[]
258 | }
259 |
260 | if (is_altrep_non_sparse_vector(y) || (!is.na(y_default) && y_default != 0)) {
261 | y <- y[]
262 | }
263 |
264 | .Call(ffi_sparse_multiplication, x, y)
265 | }
266 |
--------------------------------------------------------------------------------
/R/coerce-vector.R:
--------------------------------------------------------------------------------
1 | #' Coerce numeric vector to sparse double
2 | #'
3 | #' Takes a numeric vector, integer or double, and turn it into a sparse double
4 | #' vector.
5 | #'
6 | #' @param x a numeric vector.
7 | #' @param default default value to use. Defaults to `0`.
8 | #'
9 | #' The values of `x` must be double or integer. It must not contain any `Inf` or
10 | #' `NaN` values.
11 | #'
12 | #' @return sparse vectors
13 | #'
14 | #' @examples
15 | #' x_dense <- c(3, 0, 2, 0, 0, 0, 4, 0, 0, 0)
16 | #' x_sparse <- as_sparse_double(x_dense)
17 | #' x_sparse
18 | #'
19 | #' is_sparse_double(x_sparse)
20 | #' @name coerce-vector
21 | NULL
22 |
23 | #' @rdname coerce-vector
24 | #' @export
25 | as_sparse_double <- function(x, default = 0) {
26 | if (is_sparse_double(x)) {
27 | return(x)
28 | }
29 |
30 | validate_values_double(x)
31 |
32 | check_number_decimal(default)
33 |
34 | index <- which(x != default | is.na(x))
35 |
36 | sparse_double(
37 | values = x[index],
38 | positions = index,
39 | length = length(x),
40 | default = default
41 | )
42 | }
43 |
44 | #' @rdname coerce-vector
45 | #' @export
46 | as_sparse_integer <- function(x, default = 0L) {
47 | if (is_sparse_integer(x)) {
48 | return(x)
49 | }
50 |
51 | validate_values_integer(x)
52 | check_number_whole(default)
53 |
54 | values <- vctrs::vec_cast(x, integer())
55 | default <- vctrs::vec_cast(default, integer())
56 |
57 | index <- which(x != default | is.na(x))
58 |
59 | sparse_integer(
60 | values = x[index],
61 | positions = index,
62 | length = length(x),
63 | default = default
64 | )
65 | }
66 |
67 | #' @rdname coerce-vector
68 | #' @export
69 | as_sparse_character <- function(x, default = "") {
70 | if (is_sparse_character(x)) {
71 | return(x)
72 | }
73 |
74 | check_string(default)
75 |
76 | values <- vctrs::vec_cast(x, character())
77 | default <- vctrs::vec_cast(default, character())
78 |
79 | index <- which(x != default | is.na(x))
80 |
81 | sparse_character(
82 | values = x[index],
83 | positions = index,
84 | length = length(x),
85 | default = default
86 | )
87 | }
88 |
89 | #' @rdname coerce-vector
90 | #' @export
91 | as_sparse_logical <- function(x, default = FALSE) {
92 | if (is_sparse_logical(x)) {
93 | return(x)
94 | }
95 |
96 | check_bool(default)
97 |
98 | index <- which(x != default | is.na(x))
99 |
100 | sparse_logical(
101 | values = x[index],
102 | positions = index,
103 | length = length(x),
104 | default = default
105 | )
106 | }
107 |
--------------------------------------------------------------------------------
/R/coerce.R:
--------------------------------------------------------------------------------
1 | #' Coerce sparse data frame to sparse matrix
2 | #'
3 | #' Turning data frame with sparse columns into sparse matrix using
4 | #' [Matrix::sparseMatrix()].
5 | #'
6 | #' @param x a data frame or tibble with sparse columns.
7 | #' @inheritParams rlang::args_error_context
8 | #'
9 | #' @details
10 | #' No checking is currently do to `x` to determine whether it contains sparse
11 | #' columns or not. Thus it works with any data frame. Needless to say, creating
12 | #' a sparse matrix out of a dense data frame is not ideal.
13 | #'
14 | #' @return sparse matrix
15 | #'
16 | #' @seealso [coerce_to_sparse_data_frame()] [coerce_to_sparse_tibble()]
17 | #' @examplesIf rlang::is_installed("Matrix")
18 | #' sparse_tbl <- lapply(1:10, function(x) sparse_double(x, x, length = 10))
19 | #' names(sparse_tbl) <- letters[1:10]
20 | #' sparse_tbl <- as.data.frame(sparse_tbl)
21 | #' sparse_tbl
22 | #'
23 | #' res <- coerce_to_sparse_matrix(sparse_tbl)
24 | #' res
25 | #' @export
26 | coerce_to_sparse_matrix <- function(x, call = rlang::caller_env(0)) {
27 | rlang::check_installed("Matrix")
28 |
29 | if (!inherits(x, "data.frame")) {
30 | cli::cli_abort(
31 | "{.arg x} must be a {.cls data.frame}, not {.obj_type_friendly {x}}.",
32 | call = call
33 | )
34 | }
35 |
36 | if (!all(vapply(x, is.numeric, logical(1)))) {
37 | offenders <- which(!vapply(x, is.numeric, logical(1)))
38 | offenders <- names(x)[offenders]
39 | cli::cli_abort(
40 | c(
41 | x = "All columns of {.arg x} must be numeric.",
42 | i = "Non-numeric columns: {.field {offenders}}."
43 | ),
44 | call = call
45 | )
46 | }
47 |
48 | if (!any(vapply(x, is_sparse_numeric, logical(1)))) {
49 | res <- as.matrix(x)
50 | res <- Matrix::Matrix(res, sparse = TRUE)
51 | return(res)
52 | }
53 |
54 | if (!all(vapply(x, sparse_default, numeric(1)) == 0, na.rm = TRUE)) {
55 | offenders <- which(vapply(x, sparse_default, numeric(1)) != 0)
56 |
57 | for (i in offenders) {
58 | x[[i]] <- x[[i]][]
59 | }
60 | }
61 |
62 | all_positions <- lapply(x, sparse_positions)
63 | all_values <- lapply(x, sparse_values)
64 |
65 | all_rows <- rep(seq_along(x), times = lengths(all_positions))
66 |
67 | all_positions <- unlist(all_positions, use.names = FALSE)
68 | all_values <- unlist(all_values, use.names = FALSE)
69 |
70 | # TODO: maybe faster to do this above?
71 | non_zero <- all_values != 0 | is.na(all_values)
72 | all_rows <- all_rows[non_zero]
73 | all_positions <- all_positions[non_zero]
74 | all_values <- all_values[non_zero]
75 |
76 | n_row <- nrow(x)
77 | n_col <- ncol(x)
78 |
79 | if (identical(rownames(x), as.character(seq_len(nrow(x))))) {
80 | row_names <- NULL
81 | } else {
82 | row_names <- rownames(x)
83 | }
84 |
85 | res <- Matrix::sparseMatrix(
86 | i = all_positions,
87 | j = all_rows,
88 | x = all_values,
89 | dims = c(n_row, n_col),
90 | dimnames = list(row_names, colnames(x))
91 | )
92 | res
93 | }
94 |
95 | #' Coerce sparse matrix to tibble with sparse columns
96 | #'
97 | #' Turning a sparse matrix into a tibble.
98 | #'
99 | #' @param x sparse matrix.
100 | #' @inheritParams rlang::args_error_context
101 | #'
102 | #' @details
103 | #' The only requirement from the sparse matrix is that it contains column names.
104 | #'
105 | #' @return tibble with sparse columns
106 | #'
107 | #' @seealso [coerce_to_sparse_data_frame()] [coerce_to_sparse_matrix()]
108 | #' @examplesIf rlang::is_installed("tibble")
109 | #' set.seed(1234)
110 | #' mat <- matrix(sample(0:1, 100, TRUE, c(0.9, 0.1)), nrow = 10)
111 | #' colnames(mat) <- letters[1:10]
112 | #' sparse_mat <- Matrix::Matrix(mat, sparse = TRUE)
113 | #' sparse_mat
114 | #'
115 | #' res <- coerce_to_sparse_tibble(sparse_mat)
116 | #' res
117 | #'
118 | #' # All columns are sparse
119 | #' vapply(res, is_sparse_vector, logical(1))
120 | #' @export
121 | coerce_to_sparse_tibble <- function(x, call = rlang::caller_env(0)) {
122 | rlang::check_installed("tibble")
123 |
124 | if (!any(methods::is(x) == "sparseMatrix")) {
125 | cli::cli_abort(
126 | "{.arg x} must be a {.cls sparseMatrix}, not {.obj_type_friendly {x}}.",
127 | call = call
128 | )
129 | }
130 |
131 | if (!methods::is(x, "dgCMatrix")) {
132 | x <- methods::as(x, "generalMatrix")
133 | x <- methods::as(x, "CsparseMatrix")
134 | }
135 |
136 | if (is.null(colnames(x))) {
137 | cli::cli_abort(
138 | "{.arg x} must have column names.",
139 | call = call
140 | )
141 | }
142 |
143 | res <- .sparse_matrix_to_list(x)
144 | res <- tibble::as_tibble(res)
145 | res
146 | }
147 |
148 | #' Coerce sparse matrix to data frame with sparse columns
149 | #'
150 | #' Turning a sparse matrix into a data frame
151 | #'
152 | #' @param x sparse matrix.
153 | #' @inheritParams rlang::args_error_context
154 | #'
155 | #' @details
156 | #' The only requirement from the sparse matrix is that it contains column names.
157 | #'
158 | #' @return data.frame with sparse columns
159 | #'
160 | #' @seealso [coerce_to_sparse_tibble()] [coerce_to_sparse_matrix()]
161 | #' @examplesIf rlang::is_installed("Matrix")
162 | #' set.seed(1234)
163 | #' mat <- matrix(sample(0:1, 100, TRUE, c(0.9, 0.1)), nrow = 10)
164 | #' colnames(mat) <- letters[1:10]
165 | #' sparse_mat <- Matrix::Matrix(mat, sparse = TRUE)
166 | #' sparse_mat
167 | #'
168 | #' res <- coerce_to_sparse_data_frame(sparse_mat)
169 | #' res
170 | #'
171 | #' # All columns are sparse
172 | #' vapply(res, is_sparse_vector, logical(1))
173 | #' @export
174 | coerce_to_sparse_data_frame <- function(x, call = rlang::caller_env(0)) {
175 | if (!any(methods::is(x) == "sparseMatrix")) {
176 | cli::cli_abort(
177 | "{.arg x} must be a {.cls sparseMatrix}, not {.obj_type_friendly {x}}.",
178 | call = call
179 | )
180 | }
181 |
182 | if (!methods::is(x, "dgCMatrix")) {
183 | x <- methods::as(x, "generalMatrix")
184 | x <- methods::as(x, "CsparseMatrix")
185 | }
186 |
187 | if (is.null(colnames(x))) {
188 | cli::cli_abort(
189 | "{.arg x} must have column names.",
190 | call = call
191 | )
192 | }
193 |
194 | res <- .sparse_matrix_to_list(x)
195 | res <- as.data.frame(res)
196 | res
197 | }
198 |
199 | .sparse_matrix_to_list <- function(x) {
200 | if (methods::is(x, "ngCMatrix")) {
201 | values <- rep(1, length(x@i))
202 | } else {
203 | values <- x@x
204 | }
205 |
206 | x_positions <- x@i
207 | n_nonzero <- diff(x@p)
208 |
209 | x_length <- nrow(x)
210 |
211 | res <- list()
212 | start <- 1
213 | for (i in seq_along(n_nonzero)) {
214 | if (n_nonzero[i] == 0) {
215 | res[[i]] <- sparse_double(
216 | values = double(),
217 | positions = double(),
218 | length = x_length
219 | )
220 | next
221 | }
222 |
223 | index <- seq(start, start + n_nonzero[i] - 1)
224 |
225 | res[[i]] <- sparse_double(
226 | values = values[index],
227 | positions = x_positions[index] + 1,
228 | length = x_length
229 | )
230 | start <- start + n_nonzero[i]
231 | }
232 |
233 | names(res) <- colnames(x)
234 | res
235 | }
236 |
--------------------------------------------------------------------------------
/R/extractors.R:
--------------------------------------------------------------------------------
1 | #' Information extraction from sparse vectors
2 | #'
3 | #' Extract positions, values, and default from sparse vectors without the need
4 | #' to materialize vector.
5 | #'
6 | #' @details
7 | #'
8 | #' `sparse_default()` returns `NA` when applied to non-sparse vectors. This is
9 | #' done to have an indicator of non-sparsity.
10 | #'
11 | #' @param x vector to be extracted from.
12 | #'
13 | #' @details
14 | #' for ease of use, these functions also works on non-sparse variables.
15 | #'
16 | #' @return vectors of requested attributes
17 | #'
18 | #' @examples
19 | #' x_sparse <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10)
20 | #' x_dense <- c(0, pi, 0, 0, 0.5, 0, 0, 0, 0, 0.1)
21 | #'
22 | #' sparse_positions(x_sparse)
23 | #' sparse_values(x_sparse)
24 | #' sparse_default(x_sparse)
25 | #'
26 | #' sparse_positions(x_dense)
27 | #' sparse_values(x_dense)
28 | #' sparse_default(x_dense)
29 | #'
30 | #' x_sparse_3 <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10, default = 3)
31 | #' sparse_default(x_sparse_3)
32 | #' @name extractors
33 | NULL
34 |
35 | #' @rdname extractors
36 | #' @export
37 | sparse_positions <- function(x) {
38 | if (!is_sparse_vector(x)) {
39 | return(seq_along(x))
40 | }
41 |
42 | .Call(ffi_altrep_sparse_positions, x)
43 | }
44 |
45 | #' @rdname extractors
46 | #' @export
47 | sparse_values <- function(x) {
48 | if (!is_sparse_vector(x)) {
49 | return(x)
50 | }
51 |
52 | .Call(ffi_altrep_sparse_values, x)
53 | }
54 |
55 | #' @rdname extractors
56 | #' @export
57 | sparse_default <- function(x) {
58 | if (!is_sparse_vector(x)) {
59 | return(NA)
60 | }
61 |
62 | .Call(ffi_altrep_sparse_default, x)
63 | }
64 |
--------------------------------------------------------------------------------
/R/has_sparse_elements.R:
--------------------------------------------------------------------------------
1 | #' Check for sparse elements
2 | #'
3 | #' This function checks to see if a data.frame, tibble or list contains one or
4 | #' more sparse vectors.
5 | #'
6 | #' @param x a data frame, tibble, or list.
7 | #'
8 | #' @details
9 | #' The checking in this function is done using [is_sparse_vector()], but is
10 | #' implemented using an early exit pattern to provide fast performance for wide
11 | #' data.frames.
12 | #'
13 | #' This function does not test whether `x` is a data.frame, tibble or list. It
14 | #' simply iterates over the elements and sees if they are sparse vectors.
15 | #'
16 | #' @return A single logical value.
17 | #'
18 | #' @examplesIf rlang::is_installed("Matrix")
19 | #' set.seed(1234)
20 | #' n_cols <- 10000
21 | #' mat <- matrix(sample(0:1, n_cols * 10, TRUE, c(0.9, 0.1)), ncol = n_cols)
22 | #' colnames(mat) <- as.character(seq_len(n_cols))
23 | #' sparse_mat <- Matrix::Matrix(mat, sparse = TRUE)
24 | #'
25 | #' res <- coerce_to_sparse_tibble(sparse_mat)
26 | #' has_sparse_elements(res)
27 | #'
28 | #' has_sparse_elements(mtcars)
29 | #' @export
30 | has_sparse_elements <- function(x) {
31 | res <- FALSE
32 |
33 | for (elt in x) {
34 | if (is_sparse_vector(elt)) {
35 | res <- TRUE
36 | break
37 | }
38 | }
39 | res
40 | }
41 |
--------------------------------------------------------------------------------
/R/options.R:
--------------------------------------------------------------------------------
1 | #' sparsevctrs options
2 | #'
3 | #' These options can be set with `options()`.
4 | #'
5 | #' ## sparsevctrs.verbose_materialize
6 | #'
7 | #' This option is meant to be used as a diagnostic tool. Materialization of
8 | #' sparse vectors are done silently by default. This can make it hard to
9 | #' determine if your code is doing what you want.
10 | #'
11 | #' Setting `sparsevctrs.verbose_materialize` is a way to alert when
12 | #' materialization occurs. Note that only the first materialization is counted
13 | #' for the options below, as the materialized vector is cached.
14 | #'
15 | #' Setting `sparsevctrs.verbose_materialize = 1` or
16 | #' `sparsevctrs.verbose_materialize = TRUE` will result in a message being
17 | #' emitted each time a sparse vector is materialized.
18 | #'
19 | #' Setting `sparsevctrs.verbose_materialize = 2` will result in a warning being
20 | #' thrown each time a sparse vector is materialized.
21 | #'
22 | #' Setting `sparsevctrs.verbose_materialize = 3` will result in an error being
23 | #' thrown each time a sparse vector is materialized.
24 | #'
25 | #' @name sparsevctrs_options
26 | NULL
27 |
--------------------------------------------------------------------------------
/R/sparse_character.R:
--------------------------------------------------------------------------------
1 | #' Create sparse character vector
2 | #'
3 | #' Construction of vectors where only values and positions are recorded. The
4 | #' Length and default values determine all other information.
5 | #'
6 | #' @param values integer vector, values of non-zero entries.
7 | #' @param positions integer vector, indices of non-zero entries.
8 | #' @param length integer value, Length of vector.
9 | #' @param default integer value, value at indices not specified by `positions`.
10 | #' Defaults to `""`. Cannot be `NA`.
11 | #'
12 | #' @details
13 | #'
14 | #' `values` and `positions` are expected to be the same length, and are allowed
15 | #' to both have zero length.
16 | #'
17 | #' Allowed values for `value` are character values. Missing values such as `NA`
18 | #' and `NA_real_` are allowed as they are turned into `NA_character_`.
19 | #' Everything else is disallowed. The values are also not allowed to take the
20 | #' same value as `default`.
21 | #'
22 | #' `positions` should be integers or integer-like doubles. Everything else is
23 | #' not allowed. Positions should furthermore be positive (`0` not allowed),
24 | #' unique, and in increasing order. Lastly they should all be smaller that
25 | #' `length`.
26 | #'
27 | #' For developers:
28 | #'
29 | #' setting `options("sparsevctrs.verbose_materialize" = TRUE)` will print a
30 | #' message each time a sparse vector has been forced to materialize.
31 | #'
32 | #' @return sparse character vector
33 | #'
34 | #' @seealso [sparse_double()] [sparse_integer()]
35 | #'
36 | #' @examples
37 | #' sparse_character(character(), integer(), 10)
38 | #'
39 | #' sparse_character(c("A", "C", "E"), c(2, 5, 10), 10)
40 | #'
41 | #' str(
42 | #' sparse_character(c("A", "C", "E"), c(2, 5, 10), 1000000000)
43 | #' )
44 | #' @export
45 | sparse_character <- function(values, positions, length, default = "") {
46 | check_string(default)
47 | validate_length(length)
48 |
49 | if (!is.integer(length)) {
50 | length <- as.integer(length)
51 | }
52 |
53 | values <- vctrs::vec_cast(values, character())
54 | default <- vctrs::vec_cast(default, character())
55 |
56 | validate_positions(positions, length, len_values = length(values))
57 | positions <- as.integer(positions)
58 |
59 | if (any(values == default, na.rm = TRUE)) {
60 | offenders <- which(values == default)
61 | cli::cli_abort(
62 | c(
63 | x = "{.arg values} value must not be equal to the default {default}.",
64 | i = "{default} values at index: {offenders}."
65 | )
66 | )
67 | }
68 |
69 | new_sparse_character(values, positions, length, default)
70 | }
71 |
72 | new_sparse_character <- function(values, positions, length, default) {
73 | x <- list(
74 | values,
75 | positions,
76 | length,
77 | default
78 | )
79 |
80 | .Call(ffi_altrep_new_sparse_string, x)
81 | }
82 |
--------------------------------------------------------------------------------
/R/sparse_double.R:
--------------------------------------------------------------------------------
1 | #' Create sparse double vector
2 | #'
3 | #' Construction of vectors where only values and positions are recorded. The
4 | #' Length and default values determine all other information.
5 | #'
6 | #' @param values double vector, values of non-zero entries.
7 | #' @param positions integer vector, indices of non-zero entries.
8 | #' @param length integer value, Length of vector.
9 | #' @param default double value, value at indices not specified by `positions`.
10 | #' Defaults to `0`. Cannot be `NA`.
11 | #'
12 | #' @details
13 | #'
14 | #' `values` and `positions` are expected to be the same length, and are allowed
15 | #' to both have zero length.
16 | #'
17 | #' Allowed values for `value` is double and integer values. integer values will
18 | #' be coerced to doubles. Missing values such as `NA` and `NA_real_` are
19 | #' allowed. Everything else is disallowed, This includes `Inf` and `NaN`. The
20 | #' values are also not allowed to take the same value as `default`.
21 | #'
22 | #' `positions` should be integers or integer-like doubles. Everything else is
23 | #' not allowed. Positions should furthermore be positive (`0` not allowed),
24 | #' unique, and in increasing order. Lastly they should all be smaller that
25 | #' `length`.
26 | #'
27 | #' For developers:
28 | #'
29 | #' setting `options("sparsevctrs.verbose_materialize" = TRUE)` will print a
30 | #' message each time a sparse vector has been forced to materialize.
31 | #'
32 | #' @return sparse double vector
33 | #'
34 | #' @seealso [sparse_integer()] [sparse_character()]
35 | #'
36 | #' @examples
37 | #' sparse_double(numeric(), integer(), 10)
38 | #'
39 | #' sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10)
40 | #'
41 | #' str(
42 | #' sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 1000000000)
43 | #' )
44 | #' @export
45 | sparse_double <- function(values, positions, length, default = 0) {
46 | check_number_decimal(default)
47 | validate_length(length)
48 |
49 | if (!is.integer(length)) {
50 | length <- as.integer(length)
51 | }
52 |
53 | if (is.integer(default)) {
54 | default <- as.numeric(default)
55 | }
56 |
57 | if (identical(values, NA)) {
58 | values <- NA_real_
59 | }
60 |
61 | validate_values_double(values)
62 |
63 | if (is.integer(values)) {
64 | values <- as.double(values)
65 | }
66 |
67 | validate_positions(positions, length, len_values = length(values))
68 | positions <- as.integer(positions)
69 |
70 | if (any(values == default, na.rm = TRUE)) {
71 | offenders <- which(values == default)
72 | cli::cli_abort(
73 | c(
74 | x = "{.arg values} value must not be equal to the default {default}.",
75 | i = "{default} values at index: {offenders}."
76 | )
77 | )
78 | }
79 |
80 | new_sparse_double(values, positions, length, default)
81 | }
82 |
83 | new_sparse_double <- function(values, positions, length, default) {
84 | x <- list(
85 | values,
86 | positions,
87 | length,
88 | default
89 | )
90 |
91 | .Call(ffi_altrep_new_sparse_double, x)
92 | }
93 |
--------------------------------------------------------------------------------
/R/sparse_dummy.R:
--------------------------------------------------------------------------------
1 | #' Generate sparse dummy variables
2 | #'
3 | #' @param x A factor.
4 | #' @param one_hot A single logical value. Should the first factor level be
5 | #' included or not. Defaults to `FALSE`.
6 | #'
7 | #' @details
8 | #' Only factor variables can be used with [sparse_dummy()]. A call to
9 | #' `as.factor()` would be required for any other type of data.
10 | #'
11 | #' If only a single level is present after `one_hot` takes effect. Then the
12 | #' vector produced won't be sparse.
13 | #'
14 | #' A missing value at the `i`th element will produce missing values for all
15 | #' dummy variables at thr `i`th position.
16 | #'
17 | #' @return A list of sparse integer dummy variables.
18 | #'
19 | #' @examples
20 | #' x <- factor(c("a", "a", "b", "c", "d", "b"))
21 | #'
22 | #' sparse_dummy(x, one_hot = FALSE)
23 | #'
24 | #' x <- factor(c("a", "a", "b", "c", "d", "b"))
25 | #'
26 | #' sparse_dummy(x, one_hot = TRUE)
27 | #'
28 | #' x <- factor(c("a", NA, "b", "c", "d", NA))
29 | #'
30 | #' sparse_dummy(x, one_hot = FALSE)
31 | #'
32 | #' x <- factor(c("a", NA, "b", "c", "d", NA))
33 | #'
34 | #' sparse_dummy(x, one_hot = TRUE)
35 | #' @export
36 | sparse_dummy <- function(x, one_hot = TRUE) {
37 | if (!is.factor(x)) {
38 | cli::cli_abort("{.arg x} must be a factor, not {.obj_type_friendly {x}}.")
39 | }
40 |
41 | lvls <- levels(x)
42 |
43 | x <- as.integer(x)
44 |
45 | if (!one_hot) {
46 | lvls <- lvls[-1]
47 | x <- x - 1L
48 | }
49 |
50 | n_lvls <- length(lvls)
51 |
52 | if (n_lvls == 1 && one_hot) {
53 | res <- list(rep(1L, length(x)))
54 | names(res) <- lvls
55 | return(res)
56 | }
57 |
58 | counts <- tabulate(x, nbins = n_lvls)
59 |
60 | if (anyNA(x)) {
61 | n_missing <- sum(is.na(x))
62 | counts <- counts + n_missing
63 | res <- .Call(ffi_sparse_dummy_na, x, lvls, counts, one_hot)
64 | } else {
65 | res <- .Call(ffi_sparse_dummy, x, lvls, counts, one_hot)
66 | }
67 |
68 | names(res) <- lvls
69 | res
70 | }
71 |
--------------------------------------------------------------------------------
/R/sparse_integer.R:
--------------------------------------------------------------------------------
1 | #' Create sparse integer vector
2 | #'
3 | #' Construction of vectors where only values and positions are recorded. The
4 | #' Length and default values determine all other information.
5 | #'
6 | #' @param values integer vector, values of non-zero entries.
7 | #' @param positions integer vector, indices of non-zero entries.
8 | #' @param length integer value, Length of vector.
9 | #' @param default integer value, value at indices not specified by `positions`.
10 | #' Defaults to `0L`. Cannot be `NA`.
11 | #'
12 | #' @details
13 | #'
14 | #' `values` and `positions` are expected to be the same length, and are allowed
15 | #' to both have zero length.
16 | #'
17 | #' Allowed values for `value` is integer values. This means that the double
18 | #' vector `c(1, 5, 4)` is accepted as it can be losslessly converted to the
19 | #' integer vector `c(1L, 5L, 4L)`. Missing values such as `NA` and `NA_real_`
20 | #' are allowed. Everything else is disallowed, This includes `Inf` and `NaN`.
21 | #' The values are also not allowed to take the same value as `default`.
22 | #'
23 | #' `positions` should be integers or integer-like doubles. Everything else is
24 | #' not allowed. Positions should furthermore be positive (`0` not allowed),
25 | #' unique, and in increasing order. Lastly they should all be smaller that
26 | #' `length`.
27 | #'
28 | #' For developers:
29 | #'
30 | #' setting `options("sparsevctrs.verbose_materialize" = TRUE)` will print a
31 | #' message each time a sparse vector has been forced to materialize.
32 | #'
33 | #' @return sparse integer vector
34 | #'
35 | #' @seealso [sparse_double()] [sparse_character()]
36 | #'
37 | #' @examples
38 | #' sparse_integer(integer(), integer(), 10)
39 | #'
40 | #' sparse_integer(c(4, 5, 7), c(2, 5, 10), 10)
41 | #'
42 | #' str(
43 | #' sparse_integer(c(4, 5, 7), c(2, 5, 10), 1000000000)
44 | #' )
45 | #' @export
46 | sparse_integer <- function(values, positions, length, default = 0L) {
47 | check_number_whole(default)
48 | validate_length(length)
49 |
50 | if (!is.integer(length)) {
51 | length <- as.integer(length)
52 | }
53 |
54 | if (any(is.nan(values))) {
55 | offenders <- which(is.nan(values))
56 | cli::cli_abort(
57 | c(
58 | x = "{.arg values} must not contain NaN values.",
59 | i = "NaN values at index: {offenders}."
60 | )
61 | )
62 | }
63 |
64 | values <- vctrs::vec_cast(values, integer())
65 | default <- vctrs::vec_cast(default, integer())
66 |
67 | validate_positions(positions, length, len_values = length(values))
68 | positions <- as.integer(positions)
69 |
70 | if (any(values == default, na.rm = TRUE)) {
71 | offenders <- which(values == default)
72 | cli::cli_abort(
73 | c(
74 | x = "{.arg values} value must not be equal to the default {default}.",
75 | i = "{default} values at index: {offenders}."
76 | )
77 | )
78 | }
79 |
80 | new_sparse_integer(values, positions, length, default)
81 | }
82 |
83 | new_sparse_integer <- function(values, positions, length, default) {
84 | x <- list(
85 | values,
86 | positions,
87 | length,
88 | default
89 | )
90 |
91 | .Call(ffi_altrep_new_sparse_integer, x)
92 | }
93 |
--------------------------------------------------------------------------------
/R/sparse_is_na.R:
--------------------------------------------------------------------------------
1 | #' Detect Pressence of Missing Values
2 | #'
3 | #' @param x A sparse vector.
4 | #' @param type A single string. Most be one of `logical` or `integer`.
5 | #' Determines the resulting vector. If `type = integer` then a sparse vector is
6 | #' returned.
7 | #'
8 | #' @details
9 | #' This function, as with any of the other helper functions assumes that the
10 | #' input `x` is a sparse numeric vector. This is done for performance reasons,
11 | #' and it is thus the users responsibility to perform input checking.
12 | #'
13 | #' Note that the resulting vector will be not be a sparse vector.
14 | #'
15 | #' @seealso [sparse_which_na()]
16 | #'
17 | #' @return A logical vector or sparse integer vector.
18 | #'
19 | #' @examples
20 | #' sparse_is_na(
21 | #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000)
22 | #' )
23 | #'
24 | #' sparse_is_na(
25 | #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000),
26 | #' type = "integer"
27 | #' )
28 | #' @export
29 | sparse_is_na <- function(x, type = "logical") {
30 | values <- sparse_values(x)
31 |
32 | na_values <- is.na(values)
33 | positions <- sparse_positions(x)
34 | positions <- positions[na_values]
35 |
36 | if (type == "logical") {
37 | res <- logical(length(x))
38 |
39 | if (any(na_values)) {
40 | res[positions] <- TRUE
41 | }
42 | } else {
43 | res <- sparse_integer(
44 | rep(1, length(positions)),
45 | positions,
46 | length(x),
47 | 0L
48 | )
49 | }
50 |
51 | res
52 | }
53 |
--------------------------------------------------------------------------------
/R/sparse_lag.R:
--------------------------------------------------------------------------------
1 | #' Compute lagged values for sparse vectors
2 | #'
3 | #' @param x A sparse vector.
4 | #' @param n Positive integer of length 1, giving the number of positions to lag
5 | #' by.
6 | #' @param default The value used to pad `x`` back to its original size after
7 | #' the lag has been applied. The default, `NULL``, pads with a missing value.
8 | #'
9 | #' @details
10 | #' This function, as with any of the other helper functions assumes that the
11 | #' input `x` is a sparse numeric vector. This is done for performance reasons,
12 | #' and it is thus the users responsibility to perform input checking.
13 | #'
14 | #' @return sparse vector.
15 | #'
16 | #' @examples
17 | #' vec_dbl <- sparse_double(c(pi, 4, 5/2), c(1, 5, 7), 10)
18 | #'
19 | #' sparse_lag(vec_dbl)
20 | #' sparse_lag(vec_dbl, n = 3)
21 | #' sparse_lag(vec_dbl, n = 3, default = 0)
22 | #'
23 | #' vec_int <- sparse_integer(c(1, 2, 3), c(1, 5, 7), 10)
24 | #'
25 | #' sparse_lag(vec_int)
26 | #' sparse_lag(vec_int, n = 3)
27 | #' sparse_lag(vec_int, n = 3, default = 0L)
28 | #'
29 | #' vec_chr <- sparse_character(c("A", "B", "C"), c(1, 5, 7), 10)
30 | #'
31 | #' sparse_lag(vec_chr)
32 | #' sparse_lag(vec_chr, n = 3)
33 | #' sparse_lag(vec_chr, n = 3, default = "before")
34 | #'
35 | #' vec_lgl <- sparse_logical(c(TRUE, TRUE, TRUE), c(1, 5, 7), 10)
36 | #'
37 | #' sparse_lag(vec_lgl)
38 | #' sparse_lag(vec_lgl, n = 3)
39 | #' sparse_lag(vec_lgl, n = 3, default = FALSE)
40 | #' @export
41 | sparse_lag <- function(x, n = 1L, default = NULL) {
42 | values <- sparse_values(x)
43 | positions <- sparse_positions(x)
44 | len <- length(x)
45 | x_default <- sparse_default(x)
46 |
47 | n <- pmin(n, len)
48 |
49 | if (n < 1) {
50 | cli::cli_abort("{.arg n} must be at least 1, not {n}.")
51 | }
52 |
53 | positions <- positions + n
54 |
55 | overflow <- positions > len
56 | if (any(overflow)) {
57 | positions <- positions[!overflow]
58 | values <- values[!overflow]
59 | }
60 |
61 | if (!identical(x_default, default)) {
62 | if (is.null(default)) {
63 | default <- NA
64 | }
65 |
66 | values <- c(rep(default, n), values)
67 | positions <- c(seq_len(n), positions)
68 | }
69 |
70 | generator <- switch(
71 | class(x),
72 | integer = sparse_integer,
73 | numeric = sparse_double,
74 | character = sparse_character,
75 | logical = sparse_logical
76 | )
77 |
78 | generator(
79 | values = values,
80 | positions = positions,
81 | length = len,
82 | default = x_default
83 | )
84 | }
85 |
--------------------------------------------------------------------------------
/R/sparse_logical.R:
--------------------------------------------------------------------------------
1 | #' Create sparse logical vector
2 | #'
3 | #' Construction of vectors where only values and positions are recorded. The
4 | #' Length and default values determine all other information.
5 | #'
6 | #' @param values logical vector, values of non-zero entries.
7 | #' @param positions integer vector, indices of non-zero entries.
8 | #' @param length integer value, Length of vector.
9 | #' @param default logical value, value at indices not specified by `positions`.
10 | #' Defaults to `FALSE`. Cannot be `NA`.
11 | #'
12 | #' @details
13 | #'
14 | #' `values` and `positions` are expected to be the same length, and are allowed
15 | #' to both have zero length.
16 | #'
17 | #' Allowed values for `value` are logical values. Missing values such as `NA`
18 | #' and `NA_real_` are allowed. Everything else is disallowed, The values are
19 | #' also not allowed to take the same value as `default`.
20 | #'
21 | #' `positions` should be integers or integer-like doubles. Everything else is
22 | #' not allowed. Positions should furthermore be positive (`0` not allowed),
23 | #' unique, and in increasing order. Lastly they should all be smaller that
24 | #' `length`.
25 | #'
26 | #' For developers:
27 | #'
28 | #' setting `options("sparsevctrs.verbose_materialize" = TRUE)` will print a
29 | #' message each time a sparse vector has been forced to materialize.
30 | #'
31 | #' @return sparse logical vector
32 | #'
33 | #' @seealso [sparse_double()] [sparse_integer()] [sparse_character()]
34 | #'
35 | #' @examples
36 | #' sparse_logical(logical(), integer(), 10)
37 | #'
38 | #' sparse_logical(c(TRUE, NA, TRUE), c(2, 5, 10), 10)
39 | #'
40 | #' str(
41 | #' sparse_logical(c(TRUE, NA, TRUE), c(2, 5, 10), 1000000000)
42 | #' )
43 | #' @export
44 | sparse_logical <- function(values, positions, length, default = FALSE) {
45 | check_bool(default)
46 | validate_length(length)
47 |
48 | if (!is.integer(length)) {
49 | length <- as.integer(length)
50 | }
51 |
52 | validate_values_logical(values)
53 |
54 | validate_positions(positions, length, len_values = length(values))
55 | positions <- as.integer(positions)
56 |
57 | if (any(values == default, na.rm = TRUE)) {
58 | offenders <- which(values == default)
59 | cli::cli_abort(
60 | c(
61 | x = "{.arg values} value must not be equal to the default {default}.",
62 | i = "{default} values at index: {offenders}."
63 | )
64 | )
65 | }
66 |
67 | new_sparse_logical(values, positions, length, default)
68 | }
69 |
70 | new_sparse_logical <- function(values, positions, length, default) {
71 | x <- list(
72 | values,
73 | positions,
74 | length,
75 | default
76 | )
77 |
78 | .Call(ffi_altrep_new_sparse_logical, x)
79 | }
80 |
--------------------------------------------------------------------------------
/R/sparse_mean.R:
--------------------------------------------------------------------------------
1 | #' Calculate mean from sparse vectors
2 | #'
3 | #' @param x A sparse numeric vector.
4 | #' @param wts A numeric vector, should be same length as `x`.
5 | #' @param na_rm Logical, whether to remove missing values. Defaults to `FALSE`.
6 | #'
7 | #'
8 | #' @details
9 | #' This function, as with any of the other helper functions assumes that the
10 | #' input `x` is a sparse numeric vector. This is done for performance reasons,
11 | #' and it is thus the users responsibility to perform input checking.
12 | #'
13 | #' @return single numeric value.
14 | #'
15 | #' @examples
16 | #' sparse_mean(
17 | #' sparse_double(1000, 1, 1000)
18 | #' )
19 | #'
20 | #' sparse_mean(
21 | #' sparse_double(1000, 1, 1000, default = 1)
22 | #' )
23 | #'
24 | #' sparse_mean(
25 | #' sparse_double(c(10, 50, 11), c(1, 50, 111), 1000)
26 | #' )
27 | #'
28 | #' sparse_mean(
29 | #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000)
30 | #' )
31 | #'
32 | #' sparse_mean(
33 | #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000),
34 | #' na_rm = TRUE
35 | #' )
36 | #'
37 | #' @export
38 | sparse_mean <- function(x, wts = NULL, na_rm = FALSE) {
39 | if (!is.null(wts)) {
40 | x <- sparse_multiplication(x, wts)
41 | }
42 |
43 | default <- sparse_default(x)
44 | values <- sparse_values(x)
45 | len_values <- length(values)
46 |
47 | if (len_values == 0) {
48 | return(default)
49 | }
50 |
51 | x_len <- length(x)
52 |
53 | res <- sum(values, na.rm = na_rm)
54 |
55 | if (!is.na(default) && default != 0) {
56 | res <- res + (x_len - len_values) * default
57 | }
58 |
59 | if (na_rm) {
60 | x_len <- x_len - sum(is.na(values))
61 | }
62 |
63 | if (is.null(wts)) {
64 | res <- res / x_len
65 | } else {
66 | na_loc <- sparse_which_na(x)
67 | if (length(na_loc) > 0) {
68 | wts <- wts[-na_loc]
69 | }
70 | res <- res / sum(wts)
71 | }
72 |
73 | res
74 | }
75 |
--------------------------------------------------------------------------------
/R/sparse_median.R:
--------------------------------------------------------------------------------
1 | #' Calculate median from sparse vectors
2 | #'
3 | #' @param x A sparse numeric vector.
4 | #' @param na_rm Logical, whether to remove missing values. Defaults to `FALSE`.
5 | #'
6 | #' @details
7 | #' This function, as with any of the other helper functions assumes that the
8 | #' input `x` is a sparse numeric vector. This is done for performance reasons,
9 | #' and it is thus the users responsibility to perform input checking.
10 | #'
11 | #' @return single numeric value.
12 | #'
13 | #' @examples
14 | #' sparse_median(
15 | #' sparse_double(1000, 1, 1000)
16 | #' )
17 | #'
18 | #' sparse_median(
19 | #' sparse_double(1000, 1, 1000, default = 1)
20 | #' )
21 | #'
22 | #' sparse_median(
23 | #' sparse_double(c(10, 50, 11), c(1, 50, 111), 1000)
24 | #' )
25 | #'
26 | #' sparse_median(
27 | #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000)
28 | #' )
29 | #'
30 | #' sparse_median(
31 | #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000),
32 | #' na_rm = TRUE
33 | #' )
34 | #'
35 | #' @export
36 | sparse_median <- function(x, na_rm = FALSE) {
37 | default <- sparse_default(x)
38 | values <- sparse_values(x)
39 | values_len <- length(values)
40 |
41 | if (values_len == 0) {
42 | return(default)
43 | }
44 |
45 | x_len <- length(x)
46 |
47 | if ((x_len / 2) > values_len) {
48 | if (na_rm) {
49 | return(default)
50 | } else {
51 | if (any(is.na(values))) {
52 | return(NA_real_)
53 | } else {
54 | return(default)
55 | }
56 | }
57 | }
58 |
59 | stats::median(x, na.rm = na_rm)
60 | }
61 |
--------------------------------------------------------------------------------
/R/sparse_replace_na.R:
--------------------------------------------------------------------------------
1 | #' Replace NAs with specified values in sparse vectors
2 | #'
3 | #' @param x A sparse vector.
4 | #' @param replace A single value.
5 | #'
6 | #' @details
7 | #' This function, as with any of the other helper functions assumes that the
8 | #' input `x` is a sparse numeric vector. This is done for performance reasons,
9 | #' and it is thus the users responsibility to perform input checking.
10 | #' The `replace` is likewise not type or length checked.
11 | #'
12 | #' The output type will match the values after coercion happens during
13 | #' replacement.
14 | #'
15 | #' @return A sparse vector.
16 | #'
17 | #' @examples
18 | #' sparse_replace_na(
19 | #' sparse_double(c(10, NA, 11), c(1, 5, 10), 10),
20 | #' 5
21 | #' )
22 | #'
23 | #' sparse_replace_na(
24 | #' sparse_integer(c(10L, NA, 11L), c(1, 5, 10), 10),
25 | #' 5L
26 | #' )
27 | #'
28 | #' sparse_replace_na(
29 | #' sparse_character(c("A", NA, "E"), c(2, 5, 10), 10),
30 | #' "missing"
31 | #' )
32 | #' @export
33 | sparse_replace_na <- function(x, replace) {
34 | default <- sparse_default(x)
35 | values <- sparse_values(x)
36 | positions <- sparse_positions(x)
37 | len_values <- length(values)
38 |
39 | if (len_values == 0) {
40 | return(x)
41 | }
42 |
43 | if (replace == default) {
44 | remove <- is.na(values)
45 | values <- values[!remove]
46 | positions <- positions[!remove]
47 | } else {
48 | values[is.na(values)] <- replace
49 | }
50 |
51 | if (is.integer(values)) {
52 | res <- sparse_integer(
53 | values = values,
54 | positions = positions,
55 | length = length(x),
56 | default = default
57 | )
58 | } else if (is.double(values)) {
59 | res <- sparse_double(
60 | values = values,
61 | positions = positions,
62 | length = length(x),
63 | default = default
64 | )
65 | } else if (is.character(values)) {
66 | res <- sparse_character(
67 | values = values,
68 | positions = positions,
69 | length = length(x),
70 | default = default
71 | )
72 | }
73 |
74 | res
75 | }
76 |
--------------------------------------------------------------------------------
/R/sparse_sd.R:
--------------------------------------------------------------------------------
1 | #' Calculate standard diviation from sparse vectors
2 | #'
3 | #' @param x A sparse numeric vector.
4 | #' @param na_rm Logical, whether to remove missing values. Defaults to `FALSE`.
5 | #'
6 | #' @details
7 | #' This function, as with any of the other helper functions assumes that the
8 | #' input `x` is a sparse numeric vector. This is done for performance reasons,
9 | #' and it is thus the users responsibility to perform input checking.
10 | #'
11 | #' Much like [sd()] it uses the denominator `n-1`.
12 | #'
13 | #' @return single numeric value.
14 | #'
15 | #' @examples
16 | #' sparse_sd(
17 | #' sparse_double(1000, 1, 1000)
18 | #' )
19 | #'
20 | #' sparse_sd(
21 | #' sparse_double(1000, 1, 1000, default = 1)
22 | #' )
23 | #'
24 | #' sparse_sd(
25 | #' sparse_double(c(10, 50, 11), c(1, 50, 111), 1000)
26 | #' )
27 | #'
28 | #' sparse_sd(
29 | #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000)
30 | #' )
31 | #'
32 | #' sparse_sd(
33 | #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000),
34 | #' na_rm = TRUE
35 | #' )
36 | #'
37 | #' @export
38 | sparse_sd <- function(x, na_rm = FALSE) {
39 | sqrt(sparse_var(x, na_rm = na_rm))
40 | }
41 |
--------------------------------------------------------------------------------
/R/sparse_sqrt.R:
--------------------------------------------------------------------------------
1 | #' Calculate sqrt of sparse vectors
2 | #'
3 | #' @param x A sparse numeric vector.
4 | #'
5 | #' @details
6 | #' This function, as with any of the other helper functions assumes that the
7 | #' input `x` is a sparse numeric vector. This is done for performance reasons,
8 | #' and it is thus the users responsibility to perform input checking.
9 | #'
10 | #' The output will be a double vector regardless of the input type.
11 | #'
12 | #' @return A sparse double vector.
13 | #'
14 | #' @examples
15 | #' sparse_sqrt(
16 | #' sparse_double(1000, 1, 10)
17 | #' )
18 | #'
19 | #' sparse_sqrt(
20 | #' sparse_integer(1000, 3, 10, default = 2)
21 | #' )
22 | #'
23 | #' sparse_sqrt(
24 | #' sparse_double(c(10, NA, 11), c(1, 5, 10), 10)
25 | #' )
26 | #' @export
27 | sparse_sqrt <- function(x) {
28 | default <- sparse_default(x)
29 | values <- sparse_values(x)
30 | positions <- sparse_positions(x)
31 | len_values <- length(values)
32 |
33 | if (len_values == 0 && default == 0) {
34 | return(x)
35 | }
36 |
37 | res <- sparse_double(
38 | values = sqrt(values),
39 | positions = positions,
40 | length = length(x),
41 | default = sqrt(default)
42 | )
43 |
44 | res
45 | }
46 |
--------------------------------------------------------------------------------
/R/sparse_var.R:
--------------------------------------------------------------------------------
1 | #' Calculate variance from sparse vectors
2 | #'
3 | #' @param x A sparse numeric vector.
4 | #' @param na_rm Logical, whether to remove missing values. Defaults to `FALSE`.
5 | #'
6 | #' @details
7 | #' This function, as with any of the other helper functions assumes that the
8 | #' input `x` is a sparse numeric vector. This is done for performance reasons,
9 | #' and it is thus the users responsibility to perform input checking.
10 | #'
11 | #' Much like [var()] it uses the denominator `n-1`.
12 | #'
13 | #' @return single numeric value.
14 | #'
15 | #' @examples
16 | #' sparse_var(
17 | #' sparse_double(1000, 1, 1000)
18 | #' )
19 | #'
20 | #' sparse_var(
21 | #' sparse_double(1000, 1, 1000, default = 1)
22 | #' )
23 | #'
24 | #' sparse_var(
25 | #' sparse_double(c(10, 50, 11), c(1, 50, 111), 1000)
26 | #' )
27 | #'
28 | #' sparse_var(
29 | #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000)
30 | #' )
31 | #'
32 | #' sparse_var(
33 | #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000),
34 | #' na_rm = TRUE
35 | #' )
36 | #'
37 | #' @export
38 | sparse_var <- function(x, na_rm = FALSE) {
39 | values <- sparse_values(x)
40 | len_values <- length(values)
41 |
42 | if (len_values == 0) {
43 | return(0)
44 | }
45 |
46 | default <- sparse_default(x)
47 | x_len <- length(x)
48 |
49 | mean <- sparse_mean(x, na_rm = na_rm)
50 |
51 | res <- sum((values - mean)^2, na.rm = na_rm)
52 |
53 | res <- res + (default - mean)^2 * (x_len - len_values)
54 |
55 | denominator <- x_len - 1
56 |
57 | if (na_rm) {
58 | denominator <- denominator - sum(is.na(values))
59 | }
60 |
61 | res <- res / denominator
62 | res
63 | }
64 |
--------------------------------------------------------------------------------
/R/sparse_which_na.R:
--------------------------------------------------------------------------------
1 | #' Which indices are Missing Values
2 | #'
3 | #' @param x A sparse vector.
4 | #'
5 | #' @details
6 | #' This function, as with any of the other helper functions assumes that the
7 | #' input `x` is a sparse numeric vector. This is done for performance reasons,
8 | #' and it is thus the users responsibility to perform input checking.
9 | #'
10 | #' @return A logical vector.
11 | #'
12 | #' @seealso [sparse_is_na()]
13 | #'
14 | #' @examples
15 | #' sparse_which_na(
16 | #' sparse_double(c(10, NA, 11), c(1, 50, 111), 1000)
17 | #' )
18 | #' @export
19 | sparse_which_na <- function(x) {
20 | values <- sparse_values(x)
21 |
22 | res <- integer()
23 |
24 | na_values <- is.na(values)
25 | if (any(na_values)) {
26 | positions <- sparse_positions(x)
27 | res <- positions[na_values]
28 | }
29 |
30 | res
31 | }
32 |
--------------------------------------------------------------------------------
/R/sparsevctrs-package.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | "_PACKAGE"
3 |
4 | #' @import rlang
5 | #' @keywords internal
6 | NULL
7 |
8 | ## usethis namespace: start
9 | #' @useDynLib sparsevctrs, .registration = TRUE
10 | ## usethis namespace: end
11 | NULL
12 |
--------------------------------------------------------------------------------
/R/sparsity.R:
--------------------------------------------------------------------------------
1 | #' Calculate sparsity of data frames, matrices, and sparse matrices
2 | #'
3 | #' Turning data frame with sparse columns into sparse matrix using
4 | #' [Matrix::sparseMatrix()].
5 | #'
6 | #' @param x a data frame, matrix of sparse matrix.
7 | #' @param sample a integer or `NULL`. Number of rows to sample to estimate
8 | #' sparsity. If `NULL` then no sampling is performed. Will not be used when
9 | #' `x` is a sparse matrix. Defaults to `NULL`.
10 | #'
11 | #' @details
12 | #' Only numeric 0s are considered zeroes in this calculations. Missing values,
13 | #' logical vectors and then string `"0"` aren't counted.
14 | #'
15 | #' @return a single number, between 0 and 1.
16 | #'
17 | #' @examples
18 | #'
19 | #' # data frame
20 | #' sparsity(mtcars)
21 | #'
22 | #' # Matrix
23 | #' set.seed(1234)
24 | #' mat <- matrix(sample(0:1, 100, TRUE, c(0.9, 0.1)), nrow = 10)
25 | #' colnames(mat) <- letters[1:10]
26 | #'
27 | #' sparsity(mat)
28 | #'
29 | #' # Sparse matrix
30 | #' sparse_mat <- Matrix::Matrix(mat, sparse = TRUE)
31 | #'
32 | #' sparsity(sparse_mat)
33 | #' @export
34 | sparsity <- function(x, sample = NULL) {
35 | check_number_whole(sample, min = 1, allow_null = TRUE)
36 |
37 | x_type <- input_type(x)
38 |
39 | if (x_type != "sparse_matrix") {
40 | nrows <- nrow(x)
41 | if (!is.null(sample)) {
42 | if (nrows < sample) {
43 | sample <- nrows
44 | }
45 | x <- x[sample(nrows, sample), ]
46 | }
47 | }
48 |
49 | res <- switch(
50 | x_type,
51 | data.frame = sparsity_df(x),
52 | matrix = sparsity_mat(x),
53 | sparse_matrix = sparsity_sparse_mat(x)
54 | )
55 |
56 | res
57 | }
58 |
59 | input_type <- function(x, call = rlang::caller_env()) {
60 | if (is.data.frame(x)) {
61 | return("data.frame")
62 | } else if (is.matrix(x)) {
63 | return("matrix")
64 | } else if (any(methods::is(x) == "sparseMatrix")) {
65 | return("sparse_matrix")
66 | } else {
67 | cli::cli_abort(
68 | "{.arg x} must be a data frame, matrix, or sparse matrix,
69 | Not {.obj_type_friendly {x}}.",
70 | call = call
71 | )
72 | }
73 | }
74 |
75 | count_zeroes <- function(x) {
76 | if (!inherits(x, c("numeric", "integer"))) {
77 | return(0)
78 | }
79 |
80 | if (is_sparse_vector(x)) {
81 | default <- sparse_default(x)
82 | values <- sparse_values(x)
83 | len <- length(x)
84 |
85 | if (default == 0) {
86 | res <- len - length(values)
87 | } else {
88 | res <- length(values)
89 | }
90 | } else {
91 | res <- sum(x == 0, na.rm = TRUE)
92 | }
93 | res
94 | }
95 |
96 | sparsity_df <- function(x) {
97 | res <- vapply(x, count_zeroes, double(1))
98 | res <- sum(res) / (nrow(x) * ncol(x))
99 | res
100 | }
101 |
102 | sparsity_mat <- function(x) {
103 | if (!is.numeric(x)) {
104 | return(0)
105 | }
106 | sum(x == 0, na.rm = TRUE) / length(x)
107 | }
108 |
109 | sparsity_sparse_mat <- function(x) {
110 | 1 - (length(x@x) / length(x))
111 | }
112 |
--------------------------------------------------------------------------------
/R/type-predicates.R:
--------------------------------------------------------------------------------
1 | #' Sparse vector type checkers
2 | #'
3 | #' Helper functions to determine whether an vector is a sparse vector or not.
4 | #'
5 | #' @param x value to be checked.
6 | #'
7 | #' @details
8 | #' `is_sparse_vector()` is a general function that detects any type of sparse
9 | #' vector created with this package. `is_sparse_double()`,
10 | #' `is_sparse_integer()`, `is_sparse_character()`, and `is_sparse_logical()` are
11 | #' more specific functions that only detects the type. `is_sparse_numeric()`
12 | #' matches both sparse integers and doubles.
13 | #'
14 | #' @return single logical value
15 | #'
16 | #' @examples
17 | #' x_sparse <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10)
18 | #' x_dense <- c(0, pi, 0, 0, 0.5, 0, 0, 0, 0, 0.1)
19 | #'
20 | #' is_sparse_vector(x_sparse)
21 | #' is_sparse_vector(x_dense)
22 | #'
23 | #' is_sparse_double(x_sparse)
24 | #' is_sparse_double(x_dense)
25 | #'
26 | #' is_sparse_character(x_sparse)
27 | #' is_sparse_character(x_dense)
28 | #'
29 | #' # Forced materialization
30 | #' is_sparse_vector(x_sparse[])
31 | #' @name type-predicates
32 | NULL
33 |
34 | #' @rdname type-predicates
35 | #' @export
36 | is_sparse_vector <- function(x) {
37 | .Call(ffi_is_sparse_vector, x)
38 | }
39 |
40 | is_altrep_non_sparse_vector <- function(x) {
41 | .Call(ffi_is_altrep_non_sparse_vector, x)
42 | }
43 |
44 | #' @rdname type-predicates
45 | #' @export
46 | is_sparse_numeric <- function(x) {
47 | res <- .Call(ffi_extract_altrep_class, x)
48 | if (is.null(res)) {
49 | return(FALSE)
50 | }
51 |
52 | res <- as.character(res[[1]])
53 |
54 | res == "altrep_sparse_double" || res == "altrep_sparse_integer"
55 | }
56 |
57 | #' @rdname type-predicates
58 | #' @export
59 | is_sparse_double <- function(x) {
60 | res <- .Call(ffi_extract_altrep_class, x)
61 | if (is.null(res)) {
62 | return(FALSE)
63 | }
64 |
65 | res <- as.character(res[[1]])
66 |
67 | res == "altrep_sparse_double"
68 | }
69 |
70 | #' @rdname type-predicates
71 | #' @export
72 | is_sparse_integer <- function(x) {
73 | res <- .Call(ffi_extract_altrep_class, x)
74 | if (is.null(res)) {
75 | return(FALSE)
76 | }
77 |
78 | res <- as.character(res[[1]])
79 |
80 | res == "altrep_sparse_integer"
81 | }
82 |
83 | #' @rdname type-predicates
84 | #' @export
85 | is_sparse_character <- function(x) {
86 | res <- .Call(ffi_extract_altrep_class, x)
87 | if (is.null(res)) {
88 | return(FALSE)
89 | }
90 |
91 | res <- as.character(res[[1]])
92 |
93 | res == "altrep_sparse_string"
94 | }
95 |
96 | #' @rdname type-predicates
97 | #' @export
98 | is_sparse_logical <- function(x) {
99 | res <- .Call(ffi_extract_altrep_class, x)
100 | if (is.null(res)) {
101 | return(FALSE)
102 | }
103 |
104 | res <- as.character(res[[1]])
105 |
106 | res == "altrep_sparse_logical"
107 | }
108 |
--------------------------------------------------------------------------------
/R/validate-input.R:
--------------------------------------------------------------------------------
1 | validate_positions <- function(
2 | positions,
3 | length,
4 | len_values,
5 | call = rlang::caller_env()
6 | ) {
7 | if (!is.numeric(positions)) {
8 | cli::cli_abort(
9 | "{.arg positions} must be a integer vector, \\
10 | not {.obj_type_friendly {positions}}.",
11 | call = call
12 | )
13 | }
14 |
15 | if (any(is.infinite(positions))) {
16 | offenders <- which(is.infinite(positions))
17 | cli::cli_abort(
18 | c(
19 | x = "{.arg positions} must not contain infinite values.",
20 | i = "Infinite values at index: {offenders}."
21 | ),
22 | call = call
23 | )
24 | }
25 |
26 | if (any(is.nan(positions))) {
27 | offenders <- which(is.nan(positions))
28 | cli::cli_abort(
29 | c(
30 | x = "{.arg positions} must not contain NaN values.",
31 | i = "NaN values at index: {offenders}."
32 | ),
33 | call = call
34 | )
35 | }
36 |
37 | if (!is.integer(positions)) {
38 | if (any(round(positions) != positions, na.rm = TRUE)) {
39 | offenders <- which(round(positions) != positions)
40 |
41 | cli::cli_abort(
42 | c(
43 | x = "{.arg positions} must contain integer values.",
44 | i = "Non-integer values at index: {offenders}."
45 | ),
46 | call = call
47 | )
48 | }
49 | }
50 |
51 | len_positions <- length(positions)
52 |
53 | if (len_values != len_positions) {
54 | cli::cli_abort(
55 | "{.arg value} ({len_values}) and {.arg positions} ({len_positions}) \\
56 | must have the same length.",
57 | call = call
58 | )
59 | }
60 |
61 | if (anyDuplicated(positions) > 0) {
62 | offenders <- which(duplicated(positions))
63 |
64 | cli::cli_abort(
65 | c(
66 | x = "{.arg positions} must not contain any duplicate values.",
67 | i = "Duplicate values at index: {offenders}."
68 | ),
69 | call = call
70 | )
71 | }
72 |
73 | if (is.unsorted(positions)) {
74 | cli::cli_abort(
75 | "{.arg positions} must be sorted in increasing order.",
76 | call = call
77 | )
78 | }
79 |
80 | if (len_positions > 0 && max(positions) > length) {
81 | offenders <- which(positions > length)
82 | cli::cli_abort(
83 | c(
84 | x = "{.arg positions} value must not be larger than {.arg length}.",
85 | i = "Offending values at index: {offenders}."
86 | ),
87 | call = call
88 | )
89 | }
90 |
91 | if (len_positions > 0 && min(positions) < 1) {
92 | offenders <- which(positions < 1)
93 | cli::cli_abort(
94 | c(
95 | x = "{.arg positions} value must positive.",
96 | i = "Non-positive values at index: {offenders}."
97 | ),
98 | call = call
99 | )
100 | }
101 | }
102 |
103 | validate_values_double <- function(values, call = rlang::caller_env()) {
104 | if (!is.numeric(values)) {
105 | cli::cli_abort(
106 | "{.arg values} must be a numeric vector, \\
107 | not {.obj_type_friendly {values}}.",
108 | call = call
109 | )
110 | }
111 |
112 | if (any(is.infinite(values))) {
113 | offenders <- which(is.infinite(values))
114 | cli::cli_abort(
115 | c(
116 | x = "{.arg values} must not contain infinite values.",
117 | i = "Infinite values at index: {offenders}."
118 | ),
119 | call = call
120 | )
121 | }
122 |
123 | if (any(is.nan(values))) {
124 | offenders <- which(is.nan(values))
125 | cli::cli_abort(
126 | c(
127 | x = "{.arg values} must not contain NaN values.",
128 | i = "NaN values at index: {offenders}."
129 | ),
130 | call = call
131 | )
132 | }
133 | }
134 |
135 | validate_values_integer <- function(values, call = rlang::caller_env()) {
136 | values <- vctrs::vec_cast(values, integer())
137 |
138 | if (!is.integer(values)) {
139 | cli::cli_abort(
140 | "{.arg values} must be a integer vector, \\
141 | not {.obj_type_friendly {values}}.",
142 | call = call
143 | )
144 | }
145 | }
146 |
147 | validate_values_logical <- function(values, call = rlang::caller_env()) {
148 | if (!is.logical(values)) {
149 | cli::cli_abort(
150 | "{.arg values} must be a logical vector, \\
151 | not {.obj_type_friendly {values}}.",
152 | call = call
153 | )
154 | }
155 | }
156 |
157 | validate_length <- function(length, call = rlang::caller_env()) {
158 | check_number_whole(length, min = 0, call = call)
159 | if (length > .Machine$integer.max) {
160 | cli::cli_abort(
161 | "{.arg length} must be less than {(.Machine$integer.max)}, not {length}.",
162 | call = call
163 | )
164 | }
165 | }
166 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output: github_document
3 | ---
4 |
5 |
6 |
7 | ```{r}
8 | #| include: false
9 | knitr::opts_chunk$set(
10 | collapse = TRUE,
11 | comment = "#>",
12 | fig.path = "man/figures/README-",
13 | out.width = "100%"
14 | )
15 | set.seed(1234)
16 | ```
17 |
18 | # sparsevctrs
19 |
20 |
21 | [](https://github.com/r-lib/sparsevctrs/actions/workflows/R-CMD-check.yaml)
22 | [](https://app.codecov.io/gh/r-lib/sparsevctrs)
23 |
24 |
25 | The goal of sparsevctrs is to provide a sparse vector [ALTREP](https://svn.r-project.org/R/branches/ALTREP/ALTREP.html) class. With this, you can have sparse data in the form of sparse columns in `data.frame` or [tibble](https://tibble.tidyverse.org/). Due to the nature of how ALTREP vectors work, these sparse vectors will behave like the normal dense vectors you are used you. The vectors will contain their sparseness as much as they can, and only materialize when they have to.
26 |
27 | ## Installation
28 |
29 | You can install the development version of sparsevctrs like so:
30 |
31 | ``` r
32 | remotes::install_github("r-lib/sparsevctrs")
33 | ```
34 |
35 | ## Examples
36 |
37 | A sparse vector, here specifically a sparse double vector, will be identical to its dense counterpart, often with a smaller memory footprint.
38 |
39 | ```{r}
40 | library(sparsevctrs)
41 | library(lobstr)
42 |
43 | x_sparse <- sparse_double(value = c(3, 1, 10), position = c(2, 7, 15), length = 1000)
44 | x_dense <- numeric(1000)
45 | x_dense[2] <- 3
46 | x_dense[7] <- 1
47 | x_dense[15] <- 10
48 |
49 | obj_size(x_sparse)
50 | obj_size(x_dense)
51 |
52 | identical(x_sparse, x_dense)
53 | ```
54 |
55 | The memory of a sparse vector is proportional to the number of elements plus a constant. This means that increasing the length of a sparse vector doesn't increase how much memory it uses. Unlike dense vectors who has a much smaller constant, but increases according to the length of the values.
56 |
57 | ```{r}
58 | x_sparse_0 <- sparse_double(numeric(), integer(), length = 0)
59 | x_sparse_1000 <- sparse_double(numeric(), integer(), length = 1000)
60 | x_sparse_1000000 <- sparse_double(numeric(), integer(), length = 10000000)
61 |
62 | obj_size(x_sparse_0)
63 | obj_size(x_sparse_1000)
64 | obj_size(x_sparse_1000000)
65 |
66 | x_dense_0 <- numeric(0)
67 | x_dense_1000 <- numeric(1000)
68 | x_dense_1000000 <- numeric(10000000)
69 |
70 | obj_size(x_dense_0)
71 | obj_size(x_dense_1000)
72 | obj_size(x_dense_1000000)
73 | ```
74 |
75 | These sparse vectors are compatible with tibbles and data frames.
76 |
77 | ```{r}
78 | library(tibble)
79 | set.seed(1234)
80 |
81 | tibble(
82 | x = sample(1:1000),
83 | y = sparse_double(1, 7, 1000)
84 | )
85 | ```
86 |
87 | ## Motivation
88 |
89 | Sparse data happens from ingestion and preprocessing calculations. text to counts, dummy variables etc etc
90 |
91 | There are computational tools for calculations using sparse matrices, specifically the Matrix package and some modeling packages (e.g., xgboost, glmnet, etc.). We want to utilize these tools as best we can without making redundant implementations.
92 |
93 | However, sparse matrices are not great for data in general, or at least not until the very end, when mathematical calculations occur. Converting everything to “numeric” is problematic for dates, factors, etc. There are good reasons why data frames were created in the first place. Matrices are efficient but primitive.
94 |
95 | The problem is that many tools, especially the tidyverse, rely on data frames since they are more expressive and accommodate different variable types. We need to merge and filter rows/columns, etc, in a flexible and user-friendly way. (joins, pivoting)
96 |
97 | Having a sparse representation of data that allows us to use modern data manipulation interfaces, keeps memory overhead low, and can be efficiently converted to a more primitive matrix format so that we can let Matrix and other packages do what they do best.
98 |
99 | This is achieved with this package, by providing sparse vectors that fit into a data frame. Along with converting tools between sparse matrices and data frames.
100 |
101 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | # sparsevctrs
5 |
6 |
7 |
8 | [](https://github.com/r-lib/sparsevctrs/actions/workflows/R-CMD-check.yaml)
9 | [](https://app.codecov.io/gh/r-lib/sparsevctrs)
11 |
12 |
13 | The goal of sparsevctrs is to provide a sparse vector
14 | [ALTREP](https://svn.r-project.org/R/branches/ALTREP/ALTREP.html) class.
15 | With this, you can have sparse data in the form of sparse columns in
16 | `data.frame` or [tibble](https://tibble.tidyverse.org/). Due to the
17 | nature of how ALTREP vectors work, these sparse vectors will behave like
18 | the normal dense vectors you are used you. The vectors will contain
19 | their sparseness as much as they can, and only materialize when they
20 | have to.
21 |
22 | ## Installation
23 |
24 | You can install the development version of sparsevctrs like so:
25 |
26 | ``` r
27 | remotes::install_github("r-lib/sparsevctrs")
28 | ```
29 |
30 | ## Examples
31 |
32 | A sparse vector, here specifically a sparse double vector, will be
33 | identical to its dense counterpart, often with a smaller memory
34 | footprint.
35 |
36 | ``` r
37 | library(sparsevctrs)
38 | library(lobstr)
39 |
40 | x_sparse <- sparse_double(value = c(3, 1, 10), position = c(2, 7, 15), length = 1000)
41 | x_dense <- numeric(1000)
42 | x_dense[2] <- 3
43 | x_dense[7] <- 1
44 | x_dense[15] <- 10
45 |
46 | obj_size(x_sparse)
47 | #> 936 B
48 | obj_size(x_dense)
49 | #> 8.05 kB
50 |
51 | identical(x_sparse, x_dense)
52 | #> [1] TRUE
53 | ```
54 |
55 | The memory of a sparse vector is proportional to the number of elements
56 | plus a constant. This means that increasing the length of a sparse
57 | vector doesn’t increase how much memory it uses. Unlike dense vectors
58 | who has a much smaller constant, but increases according to the length
59 | of the values.
60 |
61 | ``` r
62 | x_sparse_0 <- sparse_double(numeric(), integer(), length = 0)
63 | x_sparse_1000 <- sparse_double(numeric(), integer(), length = 1000)
64 | x_sparse_1000000 <- sparse_double(numeric(), integer(), length = 10000000)
65 |
66 | obj_size(x_sparse_0)
67 | #> 888 B
68 | obj_size(x_sparse_1000)
69 | #> 888 B
70 | obj_size(x_sparse_1000000)
71 | #> 888 B
72 |
73 | x_dense_0 <- numeric(0)
74 | x_dense_1000 <- numeric(1000)
75 | x_dense_1000000 <- numeric(10000000)
76 |
77 | obj_size(x_dense_0)
78 | #> 48 B
79 | obj_size(x_dense_1000)
80 | #> 8.05 kB
81 | obj_size(x_dense_1000000)
82 | #> 80.00 MB
83 | ```
84 |
85 | These sparse vectors are compatible with tibbles and data frames.
86 |
87 | ``` r
88 | library(tibble)
89 | set.seed(1234)
90 |
91 | tibble(
92 | x = sample(1:1000),
93 | y = sparse_double(1, 7, 1000)
94 | )
95 | #> # A tibble: 1,000 × 2
96 | #> x y
97 | #>
98 | #> 1 284 0
99 | #> 2 848 0
100 | #> 3 918 0
101 | #> 4 101 0
102 | #> 5 623 0
103 | #> 6 905 0
104 | #> 7 645 1
105 | #> 8 934 0
106 | #> 9 400 0
107 | #> 10 900 0
108 | #> # ℹ 990 more rows
109 | ```
110 |
111 | ## Motivation
112 |
113 | Sparse data happens from ingestion and preprocessing calculations. text
114 | to counts, dummy variables etc etc
115 |
116 | There are computational tools for calculations using sparse matrices,
117 | specifically the Matrix package and some modeling packages (e.g.,
118 | xgboost, glmnet, etc.). We want to utilize these tools as best we can
119 | without making redundant implementations.
120 |
121 | However, sparse matrices are not great for data in general, or at least
122 | not until the very end, when mathematical calculations occur. Converting
123 | everything to “numeric” is problematic for dates, factors, etc. There
124 | are good reasons why data frames were created in the first place.
125 | Matrices are efficient but primitive.
126 |
127 | The problem is that many tools, especially the tidyverse, rely on data
128 | frames since they are more expressive and accommodate different variable
129 | types. We need to merge and filter rows/columns, etc, in a flexible and
130 | user-friendly way. (joins, pivoting)
131 |
132 | Having a sparse representation of data that allows us to use modern data
133 | manipulation interfaces, keeps memory overhead low, and can be
134 | efficiently converted to a more primitive matrix format so that we can
135 | let Matrix and other packages do what they do best.
136 |
137 | This is achieved with this package, by providing sparse vectors that fit
138 | into a data frame. Along with converting tools between sparse matrices
139 | and data frames.
140 |
--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
1 | url: https://r-lib.github.io/sparsevctrs/
2 |
3 | development:
4 | mode: auto
5 |
6 | template:
7 | package: tidytemplate
8 | bootstrap: 5
9 |
10 | includes:
11 | in_header: |
12 |
13 |
14 | reference:
15 | - title: Create Sparse Vectors
16 | contents:
17 | - sparse_double
18 | - sparse_integer
19 | - sparse_character
20 | - sparse_logical
21 |
22 | - title: Convertion functions
23 | contents:
24 | - coerce_to_sparse_data_frame
25 | - coerce_to_sparse_matrix
26 | - coerce_to_sparse_tibble
27 | - coerce-vector
28 |
29 | - title: Helper Functions
30 | contents:
31 | - sparse_mean
32 | - sparse_var
33 | - sparse_sd
34 | - sparse_median
35 | - sparse_sqrt
36 | - sparse_replace_na
37 | - sparse_is_na
38 | - sparse_which_na
39 | - sparse_lag
40 | - sparse_dummy
41 | - sparsity
42 |
43 | - title: Utility Functions
44 | contents:
45 | - type-predicates
46 | - sparse-arithmatic-scalar
47 | - sparse-arithmatic
48 | - extractors
49 | - has_sparse_elements
50 | - sparsevctrs_options
51 |
--------------------------------------------------------------------------------
/air.toml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-lib/sparsevctrs/08efac80710bf1f2d038f576ca0d093148300c7a/air.toml
--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 |
3 | coverage:
4 | status:
5 | project:
6 | default:
7 | target: auto
8 | threshold: 1%
9 | informational: true
10 | patch:
11 | default:
12 | target: auto
13 | threshold: 1%
14 | informational: true
15 |
--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## R CMD check results
2 |
3 | 0 errors | 0 warnings | 0 note
4 |
5 | ## revdepcheck results
6 |
7 | We checked 5 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
8 |
9 | * We saw 0 new problems
10 | * We failed to check 0 packages
11 |
12 |
--------------------------------------------------------------------------------
/man/coerce-vector.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/coerce-vector.R
3 | \name{coerce-vector}
4 | \alias{coerce-vector}
5 | \alias{as_sparse_double}
6 | \alias{as_sparse_integer}
7 | \alias{as_sparse_character}
8 | \alias{as_sparse_logical}
9 | \title{Coerce numeric vector to sparse double}
10 | \usage{
11 | as_sparse_double(x, default = 0)
12 |
13 | as_sparse_integer(x, default = 0L)
14 |
15 | as_sparse_character(x, default = "")
16 |
17 | as_sparse_logical(x, default = FALSE)
18 | }
19 | \arguments{
20 | \item{x}{a numeric vector.}
21 |
22 | \item{default}{default value to use. Defaults to \code{0}.
23 |
24 | The values of \code{x} must be double or integer. It must not contain any \code{Inf} or
25 | \code{NaN} values.}
26 | }
27 | \value{
28 | sparse vectors
29 | }
30 | \description{
31 | Takes a numeric vector, integer or double, and turn it into a sparse double
32 | vector.
33 | }
34 | \examples{
35 | x_dense <- c(3, 0, 2, 0, 0, 0, 4, 0, 0, 0)
36 | x_sparse <- as_sparse_double(x_dense)
37 | x_sparse
38 |
39 | is_sparse_double(x_sparse)
40 | }
41 |
--------------------------------------------------------------------------------
/man/coerce_to_sparse_data_frame.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/coerce.R
3 | \name{coerce_to_sparse_data_frame}
4 | \alias{coerce_to_sparse_data_frame}
5 | \title{Coerce sparse matrix to data frame with sparse columns}
6 | \usage{
7 | coerce_to_sparse_data_frame(x, call = rlang::caller_env(0))
8 | }
9 | \arguments{
10 | \item{x}{sparse matrix.}
11 |
12 | \item{call}{The execution environment of a currently
13 | running function, e.g. \code{caller_env()}. The function will be
14 | mentioned in error messages as the source of the error. See the
15 | \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.}
16 | }
17 | \value{
18 | data.frame with sparse columns
19 | }
20 | \description{
21 | Turning a sparse matrix into a data frame
22 | }
23 | \details{
24 | The only requirement from the sparse matrix is that it contains column names.
25 | }
26 | \examples{
27 | \dontshow{if (rlang::is_installed("Matrix")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
28 | set.seed(1234)
29 | mat <- matrix(sample(0:1, 100, TRUE, c(0.9, 0.1)), nrow = 10)
30 | colnames(mat) <- letters[1:10]
31 | sparse_mat <- Matrix::Matrix(mat, sparse = TRUE)
32 | sparse_mat
33 |
34 | res <- coerce_to_sparse_data_frame(sparse_mat)
35 | res
36 |
37 | # All columns are sparse
38 | vapply(res, is_sparse_vector, logical(1))
39 | \dontshow{\}) # examplesIf}
40 | }
41 | \seealso{
42 | \code{\link[=coerce_to_sparse_tibble]{coerce_to_sparse_tibble()}} \code{\link[=coerce_to_sparse_matrix]{coerce_to_sparse_matrix()}}
43 | }
44 |
--------------------------------------------------------------------------------
/man/coerce_to_sparse_matrix.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/coerce.R
3 | \name{coerce_to_sparse_matrix}
4 | \alias{coerce_to_sparse_matrix}
5 | \title{Coerce sparse data frame to sparse matrix}
6 | \usage{
7 | coerce_to_sparse_matrix(x, call = rlang::caller_env(0))
8 | }
9 | \arguments{
10 | \item{x}{a data frame or tibble with sparse columns.}
11 |
12 | \item{call}{The execution environment of a currently
13 | running function, e.g. \code{caller_env()}. The function will be
14 | mentioned in error messages as the source of the error. See the
15 | \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.}
16 | }
17 | \value{
18 | sparse matrix
19 | }
20 | \description{
21 | Turning data frame with sparse columns into sparse matrix using
22 | \code{\link[Matrix:sparseMatrix]{Matrix::sparseMatrix()}}.
23 | }
24 | \details{
25 | No checking is currently do to \code{x} to determine whether it contains sparse
26 | columns or not. Thus it works with any data frame. Needless to say, creating
27 | a sparse matrix out of a dense data frame is not ideal.
28 | }
29 | \examples{
30 | \dontshow{if (rlang::is_installed("Matrix")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
31 | sparse_tbl <- lapply(1:10, function(x) sparse_double(x, x, length = 10))
32 | names(sparse_tbl) <- letters[1:10]
33 | sparse_tbl <- as.data.frame(sparse_tbl)
34 | sparse_tbl
35 |
36 | res <- coerce_to_sparse_matrix(sparse_tbl)
37 | res
38 | \dontshow{\}) # examplesIf}
39 | }
40 | \seealso{
41 | \code{\link[=coerce_to_sparse_data_frame]{coerce_to_sparse_data_frame()}} \code{\link[=coerce_to_sparse_tibble]{coerce_to_sparse_tibble()}}
42 | }
43 |
--------------------------------------------------------------------------------
/man/coerce_to_sparse_tibble.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/coerce.R
3 | \name{coerce_to_sparse_tibble}
4 | \alias{coerce_to_sparse_tibble}
5 | \title{Coerce sparse matrix to tibble with sparse columns}
6 | \usage{
7 | coerce_to_sparse_tibble(x, call = rlang::caller_env(0))
8 | }
9 | \arguments{
10 | \item{x}{sparse matrix.}
11 |
12 | \item{call}{The execution environment of a currently
13 | running function, e.g. \code{caller_env()}. The function will be
14 | mentioned in error messages as the source of the error. See the
15 | \code{call} argument of \code{\link[rlang:abort]{abort()}} for more information.}
16 | }
17 | \value{
18 | tibble with sparse columns
19 | }
20 | \description{
21 | Turning a sparse matrix into a tibble.
22 | }
23 | \details{
24 | The only requirement from the sparse matrix is that it contains column names.
25 | }
26 | \examples{
27 | \dontshow{if (rlang::is_installed("tibble")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
28 | set.seed(1234)
29 | mat <- matrix(sample(0:1, 100, TRUE, c(0.9, 0.1)), nrow = 10)
30 | colnames(mat) <- letters[1:10]
31 | sparse_mat <- Matrix::Matrix(mat, sparse = TRUE)
32 | sparse_mat
33 |
34 | res <- coerce_to_sparse_tibble(sparse_mat)
35 | res
36 |
37 | # All columns are sparse
38 | vapply(res, is_sparse_vector, logical(1))
39 | \dontshow{\}) # examplesIf}
40 | }
41 | \seealso{
42 | \code{\link[=coerce_to_sparse_data_frame]{coerce_to_sparse_data_frame()}} \code{\link[=coerce_to_sparse_matrix]{coerce_to_sparse_matrix()}}
43 | }
44 |
--------------------------------------------------------------------------------
/man/extractors.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/extractors.R
3 | \name{extractors}
4 | \alias{extractors}
5 | \alias{sparse_positions}
6 | \alias{sparse_values}
7 | \alias{sparse_default}
8 | \title{Information extraction from sparse vectors}
9 | \usage{
10 | sparse_positions(x)
11 |
12 | sparse_values(x)
13 |
14 | sparse_default(x)
15 | }
16 | \arguments{
17 | \item{x}{vector to be extracted from.}
18 | }
19 | \value{
20 | vectors of requested attributes
21 | }
22 | \description{
23 | Extract positions, values, and default from sparse vectors without the need
24 | to materialize vector.
25 | }
26 | \details{
27 | \code{sparse_default()} returns \code{NA} when applied to non-sparse vectors. This is
28 | done to have an indicator of non-sparsity.
29 |
30 | for ease of use, these functions also works on non-sparse variables.
31 | }
32 | \examples{
33 | x_sparse <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10)
34 | x_dense <- c(0, pi, 0, 0, 0.5, 0, 0, 0, 0, 0.1)
35 |
36 | sparse_positions(x_sparse)
37 | sparse_values(x_sparse)
38 | sparse_default(x_sparse)
39 |
40 | sparse_positions(x_dense)
41 | sparse_values(x_dense)
42 | sparse_default(x_dense)
43 |
44 | x_sparse_3 <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10, default = 3)
45 | sparse_default(x_sparse_3)
46 | }
47 |
--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/r-lib/sparsevctrs/08efac80710bf1f2d038f576ca0d093148300c7a/man/figures/logo.png
--------------------------------------------------------------------------------
/man/has_sparse_elements.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/has_sparse_elements.R
3 | \name{has_sparse_elements}
4 | \alias{has_sparse_elements}
5 | \title{Check for sparse elements}
6 | \usage{
7 | has_sparse_elements(x)
8 | }
9 | \arguments{
10 | \item{x}{a data frame, tibble, or list.}
11 | }
12 | \value{
13 | A single logical value.
14 | }
15 | \description{
16 | This function checks to see if a data.frame, tibble or list contains one or
17 | more sparse vectors.
18 | }
19 | \details{
20 | The checking in this function is done using \code{\link[=is_sparse_vector]{is_sparse_vector()}}, but is
21 | implemented using an early exit pattern to provide fast performance for wide
22 | data.frames.
23 |
24 | This function does not test whether \code{x} is a data.frame, tibble or list. It
25 | simply iterates over the elements and sees if they are sparse vectors.
26 | }
27 | \examples{
28 | \dontshow{if (rlang::is_installed("Matrix")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
29 | set.seed(1234)
30 | n_cols <- 10000
31 | mat <- matrix(sample(0:1, n_cols * 10, TRUE, c(0.9, 0.1)), ncol = n_cols)
32 | colnames(mat) <- as.character(seq_len(n_cols))
33 | sparse_mat <- Matrix::Matrix(mat, sparse = TRUE)
34 |
35 | res <- coerce_to_sparse_tibble(sparse_mat)
36 | has_sparse_elements(res)
37 |
38 | has_sparse_elements(mtcars)
39 | \dontshow{\}) # examplesIf}
40 | }
41 |
--------------------------------------------------------------------------------
/man/sparse-arithmatic-scalar.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/arithmatic.R
3 | \name{sparse-arithmatic-scalar}
4 | \alias{sparse-arithmatic-scalar}
5 | \alias{sparse_division_scalar}
6 | \alias{sparse_multiplication_scalar}
7 | \alias{sparse_addition_scalar}
8 | \alias{sparse_subtraction_scalar}
9 | \title{Scalar arithmatic with sparse vectors}
10 | \usage{
11 | sparse_division_scalar(x, val)
12 |
13 | sparse_multiplication_scalar(x, val)
14 |
15 | sparse_addition_scalar(x, val)
16 |
17 | sparse_subtraction_scalar(x, val)
18 | }
19 | \arguments{
20 | \item{x}{A sparse vector.}
21 |
22 | \item{val}{A single numeric value.}
23 | }
24 | \value{
25 | A sparse vector of same type.
26 | }
27 | \description{
28 | Do Arithmatic on sparse vectors without destroying the sparsity. Note that
29 | only multiplication and division preserves the default value.
30 | }
31 | \details{
32 | No checking of the inputs are being done.
33 |
34 | \code{sparse_division_scalar()} and \code{sparse_multiplication_scalar()} are the most
35 | used ones, as they preserve the default, which is often what you want to do.
36 |
37 | \code{sparse_division_scalar()} always produces double vectors, regardless of
38 | whether they could be represented as integers or not. Expect when \code{val = 1}
39 | as the input is returned unchanged, or \code{val = NA} as the input returned will
40 | be \code{NA} or the appropiate type.
41 | }
42 | \examples{
43 | x_sparse <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10)
44 |
45 | sparse_division_scalar(x_sparse, 2)
46 | sparse_multiplication_scalar(x_sparse, 2)
47 | sparse_addition_scalar(x_sparse, 2)
48 | sparse_subtraction_scalar(x_sparse, 2)
49 | }
50 |
--------------------------------------------------------------------------------
/man/sparse-arithmatic.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/arithmatic.R
3 | \name{sparse-arithmatic}
4 | \alias{sparse-arithmatic}
5 | \alias{sparse_multiplication}
6 | \title{Vector arithmatic with sparse vectors}
7 | \usage{
8 | sparse_multiplication(x, y)
9 | }
10 | \arguments{
11 | \item{x}{A numeric vector.}
12 |
13 | \item{y}{A numeric vector.}
14 | }
15 | \value{
16 | A sparse vector of same type.
17 | }
18 | \description{
19 | Do arithmatic operations on sparse vectors while trying to void destroying
20 | the sparsity.
21 | }
22 | \details{
23 | Note that this function works with both sparse and dense vectors for both \code{x}
24 | and \code{y}, returning a sparse or dense vector according to the input.
25 |
26 | For \code{sparse_multiplication()} the class of the resulting vector depends on
27 | the classes of \code{x} and \code{y}. If both \code{x} and \code{y} are integer vectors then an
28 | integer vector is returned, otherwise a double vector is returned.
29 |
30 | \code{sparse_multiplication()} will return a non-sparse vector if both \code{x} and \code{y}
31 | is non-sparse. Otherwise a sparse vector is returned.
32 |
33 | \code{sparse_multiplication()} will destroy sparsity of sparse vectors with non-0
34 | \code{default} values.
35 | }
36 | \examples{
37 | x_sparse <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10)
38 |
39 | sparse_multiplication(x_sparse, x_sparse)
40 | }
41 |
--------------------------------------------------------------------------------
/man/sparse_character.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sparse_character.R
3 | \name{sparse_character}
4 | \alias{sparse_character}
5 | \title{Create sparse character vector}
6 | \usage{
7 | sparse_character(values, positions, length, default = "")
8 | }
9 | \arguments{
10 | \item{values}{integer vector, values of non-zero entries.}
11 |
12 | \item{positions}{integer vector, indices of non-zero entries.}
13 |
14 | \item{length}{integer value, Length of vector.}
15 |
16 | \item{default}{integer value, value at indices not specified by \code{positions}.
17 | Defaults to \code{""}. Cannot be \code{NA}.}
18 | }
19 | \value{
20 | sparse character vector
21 | }
22 | \description{
23 | Construction of vectors where only values and positions are recorded. The
24 | Length and default values determine all other information.
25 | }
26 | \details{
27 | \code{values} and \code{positions} are expected to be the same length, and are allowed
28 | to both have zero length.
29 |
30 | Allowed values for \code{value} are character values. Missing values such as \code{NA}
31 | and \code{NA_real_} are allowed as they are turned into \code{NA_character_}.
32 | Everything else is disallowed. The values are also not allowed to take the
33 | same value as \code{default}.
34 |
35 | \code{positions} should be integers or integer-like doubles. Everything else is
36 | not allowed. Positions should furthermore be positive (\code{0} not allowed),
37 | unique, and in increasing order. Lastly they should all be smaller that
38 | \code{length}.
39 |
40 | For developers:
41 |
42 | setting \code{options("sparsevctrs.verbose_materialize" = TRUE)} will print a
43 | message each time a sparse vector has been forced to materialize.
44 | }
45 | \examples{
46 | sparse_character(character(), integer(), 10)
47 |
48 | sparse_character(c("A", "C", "E"), c(2, 5, 10), 10)
49 |
50 | str(
51 | sparse_character(c("A", "C", "E"), c(2, 5, 10), 1000000000)
52 | )
53 | }
54 | \seealso{
55 | \code{\link[=sparse_double]{sparse_double()}} \code{\link[=sparse_integer]{sparse_integer()}}
56 | }
57 |
--------------------------------------------------------------------------------
/man/sparse_double.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sparse_double.R
3 | \name{sparse_double}
4 | \alias{sparse_double}
5 | \title{Create sparse double vector}
6 | \usage{
7 | sparse_double(values, positions, length, default = 0)
8 | }
9 | \arguments{
10 | \item{values}{double vector, values of non-zero entries.}
11 |
12 | \item{positions}{integer vector, indices of non-zero entries.}
13 |
14 | \item{length}{integer value, Length of vector.}
15 |
16 | \item{default}{double value, value at indices not specified by \code{positions}.
17 | Defaults to \code{0}. Cannot be \code{NA}.}
18 | }
19 | \value{
20 | sparse double vector
21 | }
22 | \description{
23 | Construction of vectors where only values and positions are recorded. The
24 | Length and default values determine all other information.
25 | }
26 | \details{
27 | \code{values} and \code{positions} are expected to be the same length, and are allowed
28 | to both have zero length.
29 |
30 | Allowed values for \code{value} is double and integer values. integer values will
31 | be coerced to doubles. Missing values such as \code{NA} and \code{NA_real_} are
32 | allowed. Everything else is disallowed, This includes \code{Inf} and \code{NaN}. The
33 | values are also not allowed to take the same value as \code{default}.
34 |
35 | \code{positions} should be integers or integer-like doubles. Everything else is
36 | not allowed. Positions should furthermore be positive (\code{0} not allowed),
37 | unique, and in increasing order. Lastly they should all be smaller that
38 | \code{length}.
39 |
40 | For developers:
41 |
42 | setting \code{options("sparsevctrs.verbose_materialize" = TRUE)} will print a
43 | message each time a sparse vector has been forced to materialize.
44 | }
45 | \examples{
46 | sparse_double(numeric(), integer(), 10)
47 |
48 | sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10)
49 |
50 | str(
51 | sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 1000000000)
52 | )
53 | }
54 | \seealso{
55 | \code{\link[=sparse_integer]{sparse_integer()}} \code{\link[=sparse_character]{sparse_character()}}
56 | }
57 |
--------------------------------------------------------------------------------
/man/sparse_dummy.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sparse_dummy.R
3 | \name{sparse_dummy}
4 | \alias{sparse_dummy}
5 | \title{Generate sparse dummy variables}
6 | \usage{
7 | sparse_dummy(x, one_hot = TRUE)
8 | }
9 | \arguments{
10 | \item{x}{A factor.}
11 |
12 | \item{one_hot}{A single logical value. Should the first factor level be
13 | included or not. Defaults to \code{FALSE}.}
14 | }
15 | \value{
16 | A list of sparse integer dummy variables.
17 | }
18 | \description{
19 | Generate sparse dummy variables
20 | }
21 | \details{
22 | Only factor variables can be used with \code{\link[=sparse_dummy]{sparse_dummy()}}. A call to
23 | \code{as.factor()} would be required for any other type of data.
24 |
25 | If only a single level is present after \code{one_hot} takes effect. Then the
26 | vector produced won't be sparse.
27 |
28 | A missing value at the \code{i}th element will produce missing values for all
29 | dummy variables at thr \code{i}th position.
30 | }
31 | \examples{
32 | x <- factor(c("a", "a", "b", "c", "d", "b"))
33 |
34 | sparse_dummy(x, one_hot = FALSE)
35 |
36 | x <- factor(c("a", "a", "b", "c", "d", "b"))
37 |
38 | sparse_dummy(x, one_hot = TRUE)
39 |
40 | x <- factor(c("a", NA, "b", "c", "d", NA))
41 |
42 | sparse_dummy(x, one_hot = FALSE)
43 |
44 | x <- factor(c("a", NA, "b", "c", "d", NA))
45 |
46 | sparse_dummy(x, one_hot = TRUE)
47 | }
48 |
--------------------------------------------------------------------------------
/man/sparse_integer.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sparse_integer.R
3 | \name{sparse_integer}
4 | \alias{sparse_integer}
5 | \title{Create sparse integer vector}
6 | \usage{
7 | sparse_integer(values, positions, length, default = 0L)
8 | }
9 | \arguments{
10 | \item{values}{integer vector, values of non-zero entries.}
11 |
12 | \item{positions}{integer vector, indices of non-zero entries.}
13 |
14 | \item{length}{integer value, Length of vector.}
15 |
16 | \item{default}{integer value, value at indices not specified by \code{positions}.
17 | Defaults to \code{0L}. Cannot be \code{NA}.}
18 | }
19 | \value{
20 | sparse integer vector
21 | }
22 | \description{
23 | Construction of vectors where only values and positions are recorded. The
24 | Length and default values determine all other information.
25 | }
26 | \details{
27 | \code{values} and \code{positions} are expected to be the same length, and are allowed
28 | to both have zero length.
29 |
30 | Allowed values for \code{value} is integer values. This means that the double
31 | vector \code{c(1, 5, 4)} is accepted as it can be losslessly converted to the
32 | integer vector \code{c(1L, 5L, 4L)}. Missing values such as \code{NA} and \code{NA_real_}
33 | are allowed. Everything else is disallowed, This includes \code{Inf} and \code{NaN}.
34 | The values are also not allowed to take the same value as \code{default}.
35 |
36 | \code{positions} should be integers or integer-like doubles. Everything else is
37 | not allowed. Positions should furthermore be positive (\code{0} not allowed),
38 | unique, and in increasing order. Lastly they should all be smaller that
39 | \code{length}.
40 |
41 | For developers:
42 |
43 | setting \code{options("sparsevctrs.verbose_materialize" = TRUE)} will print a
44 | message each time a sparse vector has been forced to materialize.
45 | }
46 | \examples{
47 | sparse_integer(integer(), integer(), 10)
48 |
49 | sparse_integer(c(4, 5, 7), c(2, 5, 10), 10)
50 |
51 | str(
52 | sparse_integer(c(4, 5, 7), c(2, 5, 10), 1000000000)
53 | )
54 | }
55 | \seealso{
56 | \code{\link[=sparse_double]{sparse_double()}} \code{\link[=sparse_character]{sparse_character()}}
57 | }
58 |
--------------------------------------------------------------------------------
/man/sparse_is_na.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sparse_is_na.R
3 | \name{sparse_is_na}
4 | \alias{sparse_is_na}
5 | \title{Detect Pressence of Missing Values}
6 | \usage{
7 | sparse_is_na(x, type = "logical")
8 | }
9 | \arguments{
10 | \item{x}{A sparse vector.}
11 |
12 | \item{type}{A single string. Most be one of \code{logical} or \code{integer}.
13 | Determines the resulting vector. If \code{type = integer} then a sparse vector is
14 | returned.}
15 | }
16 | \value{
17 | A logical vector or sparse integer vector.
18 | }
19 | \description{
20 | Detect Pressence of Missing Values
21 | }
22 | \details{
23 | This function, as with any of the other helper functions assumes that the
24 | input \code{x} is a sparse numeric vector. This is done for performance reasons,
25 | and it is thus the users responsibility to perform input checking.
26 |
27 | Note that the resulting vector will be not be a sparse vector.
28 | }
29 | \examples{
30 | sparse_is_na(
31 | sparse_double(c(10, NA, 11), c(1, 50, 111), 1000)
32 | )
33 |
34 | sparse_is_na(
35 | sparse_double(c(10, NA, 11), c(1, 50, 111), 1000),
36 | type = "integer"
37 | )
38 | }
39 | \seealso{
40 | \code{\link[=sparse_which_na]{sparse_which_na()}}
41 | }
42 |
--------------------------------------------------------------------------------
/man/sparse_lag.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sparse_lag.R
3 | \name{sparse_lag}
4 | \alias{sparse_lag}
5 | \title{Compute lagged values for sparse vectors}
6 | \usage{
7 | sparse_lag(x, n = 1L, default = NULL)
8 | }
9 | \arguments{
10 | \item{x}{A sparse vector.}
11 |
12 | \item{n}{Positive integer of length 1, giving the number of positions to lag
13 | by.}
14 |
15 | \item{default}{The value used to pad \verb{x`` back to its original size after the lag has been applied. The default, }NULL``, pads with a missing value.}
16 | }
17 | \value{
18 | sparse vector.
19 | }
20 | \description{
21 | Compute lagged values for sparse vectors
22 | }
23 | \details{
24 | This function, as with any of the other helper functions assumes that the
25 | input \code{x} is a sparse numeric vector. This is done for performance reasons,
26 | and it is thus the users responsibility to perform input checking.
27 | }
28 | \examples{
29 | vec_dbl <- sparse_double(c(pi, 4, 5/2), c(1, 5, 7), 10)
30 |
31 | sparse_lag(vec_dbl)
32 | sparse_lag(vec_dbl, n = 3)
33 | sparse_lag(vec_dbl, n = 3, default = 0)
34 |
35 | vec_int <- sparse_integer(c(1, 2, 3), c(1, 5, 7), 10)
36 |
37 | sparse_lag(vec_int)
38 | sparse_lag(vec_int, n = 3)
39 | sparse_lag(vec_int, n = 3, default = 0L)
40 |
41 | vec_chr <- sparse_character(c("A", "B", "C"), c(1, 5, 7), 10)
42 |
43 | sparse_lag(vec_chr)
44 | sparse_lag(vec_chr, n = 3)
45 | sparse_lag(vec_chr, n = 3, default = "before")
46 |
47 | vec_lgl <- sparse_logical(c(TRUE, TRUE, TRUE), c(1, 5, 7), 10)
48 |
49 | sparse_lag(vec_lgl)
50 | sparse_lag(vec_lgl, n = 3)
51 | sparse_lag(vec_lgl, n = 3, default = FALSE)
52 | }
53 |
--------------------------------------------------------------------------------
/man/sparse_logical.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sparse_logical.R
3 | \name{sparse_logical}
4 | \alias{sparse_logical}
5 | \title{Create sparse logical vector}
6 | \usage{
7 | sparse_logical(values, positions, length, default = FALSE)
8 | }
9 | \arguments{
10 | \item{values}{logical vector, values of non-zero entries.}
11 |
12 | \item{positions}{integer vector, indices of non-zero entries.}
13 |
14 | \item{length}{integer value, Length of vector.}
15 |
16 | \item{default}{logical value, value at indices not specified by \code{positions}.
17 | Defaults to \code{FALSE}. Cannot be \code{NA}.}
18 | }
19 | \value{
20 | sparse logical vector
21 | }
22 | \description{
23 | Construction of vectors where only values and positions are recorded. The
24 | Length and default values determine all other information.
25 | }
26 | \details{
27 | \code{values} and \code{positions} are expected to be the same length, and are allowed
28 | to both have zero length.
29 |
30 | Allowed values for \code{value} are logical values. Missing values such as \code{NA}
31 | and \code{NA_real_} are allowed. Everything else is disallowed, The values are
32 | also not allowed to take the same value as \code{default}.
33 |
34 | \code{positions} should be integers or integer-like doubles. Everything else is
35 | not allowed. Positions should furthermore be positive (\code{0} not allowed),
36 | unique, and in increasing order. Lastly they should all be smaller that
37 | \code{length}.
38 |
39 | For developers:
40 |
41 | setting \code{options("sparsevctrs.verbose_materialize" = TRUE)} will print a
42 | message each time a sparse vector has been forced to materialize.
43 | }
44 | \examples{
45 | sparse_logical(logical(), integer(), 10)
46 |
47 | sparse_logical(c(TRUE, NA, TRUE), c(2, 5, 10), 10)
48 |
49 | str(
50 | sparse_logical(c(TRUE, NA, TRUE), c(2, 5, 10), 1000000000)
51 | )
52 | }
53 | \seealso{
54 | \code{\link[=sparse_double]{sparse_double()}} \code{\link[=sparse_integer]{sparse_integer()}} \code{\link[=sparse_character]{sparse_character()}}
55 | }
56 |
--------------------------------------------------------------------------------
/man/sparse_mean.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sparse_mean.R
3 | \name{sparse_mean}
4 | \alias{sparse_mean}
5 | \title{Calculate mean from sparse vectors}
6 | \usage{
7 | sparse_mean(x, wts = NULL, na_rm = FALSE)
8 | }
9 | \arguments{
10 | \item{x}{A sparse numeric vector.}
11 |
12 | \item{wts}{A numeric vector, should be same length as \code{x}.}
13 |
14 | \item{na_rm}{Logical, whether to remove missing values. Defaults to \code{FALSE}.}
15 | }
16 | \value{
17 | single numeric value.
18 | }
19 | \description{
20 | Calculate mean from sparse vectors
21 | }
22 | \details{
23 | This function, as with any of the other helper functions assumes that the
24 | input \code{x} is a sparse numeric vector. This is done for performance reasons,
25 | and it is thus the users responsibility to perform input checking.
26 | }
27 | \examples{
28 | sparse_mean(
29 | sparse_double(1000, 1, 1000)
30 | )
31 |
32 | sparse_mean(
33 | sparse_double(1000, 1, 1000, default = 1)
34 | )
35 |
36 | sparse_mean(
37 | sparse_double(c(10, 50, 11), c(1, 50, 111), 1000)
38 | )
39 |
40 | sparse_mean(
41 | sparse_double(c(10, NA, 11), c(1, 50, 111), 1000)
42 | )
43 |
44 | sparse_mean(
45 | sparse_double(c(10, NA, 11), c(1, 50, 111), 1000),
46 | na_rm = TRUE
47 | )
48 |
49 | }
50 |
--------------------------------------------------------------------------------
/man/sparse_median.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sparse_median.R
3 | \name{sparse_median}
4 | \alias{sparse_median}
5 | \title{Calculate median from sparse vectors}
6 | \usage{
7 | sparse_median(x, na_rm = FALSE)
8 | }
9 | \arguments{
10 | \item{x}{A sparse numeric vector.}
11 |
12 | \item{na_rm}{Logical, whether to remove missing values. Defaults to \code{FALSE}.}
13 | }
14 | \value{
15 | single numeric value.
16 | }
17 | \description{
18 | Calculate median from sparse vectors
19 | }
20 | \details{
21 | This function, as with any of the other helper functions assumes that the
22 | input \code{x} is a sparse numeric vector. This is done for performance reasons,
23 | and it is thus the users responsibility to perform input checking.
24 | }
25 | \examples{
26 | sparse_median(
27 | sparse_double(1000, 1, 1000)
28 | )
29 |
30 | sparse_median(
31 | sparse_double(1000, 1, 1000, default = 1)
32 | )
33 |
34 | sparse_median(
35 | sparse_double(c(10, 50, 11), c(1, 50, 111), 1000)
36 | )
37 |
38 | sparse_median(
39 | sparse_double(c(10, NA, 11), c(1, 50, 111), 1000)
40 | )
41 |
42 | sparse_median(
43 | sparse_double(c(10, NA, 11), c(1, 50, 111), 1000),
44 | na_rm = TRUE
45 | )
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/man/sparse_replace_na.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sparse_replace_na.R
3 | \name{sparse_replace_na}
4 | \alias{sparse_replace_na}
5 | \title{Replace NAs with specified values in sparse vectors}
6 | \usage{
7 | sparse_replace_na(x, replace)
8 | }
9 | \arguments{
10 | \item{x}{A sparse vector.}
11 |
12 | \item{replace}{A single value.}
13 | }
14 | \value{
15 | A sparse vector.
16 | }
17 | \description{
18 | Replace NAs with specified values in sparse vectors
19 | }
20 | \details{
21 | This function, as with any of the other helper functions assumes that the
22 | input \code{x} is a sparse numeric vector. This is done for performance reasons,
23 | and it is thus the users responsibility to perform input checking.
24 | The \code{replace} is likewise not type or length checked.
25 |
26 | The output type will match the values after coercion happens during
27 | replacement.
28 | }
29 | \examples{
30 | sparse_replace_na(
31 | sparse_double(c(10, NA, 11), c(1, 5, 10), 10),
32 | 5
33 | )
34 |
35 | sparse_replace_na(
36 | sparse_integer(c(10L, NA, 11L), c(1, 5, 10), 10),
37 | 5L
38 | )
39 |
40 | sparse_replace_na(
41 | sparse_character(c("A", NA, "E"), c(2, 5, 10), 10),
42 | "missing"
43 | )
44 | }
45 |
--------------------------------------------------------------------------------
/man/sparse_sd.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sparse_sd.R
3 | \name{sparse_sd}
4 | \alias{sparse_sd}
5 | \title{Calculate standard diviation from sparse vectors}
6 | \usage{
7 | sparse_sd(x, na_rm = FALSE)
8 | }
9 | \arguments{
10 | \item{x}{A sparse numeric vector.}
11 |
12 | \item{na_rm}{Logical, whether to remove missing values. Defaults to \code{FALSE}.}
13 | }
14 | \value{
15 | single numeric value.
16 | }
17 | \description{
18 | Calculate standard diviation from sparse vectors
19 | }
20 | \details{
21 | This function, as with any of the other helper functions assumes that the
22 | input \code{x} is a sparse numeric vector. This is done for performance reasons,
23 | and it is thus the users responsibility to perform input checking.
24 |
25 | Much like \code{\link[=sd]{sd()}} it uses the denominator \code{n-1}.
26 | }
27 | \examples{
28 | sparse_sd(
29 | sparse_double(1000, 1, 1000)
30 | )
31 |
32 | sparse_sd(
33 | sparse_double(1000, 1, 1000, default = 1)
34 | )
35 |
36 | sparse_sd(
37 | sparse_double(c(10, 50, 11), c(1, 50, 111), 1000)
38 | )
39 |
40 | sparse_sd(
41 | sparse_double(c(10, NA, 11), c(1, 50, 111), 1000)
42 | )
43 |
44 | sparse_sd(
45 | sparse_double(c(10, NA, 11), c(1, 50, 111), 1000),
46 | na_rm = TRUE
47 | )
48 |
49 | }
50 |
--------------------------------------------------------------------------------
/man/sparse_sqrt.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sparse_sqrt.R
3 | \name{sparse_sqrt}
4 | \alias{sparse_sqrt}
5 | \title{Calculate sqrt of sparse vectors}
6 | \usage{
7 | sparse_sqrt(x)
8 | }
9 | \arguments{
10 | \item{x}{A sparse numeric vector.}
11 | }
12 | \value{
13 | A sparse double vector.
14 | }
15 | \description{
16 | Calculate sqrt of sparse vectors
17 | }
18 | \details{
19 | This function, as with any of the other helper functions assumes that the
20 | input \code{x} is a sparse numeric vector. This is done for performance reasons,
21 | and it is thus the users responsibility to perform input checking.
22 |
23 | The output will be a double vector regardless of the input type.
24 | }
25 | \examples{
26 | sparse_sqrt(
27 | sparse_double(1000, 1, 10)
28 | )
29 |
30 | sparse_sqrt(
31 | sparse_integer(1000, 3, 10, default = 2)
32 | )
33 |
34 | sparse_sqrt(
35 | sparse_double(c(10, NA, 11), c(1, 5, 10), 10)
36 | )
37 | }
38 |
--------------------------------------------------------------------------------
/man/sparse_var.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sparse_var.R
3 | \name{sparse_var}
4 | \alias{sparse_var}
5 | \title{Calculate variance from sparse vectors}
6 | \usage{
7 | sparse_var(x, na_rm = FALSE)
8 | }
9 | \arguments{
10 | \item{x}{A sparse numeric vector.}
11 |
12 | \item{na_rm}{Logical, whether to remove missing values. Defaults to \code{FALSE}.}
13 | }
14 | \value{
15 | single numeric value.
16 | }
17 | \description{
18 | Calculate variance from sparse vectors
19 | }
20 | \details{
21 | This function, as with any of the other helper functions assumes that the
22 | input \code{x} is a sparse numeric vector. This is done for performance reasons,
23 | and it is thus the users responsibility to perform input checking.
24 |
25 | Much like \code{\link[=var]{var()}} it uses the denominator \code{n-1}.
26 | }
27 | \examples{
28 | sparse_var(
29 | sparse_double(1000, 1, 1000)
30 | )
31 |
32 | sparse_var(
33 | sparse_double(1000, 1, 1000, default = 1)
34 | )
35 |
36 | sparse_var(
37 | sparse_double(c(10, 50, 11), c(1, 50, 111), 1000)
38 | )
39 |
40 | sparse_var(
41 | sparse_double(c(10, NA, 11), c(1, 50, 111), 1000)
42 | )
43 |
44 | sparse_var(
45 | sparse_double(c(10, NA, 11), c(1, 50, 111), 1000),
46 | na_rm = TRUE
47 | )
48 |
49 | }
50 |
--------------------------------------------------------------------------------
/man/sparse_which_na.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sparse_which_na.R
3 | \name{sparse_which_na}
4 | \alias{sparse_which_na}
5 | \title{Which indices are Missing Values}
6 | \usage{
7 | sparse_which_na(x)
8 | }
9 | \arguments{
10 | \item{x}{A sparse vector.}
11 | }
12 | \value{
13 | A logical vector.
14 | }
15 | \description{
16 | Which indices are Missing Values
17 | }
18 | \details{
19 | This function, as with any of the other helper functions assumes that the
20 | input \code{x} is a sparse numeric vector. This is done for performance reasons,
21 | and it is thus the users responsibility to perform input checking.
22 | }
23 | \examples{
24 | sparse_which_na(
25 | sparse_double(c(10, NA, 11), c(1, 50, 111), 1000)
26 | )
27 | }
28 | \seealso{
29 | \code{\link[=sparse_is_na]{sparse_is_na()}}
30 | }
31 |
--------------------------------------------------------------------------------
/man/sparsevctrs-package.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sparsevctrs-package.R
3 | \docType{package}
4 | \name{sparsevctrs-package}
5 | \alias{sparsevctrs}
6 | \alias{sparsevctrs-package}
7 | \title{sparsevctrs: Sparse Vectors for Use in Data Frames}
8 | \description{
9 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}
10 |
11 | Provides sparse vectors powered by ALTREP (Alternative Representations for R Objects) that behave like regular vectors, and can thus be used in data frames. Also provides tools to convert between sparse matrices and data frames with sparse columns and functions to interact with sparse vectors.
12 | }
13 | \seealso{
14 | Useful links:
15 | \itemize{
16 | \item \url{https://github.com/r-lib/sparsevctrs}
17 | \item \url{https://r-lib.github.io/sparsevctrs/}
18 | \item Report bugs at \url{https://github.com/r-lib/sparsevctrs/issues}
19 | }
20 |
21 | }
22 | \author{
23 | \strong{Maintainer}: Emil Hvitfeldt \email{emil.hvitfeldt@posit.co} (\href{https://orcid.org/0000-0002-0679-1945}{ORCID})
24 |
25 | Other contributors:
26 | \itemize{
27 | \item Davis Vaughan \email{davis@posit.co} [contributor]
28 | \item Posit Software, PBC (03wc8by49) [copyright holder, funder]
29 | }
30 |
31 | }
32 | \keyword{internal}
33 |
--------------------------------------------------------------------------------
/man/sparsevctrs_options.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/options.R
3 | \name{sparsevctrs_options}
4 | \alias{sparsevctrs_options}
5 | \title{sparsevctrs options}
6 | \description{
7 | These options can be set with \code{options()}.
8 | }
9 | \details{
10 | \subsection{sparsevctrs.verbose_materialize}{
11 |
12 | This option is meant to be used as a diagnostic tool. Materialization of
13 | sparse vectors are done silently by default. This can make it hard to
14 | determine if your code is doing what you want.
15 |
16 | Setting \code{sparsevctrs.verbose_materialize} is a way to alert when
17 | materialization occurs. Note that only the first materialization is counted
18 | for the options below, as the materialized vector is cached.
19 |
20 | Setting \code{sparsevctrs.verbose_materialize = 1} or
21 | \code{sparsevctrs.verbose_materialize = TRUE} will result in a message being
22 | emitted each time a sparse vector is materialized.
23 |
24 | Setting \code{sparsevctrs.verbose_materialize = 2} will result in a warning being
25 | thrown each time a sparse vector is materialized.
26 |
27 | Setting \code{sparsevctrs.verbose_materialize = 3} will result in an error being
28 | thrown each time a sparse vector is materialized.
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/man/sparsity.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sparsity.R
3 | \name{sparsity}
4 | \alias{sparsity}
5 | \title{Calculate sparsity of data frames, matrices, and sparse matrices}
6 | \usage{
7 | sparsity(x, sample = NULL)
8 | }
9 | \arguments{
10 | \item{x}{a data frame, matrix of sparse matrix.}
11 |
12 | \item{sample}{a integer or \code{NULL}. Number of rows to sample to estimate
13 | sparsity. If \code{NULL} then no sampling is performed. Will not be used when
14 | \code{x} is a sparse matrix. Defaults to \code{NULL}.}
15 | }
16 | \value{
17 | a single number, between 0 and 1.
18 | }
19 | \description{
20 | Turning data frame with sparse columns into sparse matrix using
21 | \code{\link[Matrix:sparseMatrix]{Matrix::sparseMatrix()}}.
22 | }
23 | \details{
24 | Only numeric 0s are considered zeroes in this calculations. Missing values,
25 | logical vectors and then string \code{"0"} aren't counted.
26 | }
27 | \examples{
28 |
29 | # data frame
30 | sparsity(mtcars)
31 |
32 | # Matrix
33 | set.seed(1234)
34 | mat <- matrix(sample(0:1, 100, TRUE, c(0.9, 0.1)), nrow = 10)
35 | colnames(mat) <- letters[1:10]
36 |
37 | sparsity(mat)
38 |
39 | # Sparse matrix
40 | sparse_mat <- Matrix::Matrix(mat, sparse = TRUE)
41 |
42 | sparsity(sparse_mat)
43 | }
44 |
--------------------------------------------------------------------------------
/man/type-predicates.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/type-predicates.R
3 | \name{type-predicates}
4 | \alias{type-predicates}
5 | \alias{is_sparse_vector}
6 | \alias{is_sparse_numeric}
7 | \alias{is_sparse_double}
8 | \alias{is_sparse_integer}
9 | \alias{is_sparse_character}
10 | \alias{is_sparse_logical}
11 | \title{Sparse vector type checkers}
12 | \usage{
13 | is_sparse_vector(x)
14 |
15 | is_sparse_numeric(x)
16 |
17 | is_sparse_double(x)
18 |
19 | is_sparse_integer(x)
20 |
21 | is_sparse_character(x)
22 |
23 | is_sparse_logical(x)
24 | }
25 | \arguments{
26 | \item{x}{value to be checked.}
27 | }
28 | \value{
29 | single logical value
30 | }
31 | \description{
32 | Helper functions to determine whether an vector is a sparse vector or not.
33 | }
34 | \details{
35 | \code{is_sparse_vector()} is a general function that detects any type of sparse
36 | vector created with this package. \code{is_sparse_double()},
37 | \code{is_sparse_integer()}, \code{is_sparse_character()}, and \code{is_sparse_logical()} are
38 | more specific functions that only detects the type. \code{is_sparse_numeric()}
39 | matches both sparse integers and doubles.
40 | }
41 | \examples{
42 | x_sparse <- sparse_double(c(pi, 5, 0.1), c(2, 5, 10), 10)
43 | x_dense <- c(0, pi, 0, 0, 0.5, 0, 0, 0, 0, 0.1)
44 |
45 | is_sparse_vector(x_sparse)
46 | is_sparse_vector(x_dense)
47 |
48 | is_sparse_double(x_sparse)
49 | is_sparse_double(x_dense)
50 |
51 | is_sparse_character(x_sparse)
52 | is_sparse_character(x_dense)
53 |
54 | # Forced materialization
55 | is_sparse_vector(x_sparse[])
56 | }
57 |
--------------------------------------------------------------------------------
/revdep/README.md:
--------------------------------------------------------------------------------
1 | # Platform
2 |
3 | |field |value |
4 | |:--------|:----------------------------------------|
5 | |version |R version 4.5.0 (2025-04-11) |
6 | |os |macOS Sequoia 15.5 |
7 | |system |aarch64, darwin20 |
8 | |ui |X11 |
9 | |language |(EN) |
10 | |collate |en_US.UTF-8 |
11 | |ctype |en_US.UTF-8 |
12 | |tz |Europe/Copenhagen |
13 | |date |2025-05-25 |
14 | |pandoc |3.6.1 @ /usr/local/bin/ (via rmarkdown) |
15 | |quarto |1.6.42 @ /Applications/quarto/bin/quarto |
16 |
17 | # Dependencies
18 |
19 | |package |old |new |Δ |
20 | |:-----------|:-----|:----------|:--|
21 | |sparsevctrs |0.3.3 |0.3.3.9000 |* |
22 | |cli |3.6.5 |3.6.5 | |
23 | |glue |1.8.0 |1.8.0 | |
24 | |lifecycle |1.0.4 |1.0.4 | |
25 | |rlang |1.1.6 |1.1.6 | |
26 | |vctrs |0.6.5 |0.6.5 | |
27 |
28 | # Revdeps
29 |
30 |
--------------------------------------------------------------------------------
/revdep/cran.md:
--------------------------------------------------------------------------------
1 | ## revdepcheck results
2 |
3 | We checked 5 reverse dependencies, comparing R CMD check results across CRAN and dev versions of this package.
4 |
5 | * We saw 0 new problems
6 | * We failed to check 0 packages
7 |
8 |
--------------------------------------------------------------------------------
/revdep/failures.md:
--------------------------------------------------------------------------------
1 | *Wow, no problems at all. :)*
--------------------------------------------------------------------------------
/revdep/problems.md:
--------------------------------------------------------------------------------
1 | *Wow, no problems at all. :)*
--------------------------------------------------------------------------------
/sparsevctrs.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: No
4 | SaveWorkspace: No
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | LineEndingConversion: Posix
18 |
19 | BuildType: Package
20 | PackageUseDevtools: Yes
21 | PackageInstallArgs: --no-multiarch --with-keep.source
22 | PackageRoxygenize: rd,collate,namespace
23 |
--------------------------------------------------------------------------------
/src/.gitignore:
--------------------------------------------------------------------------------
1 | *.o
2 | *.so
3 | *.dll
4 |
--------------------------------------------------------------------------------
/src/altrep-sparse-double.h:
--------------------------------------------------------------------------------
1 | #ifndef SPARSEVCTRS_ALTREP_SPARSE_DOUBLE
2 | #define SPARSEVCTRS_ALTREP_SPARSE_DOUBLE
3 |
4 | #define R_NO_REMAP
5 | #include
6 | #include "sparse-utils.h"
7 |
8 | SEXP new_sparse_double(SEXP val, SEXP pos, SEXP len, SEXP def);
9 |
10 | #endif
11 |
--------------------------------------------------------------------------------
/src/altrep-sparse-integer.h:
--------------------------------------------------------------------------------
1 | #ifndef SPARSEVCTRS_ALTREP_SPARSE_INTEGER
2 | #define SPARSEVCTRS_ALTREP_SPARSE_INTEGER
3 |
4 | #define R_NO_REMAP
5 | #include
6 | #include "sparse-utils.h"
7 |
8 | SEXP new_sparse_integer(SEXP val, SEXP pos, SEXP len, SEXP def);
9 |
10 | #endif
11 |
--------------------------------------------------------------------------------
/src/init.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include "sparse-extractors.h"
3 | #include "sparse-utils.h"
4 | #include "sparse-dummy.h"
5 | #include "sparse-arithmatic.h"
6 |
7 | // Defined in altrep-sparse-double.c
8 | extern SEXP ffi_altrep_new_sparse_double(SEXP);
9 | extern void sparsevctrs_init_altrep_sparse_double(DllInfo*);
10 |
11 | // Defined in altrep-sparse-integer.c
12 | extern SEXP ffi_altrep_new_sparse_integer(SEXP);
13 | extern void sparsevctrs_init_altrep_sparse_integer(DllInfo*);
14 |
15 | // Defined in altrep-sparse-string.c
16 | extern SEXP ffi_altrep_new_sparse_string(SEXP);
17 | extern void sparsevctrs_init_altrep_sparse_string(DllInfo*);
18 |
19 | // Defined in altrep-sparse-logical.c
20 | extern SEXP ffi_altrep_new_sparse_logical(SEXP);
21 | extern void sparsevctrs_init_altrep_sparse_logical(DllInfo*);
22 |
23 | static const R_CallMethodDef CallEntries[] = {
24 | {"ffi_altrep_new_sparse_double", (DL_FUNC) &ffi_altrep_new_sparse_double, 1
25 | },
26 | {"ffi_altrep_new_sparse_integer",
27 | (DL_FUNC) &ffi_altrep_new_sparse_integer,
28 | 1},
29 | {"ffi_altrep_new_sparse_string", (DL_FUNC) &ffi_altrep_new_sparse_string, 1
30 | },
31 | {"ffi_altrep_new_sparse_logical",
32 | (DL_FUNC) &ffi_altrep_new_sparse_logical,
33 | 1},
34 | {"ffi_altrep_sparse_positions", (DL_FUNC) &ffi_altrep_sparse_positions, 1},
35 | {"ffi_altrep_sparse_values", (DL_FUNC) &ffi_altrep_sparse_values, 1},
36 | {"ffi_altrep_sparse_default", (DL_FUNC) &ffi_altrep_sparse_default, 1},
37 | {"ffi_extract_altrep_class", (DL_FUNC) &ffi_extract_altrep_class, 1},
38 | {"ffi_is_sparse_vector", (DL_FUNC) &ffi_is_sparse_vector, 1},
39 | {"ffi_is_altrep_non_sparse_vector",
40 | (DL_FUNC) &ffi_is_altrep_non_sparse_vector,
41 | 1},
42 | {"ffi_sparse_dummy", (DL_FUNC) &ffi_sparse_dummy, 4},
43 | {"ffi_sparse_dummy_na", (DL_FUNC) &ffi_sparse_dummy_na, 4},
44 | {"ffi_sparse_multiplication", (DL_FUNC) &ffi_sparse_multiplication, 2},
45 | {NULL, NULL, 0}
46 | };
47 |
48 | void R_init_sparsevctrs(DllInfo* dll) {
49 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
50 | R_useDynamicSymbols(dll, FALSE);
51 |
52 | // altrep classes
53 | sparsevctrs_init_altrep_sparse_double(dll);
54 | sparsevctrs_init_altrep_sparse_integer(dll);
55 | sparsevctrs_init_altrep_sparse_string(dll);
56 | sparsevctrs_init_altrep_sparse_logical(dll);
57 | }
58 |
--------------------------------------------------------------------------------
/src/sparse-arithmatic.h:
--------------------------------------------------------------------------------
1 | #ifndef SPARSEVCTRS_SPARSE_ARITHMATIC_H
2 | #define SPARSEVCTRS_SPARSE_ARITHMATIC_H
3 |
4 | #define R_NO_REMAP
5 | #include
6 | #include "sparse-utils.h"
7 |
8 | SEXP ffi_sparse_multiplication(SEXP x, SEXP y);
9 |
10 | #endif
11 |
--------------------------------------------------------------------------------
/src/sparse-dummy.c:
--------------------------------------------------------------------------------
1 | #include "sparse-dummy.h"
2 |
3 | // Defined in altrep-sparse-integer.c
4 | extern SEXP ffi_altrep_new_sparse_integer(SEXP);
5 | extern void sparsevctrs_init_altrep_sparse_integer(DllInfo*);
6 |
7 | SEXP create_dummy(SEXP pos, R_xlen_t length) {
8 | SEXP out = PROTECT(Rf_allocVector(VECSXP, 4));
9 |
10 | const R_xlen_t pos_len = Rf_length(pos);
11 |
12 | // values
13 | SEXP out_val = Rf_allocVector(INTSXP, pos_len);
14 | SET_VECTOR_ELT(out, 0, out_val);
15 | int* v_out_val = INTEGER(out_val);
16 |
17 | for (R_xlen_t i = 0; i < pos_len; ++i) {
18 | v_out_val[i] = 1;
19 | }
20 |
21 | // positions
22 | SET_VECTOR_ELT(out, 1, pos);
23 |
24 | // length
25 | const SEXP out_length = Rf_ScalarInteger((int) length);
26 | SET_VECTOR_ELT(out, 2, out_length);
27 |
28 | // default
29 | const SEXP out_default = Rf_ScalarInteger(0);
30 | SET_VECTOR_ELT(out, 3, out_default);
31 |
32 | UNPROTECT(1);
33 |
34 | return ffi_altrep_new_sparse_integer(out);
35 | }
36 |
37 | SEXP create_dummy_na(SEXP values, SEXP pos, R_xlen_t length) {
38 | SEXP out = PROTECT(Rf_allocVector(VECSXP, 4));
39 |
40 | // values
41 | SET_VECTOR_ELT(out, 0, values);
42 |
43 | // positions
44 | SET_VECTOR_ELT(out, 1, pos);
45 |
46 | // length
47 | const SEXP out_length = Rf_ScalarInteger((int) length);
48 | SET_VECTOR_ELT(out, 2, out_length);
49 |
50 | // default
51 | const SEXP out_default = Rf_ScalarInteger(0);
52 | SET_VECTOR_ELT(out, 3, out_default);
53 |
54 | UNPROTECT(1);
55 |
56 | return ffi_altrep_new_sparse_integer(out);
57 | }
58 |
59 | SEXP ffi_sparse_dummy(SEXP x, SEXP lvls, SEXP counts, SEXP one_hot) {
60 | const R_xlen_t n_lvls = Rf_length(lvls);
61 | const R_xlen_t len = Rf_length(x);
62 |
63 | const int* v_x = INTEGER_RO(x);
64 |
65 | // Generate list of integer vectors. One vector for each level, with its
66 | // length equal to the number of occurances of that level.
67 | SEXP out = PROTECT(Rf_allocVector(VECSXP, n_lvls));
68 |
69 | for (R_xlen_t i = 0; i < n_lvls; ++i) {
70 | R_xlen_t n_val = INTEGER_ELT(counts, i);
71 | SET_VECTOR_ELT(out, i, Rf_allocVector(INTSXP, n_val));
72 | }
73 |
74 | // Vector of positions to keep track of how far into each position vector we
75 | // are. Initialize to 0 to indicate first position.
76 | SEXP pos_index = PROTECT(Rf_allocVector(INTSXP, n_lvls));
77 | int* v_pos_index = INTEGER(pos_index);
78 | for (R_xlen_t i = 0; i < n_lvls; ++i) {
79 | SET_INTEGER_ELT(pos_index, i, 0);
80 | }
81 |
82 | // Itterate over input, find its position index, and place the position value
83 | // at the index. Increment specific index.
84 | if ((Rboolean) LOGICAL_ELT(one_hot, 0) == TRUE) {
85 | for (R_xlen_t i = 0; i < len; ++i) {
86 | int current_val = v_x[i] - 1;
87 |
88 | int index = v_pos_index[current_val];
89 |
90 | SEXP pos_vec = VECTOR_ELT(out, current_val);
91 | int* v_pos_vec = INTEGER(pos_vec);
92 |
93 | // we need the result to be 1-indexed
94 | v_pos_vec[index] = i + 1;
95 | v_pos_index[current_val]++;
96 | }
97 | } else {
98 | for (R_xlen_t i = 0; i < len; ++i) {
99 | int current_val = v_x[i] - 1;
100 |
101 | if (current_val == -1) {
102 | continue;
103 | }
104 |
105 | int index = v_pos_index[current_val];
106 |
107 | SEXP pos_vec = VECTOR_ELT(out, current_val);
108 | int* v_pos_vec = INTEGER(pos_vec);
109 |
110 | // we need the result to be 1-indexed
111 | v_pos_vec[index] = i + 1;
112 | v_pos_index[current_val]++;
113 | }
114 | }
115 |
116 | // Turn list of integer vectors with positions, into list of sparse integer
117 | // vectors.
118 | for (R_xlen_t i = 0; i < n_lvls; ++i) {
119 | SEXP positions = VECTOR_ELT(out, i);
120 | SEXP dummy = create_dummy(positions, len);
121 | SET_VECTOR_ELT(out, i, dummy);
122 | }
123 |
124 | UNPROTECT(2);
125 |
126 | return out;
127 | }
128 |
129 | SEXP ffi_sparse_dummy_na(SEXP x, SEXP lvls, SEXP counts, SEXP one_hot) {
130 | const R_xlen_t n_lvls = Rf_length(lvls);
131 | const R_xlen_t len = Rf_length(x);
132 |
133 | const int* v_x = INTEGER_RO(x);
134 |
135 | // Generate lists of integer vectors. One vector for each level, with its
136 | // length equal to the number of occurances of that level.
137 | SEXP out_positions = PROTECT(Rf_allocVector(VECSXP, n_lvls));
138 | SEXP out_values = PROTECT(Rf_allocVector(VECSXP, n_lvls));
139 |
140 | for (R_xlen_t i = 0; i < n_lvls; ++i) {
141 | R_xlen_t n_val = INTEGER_ELT(counts, i);
142 | SET_VECTOR_ELT(out_values, i, Rf_allocVector(INTSXP, n_val));
143 | SET_VECTOR_ELT(out_positions, i, Rf_allocVector(INTSXP, n_val));
144 | }
145 |
146 | // Vector of positions to keep track of how far into each position vector we
147 | // are. Initialize to 0 to indicate first position.
148 | SEXP pos_index = PROTECT(Rf_allocVector(INTSXP, n_lvls));
149 | int* v_pos_index = INTEGER(pos_index);
150 | for (R_xlen_t i = 0; i < n_lvls; ++i) {
151 | SET_INTEGER_ELT(pos_index, i, 0);
152 | }
153 |
154 | // Itterate over input, find its position index, and place the position value
155 | // at the index. Increment specific index.
156 |
157 | if ((Rboolean) LOGICAL_ELT(one_hot, 0) == TRUE) {
158 | for (R_xlen_t i = 0; i < len; ++i) {
159 | int current_val = v_x[i];
160 |
161 | if (current_val == R_NaInt) {
162 | for (R_xlen_t j = 0; j < n_lvls; ++j) {
163 | int index = v_pos_index[j];
164 |
165 | SEXP pos_vec = VECTOR_ELT(out_positions, j);
166 | int* v_pos_vec = INTEGER(pos_vec);
167 | SEXP val_vec = VECTOR_ELT(out_values, j);
168 | int* v_val_vec = INTEGER(val_vec);
169 |
170 | v_pos_vec[index] = i + 1;
171 | v_val_vec[index] = R_NaInt;
172 | v_pos_index[j]++;
173 | }
174 | } else {
175 | --current_val;
176 | int index = v_pos_index[current_val];
177 |
178 | SEXP pos_vec = VECTOR_ELT(out_positions, current_val);
179 | int* v_pos_vec = INTEGER(pos_vec);
180 | SEXP val_vec = VECTOR_ELT(out_values, current_val);
181 | int* v_val_vec = INTEGER(val_vec);
182 |
183 | // we need the result to be 1-indexed
184 | v_pos_vec[index] = i + 1;
185 | v_val_vec[index] = 1;
186 | v_pos_index[current_val]++;
187 | }
188 | }
189 | } else {
190 | for (R_xlen_t i = 0; i < len; ++i) {
191 | int current_val = v_x[i];
192 |
193 | if (current_val == R_NaInt) {
194 | for (R_xlen_t j = 0; j < n_lvls; ++j) {
195 | int index = v_pos_index[j];
196 |
197 | SEXP pos_vec = VECTOR_ELT(out_positions, j);
198 | int* v_pos_vec = INTEGER(pos_vec);
199 | SEXP val_vec = VECTOR_ELT(out_values, j);
200 | int* v_val_vec = INTEGER(val_vec);
201 |
202 | v_pos_vec[index] = i + 1;
203 | v_val_vec[index] = R_NaInt;
204 | v_pos_index[j]++;
205 | }
206 | } else {
207 | --current_val;
208 | if (current_val == -1) {
209 | continue;
210 | }
211 |
212 | int index = v_pos_index[current_val];
213 |
214 | SEXP pos_vec = VECTOR_ELT(out_positions, current_val);
215 | int* v_pos_vec = INTEGER(pos_vec);
216 | SEXP val_vec = VECTOR_ELT(out_values, current_val);
217 | int* v_val_vec = INTEGER(val_vec);
218 |
219 | // we need the result to be 1-indexed
220 | v_pos_vec[index] = i + 1;
221 | v_val_vec[index] = 1;
222 | v_pos_index[current_val]++;
223 | }
224 | }
225 | }
226 |
227 | // Turn list of integer vectors with positions, into list of sparse integer
228 | // vectors.
229 | for (R_xlen_t i = 0; i < n_lvls; ++i) {
230 | SEXP positions = VECTOR_ELT(out_positions, i);
231 | SEXP values = VECTOR_ELT(out_values, i);
232 | SEXP dummy = create_dummy_na(values, positions, len);
233 | SET_VECTOR_ELT(out_positions, i, dummy);
234 | }
235 |
236 | UNPROTECT(3);
237 |
238 | return out_positions;
239 | }
240 |
--------------------------------------------------------------------------------
/src/sparse-dummy.h:
--------------------------------------------------------------------------------
1 | #ifndef SPARSEVCTRS_SPARSE_DUMMY_H
2 | #define SPARSEVCTRS_SPARSE_DUMMY_H
3 |
4 | #define R_NO_REMAP
5 | #include
6 | #include "sparse-utils.h"
7 |
8 | SEXP ffi_sparse_dummy(SEXP x, SEXP lvls, SEXP counts, SEXP one_hot);
9 |
10 | SEXP ffi_sparse_dummy_na(SEXP x, SEXP lvls, SEXP counts, SEXP one_hot);
11 |
12 | #endif
13 |
--------------------------------------------------------------------------------
/src/sparse-extractors.c:
--------------------------------------------------------------------------------
1 | #include "sparse-extractors.h"
2 |
3 | SEXP ffi_altrep_sparse_positions(SEXP x) {
4 | SEXP out = extract_pos(x);
5 | return out;
6 | }
7 |
8 | SEXP ffi_altrep_sparse_values(SEXP x) {
9 | SEXP out = extract_val(x);
10 | return out;
11 | }
12 |
13 | SEXP ffi_altrep_sparse_default(SEXP x) {
14 | SEXP out = extract_default(x);
15 | return out;
16 | }
17 |
--------------------------------------------------------------------------------
/src/sparse-extractors.h:
--------------------------------------------------------------------------------
1 | #ifndef SPARSEVCTRS_SPARSE_EXTRACTORS_H
2 | #define SPARSEVCTRS_SPARSE_EXTRACTORS_H
3 |
4 | #define R_NO_REMAP
5 | #include
6 | #include "sparse-utils.h"
7 |
8 | SEXP ffi_altrep_sparse_positions(SEXP x);
9 |
10 | SEXP ffi_altrep_sparse_values(SEXP x);
11 |
12 | SEXP ffi_altrep_sparse_default(SEXP x);
13 |
14 | #endif
15 |
--------------------------------------------------------------------------------
/src/sparse-utils.c:
--------------------------------------------------------------------------------
1 | #include "sparse-utils.h"
2 |
3 | SEXP extract_val(SEXP x) {
4 | SEXP data1 = R_altrep_data1(x);
5 | SEXP out = VECTOR_ELT(data1, 0);
6 | return out;
7 | }
8 |
9 | SEXP extract_pos(SEXP x) {
10 | SEXP data1 = R_altrep_data1(x);
11 | SEXP out = VECTOR_ELT(data1, 1);
12 | return out;
13 | }
14 |
15 | R_xlen_t extract_len(SEXP x) {
16 | SEXP data1 = R_altrep_data1(x);
17 | SEXP len = VECTOR_ELT(data1, 2);
18 |
19 | R_xlen_t out = (R_xlen_t) INTEGER_ELT(len, 0);
20 |
21 | return out;
22 | }
23 |
24 | SEXP extract_default(SEXP x) {
25 | SEXP data1 = R_altrep_data1(x);
26 | SEXP out = VECTOR_ELT(data1, 3);
27 |
28 | return out;
29 | }
30 |
31 | double extract_default_double(SEXP x) {
32 | SEXP default_val = extract_default(x);
33 | double out = REAL_ELT(default_val, 0);
34 |
35 | return out;
36 | }
37 |
38 | int extract_default_integer(SEXP x) {
39 | SEXP default_val = extract_default(x);
40 | int out = INTEGER_ELT(default_val, 0);
41 |
42 | return out;
43 | }
44 |
45 | SEXP extract_default_string(SEXP x) {
46 | SEXP default_val = extract_default(x);
47 | SEXP out = STRING_ELT(default_val, 0);
48 |
49 | return out;
50 | }
51 |
52 | Rboolean extract_default_logical(SEXP x) {
53 | SEXP default_val = extract_default(x);
54 | Rboolean out = LOGICAL_ELT(default_val, 0);
55 |
56 | return out;
57 | }
58 |
59 | bool is_altrep(SEXP x) {
60 | return (bool) ALTREP(x);
61 | }
62 |
63 | SEXP ffi_extract_altrep_class(SEXP x) {
64 | if (!is_altrep(x)) {
65 | return (R_NilValue);
66 | }
67 |
68 | return ATTRIB(ALTREP_CLASS(x));
69 | }
70 |
71 | static inline SEXP altrep_package(SEXP x) {
72 | return VECTOR_ELT(Rf_PairToVectorList(ATTRIB(ALTREP_CLASS(x))), 1);
73 | }
74 |
75 | SEXP ffi_is_sparse_vector(SEXP x) {
76 | if (!is_altrep(x)) {
77 | return (Rf_ScalarLogical(FALSE));
78 | }
79 |
80 | return Rf_ScalarLogical(altrep_package(x) == Rf_install("sparsevctrs"));
81 | }
82 |
83 | SEXP ffi_is_altrep_non_sparse_vector(SEXP x) {
84 | if (!is_altrep(x)) {
85 | return (Rf_ScalarLogical(FALSE));
86 | }
87 |
88 | return Rf_ScalarLogical(altrep_package(x) != Rf_install("sparsevctrs"));
89 | }
90 |
91 | static inline R_xlen_t midpoint(R_xlen_t lhs, R_xlen_t rhs) {
92 | return lhs + (rhs - lhs) / 2;
93 | }
94 |
95 | R_xlen_t binary_search(int needle, const int* v_haystack, R_xlen_t size) {
96 | R_xlen_t loc_lower_bound = 0;
97 | R_xlen_t loc_upper_bound = size - 1;
98 |
99 | while (loc_lower_bound <= loc_upper_bound) {
100 | const R_xlen_t loc_middle_bound =
101 | midpoint(loc_lower_bound, loc_upper_bound);
102 | const R_xlen_t haystack = v_haystack[loc_middle_bound];
103 |
104 | if (needle == haystack) {
105 | return loc_middle_bound;
106 | } else if (needle < haystack) {
107 | loc_upper_bound = loc_middle_bound - 1;
108 | } else {
109 | // needle > haystack
110 | loc_lower_bound = loc_middle_bound + 1;
111 | }
112 | }
113 |
114 | return size;
115 | }
116 |
117 | bool is_index_handleable(SEXP x) {
118 | if (TYPEOF(x) != INTSXP) {
119 | // i.e. can't handle indexing for long vectors
120 | return false;
121 | }
122 |
123 | R_xlen_t size = Rf_xlength(x);
124 | const int* v_x = INTEGER_RO(x);
125 |
126 | for (R_xlen_t i = 0; i < size; ++i) {
127 | const int elt = v_x[i];
128 |
129 | if (elt == NA_INTEGER) {
130 | continue;
131 | }
132 | if (elt == 0) {
133 | // `0` indices would create a result with a size `< length(indx)`, and we
134 | // can't easily handle that right now
135 | return false;
136 | }
137 | if (elt < 0) {
138 | // Pathological case, should never happen
139 | return false;
140 | }
141 | }
142 |
143 | return true;
144 | }
145 |
146 | void verbose_materialize(void) {
147 | SEXP option = Rf_GetOption1(Rf_install("sparsevctrs.verbose_materialize"));
148 |
149 | if (!Rf_isNull(option)) {
150 | if (TYPEOF(option) == LGLSXP) {
151 | Rprintf("sparsevctrs: Sparse vector materialized\n");
152 | }
153 | if (TYPEOF(option) == REALSXP) {
154 | if (*REAL_RO(option) == 3) {
155 | Rf_error("sparsevctrs: Sparse vector materialized");
156 | } else if (*REAL_RO(option) == 2) {
157 | Rf_warning("sparsevctrs: Sparse vector materialized");
158 | } else {
159 | Rprintf("sparsevctrs: Sparse vector materialized\n");
160 | }
161 | }
162 | if (TYPEOF(option) == INTSXP) {
163 | if (*INTEGER_RO(option) == 3) {
164 | Rf_error("sparsevctrs: Sparse vector materialized");
165 | } else if (*INTEGER_RO(option) == 2) {
166 | Rf_warning("sparsevctrs: Sparse vector materialized");
167 | } else {
168 | Rprintf("sparsevctrs: Sparse vector materialized\n");
169 | }
170 | }
171 | }
172 | }
173 |
174 | void sort_pos_and_val(SEXP pos, SEXP val) {
175 | R_xlen_t len = Rf_length(pos);
176 |
177 | // nothing to sort -> stop early
178 | if (len < 2) {
179 | return;
180 | }
181 |
182 | SEXP index = PROTECT(Rf_allocVector(INTSXP, len));
183 | SEXP sorted_pos = PROTECT(Rf_allocVector(INTSXP, len));
184 |
185 | // Initialize pairs array
186 | for (R_xlen_t i = 0; i < len; i++) {
187 | SET_INTEGER_ELT(sorted_pos, i, INTEGER_ELT(pos, i));
188 | SET_INTEGER_ELT(index, i, i);
189 | }
190 |
191 | // Sort pairs based on values
192 | for (int i = 0; i < len - 1; i++) {
193 | for (int j = 0; j < len - i - 1; j++) {
194 | if (INTEGER_ELT(sorted_pos, j) > INTEGER_ELT(sorted_pos, j + 1)) {
195 | // Swap pairs
196 | int temp_pos = INTEGER_ELT(sorted_pos, j);
197 | int temp_index = INTEGER_ELT(index, j);
198 |
199 | SET_INTEGER_ELT(sorted_pos, j, INTEGER_ELT(sorted_pos, j + 1));
200 | SET_INTEGER_ELT(sorted_pos, j + 1, temp_pos);
201 |
202 | SET_INTEGER_ELT(index, j, INTEGER_ELT(index, j + 1));
203 | SET_INTEGER_ELT(index, j + 1, temp_index);
204 | }
205 | }
206 | }
207 |
208 | for (R_xlen_t i = 0; i < len; i++) {
209 | SET_INTEGER_ELT(pos, i, INTEGER_ELT(sorted_pos, i));
210 | }
211 |
212 | if (Rf_isInteger(val)) {
213 | SEXP sorted_val = PROTECT(Rf_allocVector(INTSXP, len));
214 |
215 | for (R_xlen_t i = 0; i < len; i++) {
216 | int cur_index = INTEGER_ELT(index, i);
217 |
218 | SET_INTEGER_ELT(sorted_val, i, INTEGER_ELT(val, cur_index));
219 | }
220 |
221 | for (R_xlen_t i = 0; i < len; i++) {
222 | SET_INTEGER_ELT(val, i, INTEGER_ELT(sorted_val, i));
223 | }
224 | } else {
225 | SEXP sorted_val = PROTECT(Rf_allocVector(REALSXP, len));
226 |
227 | for (R_xlen_t i = 0; i < len; i++) {
228 | int cur_index = INTEGER_ELT(index, i);
229 |
230 | SET_REAL_ELT(sorted_val, i, REAL_ELT(val, cur_index));
231 | }
232 |
233 | for (R_xlen_t i = 0; i < len; i++) {
234 | SET_REAL_ELT(val, i, REAL_ELT(sorted_val, i));
235 | }
236 | }
237 | UNPROTECT(3);
238 | }
239 |
--------------------------------------------------------------------------------
/src/sparse-utils.h:
--------------------------------------------------------------------------------
1 | #ifndef SPARSEVCTRS_SPARSE_UTILS_H
2 | #define SPARSEVCTRS_SPARSE_UTILS_H
3 |
4 | #define R_NO_REMAP
5 | #include
6 | #include
7 |
8 | #define SVCTRS_DATAPTR(x) (void*) DATAPTR_RO(x);
9 |
10 | SEXP extract_val(SEXP x);
11 |
12 | SEXP extract_pos(SEXP x);
13 |
14 | R_xlen_t extract_len(SEXP x);
15 |
16 | SEXP extract_default(SEXP x);
17 |
18 | double extract_default_double(SEXP x);
19 |
20 | int extract_default_integer(SEXP x);
21 |
22 | SEXP extract_default_string(SEXP x);
23 |
24 | Rboolean extract_default_logical(SEXP x);
25 |
26 | bool is_altrep(SEXP x);
27 |
28 | SEXP ffi_extract_altrep_class(SEXP x);
29 |
30 | SEXP ffi_is_sparse_vector(SEXP x);
31 |
32 | SEXP ffi_is_altrep_non_sparse_vector(SEXP x);
33 |
34 | R_xlen_t binary_search(int needle, const int* v_haystack, R_xlen_t size);
35 |
36 | bool is_index_handleable(SEXP x);
37 |
38 | void verbose_materialize(void);
39 |
40 | void sort_pos_and_val(SEXP pos, SEXP val);
41 |
42 | #endif
43 |
--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | # This file is part of the standard setup for testthat.
2 | # It is recommended that you do not modify it.
3 | #
4 | # Where should you do additional test configuration?
5 | # Learn more about the roles of various files in:
6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
7 | # * https://testthat.r-lib.org/articles/special-files.html
8 |
9 | library(testthat)
10 | library(sparsevctrs)
11 |
12 | test_check("sparsevctrs")
13 |
--------------------------------------------------------------------------------
/tests/testthat/_snaps/coerce.md:
--------------------------------------------------------------------------------
1 | # coerce_to_sparse_matrix() errors on wrong input
2 |
3 | Code
4 | coerce_to_sparse_matrix(1:10)
5 | Condition
6 | Error in `coerce_to_sparse_matrix()`:
7 | ! `x` must be a , not an integer vector.
8 |
9 | ---
10 |
11 | Code
12 | coerce_to_sparse_matrix(matrix(0, nrow = 10, ncol = 10))
13 | Condition
14 | Error in `coerce_to_sparse_matrix()`:
15 | ! `x` must be a , not a double matrix.
16 |
17 | ---
18 |
19 | Code
20 | coerce_to_sparse_matrix(iris)
21 | Condition
22 | Error in `coerce_to_sparse_matrix()`:
23 | x All columns of `x` must be numeric.
24 | i Non-numeric columns: Species.
25 |
26 | # coerce_to_sparse_matrix() materializes non-zero defaulted columns
27 |
28 | Code
29 | res <- coerce_to_sparse_matrix(sparse_df)
30 | Output
31 | sparsevctrs: Sparse vector materialized
32 | sparsevctrs: Sparse vector materialized
33 |
34 | # coerce_to_sparse_matrix() can pass through error call
35 |
36 | Code
37 | tmp_fun(1)
38 | Condition
39 | Error in `tmp_fun()`:
40 | ! `x` must be a , not a number.
41 |
42 | ---
43 |
44 | Code
45 | tmp_fun(1)
46 | Condition
47 | Error in `tmp_fun()`:
48 | ! `x` must be a , not a number.
49 |
50 | # coerce_to_sparse_data_frame() errors with no column names
51 |
52 | Code
53 | coerce_to_sparse_data_frame(sparse_mat)
54 | Condition
55 | Error in `coerce_to_sparse_data_frame()`:
56 | ! `x` must have column names.
57 |
58 | # coerce_to_sparse_data_frame() errors with wrong input
59 |
60 | Code
61 | coerce_to_sparse_data_frame(mtcars)
62 | Condition
63 | Error in `coerce_to_sparse_data_frame()`:
64 | ! `x` must be a , not a data frame.
65 |
66 | ---
67 |
68 | Code
69 | coerce_to_sparse_data_frame(1:10)
70 | Condition
71 | Error in `coerce_to_sparse_data_frame()`:
72 | ! `x` must be a , not an integer vector.
73 |
74 | # coerce_to_sparse_data_frame() can pass through error call
75 |
76 | Code
77 | tmp_fun(1)
78 | Condition
79 | Error in `tmp_fun()`:
80 | ! `x` must be a , not a number.
81 |
82 | # coerce_to_sparse_tibble() errors with no column names
83 |
84 | Code
85 | coerce_to_sparse_tibble(sparse_mat)
86 | Condition
87 | Error in `coerce_to_sparse_tibble()`:
88 | ! `x` must have column names.
89 |
90 | # coerce_to_sparse_tibble() errors with wrong input
91 |
92 | Code
93 | coerce_to_sparse_tibble(mtcars)
94 | Condition
95 | Error in `coerce_to_sparse_tibble()`:
96 | ! `x` must be a , not a data frame.
97 |
98 | ---
99 |
100 | Code
101 | coerce_to_sparse_tibble(1:10)
102 | Condition
103 | Error in `coerce_to_sparse_tibble()`:
104 | ! `x` must be a , not an integer vector.
105 |
106 |
--------------------------------------------------------------------------------
/tests/testthat/_snaps/sparse_dummy.md:
--------------------------------------------------------------------------------
1 | # sparse_dummy() errors with wrong input
2 |
3 | Code
4 | sparse_dummy(letters)
5 | Condition
6 | Error in `sparse_dummy()`:
7 | ! `x` must be a factor, not a character vector.
8 |
9 | ---
10 |
11 | Code
12 | sparse_dummy(mtcars)
13 | Condition
14 | Error in `sparse_dummy()`:
15 | ! `x` must be a factor, not a data frame.
16 |
17 | ---
18 |
19 | Code
20 | sparse_dummy(1:5)
21 | Condition
22 | Error in `sparse_dummy()`:
23 | ! `x` must be a factor, not an integer vector.
24 |
25 | ---
26 |
27 | Code
28 | sparse_dummy(NULL)
29 | Condition
30 | Error in `sparse_dummy()`:
31 | ! `x` must be a factor, not NULL.
32 |
33 |
--------------------------------------------------------------------------------
/tests/testthat/_snaps/sparse_logical.md:
--------------------------------------------------------------------------------
1 | # input checking is done correctly
2 |
3 | Code
4 | sparse_logical("1", 1, 1)
5 | Condition
6 | Error in `sparse_logical()`:
7 | ! `values` must be a logical vector, not a string.
8 |
9 | ---
10 |
11 | Code
12 | sparse_logical(1, 1, 1)
13 | Condition
14 | Error in `sparse_logical()`:
15 | ! `values` must be a logical vector, not a number.
16 |
17 | ---
18 |
19 | Code
20 | sparse_logical(NULL, 1, 1)
21 | Condition
22 | Error in `sparse_logical()`:
23 | ! `values` must be a logical vector, not NULL.
24 |
25 | ---
26 |
27 | Code
28 | sparse_logical(Inf, 1, 1)
29 | Condition
30 | Error in `sparse_logical()`:
31 | ! `values` must be a logical vector, not a number.
32 |
33 | ---
34 |
35 | Code
36 | sparse_logical(NaN, 1, 1)
37 | Condition
38 | Error in `sparse_logical()`:
39 | ! `values` must be a logical vector, not a numeric `NA`.
40 |
41 | ---
42 |
43 | Code
44 | sparse_logical(TRUE, 1.5, 1)
45 | Condition
46 | Error in `sparse_logical()`:
47 | x `positions` must contain integer values.
48 | i Non-integer values at index: 1.
49 |
50 | ---
51 |
52 | Code
53 | sparse_logical(TRUE, "1", 1)
54 | Condition
55 | Error in `sparse_logical()`:
56 | ! `positions` must be a integer vector, not a string.
57 |
58 | ---
59 |
60 | Code
61 | sparse_logical(TRUE, NULL, 1)
62 | Condition
63 | Error in `sparse_logical()`:
64 | ! `positions` must be a integer vector, not NULL.
65 |
66 | ---
67 |
68 | Code
69 | sparse_logical(TRUE, NA, 1)
70 | Condition
71 | Error in `sparse_logical()`:
72 | ! `positions` must be a integer vector, not `NA`.
73 |
74 | ---
75 |
76 | Code
77 | sparse_logical(TRUE, Inf, 1)
78 | Condition
79 | Error in `sparse_logical()`:
80 | x `positions` must not contain infinite values.
81 | i Infinite values at index: 1.
82 |
83 | ---
84 |
85 | Code
86 | sparse_logical(TRUE, NaN, 1)
87 | Condition
88 | Error in `sparse_logical()`:
89 | x `positions` must not contain NaN values.
90 | i NaN values at index: 1.
91 |
92 | ---
93 |
94 | Code
95 | sparse_logical(numeric(0), integer(0), -10)
96 | Condition
97 | Error in `sparse_logical()`:
98 | ! `length` must be a whole number larger than or equal to 0, not the number -10.
99 |
100 | ---
101 |
102 | Code
103 | sparse_logical(numeric(0), integer(0), 1e+10)
104 | Condition
105 | Error in `sparse_logical()`:
106 | ! `length` must be less than 2147483647, not 1e+10.
107 |
108 | ---
109 |
110 | Code
111 | sparse_logical(logical(0), integer(0), c(1, 10))
112 | Condition
113 | Error in `sparse_logical()`:
114 | ! `length` must be a whole number, not a double vector.
115 |
116 | ---
117 |
118 | Code
119 | sparse_logical(logical(0), integer(0), 1.5)
120 | Condition
121 | Error in `sparse_logical()`:
122 | ! `length` must be a whole number, not the number 1.5.
123 |
124 | ---
125 |
126 | Code
127 | sparse_logical(logical(0), integer(0), "1")
128 | Condition
129 | Error in `sparse_logical()`:
130 | ! `length` must be a whole number, not the string "1".
131 |
132 | ---
133 |
134 | Code
135 | sparse_logical(logical(0), integer(0), NA)
136 | Condition
137 | Error in `sparse_logical()`:
138 | ! `length` must be a whole number, not `NA`.
139 |
140 | ---
141 |
142 | Code
143 | sparse_logical(logical(0), integer(0), Inf)
144 | Condition
145 | Error in `sparse_logical()`:
146 | ! `length` must be a whole number, not `Inf`.
147 |
148 | ---
149 |
150 | Code
151 | sparse_logical(logical(0), integer(0), NULL)
152 | Condition
153 | Error in `sparse_logical()`:
154 | ! `length` must be a whole number, not `NULL`.
155 |
156 | ---
157 |
158 | Code
159 | sparse_logical(logical(0), integer(0), NaN)
160 | Condition
161 | Error in `sparse_logical()`:
162 | ! `length` must be a whole number, not `NaN`.
163 |
164 | ---
165 |
166 | Code
167 | sparse_logical(c(TRUE, TRUE), 1:6, 10)
168 | Condition
169 | Error in `sparse_logical()`:
170 | ! `value` (2) and `positions` (6) must have the same length.
171 |
172 | ---
173 |
174 | Code
175 | sparse_logical(TRUE, 1:6, 10)
176 | Condition
177 | Error in `sparse_logical()`:
178 | ! `value` (1) and `positions` (6) must have the same length.
179 |
180 | ---
181 |
182 | Code
183 | sparse_logical(c(TRUE, TRUE, TRUE, TRUE), c(1, 1, 5, 6), 10)
184 | Condition
185 | Error in `sparse_logical()`:
186 | x `positions` must not contain any duplicate values.
187 | i Duplicate values at index: 2.
188 |
189 | ---
190 |
191 | Code
192 | sparse_logical(rep(TRUE, 100), rep(1, 100), 100)
193 | Condition
194 | Error in `sparse_logical()`:
195 | x `positions` must not contain any duplicate values.
196 | i Duplicate values at index: 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, ..., 99, and 100.
197 |
198 | ---
199 |
200 | Code
201 | sparse_logical(c(TRUE, TRUE), c(3, 1), 5)
202 | Condition
203 | Error in `sparse_logical()`:
204 | ! `positions` must be sorted in increasing order.
205 |
206 | ---
207 |
208 | Code
209 | sparse_logical(TRUE, 10, 5)
210 | Condition
211 | Error in `sparse_logical()`:
212 | x `positions` value must not be larger than `length`.
213 | i Offending values at index: 1.
214 |
215 | ---
216 |
217 | Code
218 | sparse_logical(rep(TRUE, 50), seq(25, 74), 50)
219 | Condition
220 | Error in `sparse_logical()`:
221 | x `positions` value must not be larger than `length`.
222 | i Offending values at index: 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, ..., 49, and 50.
223 |
224 | ---
225 |
226 | Code
227 | sparse_logical(TRUE, 0, 5)
228 | Condition
229 | Error in `sparse_logical()`:
230 | x `positions` value must positive.
231 | i Non-positive values at index: 1.
232 |
233 | ---
234 |
235 | Code
236 | sparse_logical(rep(TRUE, 101), seq(-50, 50), 100)
237 | Condition
238 | Error in `sparse_logical()`:
239 | x `positions` value must positive.
240 | i Non-positive values at index: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, ..., 50, and 51.
241 |
242 | # default argument is working
243 |
244 | Code
245 | sparse_logical(TRUE, 1, 10, default = TRUE)
246 | Condition
247 | Error in `sparse_logical()`:
248 | x `values` value must not be equal to the default TRUE.
249 | i TRUE values at index: 1.
250 |
251 | ---
252 |
253 | Code
254 | sparse_logical(c(TRUE, TRUE, NA), c(1, 4, 6), 10, default = TRUE)
255 | Condition
256 | Error in `sparse_logical()`:
257 | x `values` value must not be equal to the default TRUE.
258 | i TRUE values at index: 1 and 2.
259 |
260 | # verbose testing
261 |
262 | Code
263 | tmp <- x[]
264 | Output
265 | sparsevctrs: Sparse vector materialized
266 | Code
267 | tmp <- x[]
268 |
269 | ---
270 |
271 | Code
272 | tmp <- x[]
273 | Condition
274 | Warning:
275 | sparsevctrs: Sparse vector materialized
276 | Code
277 | tmp <- x[]
278 |
279 | ---
280 |
281 | Code
282 | tmp <- x[]
283 | Condition
284 | Error:
285 | ! sparsevctrs: Sparse vector materialized
286 |
287 |
--------------------------------------------------------------------------------
/tests/testthat/_snaps/sparsity.md:
--------------------------------------------------------------------------------
1 | # works with data.frames sample arg
2 |
3 | Code
4 | sparsity(mtcars, sample = 0.4)
5 | Condition
6 | Error in `sparsity()`:
7 | ! `sample` must be a whole number or `NULL`, not the number 0.4.
8 |
9 |
--------------------------------------------------------------------------------
/tests/testthat/test-coerce-vector.R:
--------------------------------------------------------------------------------
1 | test_that("as_sparse_double works", {
2 | x_dense <- c(3, 0, 2, 0, 0, 0, 4, 0, 0, NA)
3 | x_sparse <- as_sparse_double(x_dense)
4 |
5 | expect_true(is_sparse_double(x_sparse))
6 | expect_identical(x_sparse, x_dense)
7 |
8 | x_dense <- c(3L, 0L, 2L, 0L, 0L, 0L, 4L, 0L, 0L, NA)
9 | x_sparse <- as_sparse_double(x_dense)
10 |
11 | expect_true(is_sparse_double(x_sparse))
12 | expect_identical(x_sparse, as.numeric(x_dense))
13 | })
14 |
15 | test_that("as_sparse_integer works", {
16 | x_dense <- c(3, 0, 2, 0, 0, 0, 4, 0, 0, NA)
17 | x_sparse <- as_sparse_integer(x_dense)
18 |
19 | expect_true(is_sparse_integer(x_sparse))
20 | expect_identical(x_sparse, as.integer(x_dense))
21 |
22 | x_dense <- c(3L, 0L, 2L, 0L, 0L, 0L, 4L, 0L, 0L, NA)
23 | x_sparse <- as_sparse_integer(x_dense)
24 |
25 | expect_true(is_sparse_integer(x_sparse))
26 | expect_identical(x_sparse, x_dense)
27 | })
28 |
29 | test_that("as_sparse_integer works", {
30 | x_dense <- c("A", "", "B", "", "", "", "C", "", "", NA)
31 | x_sparse <- as_sparse_character(x_dense)
32 |
33 | expect_true(is_sparse_character(x_sparse))
34 | })
35 |
36 | test_that("as_sparse_logical works", {
37 | x_dense <- c(
38 | FALSE,
39 | FALSE,
40 | FALSE,
41 | FALSE,
42 | NA,
43 | FALSE,
44 | FALSE,
45 | FALSE,
46 | FALSE,
47 | FALSE
48 | )
49 | x_sparse <- as_sparse_logical(x_dense)
50 |
51 | expect_true(is_sparse_logical(x_sparse))
52 | })
53 |
--------------------------------------------------------------------------------
/tests/testthat/test-extractors.R:
--------------------------------------------------------------------------------
1 | test_that("sparse_positions works with altrep_sparse_double", {
2 | expect_identical(
3 | sparse_positions(sparse_double(1, 5, 10)),
4 | 5L
5 | )
6 |
7 | expect_identical(
8 | sparse_positions(sparse_double(1:3, 5:7, 10)),
9 | 5:7
10 | )
11 | })
12 |
13 | test_that("sparse_positions works with numeric vectors", {
14 | expect_identical(
15 | sparse_positions(c(1, 6, 4, 2)),
16 | seq_len(4)
17 | )
18 |
19 | expect_identical(
20 | sparse_positions(101:200),
21 | 1:100
22 | )
23 | })
24 |
25 | test_that("sparse_values works with altrep_sparse_double", {
26 | expect_identical(
27 | sparse_values(sparse_double(1, 5, 10)),
28 | 1
29 | )
30 |
31 | expect_identical(
32 | sparse_values(sparse_double(1:3, 5:7, 10)),
33 | c(1, 2, 3)
34 | )
35 | })
36 |
37 | test_that("sparse_values works with numeric vectors", {
38 | expect_identical(
39 | sparse_values(c(1, 6, 4, 2)),
40 | c(1, 6, 4, 2)
41 | )
42 |
43 | expect_identical(
44 | sparse_values(101:200),
45 | 101:200
46 | )
47 | })
48 |
49 | test_that("sparse_default works with altrep_sparse_double", {
50 | expect_identical(
51 | sparse_default(sparse_double(1, 5, 10)),
52 | 0
53 | )
54 |
55 | expect_identical(
56 | sparse_default(sparse_double(1, 5, 10, default = 11)),
57 | 11
58 | )
59 | })
60 |
61 | test_that("sparse_values works with numeric vectors", {
62 | expect_identical(
63 | sparse_default(c(1, 6, 4, 2)),
64 | NA
65 | )
66 | })
67 |
--------------------------------------------------------------------------------
/tests/testthat/test-has_sparse_elements.R:
--------------------------------------------------------------------------------
1 | test_that("has_sparse_elements() works", {
2 | expect_false(has_sparse_elements(mtcars))
3 |
4 | mtcars$sparse <- sparse_integer(1, 1, 32)
5 |
6 | expect_true(has_sparse_elements(mtcars))
7 | })
8 |
--------------------------------------------------------------------------------
/tests/testthat/test-sparse_character.R:
--------------------------------------------------------------------------------
1 | test_that("input checking is done correctly", {
2 | # value
3 | expect_snapshot(
4 | error = TRUE,
5 | sparse_character(1, 1, 1)
6 | )
7 | expect_snapshot(
8 | error = TRUE,
9 | sparse_character(0.5, 1, 1)
10 | )
11 | expect_snapshot(
12 | error = TRUE,
13 | sparse_character(NULL, 1, 1)
14 | )
15 | expect_snapshot(
16 | error = TRUE,
17 | sparse_character(Inf, 1, 1)
18 | )
19 | expect_snapshot(
20 | error = TRUE,
21 | sparse_character(NaN, 1, 1)
22 | )
23 |
24 | # position
25 | expect_snapshot(
26 | error = TRUE,
27 | sparse_character("A", 1.5, 1)
28 | )
29 | expect_snapshot(
30 | error = TRUE,
31 | sparse_character("A", "1", 1)
32 | )
33 | expect_snapshot(
34 | error = TRUE,
35 | sparse_character("A", NULL, 1)
36 | )
37 | expect_snapshot(
38 | error = TRUE,
39 | sparse_character("A", NA, 1)
40 | )
41 | expect_snapshot(
42 | error = TRUE,
43 | sparse_character("A", Inf, 1)
44 | )
45 | expect_snapshot(
46 | error = TRUE,
47 | sparse_character("A", NaN, 1)
48 | )
49 |
50 | # length
51 | expect_no_error(
52 | sparse_character(character(0), integer(0), 0)
53 | )
54 | expect_snapshot(
55 | error = TRUE,
56 | sparse_character(numeric(0), integer(0), -10)
57 | )
58 | expect_snapshot(
59 | error = TRUE,
60 | sparse_character(numeric(0), integer(0), 10000000000)
61 | )
62 | expect_snapshot(
63 | error = TRUE,
64 | sparse_character(character(0), integer(0), c(1, 10))
65 | )
66 | expect_snapshot(
67 | error = TRUE,
68 | sparse_character(character(0), integer(0), 1.5)
69 | )
70 | expect_snapshot(
71 | error = TRUE,
72 | sparse_character(character(0), integer(0), "1")
73 | )
74 | expect_snapshot(
75 | error = TRUE,
76 | sparse_character(character(0), integer(0), NA)
77 | )
78 | expect_snapshot(
79 | error = TRUE,
80 | sparse_character(character(0), integer(0), Inf)
81 | )
82 | expect_snapshot(
83 | error = TRUE,
84 | sparse_character(character(0), integer(0), NULL)
85 | )
86 | expect_snapshot(
87 | error = TRUE,
88 | sparse_character(character(0), integer(0), NaN)
89 | )
90 |
91 | # Length restriction
92 | expect_snapshot(
93 | error = TRUE,
94 | sparse_character(letters[1:4], 1:6, 10)
95 | )
96 | expect_snapshot(
97 | error = TRUE,
98 | sparse_character("A", 1:6, 10)
99 | )
100 |
101 | # duplicates in position
102 | expect_snapshot(
103 | error = TRUE,
104 | sparse_character(letters[1:4], c(1, 1, 5, 6), 10)
105 | )
106 | expect_snapshot(
107 | error = TRUE,
108 | sparse_character(letters, rep(1, 26), 100)
109 | )
110 |
111 | # Ordered position
112 | expect_snapshot(
113 | error = TRUE,
114 | sparse_character(c("A", "B"), c(3, 1), 5)
115 | )
116 |
117 | # Too large position values
118 | expect_snapshot(
119 | error = TRUE,
120 | sparse_character("A", 10, 5)
121 | )
122 | expect_snapshot(
123 | error = TRUE,
124 | sparse_character(rep("A", 50), seq(25, 74), 50)
125 | )
126 |
127 | # Too large position values
128 | expect_snapshot(
129 | error = TRUE,
130 | sparse_character("A", 0, 5)
131 | )
132 | expect_snapshot(
133 | error = TRUE,
134 | sparse_character(rep("A", 101), seq(-50, 50), 100)
135 | )
136 |
137 | # Too large position values
138 | expect_snapshot(
139 | error = TRUE,
140 | sparse_character("", 1, 10)
141 | )
142 | expect_snapshot(
143 | error = TRUE,
144 | sparse_character(rep(c("A", ""), 5), 1:10, 50)
145 | )
146 | })
147 |
148 | test_that("length() works with sparse_character()", {
149 | expect_identical(
150 | length(sparse_character(character(), integer(), 0)),
151 | 0L
152 | )
153 |
154 | expect_identical(
155 | length(sparse_character("A", 1, 10)),
156 | 10L
157 | )
158 |
159 | expect_identical(
160 | length(sparse_character("A", 1, 100)),
161 | 100L
162 | )
163 | })
164 |
165 | test_that("single subsetting works with sparse_character()", {
166 | x_sparse <- sparse_character(
167 | value = c("A", NA, "B"),
168 | position = c(1, 5, 8),
169 | 10
170 | )
171 | x_dense <- c("A", "", "", "", NA, "", "", "B", "", "")
172 |
173 | for (i in seq_len(10)) {
174 | expect_identical(x_sparse[i], x_dense[i])
175 | }
176 |
177 | expect_identical(x_sparse[0], x_dense[0])
178 |
179 | expect_identical(x_sparse[NA_integer_], x_dense[NA_integer_])
180 |
181 | expect_identical(x_sparse[NULL], x_dense[NULL])
182 |
183 | expect_identical(x_sparse[NaN], x_dense[NaN])
184 |
185 | expect_identical(x_sparse[100], x_dense[100])
186 |
187 | expect_identical(x_sparse[Inf], x_dense[Inf])
188 |
189 | expect_identical(x_sparse["not a number"], x_dense["not a number"])
190 |
191 | expect_identical(x_sparse[1.6], x_dense[1.6])
192 | expect_identical(x_sparse[2.6], x_dense[2.6])
193 | })
194 |
195 | test_that("multiple subsetting works with sparse_character()", {
196 | x_sparse <- sparse_character(
197 | value = c("A", NA, "B"),
198 | position = c(1, 5, 8),
199 | 10
200 | )
201 | x_dense <- c("A", "", "", "", NA, "", "", "B", "", "")
202 |
203 | expect_identical(x_sparse[1:2], x_dense[1:2])
204 |
205 | expect_identical(x_sparse[3:7], x_dense[3:7])
206 |
207 | expect_identical(x_sparse[c(1, 5, 8, 1)], x_dense[c(1, 5, 8, 1)])
208 |
209 | expect_identical(x_sparse[-1], x_dense[-1])
210 |
211 | expect_identical(x_sparse[-c(5:7)], x_dense[-c(5:7)])
212 |
213 | expect_identical(x_sparse[FALSE], x_dense[FALSE])
214 |
215 | expect_identical(x_sparse[TRUE], x_dense[TRUE])
216 |
217 | expect_identical(x_sparse[NA], x_dense[NA])
218 |
219 | expect_identical(x_sparse[c(1, NA, 4)], x_dense[c(1, NA, 4)])
220 |
221 | expect_identical(x_sparse[c(1, NA, 0, 4, 0)], x_dense[c(1, NA, 0, 4, 0)])
222 |
223 | expect_identical(x_sparse[c(1, 11)], x_dense[c(1, 11)])
224 |
225 | expect_identical(x_sparse[c(1, Inf)], x_dense[c(1, Inf)])
226 |
227 | expect_identical(x_sparse[c(1, NaN)], x_dense[c(1, NaN)])
228 | })
229 |
230 | test_that("materialization works with sparse_character()", {
231 | x_sparse <- sparse_character(
232 | value = c("A", NA, "B"),
233 | position = c(1, 5, 8),
234 | 10
235 | )
236 | x_dense <- c("A", "", "", "", NA, "", "", "B", "", "")
237 |
238 | expect_identical(x_sparse[], x_dense)
239 | })
240 |
241 | test_that("default argument is working", {
242 | expect_snapshot(
243 | error = TRUE,
244 | sparse_character("A", 1, 10, default = letters)
245 | )
246 |
247 | expect_snapshot(
248 | error = TRUE,
249 | sparse_character("A", 1, 10, default = TRUE)
250 | )
251 |
252 | expect_snapshot(
253 | error = TRUE,
254 | sparse_character(c("A", "B", "C"), c(1, 4, 6), 10, default = "A")
255 | )
256 |
257 | x_sparse <- sparse_character(
258 | value = c("A", NA, "B"),
259 | position = c(1, 5, 8),
260 | length = 10,
261 | default = "H"
262 | )
263 |
264 | x_dense <- c("A", "H", "H", "H", NA, "H", "H", "B", "H", "H")
265 |
266 | for (i in seq_len(10)) {
267 | expect_identical(x_sparse[i], x_dense[i])
268 | }
269 |
270 | expect_identical(x_sparse[1:2], x_dense[1:2])
271 |
272 | expect_identical(x_sparse[3:7], x_dense[3:7])
273 |
274 | expect_identical(x_sparse[c(1, 5, 8, 1)], x_dense[c(1, 5, 8, 1)])
275 |
276 | expect_identical(x_sparse[], x_dense)
277 | })
278 |
279 | test_that("verbose testing", {
280 | withr::local_options("sparsevctrs.verbose_materialize" = TRUE)
281 |
282 | x <- sparse_character("A", 1, 1)
283 | expect_snapshot({
284 | tmp <- x[]
285 | tmp <- x[]
286 | })
287 |
288 | withr::local_options("sparsevctrs.verbose_materialize" = 2)
289 |
290 | x <- sparse_character("A", 1, 1)
291 | expect_snapshot({
292 | tmp <- x[]
293 | tmp <- x[]
294 | })
295 |
296 | withr::local_options("sparsevctrs.verbose_materialize" = 3)
297 |
298 | x <- sparse_character("A", 1, 1)
299 | expect_snapshot(
300 | error = TRUE,
301 | {
302 | tmp <- x[]
303 | }
304 | )
305 | })
306 |
307 | test_that("printing works #48", {
308 | expect_snapshot(
309 | sparse_character("A", 1, 10)[]
310 | )
311 | })
312 |
--------------------------------------------------------------------------------
/tests/testthat/test-sparse_dummy.R:
--------------------------------------------------------------------------------
1 | # one_hot = TRUE --------------------------------------------------------------
2 |
3 | test_that("sparse_dummy(one_hot = TRUE) works with single level", {
4 | x <- factor(c("a", "a", "a"))
5 | exp <- list(
6 | a = c(1L, 1L, 1L)
7 | )
8 |
9 | res <- sparse_dummy(x, one_hot = TRUE)
10 | expect_identical(
11 | res,
12 | exp
13 | )
14 |
15 | expect_true(is.integer(res$a))
16 | expect_false(is_sparse_vector(res$a))
17 | })
18 |
19 | test_that("sparse_dummy(one_hot = FALSE) works zero length input", {
20 | x <- factor(character())
21 | exp <- structure(list(), names = character(0))
22 |
23 | res <- sparse_dummy(x, one_hot = FALSE)
24 | expect_identical(
25 | res,
26 | exp
27 | )
28 | })
29 |
30 | ## anyNA = FALSE ---------------------------------------------------------------
31 |
32 | test_that("sparse_dummy(one_hot = TRUE) works with no NAs", {
33 | x <- factor(c("a", "b", "c", "d", "a"))
34 | exp <- list(
35 | a = sparse_integer(c(1, 1), c(1, 5), 5),
36 | b = sparse_integer(1, 2, 5),
37 | c = sparse_integer(1, 3, 5),
38 | d = sparse_integer(1, 4, 5)
39 | )
40 |
41 | res <- sparse_dummy(x, one_hot = TRUE)
42 | expect_identical(
43 | res,
44 | exp
45 | )
46 |
47 | expect_true(
48 | all(vapply(res, is_sparse_integer, logical(1)))
49 | )
50 | })
51 |
52 | test_that("sparse_dummy(one_hot = TRUE) works with no NAs and unseen levels", {
53 | x <- factor(c("a", "b", "c", "d", "a"), levels = letters[1:6])
54 | exp <- list(
55 | a = sparse_integer(c(1, 1), c(1, 5), 5),
56 | b = sparse_integer(1, 2, 5),
57 | c = sparse_integer(1, 3, 5),
58 | d = sparse_integer(1, 4, 5),
59 | e = sparse_integer(integer(), integer(), 5),
60 | f = sparse_integer(integer(), integer(), 5)
61 | )
62 |
63 | res <- sparse_dummy(x, one_hot = TRUE)
64 | expect_identical(
65 | res,
66 | exp
67 | )
68 |
69 | expect_true(
70 | all(vapply(res, is_sparse_integer, logical(1)))
71 | )
72 | })
73 |
74 | ## anyNA = TRUE ----------------------------------------------------------------
75 |
76 | test_that("sparse_dummy(one_hot = TRUE) works with NA", {
77 | x <- factor(c("a", NA, "b", "c", "a", NA))
78 | exp <- list(
79 | a = sparse_integer(c(1, NA, 1, NA), c(1, 2, 5, 6), 6),
80 | b = sparse_integer(c(NA, 1, NA), c(2, 3, 6), 6),
81 | c = sparse_integer(c(NA, 1, NA), c(2, 4, 6), 6)
82 | )
83 |
84 | res <- sparse_dummy(x, one_hot = TRUE)
85 | expect_identical(
86 | res,
87 | exp
88 | )
89 |
90 | expect_true(
91 | all(vapply(res, is_sparse_integer, logical(1)))
92 | )
93 | })
94 |
95 | test_that("sparse_dummy(one_hot = TRUE) works with NA and unseen levels", {
96 | x <- factor(c("a", NA, "b", "c", "a", NA), levels = letters[1:5])
97 | exp <- list(
98 | a = sparse_integer(c(1, NA, 1, NA), c(1, 2, 5, 6), 6),
99 | b = sparse_integer(c(NA, 1, NA), c(2, 3, 6), 6),
100 | c = sparse_integer(c(NA, 1, NA), c(2, 4, 6), 6),
101 | d = sparse_integer(c(NA, NA), c(2, 6), 6),
102 | e = sparse_integer(c(NA, NA), c(2, 6), 6)
103 | )
104 |
105 | res <- sparse_dummy(x, one_hot = TRUE)
106 | expect_identical(
107 | res,
108 | exp
109 | )
110 |
111 | expect_true(
112 | all(vapply(res, is_sparse_integer, logical(1)))
113 | )
114 | })
115 |
116 | # one_hot = FALSE ---------------------------------------------------------------
117 |
118 | test_that("sparse_dummy(one_hot = FALSE) works with single level", {
119 | x <- factor(c("a", "a", "a"))
120 | exp <- structure(list(), names = character(0))
121 |
122 | res <- sparse_dummy(x, one_hot = FALSE)
123 |
124 | expect_identical(
125 | res,
126 | exp
127 | )
128 | })
129 |
130 | test_that("sparse_dummy(one_hot = FALSE) works with two levels", {
131 | x <- factor(c("a", "b", "a"))
132 | exp <- list(
133 | b = c(0L, 1L, 0L)
134 | )
135 |
136 | res <- sparse_dummy(x, one_hot = FALSE)
137 | expect_identical(
138 | res,
139 | exp
140 | )
141 |
142 | expect_true(is.integer(res$b))
143 | expect_true(is_sparse_vector(res$b))
144 | })
145 |
146 | test_that("sparse_dummy(one_hot = TRUE) works zero length input", {
147 | x <- factor(character())
148 | exp <- structure(list(), names = character(0))
149 |
150 | res <- sparse_dummy(x, one_hot = FALSE)
151 | expect_identical(
152 | res,
153 | exp
154 | )
155 | })
156 |
157 | ## anyNA = FALSE ---------------------------------------------------------------
158 |
159 | test_that("sparse_dummy(one_hot = FALSE) works with no NAs", {
160 | x <- factor(c("a", "b", "c", "d", "a"))
161 | exp <- list(
162 | b = sparse_integer(1, 2, 5),
163 | c = sparse_integer(1, 3, 5),
164 | d = sparse_integer(1, 4, 5)
165 | )
166 |
167 | res <- sparse_dummy(x, one_hot = FALSE)
168 | expect_identical(
169 | res,
170 | exp
171 | )
172 |
173 | expect_true(
174 | all(vapply(res, is_sparse_integer, logical(1)))
175 | )
176 | })
177 |
178 | test_that("sparse_dummy(one_hot = FALSE) works with no NAs and unseen levels", {
179 | x <- factor(c("a", "b", "c", "d", "a"), levels = letters[1:6])
180 | exp <- list(
181 | b = sparse_integer(1, 2, 5),
182 | c = sparse_integer(1, 3, 5),
183 | d = sparse_integer(1, 4, 5),
184 | e = sparse_integer(integer(), integer(), 5),
185 | f = sparse_integer(integer(), integer(), 5)
186 | )
187 |
188 | res <- sparse_dummy(x, one_hot = FALSE)
189 | expect_identical(
190 | res,
191 | exp
192 | )
193 |
194 | expect_true(
195 | all(vapply(res, is_sparse_integer, logical(1)))
196 | )
197 | })
198 |
199 | ## anyNA = TRUE ----------------------------------------------------------------
200 |
201 | test_that("sparse_dummy(one_hot = FALSE) works with NA", {
202 | x <- factor(c("a", NA, "b", "c", "a", NA))
203 | exp <- list(
204 | b = sparse_integer(c(NA, 1, NA), c(2, 3, 6), 6),
205 | c = sparse_integer(c(NA, 1, NA), c(2, 4, 6), 6)
206 | )
207 |
208 | res <- sparse_dummy(x, one_hot = FALSE)
209 | expect_identical(
210 | res,
211 | exp
212 | )
213 |
214 | expect_true(
215 | all(vapply(res, is_sparse_integer, logical(1)))
216 | )
217 | })
218 |
219 | test_that("sparse_dummy(one_hot = FALSE) works with NA and unseen levels", {
220 | x <- factor(c("a", NA, "b", "c", "a", NA), levels = letters[1:5])
221 | exp <- list(
222 | b = sparse_integer(c(NA, 1, NA), c(2, 3, 6), 6),
223 | c = sparse_integer(c(NA, 1, NA), c(2, 4, 6), 6),
224 | d = sparse_integer(c(NA, NA), c(2, 6), 6),
225 | e = sparse_integer(c(NA, NA), c(2, 6), 6)
226 | )
227 |
228 | res <- sparse_dummy(x, one_hot = FALSE)
229 | expect_identical(
230 | res,
231 | exp
232 | )
233 |
234 | expect_true(
235 | all(vapply(res, is_sparse_integer, logical(1)))
236 | )
237 | })
238 |
239 | # Other ------------------------------------------------------------------------
240 |
241 | test_that("sparse_dummy() errors with wrong input", {
242 | expect_snapshot(
243 | error = TRUE,
244 | sparse_dummy(letters)
245 | )
246 | expect_snapshot(
247 | error = TRUE,
248 | sparse_dummy(mtcars)
249 | )
250 | expect_snapshot(
251 | error = TRUE,
252 | sparse_dummy(1:5)
253 | )
254 | expect_snapshot(
255 | error = TRUE,
256 | sparse_dummy(NULL)
257 | )
258 | })
259 |
--------------------------------------------------------------------------------
/tests/testthat/test-sparse_is_na.R:
--------------------------------------------------------------------------------
1 | test_that("sparse_is_na() works - double", {
2 | x <- sparse_double(c(10, -10), c(5, 100), 1000)
3 |
4 | expect_equal(is.na(x), sparse_is_na(x))
5 | expect_equal(as.integer(is.na(x)), sparse_is_na(x, type = "integer"))
6 |
7 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000)
8 |
9 | expect_equal(is.na(x), sparse_is_na(x))
10 | expect_equal(as.integer(is.na(x)), sparse_is_na(x, type = "integer"))
11 | })
12 |
13 | test_that("sparse_is_na() works - integer", {
14 | x <- sparse_integer(c(10, -10), c(5, 100), 1000)
15 |
16 | expect_equal(is.na(x), sparse_is_na(x))
17 | expect_equal(as.integer(is.na(x)), sparse_is_na(x, type = "integer"))
18 |
19 | x <- sparse_integer(c(NA, 10, 30), 1:3, 1000)
20 |
21 | expect_equal(is.na(x), sparse_is_na(x))
22 | expect_equal(as.integer(is.na(x)), sparse_is_na(x, type = "integer"))
23 | })
24 |
25 | test_that("sparse_is_na() works - logical", {
26 | x <- sparse_logical(c(TRUE, TRUE), c(5, 100), 1000)
27 |
28 | expect_equal(is.na(x), sparse_is_na(x))
29 | expect_equal(as.integer(is.na(x)), sparse_is_na(x, type = "integer"))
30 |
31 | x <- sparse_logical(c(NA, TRUE, TRUE), 1:3, 1000)
32 |
33 | expect_equal(is.na(x), sparse_is_na(x))
34 | expect_equal(as.integer(is.na(x)), sparse_is_na(x, type = "integer"))
35 | })
36 |
37 | test_that("sparse_is_na() works - character", {
38 | x <- sparse_character(c("A", "B"), c(5, 100), 1000)
39 |
40 | expect_equal(is.na(x), sparse_is_na(x))
41 | expect_equal(as.integer(is.na(x)), sparse_is_na(x, type = "integer"))
42 |
43 | x <- sparse_character(c(NA, "A", "B"), 1:3, 1000)
44 |
45 | expect_equal(is.na(x), sparse_is_na(x))
46 | expect_equal(as.integer(is.na(x)), sparse_is_na(x, type = "integer"))
47 | })
48 |
--------------------------------------------------------------------------------
/tests/testthat/test-sparse_logical.R:
--------------------------------------------------------------------------------
1 | test_that("input checking is done correctly", {
2 | # value
3 | expect_snapshot(
4 | error = TRUE,
5 | sparse_logical("1", 1, 1)
6 | )
7 | expect_snapshot(
8 | error = TRUE,
9 | sparse_logical(1, 1, 1)
10 | )
11 | expect_snapshot(
12 | error = TRUE,
13 | sparse_logical(NULL, 1, 1)
14 | )
15 | expect_snapshot(
16 | error = TRUE,
17 | sparse_logical(Inf, 1, 1)
18 | )
19 | expect_snapshot(
20 | error = TRUE,
21 | sparse_logical(NaN, 1, 1)
22 | )
23 |
24 | # position
25 | expect_snapshot(
26 | error = TRUE,
27 | sparse_logical(TRUE, 1.5, 1)
28 | )
29 | expect_snapshot(
30 | error = TRUE,
31 | sparse_logical(TRUE, "1", 1)
32 | )
33 | expect_snapshot(
34 | error = TRUE,
35 | sparse_logical(TRUE, NULL, 1)
36 | )
37 | expect_snapshot(
38 | error = TRUE,
39 | sparse_logical(TRUE, NA, 1)
40 | )
41 | expect_snapshot(
42 | error = TRUE,
43 | sparse_logical(TRUE, Inf, 1)
44 | )
45 | expect_snapshot(
46 | error = TRUE,
47 | sparse_logical(TRUE, NaN, 1)
48 | )
49 |
50 | # length
51 | expect_no_error(
52 | sparse_logical(logical(0), integer(0), 0)
53 | )
54 | expect_snapshot(
55 | error = TRUE,
56 | sparse_logical(numeric(0), integer(0), -10)
57 | )
58 | expect_snapshot(
59 | error = TRUE,
60 | sparse_logical(numeric(0), integer(0), 10000000000)
61 | )
62 | expect_snapshot(
63 | error = TRUE,
64 | sparse_logical(logical(0), integer(0), c(1, 10))
65 | )
66 | expect_snapshot(
67 | error = TRUE,
68 | sparse_logical(logical(0), integer(0), 1.5)
69 | )
70 | expect_snapshot(
71 | error = TRUE,
72 | sparse_logical(logical(0), integer(0), "1")
73 | )
74 | expect_snapshot(
75 | error = TRUE,
76 | sparse_logical(logical(0), integer(0), NA)
77 | )
78 | expect_snapshot(
79 | error = TRUE,
80 | sparse_logical(logical(0), integer(0), Inf)
81 | )
82 | expect_snapshot(
83 | error = TRUE,
84 | sparse_logical(logical(0), integer(0), NULL)
85 | )
86 | expect_snapshot(
87 | error = TRUE,
88 | sparse_logical(logical(0), integer(0), NaN)
89 | )
90 |
91 | # Length restriction
92 | expect_snapshot(
93 | error = TRUE,
94 | sparse_logical(c(TRUE, TRUE), 1:6, 10)
95 | )
96 | expect_snapshot(
97 | error = TRUE,
98 | sparse_logical(TRUE, 1:6, 10)
99 | )
100 |
101 | # duplicates in position
102 | expect_snapshot(
103 | error = TRUE,
104 | sparse_logical(c(TRUE, TRUE, TRUE, TRUE), c(1, 1, 5, 6), 10)
105 | )
106 | expect_snapshot(
107 | error = TRUE,
108 | sparse_logical(rep(TRUE, 100), rep(1, 100), 100)
109 | )
110 |
111 | # Ordered position
112 | expect_snapshot(
113 | error = TRUE,
114 | sparse_logical(c(TRUE, TRUE), c(3, 1), 5)
115 | )
116 |
117 | # Too large position values
118 | expect_snapshot(
119 | error = TRUE,
120 | sparse_logical(TRUE, 10, 5)
121 | )
122 | expect_snapshot(
123 | error = TRUE,
124 | sparse_logical(rep(TRUE, 50), seq(25, 74), 50)
125 | )
126 |
127 | # Too large position values
128 | expect_snapshot(
129 | error = TRUE,
130 | sparse_logical(TRUE, 0, 5)
131 | )
132 | expect_snapshot(
133 | error = TRUE,
134 | sparse_logical(rep(TRUE, 101), seq(-50, 50), 100)
135 | )
136 | })
137 |
138 | test_that("length() works with sparse_logical()", {
139 | expect_identical(
140 | length(sparse_logical(logical(), integer(), 0)),
141 | 0L
142 | )
143 |
144 | expect_identical(
145 | length(sparse_logical(TRUE, 1, 10)),
146 | 10L
147 | )
148 |
149 | expect_identical(
150 | length(sparse_logical(TRUE, 1, 100)),
151 | 100L
152 | )
153 | })
154 |
155 | test_that("single subsetting works with sparse_logical()", {
156 | x_sparse <- sparse_logical(
157 | value = c(TRUE, NA, TRUE),
158 | position = c(1, 5, 8),
159 | 10
160 | )
161 | x_dense <- c(TRUE, FALSE, FALSE, FALSE, NA, FALSE, FALSE, TRUE, FALSE, FALSE)
162 |
163 | for (i in seq_len(10)) {
164 | expect_identical(x_sparse[i], x_dense[i])
165 | }
166 |
167 | expect_identical(x_sparse[0], x_dense[0])
168 |
169 | expect_identical(x_sparse[NA_integer_], x_dense[NA_integer_])
170 |
171 | expect_identical(x_sparse[NULL], x_dense[NULL])
172 |
173 | expect_identical(x_sparse[NaN], x_dense[NaN])
174 |
175 | expect_identical(x_sparse[100], x_dense[100])
176 |
177 | expect_identical(x_sparse[Inf], x_dense[Inf])
178 |
179 | expect_identical(x_sparse["not a number"], x_dense["not a number"])
180 |
181 | expect_identical(x_sparse[1.6], x_dense[1.6])
182 | expect_identical(x_sparse[2.6], x_dense[2.6])
183 | })
184 |
185 | test_that("multiple subsetting works with sparse_logical()", {
186 | x_sparse <- sparse_logical(
187 | value = c(TRUE, NA, TRUE),
188 | position = c(1, 5, 8),
189 | 10
190 | )
191 | x_dense <- c(TRUE, FALSE, FALSE, FALSE, NA, FALSE, FALSE, TRUE, FALSE, FALSE)
192 |
193 | expect_identical(x_sparse[1:2], x_dense[1:2])
194 |
195 | expect_identical(x_sparse[3:7], x_dense[3:7])
196 |
197 | expect_identical(x_sparse[c(1, 5, 8, 1)], x_dense[c(1, 5, 8, 1)])
198 |
199 | expect_identical(x_sparse[-1], x_dense[-1])
200 |
201 | expect_identical(x_sparse[-c(5:7)], x_dense[-c(5:7)])
202 |
203 | expect_identical(x_sparse[FALSE], x_dense[FALSE])
204 |
205 | expect_identical(x_sparse[TRUE], x_dense[TRUE])
206 |
207 | expect_identical(x_sparse[NA], x_dense[NA])
208 |
209 | expect_identical(x_sparse[c(1, NA, 4)], x_dense[c(1, NA, 4)])
210 |
211 | expect_identical(x_sparse[c(1, NA, 0, 4, 0)], x_dense[c(1, NA, 0, 4, 0)])
212 |
213 | expect_identical(x_sparse[c(1, 11)], x_dense[c(1, 11)])
214 |
215 | expect_identical(x_sparse[c(1, Inf)], x_dense[c(1, Inf)])
216 |
217 | expect_identical(x_sparse[c(1, NaN)], x_dense[c(1, NaN)])
218 | })
219 |
220 | test_that("materialization works with sparse_logical()", {
221 | x_sparse <- sparse_logical(
222 | value = c(TRUE, NA, TRUE),
223 | position = c(1, 5, 8),
224 | 10
225 | )
226 | x_dense <- c(TRUE, FALSE, FALSE, FALSE, NA, FALSE, FALSE, TRUE, FALSE, FALSE)
227 |
228 | expect_identical(x_sparse[], x_dense)
229 | })
230 |
231 | test_that("sorting works with sparse_logical()", {
232 | x_sparse <- sparse_logical(logical(), integer(), 10)
233 |
234 | expect_true(is_sparse_logical(sort(x_sparse)))
235 |
236 | x_sparse <- sparse_logical(NA, 4, 10)
237 |
238 | expect_identical(
239 | sort(x_sparse),
240 | rep(FALSE, 9)
241 | )
242 |
243 | x_sparse <- sparse_logical(logical(), integer(), 10)
244 |
245 | expect_true(is_sparse_logical(sort(x_sparse)))
246 |
247 | x_sparse <- sparse_logical(c(TRUE, TRUE, TRUE), c(1, 4, 7), 7)
248 |
249 | expect_true(is_sparse_logical(sort(x_sparse)))
250 |
251 | x_sparse <- sparse_logical(c(TRUE, TRUE), c(1, 7), 7)
252 |
253 | expect_true(is_sparse_logical(sort(x_sparse)))
254 |
255 | x_sparse <- sparse_logical(c(TRUE, TRUE), c(1, 7), 7)
256 |
257 | expect_true(is_sparse_logical(sort(x_sparse)))
258 | })
259 |
260 | test_that("default argument is working", {
261 | expect_snapshot(
262 | error = TRUE,
263 | sparse_logical(TRUE, 1, 10, default = TRUE)
264 | )
265 |
266 | expect_snapshot(
267 | error = TRUE,
268 | sparse_logical(c(TRUE, TRUE, NA), c(1, 4, 6), 10, default = TRUE)
269 | )
270 |
271 | x_sparse <- sparse_logical(
272 | value = c(FALSE, NA, FALSE),
273 | position = c(1, 5, 8),
274 | length = 10,
275 | default = TRUE
276 | )
277 |
278 | x_dense <- c(FALSE, TRUE, TRUE, TRUE, NA, TRUE, TRUE, FALSE, TRUE, TRUE)
279 |
280 | for (i in seq_len(10)) {
281 | expect_identical(x_sparse[i], x_dense[i])
282 | }
283 |
284 | expect_identical(x_sparse[1:2], x_dense[1:2])
285 |
286 | expect_identical(x_sparse[3:7], x_dense[3:7])
287 |
288 | expect_identical(x_sparse[c(1, 5, 8, 1)], x_dense[c(1, 5, 8, 1)])
289 |
290 | expect_identical(x_sparse[], x_dense)
291 | })
292 |
293 | test_that("verbose testing", {
294 | withr::local_options("sparsevctrs.verbose_materialize" = TRUE)
295 |
296 | x <- sparse_logical(TRUE, 1, 1)
297 | expect_snapshot({
298 | tmp <- x[]
299 | tmp <- x[]
300 | })
301 |
302 | withr::local_options("sparsevctrs.verbose_materialize" = 2)
303 |
304 | x <- sparse_logical(TRUE, 1, 1)
305 | expect_snapshot({
306 | tmp <- x[]
307 | tmp <- x[]
308 | })
309 |
310 | withr::local_options("sparsevctrs.verbose_materialize" = 3)
311 |
312 | x <- sparse_logical(TRUE, 1, 1)
313 | expect_snapshot(
314 | error = TRUE,
315 | {
316 | tmp <- x[]
317 | }
318 | )
319 | })
320 |
--------------------------------------------------------------------------------
/tests/testthat/test-sparse_mean.R:
--------------------------------------------------------------------------------
1 | test_that("sparse_mean() works", {
2 | x <- sparse_double(10, 5, 1000)
3 |
4 | expect_equal(mean(x), sparse_mean(x))
5 |
6 | x <- sparse_double(c(10, -10), c(5, 100), 1000)
7 |
8 | expect_equal(mean(x), sparse_mean(x))
9 |
10 | x <- sparse_double(c(10, -10), c(5, 100), 1000, default = 20)
11 |
12 | expect_equal(mean(x), sparse_mean(x))
13 |
14 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000)
15 |
16 | expect_equal(mean(x), sparse_mean(x))
17 |
18 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100)
19 |
20 | expect_equal(mean(x), sparse_mean(x))
21 |
22 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000)
23 |
24 | expect_equal(mean(x, na.rm = TRUE), sparse_mean(x, na_rm = TRUE))
25 |
26 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100)
27 |
28 | expect_equal(mean(x, na.rm = TRUE), sparse_mean(x, na_rm = TRUE))
29 |
30 | x <- sparse_double(numeric(), integer(), 1000)
31 |
32 | expect_equal(mean(x), sparse_mean(x))
33 |
34 | x <- sparse_double(numeric(), integer(), 1000, default = 100)
35 |
36 | expect_equal(mean(x), sparse_mean(x))
37 | })
38 |
39 | test_that("sparse_mean() works with wts argument", {
40 | x <- sparse_double(10, 5, 1000)
41 | wts <- (1:1000)[]
42 |
43 | expect_equal(weighted.mean(x, wts), sparse_mean(x, wts = wts))
44 |
45 | x <- sparse_double(c(10, -10), c(5, 100), 1000)
46 |
47 | expect_equal(weighted.mean(x, wts), sparse_mean(x, wts = wts))
48 |
49 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000)
50 |
51 | expect_equal(weighted.mean(x, wts), sparse_mean(x, wts = wts))
52 |
53 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100)
54 |
55 | expect_equal(weighted.mean(x, wts), sparse_mean(x, wts = wts))
56 |
57 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000)
58 |
59 | expect_equal(
60 | weighted.mean(x, wts, na.rm = TRUE),
61 | sparse_mean(x, wts = wts, na_rm = TRUE)
62 | )
63 |
64 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100)
65 |
66 | expect_equal(
67 | weighted.mean(x, wts, na.rm = TRUE),
68 | sparse_mean(x, wts = wts, na_rm = TRUE)
69 | )
70 |
71 | x <- sparse_double(numeric(), integer(), 1000)
72 |
73 | expect_equal(weighted.mean(x, wts), sparse_mean(x, wts = wts))
74 |
75 | x <- sparse_double(numeric(), integer(), 1000, default = 100)
76 |
77 | expect_equal(weighted.mean(x, wts), sparse_mean(x, wts = wts))
78 | })
79 |
--------------------------------------------------------------------------------
/tests/testthat/test-sparse_median.R:
--------------------------------------------------------------------------------
1 | test_that("sparse_median() works", {
2 | x <- sparse_double(10, 5, 1000)
3 |
4 | expect_equal(median(x), sparse_median(x))
5 |
6 | x <- sparse_double(c(10, -10), c(5, 100), 1000)
7 |
8 | expect_equal(median(x), sparse_median(x))
9 |
10 | x <- sparse_double(c(10, -10), c(5, 100), 1000, default = 20)
11 |
12 | expect_equal(median(x), sparse_median(x))
13 |
14 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000)
15 |
16 | expect_equal(median(x), sparse_median(x))
17 |
18 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100)
19 |
20 | expect_equal(median(x), sparse_median(x))
21 |
22 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000)
23 |
24 | expect_equal(median(x, na.rm = TRUE), sparse_median(x, na_rm = TRUE))
25 |
26 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100)
27 |
28 | expect_equal(median(x, na.rm = TRUE), sparse_median(x, na_rm = TRUE))
29 |
30 | x <- sparse_double(numeric(), integer(), 1000)
31 |
32 | expect_equal(median(x), sparse_median(x))
33 |
34 | x <- sparse_double(numeric(), integer(), 1000, default = 100)
35 |
36 | expect_equal(median(x), sparse_median(x))
37 | })
38 |
39 | test_that("sparse_median() edge cases", {
40 | x <- sparse_double(c(10, 10), c(1, 2), 4)
41 |
42 | expect_equal(median(x), sparse_median(x))
43 |
44 | x <- sparse_double(c(10, NA), c(1, 2), 4)
45 |
46 | expect_equal(median(x), sparse_median(x))
47 | expect_equal(median(x, na.rm = TRUE), sparse_median(x, na_rm = TRUE))
48 |
49 | x <- sparse_double(c(10, 10, NA), c(1, 2, 3), 5)
50 |
51 | expect_equal(median(x), sparse_median(x))
52 | expect_equal(median(x, na.rm = TRUE), sparse_median(x, na_rm = TRUE))
53 | })
54 |
--------------------------------------------------------------------------------
/tests/testthat/test-sparse_replace_na.R:
--------------------------------------------------------------------------------
1 | test_that("sparse_replace() works - integers", {
2 | x <- sparse_integer(c(NA, 10, 30), 1:3, 1000)
3 |
4 | res <- sparse_replace_na(x, 100L)
5 | exp <- x
6 | exp[is.na(exp)] <- 100L
7 |
8 | expect_identical(res, exp)
9 | expect_true(is_sparse_integer(res))
10 |
11 | # replace == default
12 | res <- sparse_replace_na(x, 0L)
13 | exp <- x
14 | exp[is.na(exp)] <- 0L
15 |
16 | expect_identical(res, exp)
17 | expect_true(is_sparse_integer(res))
18 | })
19 |
20 | test_that("sparse_replace() works - doubles", {
21 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000)
22 |
23 | res <- sparse_replace_na(x, 100)
24 | exp <- x
25 | exp[is.na(exp)] <- 100
26 |
27 | expect_identical(res, exp)
28 | expect_true(is_sparse_double(res))
29 |
30 | # replace == default
31 | res <- sparse_replace_na(x, 0)
32 | exp <- x
33 | exp[is.na(exp)] <- 0
34 |
35 | expect_identical(res, exp)
36 | expect_true(is_sparse_double(res))
37 | })
38 |
39 | test_that("sparse_replace() works - characters", {
40 | x <- sparse_character(c(NA, "A", "B"), 1:3, 1000)
41 |
42 | res <- sparse_replace_na(x, "M")
43 | exp <- x
44 | exp[is.na(exp)] <- "M"
45 |
46 | expect_identical(res, exp)
47 | expect_true(is_sparse_character(res))
48 |
49 | # replace == default
50 | res <- sparse_replace_na(x, 0)
51 | exp <- x
52 | exp[is.na(exp)] <- 0
53 |
54 | expect_identical(res, exp)
55 | expect_true(is_sparse_character(res))
56 | })
57 |
--------------------------------------------------------------------------------
/tests/testthat/test-sparse_sd.R:
--------------------------------------------------------------------------------
1 | test_that("sparse_sd() works", {
2 | x <- sparse_double(10, 5, 1000)
3 |
4 | expect_equal(sd(x), sparse_sd(x))
5 |
6 | x <- sparse_double(c(10, -10), c(5, 100), 1000)
7 |
8 | expect_equal(sd(x), sparse_sd(x))
9 |
10 | x <- sparse_double(c(10, -10), c(5, 100), 1000, default = 20)
11 |
12 | expect_equal(sd(x), sparse_sd(x))
13 |
14 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000)
15 |
16 | expect_equal(sd(x), sparse_sd(x))
17 |
18 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100)
19 |
20 | expect_equal(sd(x), sparse_sd(x))
21 |
22 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000)
23 |
24 | expect_equal(sd(x, na.rm = TRUE), sparse_sd(x, na_rm = TRUE))
25 |
26 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100)
27 |
28 | expect_equal(sd(x, na.rm = TRUE), sparse_sd(x, na_rm = TRUE))
29 |
30 | x <- sparse_double(numeric(), integer(), 1000)
31 |
32 | expect_equal(sd(x), sparse_sd(x))
33 |
34 | x <- sparse_double(numeric(), integer(), 1000, default = 100)
35 |
36 | expect_equal(sd(x), sparse_sd(x))
37 | })
38 | test_that("multiplication works", {
39 | expect_equal(2 * 2, 4)
40 | })
41 |
--------------------------------------------------------------------------------
/tests/testthat/test-sparse_sqrt.R:
--------------------------------------------------------------------------------
1 | test_that("sparse_sqrt() works", {
2 | x <- sparse_integer(10, 5, 1000)
3 |
4 | expect_true(is_sparse_double(sparse_sqrt(x)))
5 | expect_equal(sqrt(x), sparse_sqrt(x))
6 |
7 | x <- sparse_double(c(10, 100), c(5, 100), 1000)
8 |
9 | expect_true(is_sparse_double(sparse_sqrt(x)))
10 | expect_equal(sqrt(x), sparse_sqrt(x))
11 |
12 | x <- sparse_double(c(10, 100), c(5, 100), 1000, default = 20)
13 |
14 | expect_true(is_sparse_double(sparse_sqrt(x)))
15 | expect_equal(sqrt(x), sparse_sqrt(x))
16 |
17 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000)
18 |
19 | expect_true(is_sparse_double(sparse_sqrt(x)))
20 | expect_equal(sqrt(x), sparse_sqrt(x))
21 |
22 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100)
23 |
24 | expect_true(is_sparse_double(sparse_sqrt(x)))
25 | expect_equal(sqrt(x), sparse_sqrt(x))
26 |
27 | x <- sparse_double(numeric(), integer(), 1000)
28 |
29 | expect_true(is_sparse_double(sparse_sqrt(x)))
30 | expect_equal(sqrt(x), sparse_sqrt(x))
31 |
32 | x <- sparse_double(numeric(), integer(), 1000, default = 100)
33 |
34 | expect_true(is_sparse_double(sparse_sqrt(x)))
35 | expect_equal(sqrt(x), sparse_sqrt(x))
36 | })
37 |
--------------------------------------------------------------------------------
/tests/testthat/test-sparse_var.R:
--------------------------------------------------------------------------------
1 | test_that("sparse_var() works", {
2 | x <- sparse_double(10, 5, 1000)
3 |
4 | expect_equal(var(x), sparse_var(x))
5 |
6 | x <- sparse_double(c(10, -10), c(5, 100), 1000)
7 |
8 | expect_equal(var(x), sparse_var(x))
9 |
10 | x <- sparse_double(c(10, -10), c(5, 100), 1000, default = 20)
11 |
12 | expect_equal(var(x), sparse_var(x))
13 |
14 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000)
15 |
16 | expect_equal(var(x), sparse_var(x))
17 |
18 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100)
19 |
20 | expect_equal(var(x), sparse_var(x))
21 |
22 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000)
23 |
24 | expect_equal(var(x, na.rm = TRUE), sparse_var(x, na_rm = TRUE))
25 |
26 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000, default = 100)
27 |
28 | expect_equal(var(x, na.rm = TRUE), sparse_var(x, na_rm = TRUE))
29 |
30 | x <- sparse_double(numeric(), integer(), 1000)
31 |
32 | expect_equal(var(x), sparse_var(x))
33 |
34 | x <- sparse_double(numeric(), integer(), 1000, default = 100)
35 |
36 | expect_equal(var(x), sparse_var(x))
37 | })
38 |
--------------------------------------------------------------------------------
/tests/testthat/test-sparse_which_na.R:
--------------------------------------------------------------------------------
1 | test_that("sparse_which_na() works - double", {
2 | x <- sparse_double(c(10, -10), c(5, 100), 1000)
3 |
4 | expect_equal(which(is.na(x)), sparse_which_na(x))
5 |
6 | x <- sparse_double(c(NA, 10, 30), 1:3, 1000)
7 |
8 | expect_equal(which(is.na(x)), sparse_which_na(x))
9 | })
10 |
11 | test_that("sparse_which_na() works - integer", {
12 | x <- sparse_integer(c(10, -10), c(5, 100), 1000)
13 |
14 | expect_equal(which(is.na(x)), sparse_which_na(x))
15 |
16 | x <- sparse_integer(c(NA, 10, 30), 1:3, 1000)
17 |
18 | expect_equal(which(is.na(x)), sparse_which_na(x))
19 | })
20 |
21 | test_that("sparse_which_na() works - logical", {
22 | x <- sparse_logical(c(TRUE, TRUE), c(5, 100), 1000)
23 |
24 | expect_equal(which(is.na(x)), sparse_which_na(x))
25 |
26 | x <- sparse_logical(c(NA, TRUE, TRUE), 1:3, 1000)
27 |
28 | expect_equal(which(is.na(x)), sparse_which_na(x))
29 | })
30 |
31 | test_that("sparse_which_na() works - character", {
32 | x <- sparse_character(c("A", "B"), c(5, 100), 1000)
33 |
34 | expect_equal(which(is.na(x)), sparse_which_na(x))
35 |
36 | x <- sparse_character(c(NA, "A", "B"), 1:3, 1000)
37 |
38 | expect_equal(which(is.na(x)), sparse_which_na(x))
39 | })
40 |
--------------------------------------------------------------------------------
/tests/testthat/test-sparsity.R:
--------------------------------------------------------------------------------
1 | test_that("works with data.frames", {
2 | mtcars_exp_sparsity <- mean(mtcars == 0)
3 |
4 | expect_identical(
5 | sparsity(mtcars),
6 | mtcars_exp_sparsity
7 | )
8 | })
9 |
10 | test_that("works with non-numeric data.frames", {
11 | vs <- mtcars$vs
12 | mtcars$vs <- 4
13 | mtcars_exp_sparsity <- mean(mtcars == 0)
14 |
15 | mtcars$vs <- as.character(vs)
16 |
17 | expect_identical(
18 | sparsity(mtcars),
19 | mtcars_exp_sparsity
20 | )
21 |
22 | mtcars$vs <- as.logical(vs)
23 |
24 | expect_identical(
25 | sparsity(mtcars),
26 | mtcars_exp_sparsity
27 | )
28 |
29 | mtcars$vs <- ifelse(vs == 1, 1, NA)
30 |
31 | expect_identical(
32 | sparsity(mtcars),
33 | mtcars_exp_sparsity
34 | )
35 | })
36 |
37 | test_that("works with numeric classes in data.frames (#106)", {
38 | vs <- mtcars$vs
39 | mtcars$vs <- 4
40 | mtcars_exp_sparsity <- mean(mtcars == 0)
41 |
42 | class(vs) <- "something"
43 | mtcars$vs <- vs
44 |
45 | expect_identical(
46 | sparsity(mtcars),
47 | mtcars_exp_sparsity
48 | )
49 | })
50 |
51 | structure(
52 | c(
53 | 1,
54 | 1,
55 | 1,
56 | 1,
57 | 1,
58 | 1,
59 | 1,
60 | 1,
61 | 1,
62 | 1,
63 | 1,
64 | 1,
65 | 1,
66 | 1,
67 | 1,
68 | 1,
69 | 1,
70 | 1,
71 | 1,
72 | 1,
73 | 1,
74 | 1,
75 | 1,
76 | 1,
77 | 1,
78 | 1,
79 | 1,
80 | 1,
81 | 1,
82 | 1,
83 | 1,
84 | 1,
85 | 1,
86 | 1,
87 | 1,
88 | 1,
89 | 1,
90 | 1,
91 | 1,
92 | 1,
93 | 1,
94 | 1,
95 | 1,
96 | 1,
97 | 1,
98 | 1,
99 | 1,
100 | 1,
101 | 1,
102 | 1,
103 | 1,
104 | 1,
105 | 5
106 | ),
107 | class = c("hardhat_importance_weights", "hardhat_case_weights", "vctrs_vctr")
108 | )
109 |
110 | test_that("works with data.frames sample arg", {
111 | set.seed(1234)
112 | exp <- mean(mtcars[sample(32, 10), ] == 0)
113 |
114 | set.seed(1234)
115 | expect_identical(
116 | sparsity(mtcars, sample = 10),
117 | exp
118 | )
119 |
120 | set.seed(1234)
121 | exp <- mean(mtcars == 0)
122 |
123 | set.seed(1234)
124 | expect_identical(
125 | sparsity(mtcars, sample = 1000),
126 | exp
127 | )
128 |
129 | expect_snapshot(
130 | error = TRUE,
131 | sparsity(mtcars, sample = 0.4)
132 | )
133 | })
134 |
135 | test_that("works with matrices", {
136 | mtcars_mat <- as.matrix(mtcars)
137 | mtcars_exp_sparsity <- mean(mtcars_mat == 0)
138 |
139 | expect_identical(
140 | sparsity(mtcars_mat),
141 | mtcars_exp_sparsity
142 | )
143 |
144 | mtcars_mat[1, 1] <- NA
145 |
146 | expect_identical(
147 | sparsity(mtcars_mat),
148 | mtcars_exp_sparsity
149 | )
150 |
151 | mtcars_lgl <- apply(mtcars_mat, 2, as.logical)
152 |
153 | expect_identical(
154 | sparsity(mtcars_lgl),
155 | 0
156 | )
157 |
158 | mtcars_chr <- apply(mtcars_mat, 2, as.character)
159 |
160 | expect_identical(
161 | sparsity(mtcars_chr),
162 | 0
163 | )
164 | })
165 |
166 | test_that("works with sparse matrices", {
167 | mtcars_sparse_mat <- coerce_to_sparse_matrix(mtcars)
168 | mtcars_exp_sparsity <- mean(as.logical(mtcars_sparse_mat == 0))
169 |
170 | expect_equal(
171 | sparsity(mtcars_sparse_mat),
172 | mtcars_exp_sparsity
173 | )
174 |
175 | mtcars_sparse_mat[1, 1] <- NA
176 |
177 | expect_equal(
178 | sparsity(mtcars_sparse_mat),
179 | mtcars_exp_sparsity
180 | )
181 | })
182 |
--------------------------------------------------------------------------------
/tests/testthat/test-type-predicates.R:
--------------------------------------------------------------------------------
1 | test_that("is_sparse_vector works", {
2 | expect_true(is_sparse_vector(sparse_double(1, 1, 1)))
3 | expect_true(is_sparse_vector(sparse_integer(1, 1, 1)))
4 |
5 | expect_false(is_sparse_vector(c(1, 1, 1)))
6 | expect_false(is_sparse_vector(1:10))
7 | expect_false(is_sparse_vector(NULL))
8 | })
9 |
10 | test_that("is_sparse_numeric works", {
11 | expect_true(is_sparse_numeric(sparse_double(1, 1, 1)))
12 | expect_true(is_sparse_numeric(sparse_integer(1, 1, 1)))
13 |
14 | expect_false(is_sparse_numeric(c(1, 1, 1)))
15 | expect_false(is_sparse_numeric(1:10))
16 | expect_false(is_sparse_numeric(NULL))
17 | })
18 |
19 | test_that("is_sparse_double works", {
20 | expect_true(is_sparse_double(sparse_double(1, 1, 1)))
21 |
22 | expect_false(is_sparse_double(c(1, 1, 1)))
23 | expect_false(is_sparse_double(1:10))
24 | expect_false(is_sparse_double(NULL))
25 | })
26 |
27 | test_that("is_sparse_integer works", {
28 | expect_true(is_sparse_integer(sparse_integer(1, 1, 1)))
29 |
30 | expect_false(is_sparse_integer(c(1, 1, 1)))
31 | expect_false(is_sparse_integer(1:10))
32 | expect_false(is_sparse_integer(NULL))
33 | })
34 |
35 | test_that("is_sparse_character works", {
36 | expect_true(is_sparse_character(sparse_character("A", 1, 1)))
37 |
38 | expect_false(is_sparse_character(c(1, 1, 1)))
39 | expect_false(is_sparse_character(1:10))
40 | expect_false(is_sparse_character(NULL))
41 | })
42 |
43 | test_that("is_sparse_logical works", {
44 | expect_true(is_sparse_logical(sparse_logical(TRUE, 1, 1)))
45 |
46 | expect_false(is_sparse_logical(c(1, 1, 1)))
47 | expect_false(is_sparse_logical(1:10))
48 | expect_false(is_sparse_logical(NULL))
49 | })
50 |
--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 |
--------------------------------------------------------------------------------
/vignettes/articles/.gitignore:
--------------------------------------------------------------------------------
1 | /.quarto/
2 |
--------------------------------------------------------------------------------
/vignettes/design.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Design behind sparsevctrs"
3 | output: rmarkdown::html_vignette
4 | vignette: >
5 | %\VignetteIndexEntry{Design behind sparsevctrs}
6 | %\VignetteEngine{knitr::rmarkdown}
7 | %\VignetteEncoding{UTF-8}
8 | ---
9 |
10 | ```{r}
11 | #| include: false
12 | knitr::opts_chunk$set(
13 | collapse = TRUE,
14 | comment = "#>"
15 | )
16 | ```
17 |
18 | ```{r}
19 | #| label: setup
20 | library(sparsevctrs)
21 | ```
22 |
23 | The sparsevctrs package produces 3 things; ALTREP classes, matrix/data.frame converting functions, helper functions. This document outlines the rationale behind each of these and the decisions behind them.
24 |
25 | The primary objective of this package is to provide tools to work with sparse data in data.frames/tibbles. The next highest priority is execution speed. This means that algorithms and methods in this package are written to minimize memory allocations whenever possible, once that is done, running the code as fast as we can. These choices are made because this package was written to deal with tasks that were otherwise not possible due to memory constraints.
26 |
27 | ## Altrep Functions
28 |
29 | The functions `sparse_double()` and its relatives are used to construct sparse vectors of the noted type. To work they all need 4 pieces of information:
30 |
31 | - `values`
32 | - `positions`
33 | - `length`
34 | - `default` (defaults to 0)
35 |
36 | The values need to match the type of the function name or be easily coerced into the type (double -> integer). The positions should be integers or doubles that can losslessly be turned into integers. The length should be a single non-negative integer-like value.
37 |
38 | Values and positions are paired, and will thus be expected to be the same length, furthermore, positions are expected to be sorted in increasing order with no duplicates. The ordering is done to let the various extraction methods work as efficiently as possible.
39 |
40 | These functions have quite strict input checking by choice, to allow the inner workings to be as efficient as possible.
41 |
42 | The input of these functions mirrors the values stored in the ALTREP class that they produce.
43 |
44 | ## Converting Functions
45 |
46 | 3 functions fall into this category:
47 |
48 | - `coerce_to_sparse_data_frame()`
49 | - `coerce_to_sparse_tibble()`
50 | - `coerce_to_sparse_matrix()`
51 |
52 | the first two take a sparse matrix from the Matrix package and produce a data.frame/tibble with sparse columns. The last one takes a data.frame/tibble with sparse columns and produces a sparse matrix using the Matrix package.
53 |
54 | These functions are expected to be inverse of each other, such that `coerce_to_sparse_matrix(coerce_to_sparse_data_frame(x))` returns `x` back. They are made to be highly performant both in terms of speed and memory consumption, Meaning that sparsity is applied when appropriate.
55 |
56 | These functions have quite strict input checking by choice, to allow the inner workings to be as efficient as possible. It is in part why data.frames with sparse vectors with different can't be used with `coerce_to_sparse_matrix()` yet.
57 |
58 | ## Helper Functions
59 |
60 | There are 3 types of helper functions. First, we have the `is_*` family of functions. The specific `is_sparse_double()` and more general `is_sparse_vector()` can be used as a way to determine whether a vector is an ALTREP sparse vector. This is otherwise hard to tell as `as.numeric()` can't tell the difference.
61 |
62 | Secondly, we have the extraction functions. They are `sparse_values()` and `sparse_positions()`. These extract the values and positions respectively, without materializing the whole dense vector. These functions are made to work with non-sparse vectors as well to make them more ergonomic for the user. Internally they call `is_sparse_vector()`, so the choice to return something useful as the alternative wasn't hard. There is no `sparse_length()` function as `length()` works with these types of
63 |
64 | The last type of helper function is less clearly defined and is expanded as needed. The functions provide alternatives to functions that don't have ALTREP support. Such as `mean()`. Calling `mean()` on a sparse vector will force materialization, and then calculate the mean. This is memory inefficient as it could have been calculated like so.
65 |
66 | ```r
67 | sum(sparse_values(x)) / length(x)
68 | ```
69 |
70 | These functions, all starting with the name prefix `sparse_*`, are made to work with non-sparse vectors for the same reasons listed above regarding ergonomic use.
71 |
72 | ## FAQ
73 |
74 | > Why aren't the results returned as {vctrs} classes?
75 |
76 | As it stands right now, it is viewed to be beneficial to have the users not be alerted to these vectors as they are expected to be used internally in packages and rarely by the end user. Furthermore having these sparse vectors produce the same result as dense vectors with `class()` is a big plus.
77 |
78 | > Will this package try to replace the {Matrix} package?
79 |
80 | Not at all. The sparse vector types provided in this package mimic those created with `Matrix::sparseVector()`. They work with different types and allow for different defaults. None of the matrix operations will be reimplemented here.
81 |
--------------------------------------------------------------------------------