├── .github
    ├── .gitignore
    ├── ISSUE_TEMPLATE
    │   ├── feature_request.md
    │   └── bug_report.md
    ├── pull_request_template.md
    └── workflows
    │   ├── R-CMD-check.yaml
    │   ├── pkgdown.yaml
    │   ├── test-coverage.yaml
    │   └── update-citation-cff.yaml
├── data
    ├── GGDC10S.rda
    └── wlddev.rda
├── man
    ├── figures
    │   └── logo.png
    ├── t_list.Rd
    ├── is_unlistable.Rd
    ├── ldepth.Rd
    ├── rapply2d.Rd
    ├── groupid.Rd
    ├── collapse-renamed.Rd
    ├── fdroplevels.Rd
    ├── group.Rd
    ├── wlddev.Rd
    ├── colorder.Rd
    ├── fdist.Rd
    ├── rowbind.Rd
    ├── fslice.Rd
    ├── fnobs.Rd
    ├── summary-statistics.Rd
    ├── fcount.Rd
    ├── rsplit.Rd
    ├── timeid.Rd
    ├── pad.Rd
    ├── pwcor_pwcov_pwnobs.Rd
    ├── frename.Rd
    ├── radixorder.Rd
    ├── GGDC10S.Rd
    ├── ffirst_flast.Rd
    ├── time-series-panel-series.Rd
    ├── list-processing.Rd
    ├── roworder.Rd
    └── fndistinct.Rd
├── misc
    ├── figures
    │   ├── Thumbs.db
    │   └── collapse_logo_small.png
    ├── JSS article
    │   ├── R&R
    │   │   └── article.pdf
    │   └── final
    │   │   ├── article.pdf
    │   │   └── jss5278
    │   │       └── jss5278.pdf
    ├── collapse cheat sheet
    │   ├── preview
    │   │   ├── page1.png
    │   │   └── page2.png
    │   ├── background_cropped.png
    │   ├── collapse_cheat_sheet.pdf
    │   ├── collapse_logo_vsmall.png
    │   └── old PPT cheatsheet
    │   │   ├── collapse.pdf
    │   │   ├── collapse.pptx
    │   │   └── collapse_need_fonts.pdf
    ├── useR2022 presentation
    │   └── collapse_useR2022_final.pdf
    └── legacy
    │   ├── sorted out 1.0.0 - 1.1.0
    │       └── myomp.h
    │   ├── sorted out 1.1.0 - 1.2.0
    │       ├── cran-comments.md
    │       └── types.h
    │   ├── sorted out 1.6.0 - 1.6.2
    │       └── Makevars.makefile
    │   ├── sorted out 1.7.6 - 1.8.0
    │       └── collapse-depreciated.Rd
    │   └── sorted out 1.5.3 - 1.6.0
    │       └── fmax.c
├── pkgdown
    ├── favicon
    │   ├── favicon.ico
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   ├── apple-touch-icon.png
    │   ├── apple-touch-icon-60x60.png
    │   ├── apple-touch-icon-76x76.png
    │   ├── apple-touch-icon-120x120.png
    │   ├── apple-touch-icon-152x152.png
    │   └── apple-touch-icon-180x180.png
    └── extra.css
├── vignettes
    ├── figure
    │   ├── mts-1.png
    │   ├── stl-1.png
    │   ├── AGRmat-1.png
    │   ├── BWplot-1.png
    │   ├── Dplot-1.png
    │   ├── Gplot-1.png
    │   ├── PSACF-1.png
    │   ├── PSACF-2.png
    │   ├── PSACF-3.png
    │   ├── PSACF-4.png
    │   ├── AREA_Ag-1.png
    │   ├── FEVDplot-1.png
    │   ├── GRPplot-1.png
    │   ├── IRFplot-1.png
    │   ├── PLMGDPmat-1.png
    │   ├── PLMGDPmat-2.png
    │   ├── PLMGDPmat-3.png
    │   ├── PVARplot-1.png
    │   ├── plm_psacf-1.png
    │   ├── plm_psccf-1.png
    │   ├── psarplot-1.png
    │   ├── psarplot2-1.png
    │   ├── psmatplot-1.png
    │   ├── AGRmatplot-1.png
    │   ├── AGRmatplot2-1.png
    │   ├── plm_pspacf-1.png
    │   ├── psmatplot2-1.png
    │   ├── scplot_BWA-1.png
    │   ├── PVARfittedplot-1.png
    │   └── pwlddev_plot-1.png
    └── .gitignore
├── .gitattributes
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-sf.R
    │   ├── test-splitting.R
    │   ├── test-fslice.R
    │   ├── test-dapply.R
    │   ├── test-flm-fFtest.R
    │   ├── test-whichv.R
    │   └── test-pivot.R
├── .gitignore
├── src
    ├── Makevars
    ├── Makevars.win
    ├── base_radixsort.h
    ├── extptr.c
    ├── kit.h
    ├── stats_pacf.c
    ├── data.table.h
    └── collapse_cpp.h
├── codecov.yml
├── collapse.Rproj
├── .Rbuildignore
├── CONTRIBUTING.md
├── inst
    └── CITATION
├── DESCRIPTION
└── R
    ├── qtab.R
    ├── fcount.R
    ├── TRA.R
    ├── dapply.R
    ├── fcumsum.R
    ├── fslice.R
    ├── fndistinct.R
    └── rsplit.R


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/data/GGDC10S.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/data/GGDC10S.rda


--------------------------------------------------------------------------------
/data/wlddev.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/data/wlddev.rda


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/man/figures/logo.png


--------------------------------------------------------------------------------
/misc/figures/Thumbs.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/misc/figures/Thumbs.db


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/vignettes/figure/mts-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/mts-1.png


--------------------------------------------------------------------------------
/vignettes/figure/stl-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/stl-1.png


--------------------------------------------------------------------------------
/vignettes/figure/AGRmat-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/AGRmat-1.png


--------------------------------------------------------------------------------
/vignettes/figure/BWplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/BWplot-1.png


--------------------------------------------------------------------------------
/vignettes/figure/Dplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/Dplot-1.png


--------------------------------------------------------------------------------
/vignettes/figure/Gplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/Gplot-1.png


--------------------------------------------------------------------------------
/vignettes/figure/PSACF-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/PSACF-1.png


--------------------------------------------------------------------------------
/vignettes/figure/PSACF-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/PSACF-2.png


--------------------------------------------------------------------------------
/vignettes/figure/PSACF-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/PSACF-3.png


--------------------------------------------------------------------------------
/vignettes/figure/PSACF-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/PSACF-4.png


--------------------------------------------------------------------------------
/misc/JSS article/R&R/article.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/misc/JSS article/R&R/article.pdf


--------------------------------------------------------------------------------
/vignettes/figure/AREA_Ag-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/AREA_Ag-1.png


--------------------------------------------------------------------------------
/vignettes/figure/FEVDplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/FEVDplot-1.png


--------------------------------------------------------------------------------
/vignettes/figure/GRPplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/GRPplot-1.png


--------------------------------------------------------------------------------
/vignettes/figure/IRFplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/IRFplot-1.png


--------------------------------------------------------------------------------
/vignettes/figure/PLMGDPmat-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/PLMGDPmat-1.png


--------------------------------------------------------------------------------
/vignettes/figure/PLMGDPmat-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/PLMGDPmat-2.png


--------------------------------------------------------------------------------
/vignettes/figure/PLMGDPmat-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/PLMGDPmat-3.png


--------------------------------------------------------------------------------
/vignettes/figure/PVARplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/PVARplot-1.png


--------------------------------------------------------------------------------
/vignettes/figure/plm_psacf-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/plm_psacf-1.png


--------------------------------------------------------------------------------
/vignettes/figure/plm_psccf-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/plm_psccf-1.png


--------------------------------------------------------------------------------
/vignettes/figure/psarplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/psarplot-1.png


--------------------------------------------------------------------------------
/vignettes/figure/psarplot2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/psarplot2-1.png


--------------------------------------------------------------------------------
/vignettes/figure/psmatplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/psmatplot-1.png


--------------------------------------------------------------------------------
/misc/JSS article/final/article.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/misc/JSS article/final/article.pdf


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/pkgdown/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/pkgdown/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/vignettes/figure/AGRmatplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/AGRmatplot-1.png


--------------------------------------------------------------------------------
/vignettes/figure/AGRmatplot2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/AGRmatplot2-1.png


--------------------------------------------------------------------------------
/vignettes/figure/plm_pspacf-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/plm_pspacf-1.png


--------------------------------------------------------------------------------
/vignettes/figure/psmatplot2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/psmatplot2-1.png


--------------------------------------------------------------------------------
/vignettes/figure/scplot_BWA-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/scplot_BWA-1.png


--------------------------------------------------------------------------------
/misc/figures/collapse_logo_small.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/misc/figures/collapse_logo_small.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/vignettes/figure/PVARfittedplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/PVARfittedplot-1.png


--------------------------------------------------------------------------------
/vignettes/figure/pwlddev_plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/vignettes/figure/pwlddev_plot-1.png


--------------------------------------------------------------------------------
/misc/JSS article/final/jss5278/jss5278.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/misc/JSS article/final/jss5278/jss5278.pdf


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/pkgdown/favicon/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/pkgdown/favicon/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/misc/collapse cheat sheet/preview/page1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/misc/collapse cheat sheet/preview/page1.png


--------------------------------------------------------------------------------
/misc/collapse cheat sheet/preview/page2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/misc/collapse cheat sheet/preview/page2.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/pkgdown/favicon/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/pkgdown/favicon/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/pkgdown/favicon/apple-touch-icon-180x180.png


--------------------------------------------------------------------------------
/misc/collapse cheat sheet/background_cropped.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/misc/collapse cheat sheet/background_cropped.png


--------------------------------------------------------------------------------
/misc/collapse cheat sheet/collapse_cheat_sheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/misc/collapse cheat sheet/collapse_cheat_sheet.pdf


--------------------------------------------------------------------------------
/misc/collapse cheat sheet/collapse_logo_vsmall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/misc/collapse cheat sheet/collapse_logo_vsmall.png


--------------------------------------------------------------------------------
/misc/useR2022 presentation/collapse_useR2022_final.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/misc/useR2022 presentation/collapse_useR2022_final.pdf


--------------------------------------------------------------------------------
/misc/collapse cheat sheet/old PPT cheatsheet/collapse.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/misc/collapse cheat sheet/old PPT cheatsheet/collapse.pdf


--------------------------------------------------------------------------------
/misc/collapse cheat sheet/old PPT cheatsheet/collapse.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/misc/collapse cheat sheet/old PPT cheatsheet/collapse.pptx


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | docs/** linguist-documentation
2 | man/** linguist-documentation
3 | tests/** linguist-vendored
4 | vignettes/** linguist-vendored
5 | misc/** linguist-vendored
6 | 


--------------------------------------------------------------------------------
/misc/collapse cheat sheet/old PPT cheatsheet/collapse_need_fonts.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fastverse/collapse/HEAD/misc/collapse cheat sheet/old PPT cheatsheet/collapse_need_fonts.pdf


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | # rm(list = ls())
2 | # Sys.setenv(R_TESTS = "")
3 | library(testthat)
4 | options(collapse_export_F = TRUE)
5 | # library(collapse)
6 | 
7 | test_check("collapse")
8 | 
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.Rproj.user
 2 | *.Rhistory
 3 | *.RData
 4 | *.RDataTmp
 5 | *.Ruserdata
 6 | *.Rproj
 7 | *vignette.pdf
 8 | *.o
 9 | *.so
10 | *.dll
11 | release-Announcement*
12 | inst/doc
13 | doc
14 | Meta
15 | testing
16 | .Rproj.user
17 | *.db
18 | 


--------------------------------------------------------------------------------
/src/Makevars:
--------------------------------------------------------------------------------
1 | ## -- compiling for OpenMP
2 | PKG_CFLAGS = $($(subst OPENMP,OPENMP_CFLAGS,SHLIB_OPENMP))
3 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DSTRICT_R_HEADERS
4 | ## -- using C++ 11
5 | # CXX_STD = CXX11
6 | ## -- linking for OpenMP
7 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS)
8 | 


--------------------------------------------------------------------------------
/src/Makevars.win:
--------------------------------------------------------------------------------
1 | ## -- compiling for OpenMP
2 | PKG_CFLAGS = $($(subst OPENMP,OPENMP_CFLAGS,SHLIB_OPENMP)) -O3
3 | PKG_CXXFLAGS = $(SHLIB_OPENMP_CXXFLAGS) -DSTRICT_R_HEADERS
4 | ## -- using C++ 11
5 | # CXX_STD = CXX11
6 | ## -- linking for OpenMP
7 | PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS)
8 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | 
 3 | coverage:
 4 |   status:
 5 |     project:
 6 |       default:
 7 |         target: auto
 8 |         threshold: 1%
 9 |         informational: true
10 |     patch:
11 |       default:
12 |         target: auto
13 |         threshold: 1%
14 |         informational: true
15 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
 1 | *.html
 2 | *.R
 3 | .build.timestamp
 4 | collapse_intro_files
 5 | collapse_and_dplyr_files
 6 | collapse_and_plm_files
 7 | collapse_intro_cache
 8 | collapse_intro_cache
 9 | collapse_and_dplyr_cache
10 | collapse_and_plm_cache
11 | collapse_and_data.table_cache
12 | collapse_and_sf_cache
13 | 
14 | 


--------------------------------------------------------------------------------
/misc/legacy/sorted out 1.0.0 - 1.1.0/myomp.h:
--------------------------------------------------------------------------------
 1 | #ifdef _OPENMP
 2 |   #include <omp.h>
 3 | #else
 4 |   // for machines with compilers void of openmp support
 5 |   #define omp_get_num_threads()  1
 6 |   #define omp_get_thread_num()   0
 7 |   #define omp_get_max_threads()  1
 8 |   #define omp_get_thread_limit() 1
 9 |   #define omp_get_num_procs()    1
10 |   #define omp_set_nested(a)   // empty statement to remove the call
11 |   #define omp_get_wtime()        0
12 | #endif
13 | 


--------------------------------------------------------------------------------
/collapse.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Yes
 4 | SaveWorkspace: Yes
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-docs --no-multiarch
21 | PackageBuildArgs: --no-docs
22 | PackageBuildBinaryArgs: --no-docs
23 | PackageCheckArgs: --as-cran
24 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^collapse\.Rproj$
 2 | ^\.Rproj\.user$
 3 | \.fsrc$
 4 | \.o$
 5 | \.so$
 6 | ^CONTRIBUTING\.md$
 7 | ^README\.md$
 8 | ^README\.Rmd$
 9 | ^cran-comments\.md$
10 | ^experimental$
11 | ^testing$
12 | ^\.travis\.yml$
13 | ^\.build\.timestamp$
14 | ^\.RData
15 | ^Meta$
16 | ^CRAN-RELEASE$
17 | ^_pkgdown\.yml$
18 | ^docs$
19 | ^pkgdown$
20 | man/figures
21 | ^misc$
22 | ^codecov\.yml$
23 | ^\.covignore$
24 | ^\.github$
25 | ^\.fastverse$
26 | \.log$
27 | _cache$
28 | _snaps
29 | ^CITATION\.cff$
30 | ^\.DS_Store$
31 | ^revdep$
32 | \.orig$
33 | vignettes/figure
34 | vignettes/cache
35 | 
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to *collapse*
2 | 
3 | - Please file an issue or feature request ideally using the [templates](https://github.com/fastverse/collapse/tree/development/.github/ISSUE_TEMPLATE).
4 | - For broader proposals start a [discussion](https://github.com/fastverse/collapse/discussions).
5 | - To contribute directly, fork the entire repo (including the 'development' branch), make your changes in the 'development' branch, and send a PR to the 'development' branch.
6 | - I'll mention contributors in the `DESCRIPTION` file as `"ctb"` if the contribution is a substantial improvement or new functionality. 
7 | 


--------------------------------------------------------------------------------
/misc/legacy/sorted out 1.1.0 - 1.2.0/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Resubmission collapse 1.1.0
 2 | In this version I have:
 3 | 
 4 | * Fixed valgrind issues.
 5 | 
 6 | ## Test environments
 7 | * local Windows 8.1 install, R 3.6.1
 8 | * win-builder (devel and release)
 9 | 
10 | ## R CMD check results
11 | There were no ERRORs or WARNINGs.
12 | 
13 | There was 1 NOTE:
14 | 
15 |   * checking installed package size ... NOTE
16 |     installed size is  7.9Mb
17 |     sub-directories of 1Mb or more:
18 |       doc    1.6Mb
19 |       libs   4.7Mb
20 | 
21 | This has to do with compiled files. Data is 0.5 Mb. 
22 | 


--------------------------------------------------------------------------------
/misc/legacy/sorted out 1.6.0 - 1.6.2/Makevars.makefile:
--------------------------------------------------------------------------------
 1 | strippedLib: $(SHLIB)
 2 | 		if test -e "/usr/bin/strip" & test -e "/bin/uname" & [[ `uname` == "Linux" ]] ; then /usr/bin/strip --strip-debug $(SHLIB); fi
 3 | .phony: strippedLib
 4 | 
 5 | #-- Compiler and Linker Flags
 6 | # -march=native
 7 | # -march (or -mcpu) builds exclusively for an architecture
 8 | # -mtune optimizes for an architecture, but builds for whole processor family
 9 | # PKG_CFLAGS="-O3 -pipe"
10 | # PKG_CXXFLAGS="-O3 -pipe"
11 | # PKG_CPPFLAGS=-O3 -pipe
12 | 
13 | # PKG_CFLAGS = -O3
14 | 
15 | # CFLAGS = -g -O -mtune=native
16 | ## for C++ code
17 | # CXXFLAGS = -g -O -mtune=native
18 | 
19 | # Summary: O2 can be faster !
20 | 


--------------------------------------------------------------------------------
/src/base_radixsort.h:
--------------------------------------------------------------------------------
 1 | // #include <Defn.h> // Not available in C API !!
 2 | // #include <Internal.h> // Not available in C API !!
 3 | // #define USE_RINTERNALS
 4 | #include <R.h>
 5 | #include <Rinternals.h>
 6 | #include <stdint.h>
 7 | #include "internal/R_defn.h"
 8 | // typedef uint64_t ZPOS64_T; // already defined in stdint.h
 9 | 
10 | void checkEncodings(SEXP x);
11 | SEXP Cradixsort(SEXP NA_last, SEXP decreasing, SEXP RETstrt, SEXP RETgs, SEXP SORTStr, SEXP args);
12 | void num1radixsort(int *o, Rboolean NA_last, Rboolean decreasing, SEXP x);
13 | void iradixsort(int *o, Rboolean NA_last, Rboolean decreasing, int n, int *x);
14 | void dradixsort(int *o, Rboolean NA_last, Rboolean decreasing, int n, double *x);
15 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: "[FEAT] "
 5 | labels: enhancement
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **What aspect of the package is your request related to?**
11 | <!-- Give a brief explanation and justification about the proposed change, e.g., a new statistical function, extra arguments for existing functions, alternative algorithm implementation, support for more classes/types. -->
12 | 
13 | **Describe the solution you'd like**
14 | <!-- Provide a clear and concise description of what you want to happen. -->
15 | 
16 | **Additional context**
17 | <!-- Add any other context or screenshots about the feature request here. -->
18 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: "[BUG] "
 5 | labels: bug
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | <!-- Provide a clear and concise description of what the bug is. -->
12 | 
13 | **Steps/Code to Reproduce**
14 | <!-- Add a minimal code example that can be used to reproduce the error. -->
15 | 
16 | ```r
17 | # add relevant code
18 | 
19 | ```
20 | 
21 | **Expected behavior**
22 | <!-- Provide a clear and concise description of what you expected to happen. -->
23 | 
24 | **Screenshots**
25 | <!-- If applicable, add screenshots to help explain your problem. -->
26 | 
27 | **Additional context**
28 | <!-- Add any other context about the problem here, like package versions, `sessionInfo()` etc. -->
29 | 


--------------------------------------------------------------------------------
/misc/legacy/sorted out 1.1.0 - 1.2.0/types.h:
--------------------------------------------------------------------------------
 1 | #include<stdint.h>
 2 | 
 3 | /*
 4 |  * a struct to carry out results of embarrassingly parallel computation
 5 |  * catch verbose stdout messages, stderr messages, warnings and errors
 6 |  * safe to use inside parallel regions, of course allocated outside
 7 |  */
 8 | #define ANS_MSG_SIZE 4096
 9 | typedef struct ans_t {
10 |   int32_t *int_v;        // used in nafill
11 |   double *dbl_v;         // used in froll, nafill
12 |   int64_t *int64_v;      // not used yet
13 |   uint8_t status;        // 0:ok, 1:message, 2:warning, 3:error; unix return signal: {0,1,2}=0, {3}=1
14 |   char message[4][ANS_MSG_SIZE]; // STDOUT: output, STDERR: message, warning, error
15 | // implicit n_message limit discussed here: https://github.com/Rdatatable/data.table/issues/3423#issuecomment-487722586
16 | } ans_t;
17 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
 1 | ## Description
 2 | 
 3 | <!-- 
 4 | Provide a brief and concise description of the changes. You can reference existing
 5 | issues or pull requests as needed, for example, fixes #1, closes #2, similar to #3.
 6 | -->
 7 | 
 8 | ## Main Changes
 9 | 
10 | <!-- 
11 | List key changes as bullets, for example:
12 | - fixed a bug in `fsum` when NAs are present
13 | - added a new `how` argument to `merge` function
14 | - refactored `utils.R` to improve readability
15 | -->
16 | 
17 | ## Checklist
18 | 
19 | <!-- 
20 | Make sure you can tick all the boxes:
21 | -->
22 | 
23 | - [ ] I have performed a self-review of my code.
24 | - [ ] I have commented on my code, particularly in hard-to-understand areas.
25 | - [ ] I have updated the documentation where applicable.
26 | 
27 | ## Additional Context
28 | 
29 | <!-- Any other relevant information goes here.-->
30 | 


--------------------------------------------------------------------------------
/src/extptr.c:
--------------------------------------------------------------------------------
 1 | #include "collapse_c.h"
 2 | 
 3 | static void eptrFinalizer(SEXP eptr) {
 4 |   if(!R_ExternalPtrAddr(eptr)) return;
 5 |   // R_SetExternalPtrProtected(eptr, R_NilValue);
 6 |   R_ClearExternalPtr(eptr);
 7 | }
 8 | 
 9 | SEXP createeptr(SEXP x) {
10 |   SEXP eptr = PROTECT(R_MakeExternalPtr(x, R_NilValue, R_NilValue)); // x // Using the 'prot' or 'tag' fields includes the object in the pointer, which obscures the purpose of this which is memory efficiency.
11 |   R_RegisterCFinalizerEx(eptr, eptrFinalizer, TRUE);
12 |   UNPROTECT(1);
13 |   return eptr;
14 | }
15 | 
16 | SEXP geteptr(SEXP x) {
17 |   if(TYPEOF(x) != EXTPTRSXP) return x;
18 |   void * res = R_ExternalPtrAddr(x);
19 |   if(!res) error("Invalid pointer to 'index': external pointers are only valid within the current R session. Please reindex() your data: data = reindex(data)");
20 |   return (SEXP)res;
21 |   // return R_ExternalPtrProtected(x);
22 | }
23 | 


--------------------------------------------------------------------------------
/src/kit.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This code is adapted from the kit package: https://github.com/2005m/kit
 3 |  and licensed under a GPL-3.0 license.
 4 | */
 5 | 
 6 | #include <R.h>
 7 | #include <Rinternals.h>
 8 | #include <stdint.h> // needed for uintptr_t on linux
 9 | 
10 | #define NOGE(x, l) ((x < 0 && x != NA_INTEGER) || (x >= l))
11 | #define HASH(key, K)  (3141592653U * (unsigned int)(key) >> (32 - (K)))
12 | #define HASHK(key, K)  (3141592653U * (unsigned int)(key) >> (K))
13 | #define N_ISNAN(x, y) (!ISNAN(x) && !ISNAN(y))
14 | #define B_IsNA(x, y)  (R_IsNA(x) && R_IsNA(y))
15 | #define B_IsNaN(x, y) (R_IsNaN(x) && R_IsNaN(y))
16 | #define B_ISNAN(x, y) (ISNAN(x) && ISNAN(y))
17 | #define C_IsNA(x)     (R_IsNA(x.r) || R_IsNA(x.i))
18 | #define C_IsNaN(x)    (R_IsNaN(x.r) || R_IsNaN(x.i))
19 | #define C_ISNAN(x, y) (B_ISNAN(x, y) || (N_ISNAN(x, y) && x == y))
20 | #define REQUAL(x, y)  (N_ISNAN(x, y) ? (x == y) : (B_IsNA(x, y) || B_IsNaN(x, y)))
21 | #define CEQUAL(x, y) ((N_ISNAN(x.r, x.i) && N_ISNAN(y.r, y.i)) ? (x.r == y.r && x.i == y.i) : (C_IsNA(x) ? C_IsNA(y) : (C_IsNA(y) ? 0 : (C_ISNAN(x.r, y.r) && C_ISNAN(x.i, y.i)))))
22 | 
23 | union uno { double d; unsigned int u[2]; };
24 | 
25 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("To cite collapse in publications, please use:")
 2 | 
 3 | bibentry(bibtype = "misc",
 4 |          key = "krantz2024collapse",
 5 |          title = "collapse: Advanced and Fast Statistical Computing and Data Transformation in R",
 6 |          author = person("Sebastian", "Krantz"),
 7 |          year = "2024",
 8 |          eprint="2403.05038",
 9 |          archivePrefix="arXiv",
10 |          primaryClass="stat.CO",
11 |          url = "https://arxiv.org/abs/2403.05038",
12 |          textVersion = "Krantz, S. (2024). collapse: Advanced and Fast Statistical Computing and Data Transformation in R [Preprint]. arXiv. https://arxiv.org/abs/2403.05038")
13 | 
14 | year <- sub("-.*", "", meta$Date)
15 | note <- sprintf("R package version %s", meta$Version)
16 | bibentry(bibtype = "Manual",
17 |          key = "rcollapse",
18 |          title = "collapse: Advanced and Fast Data Transformation in R",
19 |          author = person("Sebastian", "Krantz"),
20 |          year = year,
21 |          note = note,
22 |          doi = "10.5281/zenodo.8433090",
23 |          url = "https://fastverse.github.io/collapse/",
24 |          textVersion = paste0("Krantz (", year, "). collapse: Advanced and Fast Data Transformation in R. ", note,
25 |                               ". doi:10.5281/zenodo.8433090. https://fastverse.github.io/collapse/."))
26 | 


--------------------------------------------------------------------------------
/pkgdown/extra.css:
--------------------------------------------------------------------------------
 1 | .navbar-nav .nav-item > .nav-link {
 2 |   margin-right: 10px;
 3 | }
 4 | .template-home img.logo {
 5 |   width: 150px;
 6 | }
 7 | img.logo {
 8 |   width: 150px;
 9 |   margin-left: 30px;
10 | }
11 | .h1, .h2, .h3, h1, h2, h3 {
12 |   margin-top: 35px;
13 |   margin-bottom: 10px;
14 | }
15 | body {
16 |   font-size: 100%;
17 | }
18 | /*
19 | p {
20 |   font-size: 0.875em;  14px/16=0.875em
21 | }
22 | */
23 | .fa-bluesky {
24 |   font-family: "Font Awesome 6 Brands";
25 |   font-weight: 400;
26 | }
27 | span.fa.fa-bluesky {
28 |   font-size: 15.5px;
29 | }
30 | @media screen and (min-width: 1000px) {
31 |   span.fa.fa-bluesky {
32 |     padding-left: 12px;
33 |   }
34 | }
35 | span.fa.fa-twitter {
36 |   font-size: 18px;
37 | }
38 | span.fa.fa-github {
39 |   font-size: 18px;
40 |   margin-right: 100px;
41 | }
42 | a {
43 |   color: #0089b3; /* #007da3 */
44 | }
45 | a:hover {
46 |   color: #005873; /* #027ca1; */
47 | }
48 | pre {
49 |   color: #cccccc;
50 | }
51 | small.nav-text.text-muted {
52 |   color: #999a9c !important; /* #8e8c84 #999a9c; -> Same as navbar */
53 | }
54 | 
55 | .form-control,
56 | .form-control::placeholder {
57 |   color: #999a9c !important;
58 | }
59 | 
60 | [data-bs-theme="dark"] {
61 |   --bs-body-color: #cccccc !important;
62 |   --bs-secondary-color: #cccccc !important;
63 |   --bs-tertiary-color: #999a9c !important;
64 | }
65 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   workflow_dispatch:
 9 | 
10 | name: R-CMD-check
11 | 
12 | jobs:
13 |   R-CMD-check:
14 |     runs-on: ${{ matrix.config.os }}
15 | 
16 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
17 | 
18 |     strategy:
19 |       fail-fast: false
20 |       matrix:
21 |         config:
22 |           - {os: macos-latest,   r: 'release'}
23 |           - {os: windows-latest, r: 'release'}
24 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
25 |           - {os: ubuntu-latest,   r: 'release'}
26 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
27 | 
28 |     env:
29 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
30 |       R_KEEP_PKG_SOURCE: yes
31 | 
32 |     steps:
33 |       - uses: actions/checkout@v3
34 | 
35 |       - uses: r-lib/actions/setup-pandoc@v2
36 | 
37 |       - uses: r-lib/actions/setup-r@v2
38 |         with:
39 |           r-version: ${{ matrix.config.r }}
40 |           http-user-agent: ${{ matrix.config.http-user-agent }}
41 |           use-public-rspm: true
42 | 
43 |       - uses: r-lib/actions/setup-r-dependencies@v2
44 |         with:
45 |           extra-packages: any::rcmdcheck
46 |           needs: check
47 | 
48 |       - uses: r-lib/actions/check-r-package@v2
49 |         with:
50 |           upload-snapshots: true
51 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 |   workflow_dispatch:
 7 | 
 8 | name: pkgdown
 9 | 
10 | jobs:
11 |   pkgdown:
12 |     runs-on: macos-latest
13 |     # Only restrict concurrency for non-PR jobs
14 |     concurrency:
15 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
16 |     env:
17 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 |     permissions:
19 |       contents: write
20 |     steps:
21 |       - uses: actions/checkout@v3
22 | 
23 |       - uses: r-lib/actions/setup-pandoc@v2
24 | 
25 |       - uses: r-lib/actions/setup-r@v2
26 |         with:
27 |           use-public-rspm: true
28 | 
29 |       - uses: r-lib/actions/setup-r-dependencies@v2
30 |         with:
31 |           extra-packages: |
32 |             any::sf
33 |             github::SebKrantz/pkgdown
34 |             local::.
35 |           needs: website
36 | 
37 |       - name: Build site
38 |         run: |
39 |           options(max.print = 70L)
40 |           Sys.setenv(NCRAN = TRUE, NMAC = TRUE, RUNBENCH = TRUE, LOCAL = TRUE)
41 |           pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
42 |         shell: Rscript {0}
43 | 
44 |       - name: Deploy to GitHub pages 🚀
45 |         if: github.event_name != 'pull_request'
46 |         uses: JamesIves/github-pages-deploy-action@v4.4.1
47 |         with:
48 |           clean: false
49 |           branch: gh-pages
50 |           folder: docs
51 | 


--------------------------------------------------------------------------------
/man/t_list.Rd:
--------------------------------------------------------------------------------
 1 | \name{t_list}
 2 | \alias{t_list}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Efficient List Transpose
 6 | }
 7 | \description{
 8 | \code{t_list} turns a list of lists inside-out. The performance is quite efficient regardless of the size of the list.
 9 | }
10 | \usage{
11 | t_list(l)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{l}{a list of lists. Elements inside the sublists can be heterogeneous, including further lists.
16 | }
17 | }
18 | \value{
19 | \code{l} transposed such that the second layer of the list becomes the top layer and the top layer the second layer. See Examples.
20 | }
21 | 
22 | \note{
23 | To transpose a data frame / list of atomic vectors see \code{data.table::transpose()}.
24 | }
25 | 
26 | \seealso{
27 | \code{\link{rsplit}}, \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview}
28 | }
29 | \examples{
30 | # Homogenous list of lists
31 | l <- list(a = list(c = 1, d = 2), b = list(c = 3, d = 4))
32 | str(l)
33 | str(t_list(l))
34 | 
35 | # Heterogenous case
36 | l2 <- list(a = list(c = 1, d = letters), b = list(c = 3:10, d = list(4, e = 5)))
37 | attr(l2, "bla") <- "abc"  # Attributes other than names are preserved
38 | str(l2)
39 | str(t_list(l2))
40 | 
41 | rm(l, l2)
42 | }
43 | \keyword{list}
44 | \keyword{manip}
45 | \keyword{utilities}
46 | 
47 | % \keyword{ ~kwd1 }
48 | % \keyword{ ~kwd2 }
49 | % Use only one keyword per line.
50 | % For non-standard keywords, use \concept instead of \keyword:
51 | % \concept{ ~cpt1 }
52 | % \concept{ ~cpt2 }
53 | % Use only one concept per line.
54 | 


--------------------------------------------------------------------------------
/.github/workflows/test-coverage.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 |   workflow_dispatch:
 9 | 
10 | name: test-coverage
11 | 
12 | jobs:
13 |   test-coverage:
14 |     runs-on: ubuntu-latest
15 |     env:
16 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
17 | 
18 |     steps:
19 |       - uses: actions/checkout@v3
20 | 
21 |       - uses: r-lib/actions/setup-r@v2
22 |         with:
23 |           use-public-rspm: true
24 | 
25 |       - uses: r-lib/actions/setup-r-dependencies@v2
26 |         with:
27 |           extra-packages: any::covr, any::sf, any::weights
28 |           needs: coverage
29 | 
30 |       - name: Test coverage
31 |         run: |
32 |           Sys.setenv(NCRAN = TRUE, NMAC = TRUE, OMP = TRUE)
33 |           suppressWarnings(covr::codecov(
34 |             type = "all",
35 |             quiet = FALSE,
36 |             clean = FALSE,
37 |             install_path = file.path(Sys.getenv("RUNNER_TEMP"), "package")
38 |           ))
39 |         shell: Rscript {0}
40 | 
41 |       - name: Show testthat output
42 |         if: always()
43 |         run: |
44 |           ## --------------------------------------------------------------------
45 |           find ${{ runner.temp }}/package -name 'testthat.Rout*' -exec cat '{}' \; || true
46 |         shell: bash
47 | 
48 |       - name: Upload test results
49 |         if: failure()
50 |         uses: actions/upload-artifact@v4
51 |         with:
52 |           name: coverage-test-failures
53 |           path: ${{ runner.temp }}/package
54 | 


--------------------------------------------------------------------------------
/man/is_unlistable.Rd:
--------------------------------------------------------------------------------
 1 | \name{is_unlistable}
 2 | \alias{is_unlistable}
 3 | \title{
 4 | Unlistable Lists
 5 | }
 6 | \description{
 7 | A (nested) list with atomic objects in all final nodes of the list-tree is unlistable - checked with \code{is_unlistable}.
 8 | }
 9 | \usage{
10 | is_unlistable(l, DF.as.list = FALSE)
11 | }
12 | %- maybe also 'usage' for other objects documented here.
13 | \arguments{
14 |  % \item{x}{an R object.}
15 |     \item{l}{a list.}
16 |     \item{DF.as.list}{logical. \code{TRUE} treats data frames like (sub-)lists; \code{FALSE} like atomic elements.}
17 | }
18 | \details{
19 | \code{is_unlistable} with \code{DF.as.list = TRUE} is defined as \code{all(rapply(l, is.atomic))}, whereas \code{DF.as.list = FALSE} yields checking using \code{all(unlist(rapply2d(l, function(x) is.atomic(x) || is.list(x)), use.names = FALSE))}, assuming that data frames are lists composed of atomic elements.  If \code{l} contains data frames, the latter can be a lot faster than applying \code{is.atomic} to every data frame column.
20 | }
21 | \value{
22 | \code{logical(1)} - \code{TRUE} or \code{FALSE}.
23 | }
24 | % \references{
25 | %% ~put references to the literature/web site here ~
26 | % }
27 | % \author{
28 | %%  ~~who you are~~
29 | % }
30 | % \note{
31 | %%  ~~further notes~~
32 | % }
33 | 
34 | %% ~Make other sections like Warning with \section{Warning }{\dots.} ~
35 | 
36 | \seealso{
37 | \code{\link{ldepth}}, \code{\link{has_elem}}, \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview}
38 | }
39 | \examples{
40 | l <- list(1, 2, list(3, 4, "b", FALSE))
41 | is_unlistable(l)
42 | l <- list(1, 2, list(3, 4, "b", FALSE, e ~ b))
43 | is_unlistable(l)
44 | 
45 | }
46 | \keyword{list}
47 | \keyword{utilities}
48 | 


--------------------------------------------------------------------------------
/man/ldepth.Rd:
--------------------------------------------------------------------------------
 1 | \name{ldepth}
 2 | \alias{ldepth}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Determine the Depth / Level of Nesting of a List
 6 | }
 7 | \description{
 8 | \code{ldepth} provides the depth of a list or list-like structure.
 9 | }
10 | \usage{
11 | ldepth(l, DF.as.list = FALSE)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{l}{a list.}
16 |   \item{DF.as.list}{logical. \code{TRUE} treats data frames like (sub-)lists; \code{FALSE} like atomic elements.}
17 | }
18 | \details{
19 | The depth or level or nesting of a list or list-like structure (e.g. a model object) is found by recursing down to the bottom of the list and adding an integer count of 1 for each level passed. For example the depth of a data frame is 1. If a data frame has list-columns, the depth is 2. However for reasons of efficiency, if \code{l} is not a data frame and \code{DF.as.list = FALSE}, data frames found inside \code{l} will not be checked for list column's but assumed to have a depth of 1.
20 | }
21 | \value{
22 | A single integer indicating the depth of the list.
23 | }
24 | % \references{
25 | %% ~put references to the literature/web site here ~
26 | % }
27 | % \author{
28 | %%  ~~who you are~~
29 | % }
30 | % \note{
31 | %%  ~~further notes~~
32 | % }
33 | 
34 | %% ~Make other sections like Warning with \section{Warning }{\dots.} ~
35 | 
36 | \seealso{
37 | \code{\link{is_unlistable}}, \code{\link{has_elem}}, \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview}
38 | }
39 | \examples{
40 | l <- list(1, 2)
41 | ldepth(l)
42 | l <- list(1, 2, mtcars)
43 | ldepth(l)
44 | ldepth(l, DF.as.list = FALSE)
45 | l <- list(1, 2, list(4, 5, list(6, mtcars)))
46 | ldepth(l)
47 | ldepth(l, DF.as.list = FALSE)
48 | }
49 | \keyword{list}
50 | \keyword{utilities}
51 | 


--------------------------------------------------------------------------------
/.github/workflows/update-citation-cff.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples
 2 | # The action runs when:
 3 | # - A new release is published
 4 | # - The DESCRIPTION or inst/CITATION are modified
 5 | # - Can be run manually
 6 | # For customizing the triggers, visit https://docs.github.com/en/actions/learn-github-actions/events-that-trigger-workflows
 7 | on:
 8 |   release:
 9 |     types: [published]
10 |   push:
11 |     branches: [master, main]
12 |     paths:
13 |       - DESCRIPTION
14 |       - inst/CITATION
15 |   workflow_dispatch:
16 | 
17 | name: Update CITATION.cff
18 | 
19 | jobs:
20 |   update-citation-cff:
21 |     runs-on: macos-latest
22 |     env:
23 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
24 |     steps:
25 |       - uses: actions/checkout@v3
26 |       - uses: r-lib/actions/setup-r@v2
27 |       - uses: r-lib/actions/setup-r-dependencies@v2
28 |         with:
29 |           extra-packages: |
30 |             any::cffr
31 |             any::V8
32 | 
33 |       - name: Update CITATION.cff
34 |         run: |
35 | 
36 |           library(cffr)
37 | 
38 |           # Customize with your own code
39 |           # See https://docs.ropensci.org/cffr/articles/cffr.html
40 | 
41 |           # Write your own keys
42 |           mykeys <- list()
43 | 
44 |           # Create your CITATION.cff file
45 |           cff_write(keys = mykeys)
46 | 
47 |         shell: Rscript {0}
48 | 
49 |       - name: Commit results
50 |         run: |
51 |           git config --local user.name "github-actions[bot]"
52 |           git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
53 |           git add CITATION.cff
54 |           git commit -m 'Update CITATION.cff' || echo "No changes to commit"
55 |           git push origin || echo "No changes to commit"
56 | 
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/man/rapply2d.Rd:
--------------------------------------------------------------------------------
 1 | \name{rapply2d}
 2 | \alias{rapply2d}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Recursively Apply a Function to a List of Data Objects
 6 | }
 7 | \description{
 8 | \code{rapply2d} is a recursive version of \code{lapply} with three differences to \code{\link{rapply}}:
 9 | 
10 | \enumerate{
11 | \item data frames (or other list-based objects specified in \code{classes}) are considered as atomic, not as (sub-)lists
12 | \item \code{FUN} is applied to all 'atomic' objects in the nested list
13 | \item the result is not simplified / unlisted.
14 | }
15 | 
16 | }
17 | \usage{
18 | rapply2d(l, FUN, \dots, classes = "data.frame")
19 | }
20 | %- maybe also 'usage' for other objects documented here.
21 | \arguments{
22 |   \item{l}{a list.}
23 |   \item{FUN}{a function that can be applied to all 'atomic' elements in l.}
24 |   \item{\dots}{additional elements passed to FUN.}
25 |   \item{classes}{character. Classes of list-based objects inside \code{l} that should be considered as atomic. }
26 | }
27 | \value{
28 | A list of the same structure as \code{l}, where \code{FUN} was applied to all atomic elements and list-based objects of a class included in \code{classes}.
29 | }
30 | \note{
31 | The main reason \code{rapply2d} exists is to have a recursive function that out-of-the-box applies a function to a nested list of data frames.
32 | 
33 | For most other purposes \code{\link{rapply}}, or by extension the excellent \href{https://cran.r-project.org/package=rrapply}{rrapply} function / package, provide more advanced functionality and greater performance.
34 | }
35 | %% ~Make other sections like Warning with \section{Warning }{\dots.} ~
36 | \seealso{
37 | \code{\link{rsplit}}, \code{\link{unlist2d}}, \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview}
38 | }
39 | \examples{
40 | l <- list(mtcars, list(mtcars, as.matrix(mtcars)))
41 | rapply2d(l, fmean)
42 | unlist2d(rapply2d(l, fmean))
43 | }
44 | \keyword{manip}
45 | \keyword{list}
46 | 


--------------------------------------------------------------------------------
/src/stats_pacf.c:
--------------------------------------------------------------------------------
 1 | /*  R : A Computer Language for Statistical Data Analysis
 2 | *
 3 |   *  Copyright (C) 1999-2016	The R Core Team
 4 | *
 5 |   *  This program is free software; you can redistribute it and/or modify
 6 | *  it under the terms of the GNU General Public License as published by
 7 | *  the Free Software Foundation; either version 2 of the License, or
 8 | *  (at your option) any later version.
 9 | *
10 |   *  This program is distributed in the hope that it will be useful,
11 | *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12 | *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 | *  GNU General Public License for more details.
14 | *
15 |   *  You should have received a copy of the GNU General Public License
16 | *  along with this program; if not, a copy is available at
17 | *  https://www.R-project.org/Licenses/.
18 | */
19 | 
20 | // #ifdef HAVE_CONFIG_H
21 | // # include <config.h>
22 | // #endif
23 | 
24 | // #include "data.table.h"
25 | #include <R.h>
26 | #include <Rinternals.h>
27 | #include <Rdefines.h>
28 | 
29 | // #include <R.h>
30 | // #include "ts.h"
31 | 
32 | 
33 | /* cor is the autocorrelations starting from 0 lag*/
34 |   static void uni_pacf(double *cor, double *p, int nlag)
35 | {
36 |   double a, b, c, *v, *w;
37 | 
38 |   v = (double*) R_alloc(nlag, sizeof(double));
39 |   w = (double*) R_alloc(nlag, sizeof(double));
40 |   w[0] = p[0] = cor[1];
41 |   for(int ll = 1; ll < nlag; ll++) {
42 |     a = cor[ll+1];
43 |     b = 1.0;
44 |     for(int i = 0; i < ll; i++) {
45 |       a -= w[i] * cor[ll - i];
46 |       b -= w[i] * cor[i + 1];
47 |     }
48 |     p[ll] = c = a/b;
49 |     if(ll+1 == nlag) break;
50 |     w[ll] = c;
51 |     for(int i = 0; i < ll; i++)
52 |       v[ll-i-1] = w[i];
53 |     for(int i = 0; i < ll; i++)
54 |       w[i] -= c*v[i];
55 |   }
56 |   }
57 | 
58 | SEXP pacf1(SEXP acf, SEXP lmax)
59 | {
60 |   int lagmax = asInteger(lmax);
61 |   acf = PROTECT(coerceVector(acf, REALSXP));
62 |   SEXP ans = PROTECT(allocVector(REALSXP, lagmax));
63 |   uni_pacf(REAL(acf), REAL(ans), lagmax);
64 |   SEXP d = PROTECT(allocVector(INTSXP, 3));
65 |   INTEGER(d)[0] = lagmax;
66 |   INTEGER(d)[1] = INTEGER(d)[2] = 1;
67 |   setAttrib(ans, R_DimSymbol, d);
68 |   UNPROTECT(3);
69 |   return ans;
70 | }
71 | 


--------------------------------------------------------------------------------
/man/groupid.Rd:
--------------------------------------------------------------------------------
 1 | \name{groupid}
 2 | \alias{groupid}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Generate Run-Length Type Group-Id
 6 | }
 7 | \description{
 8 | \code{groupid} is an enhanced version of \code{data.table::rleid} for atomic vectors. It generates a run-length type group-id where consecutive identical values are assigned the same integer. It is a generalization as it can be applied to unordered vectors, generate group id's starting from an arbitrary value, and skip missing values.
 9 | }
10 | \usage{
11 | groupid(x, o = NULL, start = 1L, na.skip = FALSE, check.o = TRUE)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{x}{an atomic vector of any type. Attributes are not considered.}
16 | 
17 |   \item{o}{an (optional) integer ordering vector specifying the order by which to pass through \code{x}.}
18 | 
19 |   \item{start}{integer. The starting value of the resulting group-id. Default is starting from 1.} %For C++ programmers, starting from 0 could be a better choice. }
20 | 
21 |   \item{na.skip}{logical. Skip missing values i.e. if \code{TRUE} something like \code{groupid(c("a", NA, "a"))} gives \code{c(1, NA, 1)} whereas \code{FALSE} gives \code{c(1, 2, 3)}.}
22 | 
23 |  \item{check.o}{logical. Programmers option: \code{FALSE} prevents checking that each element of \code{o} is in the range \code{[1, length(x)]}, it only checks the length of \code{o}. This gives some extra speed, but will terminate R if any element of \code{o} is too large or too small. }
24 | 
25 | }
26 | \value{
27 | An integer vector of class 'qG'. See \code{\link{qG}}.
28 | }
29 | 
30 | \seealso{
31 | \code{\link{seqid}}, \code{\link{timeid}}, \code{\link{qG}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview}
32 | }
33 | \examples{
34 | groupid(airquality$Month)
35 | groupid(airquality$Month, start = 0)
36 | groupid(wlddev$country)[1:100]
37 | 
38 | ## Same thing since country is alphabetically ordered: (groupid is faster..)
39 | all.equal(groupid(wlddev$country), qG(wlddev$country, na.exclude = FALSE))
40 | 
41 | ## When data is unordered, group-id can be generated through an ordering..
42 | uo <- order(rnorm(fnrow(airquality)))
43 | monthuo <- airquality$Month[uo]
44 | o <- order(monthuo)
45 | groupid(monthuo, o)
46 | identical(groupid(monthuo, o)[o], unattrib(groupid(airquality$Month)))
47 | }
48 | \keyword{manip}
49 | 


--------------------------------------------------------------------------------
/man/collapse-renamed.Rd:
--------------------------------------------------------------------------------
 1 | \name{collapse-renamed}
 2 | \alias{collapse-renamed}
 3 | \alias{.COLLAPSE_OLD}
 4 | \alias{fNobs}
 5 | \alias{fNobs.default}
 6 | \alias{fNobs.matrix}
 7 | \alias{fNobs.data.frame}
 8 | \alias{fNobs.grouped_df}
 9 | \alias{fNdistinct}
10 | \alias{fNdistinct.default}
11 | \alias{fNdistinct.matrix}
12 | \alias{fNdistinct.data.frame}
13 | \alias{fNdistinct.grouped_df}
14 | \alias{fHDwithin}
15 | \alias{fHDwithin.default}
16 | \alias{fHDwithin.matrix}
17 | \alias{fHDwithin.data.frame}
18 | \alias{fHDwithin.pseries}
19 | \alias{fHDwithin.pdata.frame}
20 | \alias{fHDwithin.grouped_df}
21 | \alias{fHDbetween}
22 | \alias{fHDbetween.default}
23 | \alias{fHDbetween.matrix}
24 | \alias{fHDbetween.data.frame}
25 | \alias{fHDbetween.pseries}
26 | \alias{fHDbetween.pdata.frame}
27 | \alias{fHDbetween.grouped_df}
28 | \alias{replace_NA}
29 | \alias{replace_Inf}
30 | % \alias{pwNobs}
31 | % \alias{as.factor_GRP}
32 | % \alias{as.factor_qG}
33 | % \alias{is.GRP}
34 | % \alias{is.qG}
35 | % \alias{is.unlistable}
36 | % \alias{is.categorical}
37 | % \alias{is.Date}
38 | % \alias{as.character_factor}
39 | % \alias{as.numeric_factor}
40 | % \alias{Date_vars}
41 | % \alias{Date_vars<-}
42 | 
43 | 
44 | %- Also NEED an '\alias' for EACH other topic documented here.
45 | \title{
46 | Renamed Functions
47 | }
48 | \description{
49 | These functions were renamed (mostly during v1.6.0 update) to make the namespace more consistent. % Except for the S3 generics of \code{fNobs}, \code{fNdistinct}, \code{fHDbetween} and \code{fHDwithin}, and functions \code{replace_NA} and \code{replace_Inf}, I intend to remove all of these functions by end of 2023. %The S3 generics and the other functions will be depreciated in 2023 for the earliest. These all now give a message reminding you not to use them in fresh code.
50 | }
51 | \section{Renaming}{\if{html}{\out{<div class="sourceCode r">}}\preformatted{
52 | fNobs -> fnobs
53 | fNdistinct -> fndistinct
54 | fHDwithin -> fhdwithin
55 | fHDbetween -> fhdbetween
56 | replace_NA -> replace_na
57 | replace_Inf -> replace_inf
58 | % pwNobs -> pwnobs
59 | % as.factor_GRP -> as_factor_GRP
60 | % as.factor_qG -> as_factor_qG
61 | % is.GRP -> is_GRP
62 | % is.qG -> is_qG
63 | % is.unlistable -> is_unlistable
64 | % is.categorical -> is_categorical
65 | % is.Date -> is_date
66 | % as.numeric_factor -> as_numeric_factor
67 | % as.character_factor -> as_character_factor
68 | % Date_vars -> date_vars
69 | % `Date_vars<-` -> `date_vars<-`
70 | }\if{html}{\out{</div>}}
71 | }
72 | 
73 | 


--------------------------------------------------------------------------------
/man/fdroplevels.Rd:
--------------------------------------------------------------------------------
 1 | \name{fdroplevels}
 2 | \alias{fdroplevels}
 3 | \alias{fdroplevels.factor}
 4 | \alias{fdroplevels.data.frame}
 5 | %- Also NEED an '\alias' for EACH other topic documented here.
 6 | \title{
 7 | Fast Removal of Unused Factor Levels
 8 | }
 9 | \description{
10 | A substantially faster replacement for \code{\link{droplevels}}.
11 | }
12 | \usage{
13 | fdroplevels(x, ...)
14 | 
15 | \method{fdroplevels}{factor}(x, ...)
16 | 
17 | \method{fdroplevels}{data.frame}(x, ...)
18 | }
19 | %- maybe also 'usage' for other objects documented here.
20 | \arguments{
21 |   \item{x}{a factor, or data frame / list containing one or more factors.}
22 |   \item{\dots}{not used.}
23 | }
24 | \details{
25 | \code{\link{droplevels}} passes a factor from which levels are to be dropped to \code{\link{factor}}, which first calls \code{\link{unique}} and then \code{\link{match}} to drop unused levels. Both functions internally use a hash table, which is highly inefficient. \code{fdroplevels} does not require mapping values at all, but uses a super fast boolean vector method to determine which levels are unused and remove those levels. In addition, if no unused levels are found, \code{x} is simply returned. Any missing values found in \code{x} are efficiently skipped in the process of checking and replacing levels. All other attributes of \code{x} are preserved.
26 | }
27 | \value{
28 | \code{x} with unused factor levels removed.
29 | }
30 | \note{
31 | If \code{x} is malformed e.g. has too few levels, this function can cause a segmentation fault terminating the R session, thus only use with ordinary / proper factors.
32 | }
33 | 
34 | %% ~Make other sections like Warning with \section{Warning }{....} ~
35 | 
36 | \seealso{
37 | \code{\link{qF}}, \code{\link{funique}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview}
38 | }
39 | \examples{
40 | f <- iris$Species[1:100]
41 | fdroplevels(f)
42 | identical(fdroplevels(f), droplevels(f))
43 | 
44 | fNA <- na_insert(f)
45 | fdroplevels(fNA)
46 | identical(fdroplevels(fNA), droplevels(fNA))
47 | 
48 | identical(fdroplevels(ss(iris, 1:100)), droplevels(ss(iris, 1:100)))
49 | }
50 | % Add one or more standard keywords, see file 'KEYWORDS' in the
51 | % R documentation directory (show via RShowDoc("KEYWORDS")):
52 | % \keyword{ ~kwd1 }
53 | % \keyword{ ~kwd2 }
54 | % Use only one keyword per line.
55 | % For non-standard keywords, use \concept instead of \keyword:
56 | % \concept{ ~cpt1 }
57 | % \concept{ ~cpt2 }
58 | % Use only one concept per line.
59 | 


--------------------------------------------------------------------------------
/tests/testthat/test-sf.R:
--------------------------------------------------------------------------------
 1 | context("collapse and sf")
 2 | 
 3 | if(Sys.getenv("NMAC") == "TRUE" && requireNamespace(paste0("s", "f"), quietly = TRUE)) {
 4 | 
 5 | eval(parse(text = paste0("libr", "ary(", "sf)")))
 6 | nc <- st_read(system.file("shape/nc.shp", package = "sf"), quiet = TRUE)
 7 | 
 8 | test_that("sf methods work properly", {
 9 |   expect_visible(nc %>% fgroup_by(AREA))
10 |   expect_visible(nc %>% fgroup_by(AREA) %>% fgroup_vars)
11 |   expect_visible(descr(nc))
12 |   expect_visible(qsu(nc))
13 |   expect_visible(varying(nc))
14 |   expect_true(any(names(num_vars(nc)) == "geometry"))
15 |   expect_true(any(names(fselect(nc, AREA, NAME:FIPSNO)) == "geometry"))
16 |   expect_true(any(names(gv(nc, c("AREA", "NAME", "FIPS", "FIPSNO"))) == "geometry"))
17 |   expect_true(any(names(fsubset(nc, AREA > fmean(AREA), AREA, NAME:FIPSNO)) == "geometry"))
18 |   expect_true(any(names(ss(nc, 1:10, c("AREA", "NAME", "FIPS", "FIPSNO"))) == "geometry"))
19 |   expect_true(inherits(rsplit(nc, AREA ~ SID74)[[1L]], "sf"))
20 |   expect_equal(names(`nv<-`(nc, NULL)), c("NAME", "FIPS", "geometry"))
21 |   # nv(nc) <- NULL
22 |   expect_equal(tfmv(nc, is.numeric, log), tfmv(nc, is.numeric, log, apply = FALSE))
23 |   expect_equal(length(nc %>% gby(NAME) %>% varying), length(nc) - 2L)
24 |   expect_true(is.data.frame(nc %>% gby(NAME) %>% varying(any_group = FALSE)))
25 |   expect_visible(funique(nc, cols = 1))
26 |   expect_true(length(fcompute(nc, log_AREA = log(AREA))) == 2L)
27 |   expect_true(length(fcomputev(nc, "AREA", log)) == 2L)
28 |   expect_true(length(fcomputev(nc, "AREA", log, keep = "PERIMETER")) == 3L)
29 |   expect_true(length(fcomputev(nc, "AREA", fscale, apply = FALSE)) == 2L)
30 |   expect_true(length(fcomputev(nc, "AREA", fscale, apply = FALSE, keep = "PERIMETER")) == 3L)
31 |   expect_true(inherits(nc %>% fgroup_by(SID74) %>%
32 |                          fsummarise(AREA_Ag = fsum(AREA),
33 |                                     Perimeter_Ag = fmedian(PERIMETER),
34 |                                     geometry = st_union(geometry)), "sf"))
35 | })
36 | 
37 | test_that("rbinding and mutating sf works well", {
38 |   expect_identical(nc, nc %>% fgroup_by(AREA) %>% fmutate((.data)) %>% fungroup())
39 |   expect_identical(funique(nc, "AREA"), nc %>% fgroup_by(AREA, sort = FALSE) %>% ffirst(na.rm = FALSE))
40 |   expect_identical(roworder(nc, AREA), nc %>% rsplit(~ AREA, keep.by = TRUE) %>% unlist2d(FALSE) %>% copyMostAttrib(nc))
41 |   expect_identical(roworder(nc, AREA), nc %>% rsplit(~ AREA) %>% unlist2d("AREA") %>%
42 |                      fmutate(AREA = as.double(AREA)) %>% copyMostAttrib(nc))
43 | })
44 | 
45 | }
46 | 


--------------------------------------------------------------------------------
/man/group.Rd:
--------------------------------------------------------------------------------
 1 | \name{group}
 2 | \alias{group}
 3 | \alias{groupv}
 4 | %- Also NEED an '\alias' for EACH other topic documented here.
 5 | \title{
 6 | Fast Hash-Based Grouping
 7 | }
 8 | \description{
 9 | \code{group()} scans the rows of a data frame (or atomic vector / list of atomic vectors), assigning to each unique row an integer id - starting with 1 and proceeding in first-appearance order of the rows. The function is written in C and optimized for R's data structures. It is the workhorse behind functions like \code{\link{GRP}} / \code{\link{fgroup_by}}, \code{\link{collap}}, \code{\link{qF}}, \code{\link{qG}}, \code{\link{finteraction}} and \code{\link{funique}}, when called with argument \code{sort = FALSE}.
10 | }
11 | \usage{
12 | group(\dots, starts = FALSE, group.sizes = FALSE)
13 | 
14 | groupv(x, starts = FALSE, group.sizes = FALSE)
15 | }
16 | %- maybe also 'usage' for other objects documented here.
17 | \arguments{
18 |   \item{\dots}{comma separated atomic vectors to group. Also supports a single list of vectors for backward compatibility.}
19 |   \item{x}{an atomic vector or data frame / list of equal-length atomic vectors.}
20 |   \item{starts}{logical. If \code{TRUE}, an additional attribute \code{"starts"} is attached giving a vector of group starts (= index of first-occurrence of unique rows).
21 | }
22 |   \item{group.sizes}{
23 | logical. If \code{TRUE}, an additional attribute \code{"group.sizes"} is attached giving the size of each group.
24 | }
25 | }
26 | \details{
27 | A data frame is grouped on a column-by-column basis, starting from the leftmost column. For each new column the grouping vector obtained after the previous column is also fed back into the hash function so that unique values are determined on a running basis. The algorithm terminates as soon as the number of unique rows reaches the size of the data frame. Missing values are also grouped just like any other values. Invoking arguments \code{starts} and/or \code{group.sizes} requires an additional pass through the final grouping vector.
28 | }
29 | \value{
30 | An object is of class 'qG' see \code{\link{qG}}.
31 | }
32 | \author{
33 | The Hash Function and inspiration was taken from the excellent \emph{kit} package by Morgan Jacob, the algorithm was developed by Sebastian Krantz.
34 | }
35 | 
36 | %% ~Make other sections like Warning with \section{Warning }{....} ~
37 | 
38 | \seealso{
39 | \code{\link{radixorder}}, \code{\link{GRPid}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview}
40 | }
41 | \examples{
42 | # Let's replicate what funique does
43 | g <- groupv(wlddev, starts = TRUE)
44 | if(attr(g, "N.groups") == fnrow(wlddev)) wlddev else
45 |    ss(wlddev, attr(g, "starts"))
46 | 
47 | }
48 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: collapse
 2 | Title: Advanced and Fast Data Transformation
 3 | Version: 2.1.5.9000
 4 | Date: 2025-12-02
 5 | Authors@R: c(
 6 |            person("Sebastian", "Krantz", role = c("aut", "cre"), 
 7 |                   email = "sebastian.krantz@graduateinstitute.ch", 
 8 |                   comment = c(ORCID = "0000-0001-6212-5229")),
 9 |            person("Matt", "Dowle", role = "ctb"),
10 |            person("Arun", "Srinivasan", role = "ctb"),
11 |            person("Morgan", "Jacob", role = "ctb"),
12 |            person("Dirk", "Eddelbuettel", role = "ctb"),
13 |            person("Laurent", "Berge", role = "ctb"),
14 |            person("Kevin", "Tappe", role = "ctb"),
15 |            person("Alina", "Cherkas", role = "ctb"),
16 |            person("R Core Team and contributors worldwide", role = "ctb"),
17 |            person("Martyn", "Plummer", role = "cph"),
18 |            person("1999-2016 The R Core Team", role = "cph")
19 |            )
20 | Description: A large C/C++-based package for advanced data transformation and 
21 |     statistical computing in R that is extremely fast, class-agnostic, robust, and 
22 |     programmer friendly. Core functionality includes a rich set of S3 generic grouped 
23 |     and weighted statistical functions for vectors, matrices and data frames, which 
24 |     provide efficient low-level vectorizations, OpenMP multithreading, and skip missing 
25 |     values by default. These are integrated with fast grouping and ordering algorithms 
26 |     (also callable from C), and efficient data manipulation functions. The package also 
27 |     provides a flexible and rigorous approach to time series and panel data in R, fast 
28 |     functions for data transformation and common statistical procedures, detailed 
29 |     (grouped, weighted) summary statistics, powerful tools to work with nested data, 
30 |     fast data object conversions, functions for memory efficient R programming, and 
31 |     helpers to effectively deal with variable labels, attributes, and missing data. It 
32 |     seamlessly supports base R objects/classes as well as 'units', 'integer64', 'xts'/
33 |     'zoo', 'tibble', 'grouped_df', 'data.table', 'sf', and 'pseries'/'pdata.frame'.
34 | URL: https://fastverse.github.io/collapse/, https://github.com/fastverse/collapse
35 | BugReports: https://github.com/fastverse/collapse/issues
36 | License: GPL (>= 2) | file LICENSE
37 | Encoding: UTF-8
38 | LazyData: true
39 | Depends: R (>= 3.5.0)
40 | Imports: Rcpp (>= 1.0.1)
41 | LinkingTo: Rcpp
42 | Suggests: fastverse, data.table, magrittr, kit, xts, zoo, plm, fixest, vars, 
43 |           RcppArmadillo, RcppEigen, tibble, dplyr, ggplot2, scales, microbenchmark, 
44 |           testthat, covr, knitr, rmarkdown, withr, bit64
45 | VignetteBuilder: knitr
46 | 
47 | 


--------------------------------------------------------------------------------
/tests/testthat/test-splitting.R:
--------------------------------------------------------------------------------
 1 | context("gsplit and rsplit")
 2 | 
 3 | 
 4 | 
 5 | wld2 <- wlddev
 6 | oldClass(wld2) <- NULL
 7 | vlabels(wld2) <- NULL
 8 | f <- wld2$iso3c
 9 | ind <- 1:1000
10 | fss <- f[ind]
11 | fl <- wld2[c("region", "income")]
12 | flss <- ss(fl, ind)
13 | 
14 | test_that("gsplit / rsplit work like split", {
15 | 
16 |   for(i in seq_col(wld2)) {
17 |     expect_equal(gsplit(wld2[[i]], f, TRUE), split(wld2[[i]], f))
18 |     expect_equal(gsplit(wld2[[i]], f, FALSE), `names<-`(split(wld2[[i]], f), NULL))
19 |     expect_equal(gsplit(wld2[[i]][ind], fss, TRUE), split(wld2[[i]][ind], fss))
20 |     expect_equal(rsplit(wld2[[i]][ind], fss), split(wld2[[i]][ind], fss, drop = TRUE))
21 |     # factor list
22 |     expect_true(all_obj_equal(gsplit(wld2[[i]], fl, TRUE),
23 |                               rsplit(wld2[[i]], fl, flatten = TRUE),
24 |                               unlist(rsplit(wld2[[i]], fl), recursive = FALSE),
25 |                               split(wld2[[i]], fl, drop = TRUE, lex.order = TRUE)))
26 | 
27 |     expect_true(all_obj_equal(gsplit(wld2[[i]][ind], flss, TRUE),
28 |                               rsplit(wld2[[i]][ind], flss, flatten = TRUE),
29 |                               unlist(rsplit(wld2[[i]][ind], flss), recursive = FALSE),
30 |                               split(wld2[[i]][ind], flss, drop = TRUE, lex.order = TRUE)))
31 |   }
32 | })
33 | 
34 | test_that("rsplit matrix method works as intended", {
35 |   m = qM(nv(GGDC10S))
36 |   fl = lapply(GGDC10S[c("Country", "Variable")], qF, sort = FALSE)
37 |   expect_equal(lapply(rsplit(m, GGDC10S$Country), unattrib), split(m, GGDC10S$Country))
38 |   expect_equal(lapply(rsplit(m, itn(fl), flatten = TRUE), unattrib), split(m, itn(fl)))
39 | 
40 |   expect_equal(rsplit(m, fl, flatten = TRUE), unlist(rsplit(m, fl), FALSE))
41 | 
42 |   expect_true(all(vapply(rsplit(m, c(fl, GGDC10S["Year"]), flatten = TRUE), is.matrix, TRUE)))
43 |   expect_true(!any(vapply(rsplit(m, c(fl, GGDC10S["Year"]), flatten = TRUE, drop.dim = TRUE), is.matrix, TRUE)))
44 | })
45 | 
46 | test_that("rsplit data frame method works as intended", {
47 | 
48 |   expect_equal(rsplit(mtcars, mtcars$cyl), split(mtcars, mtcars$cyl))
49 |   expect_equal(rsplit(mtcars, mpg ~ cyl), split(mtcars$mpg, mtcars$cyl))
50 |   expect_equal(rsplit(mtcars, mpg ~ cyl, simplify = FALSE), split(mtcars["mpg"], mtcars$cyl))
51 | 
52 |   expect_true(all_obj_equal(rsplit(mtcars, mtcars[.c(cyl, vs, am)], flatten = TRUE),
53 |                rsplit(mtcars, ~ cyl + vs + am, flatten = TRUE, keep.by = TRUE),
54 |                unlist(unlist(rsplit(mtcars, mtcars[.c(cyl, vs, am)]), FALSE), FALSE),
55 |                unlist(unlist(rsplit(mtcars, ~ cyl + vs + am, keep.by = TRUE), FALSE), FALSE),
56 |                split(mtcars, mtcars[.c(cyl, vs, am)], drop = TRUE, lex.order = TRUE)))
57 | 
58 |   expect_true(all_obj_equal(rsplit(mtcars, ~ cyl + vs + am, flatten = TRUE),
59 |                             unlist(unlist(rsplit(mtcars, ~ cyl + vs + am), FALSE), FALSE),
60 |                             split(mtcars[names(mtcars) %!in% .c(cyl, vs, am)],
61 |                                   mtcars[.c(cyl, vs, am)], drop = TRUE, lex.order = TRUE)))
62 | 
63 | })
64 | 
65 | 
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/R/qtab.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | qtab <- function(..., w = NULL, wFUN = NULL, wFUN.args = NULL,
 4 |                    dnn = "auto", sort = .op[["sort"]],
 5 |                    na.exclude = TRUE, drop = FALSE, method = "auto") {
 6 |   ll <- ...length() == 1L && is.list(..1)
 7 |   l <- if(ll) unclass(..1) else list(...)
 8 |   n <- length(l)
 9 |   dn <- vector("list", n)
10 |   dm <- integer(n)
11 | 
12 |   names(dn) <- if(is.character(dnn)) {
13 |     if(length(dnn) > 1L) dnn else {
14 |       nam <- names(l)
15 |       nam <- switch(dnn, auto =, namlab =
16 |                if(ll) nam else if(is.null(nam)) .c(...) else
17 |                if(all(has_nam <- nzchar(nam))) nam else
18 |                  `[<-`(nam, !has_nam, value = .c(...)[!has_nam]), dnn)
19 |       if(dnn != "namlab") nam else paste(nam, setv(vlabels(l, use.names = FALSE), NA, ""), sep = ": ")
20 |     }
21 |   } else if(is.function(dnn)) dnn(l) else unlist(dnn, use.names = FALSE)
22 | 
23 |   # tofact <- function(g) {
24 |   #   if(is.factor(g)) {
25 |   #     if(!na.exclude && !inherits(g, "na.included")) return(addNA2(g))
26 |   #     return(g)
27 |   #   }
28 |   #   groupfact(g, ord = FALSE, fact = TRUE, naincl = !na.exclude, keep = FALSE)
29 |   # }
30 |   g <- qF(l[[1L]], sort = sort, na.exclude = na.exclude, drop = drop, method = method)
31 |   lev <- attr(g, "levels")
32 |   dn[[1L]] <- lev
33 |   dm[1L] <- ngp <- length(lev)
34 |   attributes(g) <- NULL
35 | 
36 |   if(n > 1L) for (i in 2:n) {
37 |     gi <- qF(l[[i]], sort = sort, na.exclude = na.exclude, drop = drop, method = method)
38 |     lev <- attr(gi, "levels")
39 |     dn[[i]] <- lev
40 |     dm[i] <- length(lev)
41 |     # attributes(gi) <- NULL
42 |     # unattrib(x) + (unattrib(y) - 1L) * fnlevels(x)
43 |     # NA values cause integer overflows...
44 |     # gi %-=% 1L
45 |     # gi %*=% ngp
46 |     # g %+=% gi
47 |     # TODO: what if g is not a deep copy?? -> seems to work so far. I guess qF() or attributes(g) <- NULL creates a deep copy?
48 |     .Call(C_fcrosscolon, g, ngp, gi, na.exclude)
49 |     ngp <- ngp * length(lev)
50 |   }
51 | 
52 |   if(is.null(w) || is.null(wFUN))
53 |      tab <- .Call(C_fwtabulate, g, w, ngp, na.exclude) # tabulate(g, nbins = ngp)
54 |   else {
55 |     if(is.function(wFUN)) {
56 |        wf <- l1orlst(as.character(substitute(wFUN)))
57 |     } else if (is.character(wFUN)) {
58 |        wf <- wFUN
59 |        wFUN <- match.fun(wFUN)
60 |     } else stop("wFUN needs to be a function or function-string")
61 |     if(na.exclude && anyNA(g)) {
62 |       nna <- whichNA(g, invert = TRUE)
63 |       w <- Csv(w, nna)
64 |       g <- Csv(g, nna)
65 |     }
66 |     attr(g, "N.groups") <- ngp
67 |     oldClass(g) <- c("qG", "na.included")
68 |     if(is.null(wFUN.args)) {
69 |       tab <- if(any(wf == .FAST_STAT_FUN)) wFUN(w, g = g, use.g.names = FALSE) else
70 |              splaplfun(w, g, wFUN)
71 |     } else {
72 |       tab <- if(any(wf == .FAST_STAT_FUN)) do.call(wFUN, c(list(x = w, g = g, use.g.names = FALSE), wFUN.args)) else
73 |              do.call(splaplfun, c(list(x = w, g = g, FUN = wFUN), wFUN.args))
74 |     }
75 |   }
76 | 
77 |   dim(tab) <- dm
78 |   dimnames(tab) <- dn
79 |   oldClass(tab) <- c("qtab", "table")
80 |   attr(tab, "sorted") <- sort
81 |   attr(tab, "weighted") <- !is.null(w)
82 |   tab
83 | }
84 | 
85 | qtable <- function(...) qtab(...)
86 | 


--------------------------------------------------------------------------------
/man/wlddev.Rd:
--------------------------------------------------------------------------------
 1 | \name{wlddev}
 2 | \alias{wlddev}
 3 | \docType{data}
 4 | \title{
 5 | World Development Dataset
 6 | }
 7 | \description{
 8 | This dataset contains 5 indicators from the World Bank's World Development Indicators (WDI) database: (1) GDP per capita, (2) Life expectancy at birth, (3) GINI index, (4) Net ODA and official aid received and (5) Population. The panel data is balanced and covers 216 present and historic countries from 1960-2020 (World Bank aggregates and regional entities are excluded).
 9 | 
10 | Apart from the indicators the data contains a number of identifiers (character country name, factor ISO3 country code, World Bank region and income level, numeric year and decade) and 2 generated variables: A logical variable indicating whether the country is an OECD member, and a fictitious variable stating the date the data was recorded. These variables were added so that all common data-types are represented in this dataset, making it an ideal test-dataset for certain \emph{collapse} functions.
11 | }
12 | \usage{data("wlddev")}
13 | \format{
14 |   A data frame with 13176 observations on the following 13 variables. All variables are labeled e.g. have a 'label' attribute.
15 |   \describe{
16 |     \item{\code{country}}{\emph{chr} Country Name}
17 |     \item{\code{iso3c}}{\emph{fct} Country Code}
18 |     \item{\code{date}}{\emph{date} Date Recorded (Fictitious)}
19 |     \item{\code{year}}{\emph{int} Year}
20 |     \item{\code{decade}}{\emph{int} Decade}
21 |     \item{\code{region}}{\emph{fct} World Bank Region}
22 |     \item{\code{income}}{\emph{fct} World Bank Income Level}
23 |     \item{\code{OECD}}{\emph{log} Is OECD Member Country?}
24 |     \item{\code{PCGDP}}{\emph{num} GDP per capita (constant 2010 US$)}
25 |     \item{\code{LIFEEX}}{\emph{num} Life expectancy at birth, total (years)}
26 |     \item{\code{GINI}}{\emph{num} GINI index (World Bank estimate)}
27 |     \item{\code{ODA}}{\emph{num} Net official development assistance and official aid received (constant 2018 US$)}
28 |     \item{\code{POP}}{\emph{num} Population, total}
29 |   }
30 | }
31 | % \details{
32 | %%  ~~ If necessary, more details than the __description__ above ~~
33 | % }
34 | \source{
35 | \url{https://data.worldbank.org/}, accessed via the \code{WDI} package. The codes for the series are \code{c("NY.GDP.PCAP.KD", "SP.DYN.LE00.IN", "SI.POV.GINI", "DT.ODA.ALLD.KD", "SP.POP.TOTL")}.
36 | }
37 | % \references{
38 | %%  ~~ possibly secondary sources and usages ~~
39 | % }
40 | \seealso{
41 | \code{\link{GGDC10S}}, \link[=collapse-documentation]{Collapse Overview}
42 | }
43 | \examples{
44 | data(wlddev)
45 | 
46 | # Panel-summarizing the 5 series
47 | qsu(wlddev, pid = ~iso3c, cols = 9:13, vlabels = TRUE)
48 | 
49 | # By Region
50 | qsu(wlddev, by = ~region, cols = 9:13, vlabels = TRUE)
51 | 
52 | # Panel-summary by region
53 | qsu(wlddev, by = ~region, pid = ~iso3c, cols = 9:13, vlabels = TRUE)
54 | 
55 | # Pairwise correlations: Ovarall
56 | print(pwcor(get_vars(wlddev, 9:13), N = TRUE, P = TRUE), show = "lower.tri")
57 | 
58 | # Pairwise correlations: Between Countries
59 | print(pwcor(fmean(get_vars(wlddev, 9:13), wlddev$iso3c), N = TRUE, P = TRUE), show = "lower.tri")
60 | 
61 | # Pairwise correlations: Within Countries
62 | print(pwcor(fwithin(get_vars(wlddev, 9:13), wlddev$iso3c), N = TRUE, P = TRUE), show = "lower.tri")
63 | 
64 | }
65 | 
66 | 
67 | \keyword{datasets}
68 | 


--------------------------------------------------------------------------------
/src/data.table.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  This code is adapted from the data.table package: http://r-datatable.com
 3 |  and licensed under a Mozilla Public License 2.0 (MPL-2.0) license.
 4 | */
 5 | 
 6 | #ifndef DATATABLE_H  // Check if DATATABLE_H is not defined
 7 | #define DATATABLE_H  // Define DATATABLE_H
 8 | 
 9 | // #define USE_RINTERNALS
10 | #include "base_radixsort.h"
11 | // #include <stdint.h> // for uint64_t rather than unsigned long long
12 | #include <stdbool.h>
13 | // #include "types.h"
14 | 
15 | #define IS_TRUE(x)  (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]==TRUE)
16 | #define IS_FALSE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]==FALSE)
17 | #define IS_TRUE_OR_FALSE(x) (TYPEOF(x)==LGLSXP && LENGTH(x)==1 && LOGICAL(x)[0]!=NA_LOGICAL)
18 | #define SIZEOF(x) sizes[TYPEOF(x)]
19 | #define TYPEORDER(x) typeorder[x]
20 | 
21 | // Needed for match.c and join.c
22 | #define NEED2UTF8(s) !(IS_ASCII(s) || (s)==NA_STRING || IS_UTF8(s))
23 | #define ENC2UTF8(s) (!NEED2UTF8(s) ? (s) : mkCharCE(translateCharUTF8(s), CE_UTF8))
24 | 
25 | // for use with bit64::integer64
26 | #define NA_INTEGER64  INT64_MIN
27 | #define MAX_INTEGER64 INT64_MAX
28 | #ifndef INTEGER64_PTR
29 | #define INTEGER64_PTR(x) ((int64_t*) REAL(x))
30 | #endif
31 | #ifndef INTEGER64_PTR_RO
32 | #define INTEGER64_PTR_RO(x) ((int64_t*) REAL_RO(x))
33 | #endif
34 | 
35 | // init.c // https://stackoverflow.com/questions/1410563/what-is-the-difference-between-a-definition-and-a-declaration
36 | extern SEXP char_integer64;
37 | extern SEXP char_nanotime;
38 | extern SEXP char_factor;
39 | extern SEXP char_ordered;
40 | extern SEXP char_dataframe;
41 | extern SEXP char_datatable;
42 | extern SEXP char_sf;
43 | extern SEXP sym_sorted;
44 | extern SEXP sym_index;
45 | extern SEXP sym_index_df;
46 | extern SEXP sym_sf_column;
47 | extern SEXP SelfRefSymbol;
48 | extern SEXP sym_datatable_locked;
49 | 
50 | // data.table_init.c
51 | SEXP collapse_init(SEXP mess);
52 | long long DtoLL(double x);
53 | double LLtoD(long long x);
54 | extern double NA_INT64_D;
55 | extern long long NA_INT64_LL;
56 | extern Rcomplex NA_CPLX;  // initialized in init.c; see there for comments
57 | extern size_t sizes[100];  // max appears to be FUNSXP = 99, see Rinternals.h
58 | extern size_t typeorder[100];
59 | 
60 | // data.table_utils.c
61 | int need2utf8(SEXP x);
62 | SEXP coerceUtf8IfNeeded(SEXP x);
63 | SEXP setnames(SEXP x, SEXP nam);
64 | bool allNA(SEXP x, bool errorForBadType);
65 | SEXP allNAv(SEXP x, SEXP errorForBadType);
66 | bool INHERITS(SEXP x, SEXP char_);
67 | SEXP dt_na(SEXP x, SEXP cols, SEXP Rprop, SEXP Rcount);
68 | SEXP frankds(SEXP xorderArg, SEXP xstartArg, SEXP xlenArg, SEXP dns);
69 | SEXP setcolorder(SEXP x, SEXP o);
70 | 
71 | // data.table_subset.c
72 | void setselfref(SEXP x);
73 | SEXP Calloccol(SEXP dt);
74 | SEXP convertNegAndZeroIdx(SEXP idx, SEXP maxArg, SEXP allowOverMax);
75 | SEXP extendIntVec(SEXP x, int len, int val);
76 | SEXP subsetCols(SEXP x, SEXP cols, SEXP checksf);
77 | SEXP subsetDT(SEXP x, SEXP rows, SEXP cols, SEXP checkrows);
78 | SEXP subsetVector(SEXP x, SEXP idx, SEXP checkidx);
79 | 
80 | // rbindlist.c
81 | void writeNA(SEXP v, const int from, const int n);
82 | void writeValue(SEXP target, SEXP source, const int from, const int n);
83 | void savetl_init(void), savetl(SEXP s), savetl_end(void);
84 | SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg);
85 | 
86 | #endif // End of DATATABLE_H guard
87 | 


--------------------------------------------------------------------------------
/man/colorder.Rd:
--------------------------------------------------------------------------------
 1 | \name{colorder}
 2 | \alias{colorder}
 3 | \alias{colorderv}
 4 | %- Also NEED an '\alias' for EACH other topic documented here.
 5 | \title{
 6 | Fast Reordering of Data Frame Columns
 7 | }
 8 | \description{
 9 | Efficiently reorder columns in a data frame. To do this fully by reference see also \code{data.table::setcolorder}.
10 | }
11 | \usage{
12 | colorder(.X, \dots, pos = "front")
13 | 
14 | colorderv(X, neworder = radixorder(names(X)),
15 |           pos = "front", regex = FALSE, \dots)
16 | }
17 | %- maybe also 'usage' for other objects documented here.
18 | \arguments{
19 |   \item{.X, X}{a data frame or list.}
20 |   \item{\dots}{for \code{colorder}: Column names of \code{.X} in the new order (can also use sequences i.e. \code{col1:coln, newname = colk, \dots}). For \code{colorderv}: Further arguments to \code{\link{grep}} if \code{regex = TRUE}.}
21 |   \item{neworder}{a vector of column names, positive indices, a suitable logical vector, a function such as \code{is.numeric}, or a vector of regular expressions matching column names (if \code{regex = TRUE}). }
22 |   \item{pos}{integer or character. Different options regarding column arrangement if \code{...length() < ncol(.X)} (or \code{length(neworder) < ncol(X)}).
23 |         \tabular{lllll}{\emph{ Int. }   \tab\tab \emph{ String }   \tab\tab \emph{ Description }  \cr
24 |                  1 \tab\tab "front"   \tab\tab move specified columns to the front (the default). \cr
25 |                  2 \tab\tab "end" \tab\tab move specified columns to the end. \cr
26 |                  3 \tab\tab "exchange"   \tab\tab just exchange the positions of selected columns, other columns remain in the same position. \cr
27 |                  4 \tab\tab "after"  \tab\tab place all further selected columns behind the first selected column. \cr
28 |   }
29 | }
30 | \item{regex}{logical. \code{TRUE} will do regular expression search on the column names of \code{X} using a (vector of) regular expression(s) passed to \code{neworder}. Matching is done using \code{\link{grep}}. \emph{Note} that multiple regular expressions will be matched in the order they are passed, and \code{\link{funique}} will be applied to the resulting set of indices. }
31 | }
32 | \value{
33 | \code{.X/X} with columns reordered (no deep copies).
34 | }
35 | 
36 | 
37 | %% ~Make other sections like Warning with \section{Warning }{\dots.} ~
38 | 
39 | \seealso{
40 | \code{\link{roworder}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=collapse-documentation]{Collapse Overview}
41 | }
42 | \examples{
43 | head(colorder(mtcars, vs, cyl:hp, am))
44 | head(colorder(mtcars, vs, cyl:hp, am, pos = "end"))
45 | head(colorder(mtcars, vs, cyl:hp, am, pos = "after"))
46 | head(colorder(mtcars, vs, cyl, pos = "exchange"))
47 | head(colorder(mtcars, vs, cyl:hp, new = am))    # renaming
48 | 
49 | ## Same in standard evaluation
50 | head(colorderv(mtcars, c(8, 2:4, 9)))
51 | head(colorderv(mtcars, c(8, 2:4, 9), pos = "end"))
52 | head(colorderv(mtcars, c(8, 2:4, 9), pos = "after"))
53 | head(colorderv(mtcars, c(8, 2), pos = "exchange"))
54 | }
55 | % Add one or more standard keywords, see file 'KEYWORDS' in the
56 | % R documentation directory (show via RShowDoc("KEYWORDS")):
57 | \keyword{ manip }
58 | % \keyword{ ~kwd2 }
59 | % Use only one keyword per line.
60 | % For non-standard keywords, use \concept instead of \keyword:
61 | % \concept{ ~cpt1 }
62 | % \concept{ ~cpt2 }
63 | % Use only one concept per line.
64 | 


--------------------------------------------------------------------------------
/R/fcount.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # TODO: keep argument? -> not needed, can use fselect beforehand...
 4 | fcount_core <- function(x, g, w = NULL, name = "N", add = FALSE) {
 5 |   # TODO: don't need integer group sizes if this is the case....
 6 |   if(length(w)) g$group.sizes <- .Call(C_fwtabulate, g$group.id, w, g$N.groups, FALSE) # na.rm in g is not needed (FALSE)
 7 |   # if(is.atomic(x)) { # what about factors and sort argument?? and dropping levels??
 8 |   #   if(add) {
 9 |   #     res <- list(x, .Call(C_subsetVector, g$group.sizes, g$group.id, FALSE))
10 |   #     names(res) <- c(g$group.vars, name[1L])
11 |   #   } else {
12 |   #     res <- g$groups
13 |   #     res[[name[1L]]] <- g$group.sizes
14 |   #   }
15 |   #   attr(res, "row.names") <- .set_row_names(.Call(C_fnrow, res))
16 |   #   oldClass(res) <- "data.frame"
17 |   #   return(res)
18 |   # }
19 |   if(add) {
20 |     gs <- .Call(C_subsetVector, g$group.sizes, g$group.id, FALSE)
21 |     # return(`add_vars<-`(x, "end", `names<-`(list(gs), name[1L])))
22 |     if(add == 2L) {
23 |       x <- # if(inherits(x, "grouped_df")) fgroup_vars(x) else # Better keep groups, does no harm... can use fungroup()
24 |         .Call(C_subsetCols, x, ckmatch(g$group.vars, attr(x, "names")), TRUE)
25 |     }
26 |     res <- c(x, `names<-`(list(gs), name[1L]))
27 |     return(condalc(copyMostAttributes(res, x), inherits(x, "data.table")))
28 |   }
29 |   res <- g$groups
30 |   if(!is.object(res) && is.object(x)) { # inherits(x, c("grouped_df", "indexed_frame"))
31 |     res[[name[1L]]] <- g$group.sizes
32 |     return(condCopyAttrib(res, x))
33 |   }
34 |   condalc(copyMostAttributes(c(res, `names<-`(list(g$group.sizes), name[1L])), res), inherits(x, "data.table"))
35 | }
36 | 
37 | fcount <- function(x, ..., w = NULL, name = "N", add = FALSE, sort = FALSE, decreasing = FALSE) {
38 |   if(is.list(x)) w <- eval(substitute(w), x, parent.frame())
39 |   else x <- qDF(x)
40 |   if(is.character(add)) add <- switch(add, gv =, group_vars = 2L, stop("add must be TRUE, FALSE or group_vars (gv)")) # add = "g", "groups" or "group_vars"
41 |   # Note: this code duplication with GRP() is needed for GRP() to capture x (using substitute) if x is atomic.
42 |   # if(is.atomic(x)) `names<-`(list(x), l1orlst(as.character(substitute(x)))) else
43 |   g <- if(missing(...)) GRP(x, sort = sort, decreasing = decreasing, return.groups = !add, return.order = FALSE, call = FALSE) else
44 |     GRP.default(fselect(x, ...), sort = sort, decreasing = decreasing, return.groups = !add, return.order = FALSE, call = FALSE)
45 |   fcount_core(x, g, w, name, add)
46 | }
47 | 
48 | fcountv <- function(x, cols = NULL, w = NULL, name = "N", add = FALSE, sort = FALSE, ...) {
49 |   # Safe enough ? or only allow character ? what about collapv() ?, extra option ?
50 |   # if(length(w) == 1L && is.list(x) && length(unclass(x)) > 1L && (is.character(w) || is.integer(w) || (is.numeric(w) && w %% 1 < 1e-6)))
51 |   if(is.atomic(x)) x <- qDF(x)
52 |   if(length(w) == 1L && is.character(w)) {
53 |     w <- .subset2(x, w) # Problem: if w is wrong character: NULL
54 |     if(is.null(w)) stop("Unknown column: ", w)
55 |   }
56 |   if(is.character(add)) add <- switch(add, gv =, group_vars = 2L, stop("add must be TRUE, FALSE or group_vars (gv)")) # add = "g", "groups" or "group_vars"
57 |   g <- if(is.null(cols)) GRP(x, sort = sort, return.groups = !add, return.order = FALSE, call = FALSE, ...) else
58 |     GRP.default(colsubset(x, cols), sort = sort, return.groups = !add, return.order = FALSE, call = FALSE, ...)
59 |   fcount_core(x, g, w, name, add)
60 | }
61 | 


--------------------------------------------------------------------------------
/man/fdist.Rd:
--------------------------------------------------------------------------------
 1 | \name{fdist}
 2 | \alias{fdist}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Fast and Flexible Distance Computations
 6 | }
 7 | \description{
 8 | A fast and flexible replacement for \code{\link{dist}}, to compute euclidean distances.
 9 | }
10 | \usage{
11 | fdist(x, v = NULL, ..., method = "euclidean", nthreads = .op[["nthreads"]])
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{x}{a numeric vector or matrix. Data frames/lists can be passed but will be converted to matrix using \code{\link{qM}}. Non-numeric (double) inputs will be coerced. }
16 |   \item{v}{an (optional) numeric (double) vector such that \code{length(v) == NCOL(x)}, to compute distances with (the rows of) \code{x}. Other vector types will be coerced.}
17 |   \item{\dots}{not used. A placeholder for possible future arguments.}
18 |   \item{method}{an integer or character string indicating the method of computing distances.
19 |   \tabular{lllll}{\emph{ Int. }   \tab\tab \emph{ String }   \tab\tab \emph{ Description }  \cr
20 |                  1 \tab\tab \code{"euclidean"}   \tab\tab euclidean distance \cr
21 |                  2 \tab\tab \code{"euclidean_squared"} \tab\tab squared euclidean distance (more efficient) \cr
22 |     }
23 |     %\emph{Note:} The mahalanobis distance can be computed using: \code{x_mahal = t(forwardsolve(t(chol(cov(x))), t(x)))}. See Examples.
24 |   }
25 |   \item{nthreads}{integer. The number of threads to use. If \code{v = NULL} (full distance matrix), multithreading is along the distance matrix columns (decreasing thread loads as matrix is lower triangular). If \code{v} is supplied, multithreading is at the sub-column level (across elements).}
26 | }
27 | \value{
28 | If \code{v = NULL}, a full lower-triangular distance matrix between the rows of \code{x} is computed and returned as a 'dist' object (all methods apply, see \code{\link{dist}}). Otherwise, a numeric vector of distances of each row of \code{x} with \code{v} is returned. See Examples.
29 | }
30 | \note{
31 | \code{fdist} does not check for missing values, so \code{NA}'s will result in \code{NA} distances.
32 | 
33 | \code{kit::topn} is a suitable complimentary function to find nearest neighbors. It is very efficient and skips missing values by default.
34 | }
35 | 
36 | %% ~Make other sections like Warning with \section{Warning }{....} ~
37 | 
38 | \seealso{
39 | \code{\link{flm}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview}
40 | }
41 | \examples{
42 | # Distance matrix
43 | m = as.matrix(mtcars)
44 | str(fdist(m)) # Same as dist(m)
45 | 
46 | # Distance with vector
47 | d = fdist(m, fmean(m))
48 | kit::topn(d, 5)  # Index of 5 nearest neighbours
49 | 
50 | # Mahalanobis distance
51 | m_mahal = t(forwardsolve(t(chol(cov(m))), t(m)))
52 | fdist(m_mahal, fmean(m_mahal))
53 | sqrt(unattrib(mahalanobis(m, fmean(m), cov(m))))
54 | \donttest{
55 | # Distance of two vectors
56 | x <- rnorm(1e6)
57 | y <- rnorm(1e6)
58 | microbenchmark::microbenchmark(
59 |   fdist(x, y),
60 |   fdist(x, y, nthreads = 2),
61 |   sqrt(sum((x-y)^2))
62 | )
63 | }
64 | }
65 | % Add one or more standard keywords, see file 'KEYWORDS' in the
66 | % R documentation directory (show via RShowDoc("KEYWORDS")):
67 | \keyword{multivariate}
68 | \keyword{nonparametric}
69 | % \keyword{ ~kwd2 }
70 | % Use only one keyword per line.
71 | % For non-standard keywords, use \concept instead of \keyword:
72 | % \concept{ ~cpt1 }
73 | % \concept{ ~cpt2 }
74 | % Use only one concept per line.
75 | 


--------------------------------------------------------------------------------
/tests/testthat/test-fslice.R:
--------------------------------------------------------------------------------
  1 | context("fslice")
  2 | data("iris")
  3 | 
  4 | test_that("fslice works with integers and no grouping", {
  5 |   N <- c(1, 5, 17)
  6 |   for (n in N) {
  7 |     # first
  8 |     expect_equal(
  9 |       dplyr::slice_head(iris, n = n),
 10 |       fslice(iris, n = n)
 11 |     )
 12 |     expect_equal(
 13 |       dplyr::slice_head(iris, n = n),
 14 |       fslice(iris, n = n, how = "first")
 15 |     )
 16 |     # last
 17 |     expect_equal(
 18 |       setRownames(dplyr::slice_tail(iris, n = n)),
 19 |       fslice(iris, n = n, how = "last")
 20 |     )
 21 |     # min
 22 |     expect_equal(
 23 |       iris |> dplyr::slice_min(Petal.Length, n = n, with_ties = FALSE),
 24 |       fslice(iris, n = n, how = "min", order.by = "Petal.Length")
 25 |     )
 26 |     # max
 27 |     expect_equal(
 28 |       iris |> dplyr::slice_max(Petal.Length, n = n, with_ties = FALSE),
 29 |       fslice(iris, n = n, how = "max", order.by = "Petal.Length")
 30 |     )
 31 |   }
 32 | })
 33 | 
 34 | 
 35 | test_that("fslice works with proportions and no grouping", {
 36 |   N <- c(0.5, 0.75)
 37 |   for (n in N) {
 38 |     # first
 39 |     expect_equal(
 40 |       dplyr::slice_head(iris, prop = n),
 41 |       fslice(iris, n = n)
 42 |     )
 43 |     expect_equal(
 44 |       dplyr::slice_head(iris, prop = n),
 45 |       fslice(iris, n = n, how = "first")
 46 |     )
 47 |     # last
 48 |     expect_equal(
 49 |       setRownames(dplyr::slice_tail(iris, prop = n)),
 50 |       fslice(iris, n = n, how = "last")
 51 |     )
 52 |     # min
 53 |     expect_equal(
 54 |       iris |> dplyr::slice_min(Petal.Length, prop = n, with_ties = FALSE),
 55 |       fslice(iris, n = n, how = "min", order.by = "Petal.Length")
 56 |     )
 57 |     # max
 58 |     expect_equal(
 59 |       iris |> dplyr::slice_max(Petal.Length, prop = n, with_ties = FALSE),
 60 |       fslice(iris, n = n, how = "max", order.by = "Petal.Length")
 61 |     )
 62 |   }
 63 | })
 64 | 
 65 | 
 66 | test_that("fslice works with grouping", {
 67 |   N <- c(1, 5, 17)
 68 |   for (n in N) {
 69 |     # first
 70 |     expect_equal(
 71 |       iris |> dplyr::group_by(Species) |> dplyr::slice_head(n = n) |> qDF(),
 72 |       fslice(iris, "Species", n = n, how = "first")
 73 |     )
 74 |     # last
 75 |     expect_equal(
 76 |       iris |> dplyr::group_by(Species) |> dplyr::slice_tail(n = n) |> qDF(),
 77 |       fslice(iris, "Species", n = n, how = "last")
 78 |     )
 79 |     # min
 80 |     expect_equal(
 81 |       iris |> dplyr::group_by(Species) |> dplyr::slice_min(Petal.Length, n = n, with_ties = FALSE) |> qDF(),
 82 |       fslice(iris, "Species", n = n, how = "min", order.by = "Petal.Length")
 83 |     )
 84 |     # max
 85 |     expect_equal(
 86 |       iris |> dplyr::group_by(Species) |> dplyr::slice_max(Petal.Length, n = n, with_ties = FALSE) |> qDF(),
 87 |       fslice(iris, "Species", n = n, how = "max", order.by = "Petal.Length")
 88 |     )
 89 |   }
 90 | })
 91 | 
 92 | test_that("fslice works with ties", {
 93 |   N <- 1 # c(1, 5, 17)
 94 |   for (n in N) {
 95 |     # min
 96 |     expect_equal(
 97 |       iris |> dplyr::group_by(Species) |> dplyr::slice_min(Petal.Length, n = n, with_ties = TRUE) |> qDF(),
 98 |       fslice(iris, "Species", n = n, how = "min", order.by = "Petal.Length", with.ties = TRUE)
 99 |     )
100 |     # max
101 |     expect_equal(
102 |       iris |> dplyr::group_by(Species) |> dplyr::slice_max(Petal.Length, n = n, with_ties = TRUE) |> qDF(),
103 |       fslice(iris, "Species", n = n, how = "max", order.by = "Petal.Length", with.ties = TRUE)
104 |     )
105 |   }
106 | })
107 | 


--------------------------------------------------------------------------------
/R/TRA.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | TRA <- function(x, STATS, FUN = "-", ...) UseMethod("TRA") # , x
 4 | 
 5 | setTRA <- function(x, STATS, FUN = "-", ...) invisible(TRA(x, STATS, FUN, ..., set = TRUE))
 6 | 
 7 | TRA.default <- function(x, STATS, FUN = "-", g = NULL, set = FALSE, ...) {
 8 |   # if(is.matrix(x) && !inherits(x, "matrix")) return(TRA.matrix(x, STATS, FUN, g, set, ...))
 9 |   if(!missing(...)) unused_arg_action(match.call(), ...)
10 |   if(is.null(g)) return(.Call(C_TRA,x,STATS,0L,FUN,set))
11 |   if(is.atomic(g)) {
12 |     if(is.nmfactor(g)) {
13 |       if(fnlevels(g) != length(STATS)) stop("number of groups must match length(STATS)")
14 |     } else {
15 |       g <- qG(g, na.exclude = FALSE) # needs to be ordered to be compatible with fast functions !!
16 |       if(attr(g, "N.groups") != length(STATS)) stop("number of groups must match length(STATS)")
17 |     }
18 |     return(.Call(C_TRA,x,STATS,g,FUN,set))
19 |   }
20 |   if(!is_GRP(g)) g <- GRP.default(g, return.groups = FALSE, call = FALSE)
21 |   if(g[[1L]] != length(STATS)) stop("number of groups must match length(STATS)")
22 |   .Call(C_TRA,x,STATS,g[[2L]],FUN,set)
23 | }
24 | 
25 | TRA.matrix <- function(x, STATS, FUN = "-", g = NULL, set = FALSE, ...) {
26 |   if(!missing(...)) unused_arg_action(match.call(), ...)
27 |   if(is.null(g)) return(.Call(C_TRAm,x,STATS,0L,FUN,set))
28 |   if(is.atomic(g)) {
29 |     if(is.nmfactor(g)) {
30 |       if(fnlevels(g) != nrow(STATS)) stop("number of groups must match nrow(STATS)")
31 |     } else {
32 |       g <- qG(g, na.exclude = FALSE) # needs to be ordered to be compatible with fast functions !!
33 |       if(attr(g, "N.groups") != nrow(STATS)) stop("number of groups must match nrow(STATS)")
34 |     }
35 |     return(.Call(C_TRAm,x,STATS,g,FUN,set))
36 |   }
37 |   if(!is_GRP(g)) g <- GRP.default(g, return.groups = FALSE, call = FALSE)
38 |   if(g[[1L]] != nrow(STATS)) stop("number of groups must match nrow(STATS)")
39 |   .Call(C_TRAm,x,STATS,g[[2L]],FUN,set)
40 | }
41 | 
42 | TRA.data.frame <- function(x, STATS, FUN = "-", g = NULL, set = FALSE, ...) {
43 |   if(!missing(...)) unused_arg_action(match.call(), ...)
44 |   if(is.null(g)) return(.Call(C_TRAl,x,STATS,0L,FUN,set))
45 |   if(is.atomic(g)) {
46 |     if(is.nmfactor(g)) {
47 |       if(fnlevels(g) != fnrow(STATS)) stop("number of groups must match nrow(STATS)")
48 |     } else {
49 |       g <- qG(g, na.exclude = FALSE) # needs to be ordered to be compatible with fast functions !!
50 |       if(attr(g, "N.groups") != fnrow(STATS)) stop("number of groups must match nrow(STATS)")
51 |     }
52 |     return(.Call(C_TRAl,x,STATS,g,FUN,set))
53 |   }
54 |   if(!is_GRP(g)) g <- GRP.default(g, return.groups = FALSE, call = FALSE)
55 |   if(g[[1L]] != fnrow(STATS)) stop("number of groups must match nrow(STATS)")
56 |   .Call(C_TRAl,x,STATS,g[[2L]],FUN,set)
57 | }
58 | 
59 | TRA.list <- function(x, ...) TRA.data.frame(x, ...)
60 | 
61 | TRA.grouped_df <- function(x, STATS, FUN = "-", keep.group_vars = TRUE, set = FALSE, ...) {
62 |   if(!missing(...)) unused_arg_action(match.call(), ...)
63 |   g <- GRP.grouped_df(x, call = FALSE)
64 |   clx <- oldClass(x)
65 |   oldClass(x) <- NULL
66 |   oldClass(STATS) <- NULL
67 |   if(g[[1L]] != length(STATS[[1L]])) stop("number of groups must match nrow(STATS)")
68 |   nognst <- names(STATS) %!in% g[[5L]]
69 |   mt <- ckmatch(names(STATS), names(x), "Variables in STATS not found in x:")
70 |   mt <- mt[nognst]
71 |   x[mt] <- .Call(C_TRAl,x[mt],STATS[nognst],g[[2L]],FUN,set)
72 |   if(!keep.group_vars) x[names(x) %in% g[[5L]]] <- NULL
73 |   oldClass(x) <- clx
74 |   x
75 | }
76 | 
77 | TRA.zoo <- function(x, STATS, FUN = "-", ...) if(is.matrix(x)) TRA.matrix(x, STATS, FUN, ...) else TRA.default(x, STATS, FUN, ...)
78 | TRA.units <- TRA.zoo
79 | 


--------------------------------------------------------------------------------
/R/dapply.R:
--------------------------------------------------------------------------------
 1 | 
 2 | dapply <- function(X, FUN, ..., MARGIN = 2, parallel = FALSE,
 3 |                    mc.cores = 1L, return = c("same", "matrix", "data.frame"), drop = TRUE) {
 4 |   rowwl <- switch(MARGIN, `1` = TRUE, `2` = FALSE, stop("MARGIN only supports 2 - columns or 1 - rows"))
 5 |   aplyfun <- if(parallel) function(...) mclapply(..., mc.cores = mc.cores) else lapply
 6 |   if(is.atomic(X)) {
 7 |     dX <- dim(X)
 8 |     if(length(dX) != 2L) stop("dapply cannot handle vectors or higher-dimensional arrays")
 9 |     res <- if(rowwl) aplyfun(.Call(Cpp_mrtl, X, FALSE, 0L), FUN, ...) else aplyfun(.Call(Cpp_mctl, X, FALSE, 0L), FUN, ...)
10 |     lx1 <- .Call(C_fnrow, res)
11 |     if(lx1 == 1L && drop) return(`names<-`(unlist(res, use.names = FALSE), dimnames(X)[[if(rowwl) 1L else 2L]]))
12 |     switch(return[1L], same = {
13 |              ax <- attributes(X)
14 |              retmatl <- TRUE
15 |            }, matrix = {
16 |              ax <- list(dim = dX, dimnames = dimnames(X))
17 |              retmatl <- TRUE
18 |            }, data.frame = {
19 |              dn <- dimnames(X)
20 |              ax <- list(names = dn[[2L]],
21 |                         row.names = if(is.null(dn[[1L]])) .set_row_names(dX[1L]) else dn[[1L]],
22 |                         class = "data.frame")
23 |              retmatl <- FALSE
24 |            }, stop("Unknown return option!"))
25 |   } else {
26 |     ax <- attributes(X)
27 |     attributes(X) <- NULL
28 |     res <- if(rowwl) aplyfun(.Call(Cpp_mrtl, do.call(cbind, X), FALSE, 0L), FUN, ...) else aplyfun(X, FUN, ...)
29 |     lx1 <- .Call(C_fnrow, res)
30 |     if(lx1 == 1L && drop) return(`names<-`(unlist(res, use.names = FALSE), if(rowwl) charorNULL(ax[["row.names"]]) else ax[["names"]]))
31 |     dX <- c(.Call(C_fnrow, X), length(X))
32 |     switch(return[1L], same = retmatl <- FALSE, matrix = {
33 |       ax <- list(dim = dX, dimnames = list(charorNULL(ax[["row.names"]]), ax[["names"]]))
34 |       retmatl <- TRUE
35 |     }, data.frame = {
36 |       ax <- list(names = ax[["names"]],
37 |                  row.names = if(is.null(ax[["row.names"]])) .set_row_names(dX[1L]) else ax[["row.names"]],
38 |                  class = "data.frame")
39 |       retmatl <- FALSE
40 |     }, stop("Unknown return option!"))
41 |   }
42 |   if(retmatl) {
43 |     if(rowwl) {
44 |       if(lx1 != dX[2L]) {
45 |         ax[["dim"]][2L] <- lx1
46 |         ax[["dimnames"]] <- list(ax[["dimnames"]][[1L]], if(length(nx1 <- names(res[[1L]]))) nx1 else if(lx1 == 1L)
47 |           deparse(substitute(FUN)) else paste0(deparse(substitute(FUN)), seq_len(lx1)))
48 |       }
49 |       res <- matrix(unlist(res, use.names = FALSE), ncol = lx1, byrow = TRUE)
50 |     } else {
51 |       if(lx1 != dX[1L]) {
52 |         ax[["dim"]][1L] <- lx1
53 |         ax[["dimnames"]] <- list(if(length(nx1 <- names(res[[1L]]))) nx1 else if(lx1 == 1L)
54 |           deparse(substitute(FUN)) else paste0(deparse(substitute(FUN)), seq_len(lx1)), ax[["dimnames"]][[2L]])
55 |       }
56 |       res <- do.call(cbind, res)
57 |     }
58 |   } else {
59 |     if(rowwl) {
60 |       if(lx1 != dX[2L]) ax[["names"]] <- if(length(nx1 <- names(res[[1L]]))) nx1 else if(lx1 == 1L)
61 |         deparse(substitute(FUN)) else paste0(deparse(substitute(FUN)), seq_len(lx1))
62 |       res <- .Call(Cpp_mctl, matrix(unlist(res, use.names = FALSE), ncol = lx1, byrow = TRUE), FALSE, 0L) # definitely faster than do.call(rbind, X)
63 |     } else if(lx1 != dX[1L])
64 |       ax[["row.names"]] <- if(length(nx1 <- names(res[[1L]]))) nx1 else .set_row_names(lx1) # could also make deparse(substitute(FUN)), but that is not so typical for data.frames !
65 |    if(any(ax[["class"]] == "data.table")) return(alcSA(res, ax))
66 |   }
67 |   setAttributes(res, ax)
68 | }
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/man/rowbind.Rd:
--------------------------------------------------------------------------------
 1 | \name{rowbind}
 2 | \alias{rowbind}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Row-Bind Lists / Data Frame-Like Objects
 6 | }
 7 | \description{
 8 | \emph{collapse}'s version of \code{data.table::rbindlist} and \code{rbind.data.frame}. The core code is copied from \emph{data.table}, which deserves all credit for the implementation. \code{rowbind} only binds lists/data.frame's. For a more flexible recursive version see \code{\link{unlist2d}}. To combine lists column-wise see \code{\link{add_vars}} or \code{\link{ftransform}} (with replacement).
 9 | }
10 | \usage{
11 | rowbind(\dots, idcol = NULL, row.names = FALSE,
12 |         use.names = TRUE, fill = FALSE, id.factor = "auto",
13 |         return = c("as.first", "data.frame", "data.table", "tibble", "list"))
14 | }
15 | \arguments{
16 |   \item{\dots}{a single list of list-like objects (data.frames) or comma separated objects (internally assembled using \code{list(\dots)}). Names can be supplied if \code{!is.null(idcol)}.}
17 | 
18 |   \item{idcol}{character. The name of an id-column to be generated identifying the source of rows in the final object. Using \code{idcol = TRUE} will set the name to \code{".id"}. If the input list has names, these will form the content of the id column, otherwise integers are used. To save memory, it is advised to keep \code{id.factor = TRUE}.}
19 | 
20 |     \item{row.names}{\code{TRUE} extracts row names from all the objects in \code{l} and adds them to the output in a column named \code{"row.names"}. Alternatively, a column name i.e. \code{row.names = "variable"} can be supplied. }
21 | 
22 |   \item{use.names}{logical. \code{TRUE} binds by matching column name, \code{FALSE} by position. }
23 | 
24 |   \item{fill}{logical. \code{TRUE} fills missing columns with NAs. When \code{TRUE}, \code{use.names} is set to \code{TRUE}.}
25 | 
26 |   \item{id.factor}{if \code{TRUE} and \code{!isFALSE(idcols)}, create id column as factor instead of character or integer vector. It is also possible to specify \code{"ordered"} to generate an ordered factor id. \code{"auto"} uses \code{TRUE} if \code{!is.null(names(l))} where \code{l} is the input list (because factors are much more memory efficient than character vectors). }
27 | 
28 | \item{return}{an integer or string specifying what to return. \code{1 - "as.first"} preserves the attributes of the first element of the list, \code{2/3/4 - "data.frame"/"data.table"/"tibble"} coerces to specific objects, and \code{5 - "list"} returns a (named) list. }
29 | 
30 | }
31 | 
32 | \value{
33 | a long list or data frame-like object formed by combining the rows / elements of the input objects. The \code{return} argument controls the exact format of the output.
34 | }
35 | 
36 | 
37 | \seealso{
38 | \code{\link{unlist2d}}, \code{\link{add_vars}}, \code{\link{ftransform}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=collapse-documentation]{Collapse Overview}
39 | }
40 | \examples{
41 | # These are the same
42 | rowbind(mtcars, mtcars)
43 | rowbind(list(mtcars, mtcars))
44 | 
45 | # With id column
46 | rowbind(mtcars, mtcars, idcol = "id")
47 | rowbind(a = mtcars, b = mtcars, idcol = "id")
48 | 
49 | # With saving row-names
50 | rowbind(mtcars, mtcars, row.names = "cars")
51 | rowbind(a = mtcars, b = mtcars, idcol = "id", row.names = "cars")
52 | 
53 | # Filling up columns
54 | rowbind(mtcars, mtcars[2:8], fill = TRUE)
55 | }
56 | % Add one or more standard keywords, see file 'KEYWORDS' in the
57 | % R documentation directory (show via RShowDoc("KEYWORDS")):
58 | \keyword{manip}
59 | % \keyword{ ~kwd2 }
60 | % Use only one keyword per line.
61 | % For non-standard keywords, use \concept instead of \keyword:
62 | % \concept{ ~cpt1 }
63 | % \concept{ ~cpt2 }
64 | % Use only one concept per line.
65 | 


--------------------------------------------------------------------------------
/R/fcumsum.R:
--------------------------------------------------------------------------------
 1 | 
 2 | ford <- function(x, g = NULL) {
 3 |   if(is.null(x)) return(NULL)
 4 |   if(!is.null(g)) {
 5 |     x <- c(if(is.atomic(g)) list(g) else if(is_GRP(g)) g[2L] else g,
 6 |            if(is.atomic(x)) list(x) else x, list(method = "radix"))
 7 |     return(do.call(order, x))
 8 |   }
 9 |   if(is.list(x)) return(do.call(order, c(x, list(method = "radix"))))
10 |   if(length(x) < 1000L) .Call(C_radixsort, TRUE, FALSE, FALSE, FALSE, TRUE, pairlist(x)) else order(x, method = "radix")
11 | }
12 | 
13 | fcumsum <- function(x, ...) UseMethod("fcumsum") # , x
14 | 
15 | fcumsum.default <- function(x, g = NULL, o = NULL, na.rm = .op[["na.rm"]], fill = FALSE, check.o = TRUE, ...) {
16 |   # if(is.matrix(x) && !inherits(x, "matrix")) return(UseMethod("fcumsum", unclass(x)))
17 |   if(!missing(...)) unused_arg_action(match.call(), ...)
18 |   if(length(o) && check.o) o <- ford(o, g)
19 |   if(is.null(g)) return(.Call(C_fcumsum,x,0L,0L,o,na.rm,fill))
20 |   g <- G_guo(g)
21 |   .Call(C_fcumsum,x,g[[1L]],g[[2L]],o,na.rm,fill)
22 | }
23 | 
24 | fcumsum.pseries <- function(x, na.rm = .op[["na.rm"]], fill = FALSE, shift = "time", ...) {
25 |   if(!missing(...)) unused_arg_action(match.call(), ...)
26 |   index <- uncl2pix(x)
27 |   g <- index[[1L]]
28 |   o <- switch(shift, time = ford(index[[2L]], g), row = NULL, stop("'shift' must be either 'time' or 'row'"))
29 |   if(is.matrix(x))
30 |     .Call(C_fcumsumm,x,fnlevels(g),g,o,na.rm,fill) else
31 |       .Call(C_fcumsum,x,fnlevels(g),g,o,na.rm,fill)
32 | }
33 | 
34 | fcumsum.matrix <- function(x, g = NULL, o = NULL, na.rm = .op[["na.rm"]], fill = FALSE, check.o = TRUE, ...) {
35 |   if(!missing(...)) unused_arg_action(match.call(), ...)
36 |   if(length(o) && check.o) o <- ford(o, g)
37 |   if(is.null(g)) return(.Call(C_fcumsumm,x,0L,0L,o,na.rm,fill))
38 |   g <- G_guo(g)
39 |   .Call(C_fcumsumm,x,g[[1L]],g[[2L]],o,na.rm,fill)
40 | }
41 | 
42 | fcumsum.zoo <- function(x, ...) if(is.matrix(x)) fcumsum.matrix(x, ...) else fcumsum.default(x, ...)
43 | fcumsum.units <- fcumsum.zoo
44 | 
45 | fcumsum.grouped_df <- function(x, o = NULL, na.rm = .op[["na.rm"]], fill = FALSE, check.o = TRUE, keep.ids = TRUE, ...) {
46 |   if(!missing(...)) unused_arg_action(match.call(), ...)
47 |   g <- GRP.grouped_df(x, call = FALSE)
48 |   osym <- substitute(o)
49 |   nam <- attr(x, "names")
50 |   gn <- which(nam %in% g[[5L]])
51 |   if(!is.null(osym)) {
52 |     o <- eval(osym, x, parent.frame())
53 |     if(!anyNA(on <- match(all.vars(osym), nam))) {
54 |       gn <- c(gn, on)
55 |       if(anyDuplicated.default(gn)) stop("timevar coincides with grouping variables!")
56 |     }
57 |     if(check.o) o <- ford(o, g)
58 |   }
59 |   if(length(gn)) {
60 |     ax <- attributes(x)
61 |     res <- .Call(C_fcumsuml,.subset(x,-gn),g[[1L]],g[[2L]],o,na.rm,fill)
62 |     if(keep.ids) res <- c(.subset(x, gn), res)
63 |     ax[["names"]] <- names(res)
64 |     return(setAttributes(res, ax))
65 |   }
66 |   .Call(C_fcumsuml,x,g[[1L]],g[[2L]],o,na.rm,fill)
67 | }
68 | 
69 | fcumsum.data.frame <- function(x, g = NULL, o = NULL, na.rm = .op[["na.rm"]], fill = FALSE, check.o = TRUE, ...) {
70 |   if(!missing(...)) unused_arg_action(match.call(), ...)
71 |   if(length(o) && check.o) o <- ford(o, g)
72 |   if(is.null(g)) return(.Call(C_fcumsuml,x,0L,0L,o,na.rm,fill))
73 |   g <- G_guo(g)
74 |   .Call(C_fcumsuml,x,g[[1L]],g[[2L]],o,na.rm,fill)
75 | }
76 | 
77 | fcumsum.list <- function(x, ...) fcumsum.data.frame(x, ...)
78 | 
79 | fcumsum.pdata.frame <- function(x, na.rm = .op[["na.rm"]], fill = FALSE, shift = "time", ...) {
80 |   if(!missing(...)) unused_arg_action(match.call(), ...)
81 |   index <- uncl2pix(x)
82 |   g <- index[[1L]]
83 |   o <- switch(shift, time = ford(index[[2L]], g), row = NULL, stop("'shift' must be either 'time' or 'row'"))
84 |   .Call(C_fcumsuml,x,fnlevels(g),g,o,na.rm,fill)
85 | }
86 | 


--------------------------------------------------------------------------------
/man/fslice.Rd:
--------------------------------------------------------------------------------
 1 | \name{fslice}
 2 | \alias{fslice}
 3 | \alias{fslicev}
 4 | \title{
 5 | Fast Slicing of Matrix-Like Objects
 6 | }
 7 | \description{
 8 | A fast function to extract rows from a matrix or data frame-like object (by groups).
 9 | }
10 | \usage{
11 | fslice(x, ..., n = 1, how = "first", order.by = NULL,
12 |        na.rm = .op[["na.rm"]], sort = FALSE, with.ties = FALSE)
13 | 
14 | fslicev(x, cols = NULL, n = 1, how = "first", order.by = NULL,
15 |         na.rm = .op[["na.rm"]], sort = FALSE, with.ties = FALSE, ...)
16 | }
17 | \arguments{
18 |   \item{x}{a matrix, data frame or list-like object, including 'grouped_df'.}
19 |   \item{\dots}{for \code{fslice}: names or sequences of columns to group by - passed to \code{\link{fselect}}. If \code{x} is a matrix: atomic vectors to group \code{x}. Can be empty to operate on (un)grouped data. For \code{fslicev}: further arguments passed to \code{\link{GRP}} (such as \code{decreasing}, \code{na.last}, \code{method}). }
20 |   \item{cols}{select columns to group by, using column names, indices, a logical vector or a selector function (e.g. \code{is_categorical}). It can also be a list of vectors, or, if \code{x} is a matrix, a single vector.}
21 |   \item{n}{integer or proportion (if < 1). Number of rows to select from each group. If a proportion is provided, it is converted to the equivalent number of rows using \code{max(1, round(n * nrow(x)))} or \code{max(1, round(n * nrow(x) / N.groups))} for grouped data.}
22 |   \item{how}{character. Method to select rows. One of:
23 |     \itemize{
24 |       \item \code{"first"}: select first \code{n} rows
25 |       \item \code{"last"}: select last \code{n} rows
26 |       \item \code{"min"}: select \code{n} rows with minimum values of \code{order.by}
27 |       \item \code{"max"}: select \code{n} rows with maximum values of \code{order.by}
28 |     }
29 |   }
30 |   \item{order.by}{vector or column name to order by when \code{how} is \code{"min"} or \code{"max"}. Must be same length as rows in \code{x}. In \code{fslice} it must not be quoted.}
31 |   \item{na.rm}{logical. If \code{TRUE}, missing values in \code{order.by} are removed before selecting rows.}
32 |   \item{sort}{logical. If \code{TRUE}, sort selected rows on the grouping columns. \code{FALSE} uses first-appearance order (including grouping columns if \code{how} is \code{"first"} or \code{"last"}) - fastest.}
33 |   \item{with.ties}{logical. If \code{TRUE} and \code{how} is \code{"min"} or \code{"max"}, returns all rows with the extreme value. Currently only supported for \code{n = 1} and \code{sort = FALSE}.}
34 | }
35 | \value{
36 | A subset of \code{x} containing the selected rows.
37 | }
38 | \seealso{
39 | \code{\link{fsubset}}, \code{\link{fcount}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=collapse-documentation]{Collapse Overview}
40 | }
41 | \examples{
42 | # Basic usage
43 | fslice(mtcars, n = 3)                    # First 3 rows
44 | fslice(mtcars, n = 3, how = "last")      # Last 3 rows
45 | fslice(mtcars, n = 0.1)                  # First 10\% of rows
46 | 
47 | # Using order.by
48 | fslice(mtcars, n = 3, how = "min", order.by = mpg)  # 3 cars with lowest mpg
49 | fslice(mtcars, n = 3, how = "max", order.by = mpg)  # 3 cars with highest mpg
50 | 
51 | # With grouping
52 | mtcars |> fslice(cyl, n = 2)                        # First 2 cars per cylinder
53 | mtcars |> fslice(cyl, n = 2, sort = TRUE)           # with sorting (slightly less efficient)
54 | mtcars |> fslice(cyl, n = 2, how = "min", order.by = mpg)  # 2 lowest mpg cars per cylinder
55 | 
56 | # Using with.ties
57 | mtcars |> fslice(cyl, n = 1, how = "min", order.by = mpg, with.ties = TRUE)
58 | 
59 | # With grouped data
60 | mtcars |>
61 |   fgroup_by(cyl) |>
62 |   fslice(n = 2, how = "max", order.by = mpg)        # 2 highest mpg cars per cylinder
63 | }
64 | \keyword{manip}
65 | 


--------------------------------------------------------------------------------
/man/fnobs.Rd:
--------------------------------------------------------------------------------
 1 | \name{fnobs}
 2 | \alias{fnobs}
 3 | \alias{fnobs.default}
 4 | \alias{fnobs.matrix}
 5 | \alias{fnobs.data.frame}
 6 | \alias{fnobs.grouped_df}
 7 | \title{Fast (Grouped) Observation Count for Matrix-Like Objects}  % Vectors, Matrix and Data Frame Columns}
 8 | \description{
 9 | \code{fnobs} is a generic function that (column-wise) computes the number of non-missing values in \code{x}, (optionally) grouped by \code{g}. It is much faster than \code{sum(!is.na(x))}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped) observation count.
10 | }
11 | 
12 | \usage{
13 | fnobs(x, \dots)
14 | 
15 | \method{fnobs}{default}(x, g = NULL, TRA = NULL, use.g.names = TRUE, \dots)
16 | 
17 | \method{fnobs}{matrix}(x, g = NULL, TRA = NULL, use.g.names = TRUE, drop = TRUE, \dots)
18 | 
19 | \method{fnobs}{data.frame}(x, g = NULL, TRA = NULL, use.g.names = TRUE, drop = TRUE, \dots)
20 | 
21 | \method{fnobs}{grouped_df}(x, TRA = NULL, use.g.names = FALSE, keep.group_vars = TRUE, \dots)
22 | }
23 | \arguments{
24 | \item{x}{a vector, matrix, data frame or grouped data frame (class 'grouped_df').}
25 | 
26 | \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.}
27 | 
28 | \item{TRA}{an integer or quoted operator indicating the transformation to perform:
29 | 0 - "na"     |     1 - "fill"     |     2 - "replace"     |     3 - "-"     |     4 - "-+"     |     5 - "/"     |     6 - "\%"     |     7 - "+"     |     8 - "*"     |     9 - "\%\%"     |     10 - "-\%\%". See \code{\link{TRA}}.}
30 | 
31 | 
32 | \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.}
33 | 
34 | \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.}
35 | 
36 | \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.}
37 | 
38 | \item{\dots}{arguments to be passed to or from other methods. If \code{TRA} is used, passing \code{set = TRUE} will transform data by reference and return the result invisibly.}
39 | 
40 | }
41 | \details{
42 | \code{fnobs} preserves all attributes of non-classed vectors / columns, and only the 'label' attribute (if available) of classed vectors / columns (i.e. dates or factors). When applied to data frames and matrices, the row-names are adjusted as necessary.
43 | }
44 | \value{
45 | Integer. The number of non-missing observations in \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its number of non-missing observations, grouped by \code{g}.
46 | }
47 | \seealso{
48 | \code{\link{fndistinct}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview}
49 | }
50 | \examples{
51 | ## default vector method
52 | fnobs(airquality$Solar.R)                   # Simple Nobs
53 | fnobs(airquality$Solar.R, airquality$Month) # Grouped Nobs
54 | 
55 | ## data.frame method
56 | fnobs(airquality)
57 | fnobs(airquality, airquality$Month)
58 | fnobs(wlddev)                               # Works with data of all types!
59 | head(fnobs(wlddev, wlddev$iso3c))
60 | 
61 | ## matrix method
62 | aqm <- qM(airquality)
63 | fnobs(aqm)                                  # Also works for character or logical matrices
64 | fnobs(aqm, airquality$Month)
65 | 
66 | ## method for grouped data frames - created with dplyr::group_by or fgroup_by
67 | airquality |> fgroup_by(Month) |> fnobs()
68 | wlddev |> fgroup_by(country) |>
69 |            fselect(PCGDP,LIFEEX,GINI,ODA) |> fnobs()
70 | }
71 | \keyword{univar}
72 | \keyword{manip}
73 | 


--------------------------------------------------------------------------------
/tests/testthat/test-dapply.R:
--------------------------------------------------------------------------------
 1 | context("dapply")
 2 | 
 3 | # rm(list = ls())
 4 | 
 5 | 
 6 | test_that("All common uses of dapply can be performed, as per examples", {
 7 |   # data.frame
 8 |   expect_equal(dapply(mtcars, force), mtcars)
 9 |   expect_equal(dapply(`attr<-`(mtcars, "bla", 1), force), `attr<-`(mtcars, "bla", 1))
10 |   expect_equal(dapply(`attr<-`(mtcars, "bla", 1), force, MARGIN = 1), `attr<-`(mtcars, "bla", 1))
11 |   expect_visible(dapply(mtcars, log))
12 |   expect_true(is.matrix(dapply(mtcars, log, return = "matrix")))
13 | 
14 |   # matrix
15 |   m <- as.matrix(mtcars)
16 |   expect_equal(dapply(m, force), m)
17 |   expect_equal(dapply(EuStockMarkets, force), EuStockMarkets)
18 |   expect_equal(dapply(EuStockMarkets, force, MARGIN = 1), EuStockMarkets)
19 |   expect_visible(dapply(m, log))
20 |   expect_true(is.data.frame(dapply(m, log, return = "data.frame")))
21 | 
22 |   # matrix <> data.frame conversions
23 |   expect_equal(dapply(mtcars, log, return = "matrix"), dapply(m, log))
24 |   expect_equal(dapply(mtcars, log, return = "matrix", MARGIN = 1), dapply(m, log, MARGIN = 1))
25 |   expect_equal(dapply(m, log, return = "data.frame"), dapply(mtcars, log))
26 |   expect_equal(dapply(m, log, return = "data.frame", MARGIN = 1), dapply(mtcars, log, MARGIN = 1))
27 |   expect_equal(dapply(mtcars, quantile, return = "matrix"), dapply(m, quantile))
28 |   expect_equal(dapply(mtcars, quantile, return = "matrix", MARGIN = 1), dapply(m, quantile, MARGIN = 1))
29 |   expect_equal(dapply(m, quantile, return = "data.frame"), dapply(mtcars, quantile))
30 |   expect_equal(dapply(m, quantile, return = "data.frame", MARGIN = 1), dapply(mtcars, quantile, MARGIN = 1))
31 | 
32 |   # scalar function gives atomic vector
33 |   expect_true(is.atomic(dapply(mtcars, sum)))
34 |   expect_equal(dapply(m, sum), dapply(mtcars, sum))
35 |   expect_true(is.atomic(dapply(mtcars, sum, MARGIN = 1)))
36 |   expect_equal(dapply(m, sum, MARGIN = 1), dapply(mtcars, sum, MARGIN = 1))
37 | 
38 |   # drop = FALSE retains object structure
39 |   expect_true(is.data.frame(dapply(mtcars, sum, drop = FALSE)))
40 |   expect_true(is.data.frame(dapply(mtcars, sum, MARGIN = 1, drop = FALSE)))
41 |   expect_true(is.matrix(dapply(m, sum, drop = FALSE)))
42 |   expect_true(is.matrix(dapply(m, sum, MARGIN = 1, drop = FALSE)))
43 | 
44 |   # matrix <> data.frame conversions without drop dimensions
45 |   expect_equal(dapply(m, sum, drop = FALSE), dapply(mtcars, sum, return = "matrix", drop = FALSE))
46 |   expect_equal(dapply(mtcars, sum, drop = FALSE), dapply(m, sum, return = "data.frame", drop = FALSE))
47 | 
48 |   # ... but if function is vector value, drop = FALSE does nothing
49 |   expect_true(is.data.frame(dapply(mtcars, log, drop = FALSE)))
50 |   expect_true(is.data.frame(dapply(mtcars, log, MARGIN = 1, drop = FALSE)))
51 |   expect_true(is.data.frame(dapply(mtcars, quantile, drop = FALSE)))
52 |   expect_true(is.data.frame(dapply(mtcars, quantile, MARGIN = 1, drop = FALSE)))
53 |   expect_true(is.matrix(dapply(m, log, drop = FALSE)))
54 |   expect_true(is.matrix(dapply(m, log, MARGIN = 1, drop = FALSE)))
55 |   expect_true(is.matrix(dapply(m, quantile, drop = FALSE)))
56 |   expect_true(is.matrix(dapply(m, quantile, MARGIN = 1, drop = FALSE)))
57 | 
58 |   # passing additional arguments works:
59 |   dapply(mtcars, weighted.mean, mtcars$hp, na.rm = TRUE)
60 |   dapply(m, weighted.mean, mtcars$hp, na.rm = TRUE)
61 | })
62 | 
63 | 
64 | test_that("dapply produces errors for wrong input", {
65 |   expect_error(dapply("a", sum))
66 |   expect_error(dapply(~ y, sum))
67 |   expect_error(dapply(iris3, sum))
68 |   expect_error(dapply(mtcars, sum2))
69 |   expect_error(dapply(mtcars, sum, MARGIN = 3))
70 |   expect_error(dapply(mtcars, sum, MARGIN = 1:2))
71 |   expect_error(dapply(mtcars, sum, MARGIN = "a"))
72 |   expect_error(dapply(mtcars, sum, return = "bla", drop = FALSE))
73 | })
74 | 


--------------------------------------------------------------------------------
/man/summary-statistics.Rd:
--------------------------------------------------------------------------------
 1 | \name{summary-statistics} % \name{Time Series and Panel Computations}
 2 | \alias{A9-summary-statistics}
 3 | \alias{summary-statistics}
 4 | % \alias{tscomp}
 5 | \title{Summary Statistics} % \emph{collapse}
 6 | 
 7 | \description{
 8 | \emph{collapse} provides the following functions to efficiently summarize and examine data:
 9 | \itemize{
10 | \item \code{\link{qsu}}, shorthand for quick-summary, is an extremely fast summary command inspired by the (xt)summarize command in the STATA statistical software. It computes a set of 7 statistics (nobs, mean, sd, min, max, skewness and kurtosis) using a numerically stable one-pass method. Statistics can be computed weighted, by groups, and also within-and between entities (for multilevel / panel data).
11 | 
12 | \item \code{\link{qtab}}, shorthand for quick-table, is a faster and more versatile alternative to \code{\link{table}}. Notably, it also supports tabulations with frequency weights, as well as computing a statistic over combinations of variables. 'qtab's inherit the 'table' class, allowing for seamless application of 'table' methods.
13 | 
14 | \item \code{\link{descr}} computes a concise and detailed description of a data frame, including (sorted) frequency tables for categorical variables and various statistics and quantiles for numeric variables. It is inspired by \code{Hmisc::describe}, but about 10x faster.
15 | 
16 | \item \code{\link{pwcor}}, \code{\link{pwcov}} and \code{\link{pwnobs}} compute (weighted) pairwise correlations, covariances and observation counts on matrices and data frames. Pairwise correlations and covariances can be computed together with observation counts and p-values. The elaborate print method displays all of these statistics in a single correlation table.
17 | 
18 | \item \code{\link{varying}} very efficiently checks for the presence of any variation in data (optionally) within groups (such as panel-identifiers). A variable is variant if it has at least 2 distinct non-missing data points.
19 | 
20 | % \item \code{\link{fFtest}} is a fast implementation of the R-Squared based F-test, to test \bold{exclusion restrictions} in linear models potentially involving multiple large factors (fixed effects). It internally utilizes \code{\link{fhdwithin}} to project out factors while counting the degrees of freedom.
21 | 
22 | }
23 | }
24 | \section{Table of Functions}{
25 |   \tabular{lllll}{\emph{ Function / S3 Generic }   \tab\tab \emph{ Methods }   \tab\tab \emph{ Description }  \cr
26 |                  \code{\link{qsu}} \tab\tab \code{default, matrix, data.frame, grouped_df, pseries, pdata.frame, sf}  \tab\tab Fast (grouped, weighted, panel-decomposed) summary statistics \cr
27 |                  \code{\link{qtab}} \tab\tab No methods, for data frames or vectors  \tab\tab Fast (weighted) cross tabulation \cr
28 |                  \code{\link{descr}} \tab\tab \code{default, grouped_df}  (default method handles most objects)  \tab\tab Detailed statistical description of data frame \cr
29 |                  \code{\link{pwcor}} \tab\tab No methods, for matrices or data frames \tab\tab Pairwise (weighted) correlations \cr
30 |                  \code{\link{pwcov}} \tab\tab No methods, for matrices or data frames \tab\tab Pairwise (weighted) covariances \cr
31 |                  \code{\link{pwnobs}} \tab\tab No methods, for matrices or data frames \tab\tab Pairwise observation counts \cr
32 |                 \code{\link{varying}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df} \tab\tab Fast variation check
33 | %                \code{\link{fFtest}} \tab\tab No methods, its a standalone test to which data needs to be supplied.  \tab\tab Fast F-test of exclusion restrictions in linear models (with factors variables) \cr
34 | }
35 | }
36 | \seealso{
37 | \link[=collapse-documentation]{Collapse Overview}, \link[=fast-statistical-functions]{Fast Statistical Functions}
38 | }
39 | \keyword{manip}
40 | \keyword{documentation}
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/misc/legacy/sorted out 1.7.6 - 1.8.0/collapse-depreciated.Rd:
--------------------------------------------------------------------------------
 1 | \name{collapse-depreciated}
 2 | \alias{collapse-depreciated}
 3 | \alias{Recode}
 4 | \alias{replace_non_finite}
 5 | \alias{is.regular}
 6 | \title{
 7 |  Depreciated \emph{collapse} Functions
 8 | }
 9 | \description{
10 | The functions \code{Recode} and \code{replace_non_finite} available until \emph{collapse} v1.1.0 will be removed soon. Since v1.2.0, \code{Recode} is replaced by \code{\link{recode_num}} and \code{\link{recode_char}} and \code{replace_non_finite} is replaced by \code{\link{replace_Inf}}. Since version 1.5.1, \code{is.regular} is depreciated - the function is not very useful and clashes with a more important one in the \emph{zoo} package.
11 | 
12 | %The function \code{as.factor.GRP} was renamed to \code{\link{as_factor_GRP}} to make it clear that this is not a method, and \code{group_names.GRP} was renamed to \code{\link{GRPnames}} for the same reason and to increase parsimony.
13 | }
14 | \usage{
15 | Recode(X, \dots, copy = FALSE, reserve.na.nan = TRUE, regex = FALSE)
16 | 
17 | replace_non_finite(X, value = NA, replace.nan = TRUE)
18 | 
19 | is.regular(x)
20 | }
21 | %- maybe also 'usage' for other objects documented here.
22 | \arguments{
23 |   \item{X}{a vector, matrix or data frame.}
24 |   \item{x}{an R object. }
25 |   \item{\dots}{comma-separated recode arguments of the form: \code{name = newname, `2` = 0, `NaN` = 0, `NA` = 0, `Inf` = NA, `-Inf` = NA}, etc\dots}
26 |   \item{value}{a single (scalar) value to replace matching elements with. Default is \code{NA}.}
27 |   \item{copy}{logical. For reciprocal or sequential replacements of the form \code{a = b, b = c} make a copy of \code{X} to prevent \code{a} being replaced with \code{b} and then all \code{b}-values being replaced with \code{c} again. In general \code{Recode} does the replacements one-after the other, starting with the first. }
28 |   \item{reserve.na.nan}{logical. \code{TRUE} identifies \code{NA} and \code{NaN} as special numeric values and does the correct replacement. \code{FALSE} will treat \code{NA/NaN} as strings, and thus not match numeric \code{NA/NaN}. \emph{Note}: This is not an issue for \code{Inf/-Inf}, which are matched in both numeric and character variables. }
29 |   \item{regex}{logical. If \code{TRUE}, all recode-argument names are (sequentially) passed to \code{\link{grepl}} as a pattern to search \code{X}. All matches are replaced.}
30 |   \item{replace.nan}{logical. \code{TRUE} (default) replaces \code{NaN/Inf/-Inf}. \code{FALSE} replaces only \code{Inf/-Inf}.}
31 | }
32 | % \details{
33 | %%  ~~ If necessary, more details than the description above ~~
34 | % }
35 | % \value{
36 | %%  ~Describe the value returned
37 | %%  If it is a LIST, use
38 | %%  \item{comp1 }{Description of 'comp1'}
39 | %%  \item{comp2 }{Description of 'comp2'}
40 | %% \dots
41 | % }
42 | % \references{
43 | %% ~put references to the literature/web site here ~
44 | % }
45 | % \author{
46 | %%  ~~who you are~~
47 | % }
48 | \note{
49 | \code{Recode} is not suitable for recoding factors or other classed objects / columns, it simply does \code{X[X == value] <- replacement} in a more efficient way. For classed objects, see for example \code{dplyr::recode}.
50 | }
51 | 
52 | %% ~Make other sections like Warning with \section{Warning }{\dots.} ~
53 | 
54 | \seealso{
55 | \link[=recode-replace]{Recode and Replace Values}, \link[=collapse-documentation]{Collapse Overview}
56 | }
57 | \examples{
58 | \dontrun{
59 | Recode(c("a","b","c"), a = "b", b = "c")
60 | Recode(c("a","b","c"), a = "b", b = "c", copy = TRUE)
61 | Recode(c("a","b","c"), a = "b", b = "a", copy = TRUE)
62 | Recode(month.name, ber = NA, regex = TRUE)
63 | mtcr <- Recode(mtcars, `0` = 2, `4` = Inf, `1` = NaN)
64 | replace_non_finite(mtcr)
65 | replace_non_finite(mtcr, replace.nan = FALSE)
66 | }
67 | }
68 | % Add one or more standard keywords, see file 'KEYWORDS' in the
69 | % R documentation directory.
70 | \keyword{manip} % __ONLY ONE__ keyword per line % use one of  RShowDoc("KEYWORDS")
71 | \keyword{documentation}
72 | 


--------------------------------------------------------------------------------
/man/fcount.Rd:
--------------------------------------------------------------------------------
 1 | \name{fcount}
 2 | \alias{fcount}
 3 | \alias{fcountv}
 4 | %- Also NEED an '\alias' for EACH other topic documented here.
 5 | \title{
 6 | Efficiently Count Observations by Group
 7 | }
 8 | \description{
 9 | A much faster replacement for \code{dplyr::count}.
10 | }
11 | \usage{
12 | fcount(x, ..., w = NULL, name = "N", add = FALSE,
13 |       sort = FALSE, decreasing = FALSE)
14 | 
15 | fcountv(x, cols = NULL, w = NULL, name = "N", add = FALSE,
16 |         sort = FALSE, ...)
17 | }
18 | %- maybe also 'usage' for other objects documented here.
19 | \arguments{
20 |   \item{x}{a data frame or list-like object, including 'grouped_df' or 'indexed_frame'. Atomic vectors or matrices can also be passed, but will be sent through \code{\link{qDF}}. }
21 |   \item{\dots}{for \code{fcount}: names or sequences of columns to count cases by - passed to \code{\link{fselect}}. For \code{fcountv}: further arguments passed to \code{\link{GRP}} (such as \code{decreasing}, \code{na.last}, \code{method}, \code{effect} etc.). Leaving this empty will count on all columns. }
22 |   \item{cols}{select columns to count cases by, using column names, indices, a logical vector or a selector function (e.g. \code{is_categorical}).}
23 |   \item{w}{a numeric vector of weights, may contain missing values. In \code{fcount} this can also be the (unquoted) name of a column in the data frame. \code{fcountv} also supports a single character name. \emph{Note} that the corresponding argument in \code{dplyr::count} is called \code{wt}, but \emph{collapse} has a global default for weights arguments to be called \code{w}.}
24 |   \item{name}{character. The name of the column containing the count or sum of weights. \code{dplyr::count} it is called \code{"n"}, but \code{"N"} is more consistent with the rest of \emph{collapse} and \emph{data.table}.}
25 |   \item{add}{\code{TRUE} adds the count column to \code{x}. Alternatively \code{add = "group_vars"} (or \code{add = "gv"} for parsimony) can be used to retain only the variables selected for counting in \code{x} and the count.}
26 |   \item{sort, decreasing}{arguments passed to \code{\link{GRP}} affecting the order of rows in the output (if \code{add = FALSE}), and the algorithm used for counting. In general, \code{sort = FALSE} is faster unless data is already sorted by the columns used for counting.
27 | }
28 | }
29 | \value{
30 | If \code{x} is a list, an object of the same type as \code{x} with a column (\code{name}) added at the end giving the count. Otherwise, if \code{x} is atomic, a data frame returned from \code{\link[=qDF]{qDF(x)}} with the count column added. By default (\code{add = FALSE}) only the unique rows of \code{x} of the columns used for counting are returned.
31 | }
32 | \seealso{
33 | \code{\link{GRPN}}, \code{\link{fnobs}}, \code{\link{fndistinct}}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview}
34 | }
35 | \examples{
36 | fcount(mtcars, cyl, vs, am)
37 | fcountv(mtcars, cols = .c(cyl, vs, am))
38 | fcount(mtcars, cyl, vs, am, sort = TRUE)
39 | fcount(mtcars, cyl, vs, am, add = TRUE)
40 | fcount(mtcars, cyl, vs, am, add = "group_vars")
41 | 
42 | ## With grouped data
43 | mtcars |> fgroup_by(cyl, vs, am) |> fcount()
44 | mtcars |> fgroup_by(cyl, vs, am) |> fcount(add = TRUE)
45 | mtcars |> fgroup_by(cyl, vs, am) |> fcount(add = "group_vars")
46 | 
47 | ## With indexed data: by default counting on the first index variable
48 | wlddev |> findex_by(country, year) |> fcount()
49 | wlddev |> findex_by(country, year) |> fcount(add = TRUE)
50 | # Use fcountv to pass additional arguments to GRP.pdata.frame,
51 | # here using the effect argument to choose a different index variable
52 | wlddev |> findex_by(country, year) |> fcountv(effect = "year")
53 | wlddev |> findex_by(country, year) |> fcountv(add = "group_vars", effect = "year")
54 | 
55 | }
56 | % Add one or more standard keywords, see file 'KEYWORDS' in the
57 | % R documentation directory (show via RShowDoc("KEYWORDS")):
58 | \keyword{manip}
59 | % \keyword{ ~kwd2 }
60 | % Use only one keyword per line.
61 | % For non-standard keywords, use \concept instead of \keyword:
62 | % \concept{ ~cpt1 }
63 | % \concept{ ~cpt2 }
64 | % Use only one concept per line.
65 | 


--------------------------------------------------------------------------------
/man/rsplit.Rd:
--------------------------------------------------------------------------------
 1 | \name{rsplit}
 2 | \alias{rsplit}
 3 | \alias{rsplit.default}
 4 | \alias{rsplit.matrix}
 5 | \alias{rsplit.data.frame}
 6 | \title{
 7 | Fast (Recursive) Splitting
 8 | }
 9 | \description{
10 | \code{rsplit} (recursively) splits a vector, matrix or data frame into subsets according to combinations of (multiple) vectors / factors and returns a (nested) list. If \code{flatten = TRUE}, the list is flattened yielding the same result as \code{\link{split}}. \code{rsplit} is implemented as a wrapper around \code{\link{gsplit}}, and significantly faster than \code{\link{split}}.
11 | }
12 | \usage{
13 | rsplit(x, \dots)
14 | 
15 | \method{rsplit}{default}(x, fl, drop = TRUE, flatten = FALSE, use.names = TRUE, \dots)
16 | 
17 | \method{rsplit}{matrix}(x, fl, drop = TRUE, flatten = FALSE, use.names = TRUE,
18 |        drop.dim = FALSE, \dots)
19 | 
20 | \method{rsplit}{data.frame}(x, by, drop = TRUE, flatten = FALSE, cols = NULL,
21 |        keep.by = FALSE, simplify = TRUE, use.names = TRUE, \dots)
22 | }
23 | \arguments{
24 |   \item{x}{a vector, matrix, data.frame or list like object.}
25 |   \item{fl}{a \code{\link{GRP}} object, or a (list of) vector(s) / factor(s) (internally converted to a \code{\link{GRP}} object(s)) used to split \code{x}.}
26 |   \item{by}{\emph{data.frame method}: Same as \code{fl}, but also allows one- or two-sided formulas i.e. \code{~ group1} or \code{var1 + var2 ~ group1 + group2}. See Examples.}
27 |   \item{drop}{logical. \code{TRUE} removes unused levels or combinations of levels from factors before splitting; \code{FALSE} retains those combinations yielding empty list elements in the output.}
28 |   \item{flatten}{logical. If \code{fl} is a list of vectors / factors, \code{TRUE} calls \code{\link{GRP}} on the list, creating a single grouping used for splitting; \code{FALSE} yields recursive splitting.}
29 |   \item{use.names}{logical. \code{TRUE} returns a named list (like \code{\link{split}}); \code{FALSE} returns a plain list.}
30 |   \item{drop.dim}{logical. \code{TRUE} returns atomic vectors for matrix-splits consisting of one row. }
31 |   \item{cols}{\emph{data.frame method}: Select columns to split using a function, column names, indices or a logical vector. \emph{Note}: \code{cols} is ignored if a two-sided formula is passed to \code{by}.}
32 |   \item{keep.by}{logical. If a formula is passed to \code{by}, then \code{TRUE} preserves the splitting (right-hand-side) variables in the data frame.}
33 |   \item{simplify}{\emph{data.frame method}: Logical. \code{TRUE} calls \code{rsplit.default} if a single column is split e.g. \code{rsplit(data, col1 ~ group1)} becomes the same as \code{rsplit(data$col1, data$group1)}.}
34 | \item{\dots}{further arguments passed to \code{\link{GRP}}. Sensible choices would be \code{sort = FALSE}, \code{decreasing = TRUE} or \code{na.last = FALSE}. Note that these options only apply if \code{fl} is not already a (list of) factor(s).}
35 | }
36 | \value{
37 | a (nested) list containing the subsets of \code{x}.
38 | }
39 | \seealso{
40 | \code{\link{gsplit}}, \code{\link{rapply2d}}, \code{\link{unlist2d}}, \link[=list-processing]{List Processing}, \link[=collapse-documentation]{Collapse Overview}
41 | }
42 | \examples{
43 | rsplit(mtcars$mpg, mtcars$cyl)
44 | rsplit(mtcars, mtcars$cyl)
45 | 
46 | rsplit(mtcars, mtcars[.c(cyl, vs, am)])
47 | rsplit(mtcars, ~ cyl + vs + am, keep.by = TRUE)  # Same thing
48 | rsplit(mtcars, ~ cyl + vs + am)
49 | 
50 | rsplit(mtcars, ~ cyl + vs + am, flatten = TRUE)
51 | 
52 | rsplit(mtcars, mpg ~ cyl)
53 | rsplit(mtcars, mpg ~ cyl, simplify = FALSE)
54 | rsplit(mtcars, mpg + hp ~ cyl + vs + am)
55 | rsplit(mtcars, mpg + hp ~ cyl + vs + am, keep.by = TRUE)
56 | 
57 | # Split this sectoral data, first by Variable (Emloyment and Value Added), then by Country
58 | GGDCspl <- rsplit(GGDC10S, ~ Variable + Country, cols = 6:16)
59 | str(GGDCspl)
60 | 
61 | # The nested list can be reassembled using unlist2d()
62 | head(unlist2d(GGDCspl, idcols = .c(Variable, Country)))
63 | rm(GGDCspl)
64 | 
65 | # Another example with mtcars (not as clean because of row.names)
66 | nl <- rsplit(mtcars, mpg + hp ~ cyl + vs + am)
67 | str(nl)
68 | unlist2d(nl, idcols = .c(cyl, vs, am), row.names = "car")
69 | rm(nl)
70 | }
71 | \keyword{manip}
72 | 


--------------------------------------------------------------------------------
/man/timeid.Rd:
--------------------------------------------------------------------------------
 1 | \name{timeid}
 2 | \alias{timeid}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Generate Integer-Id From Time/Date Sequences
 6 | }
 7 | \description{
 8 | \code{timeid} groups time vectors in a way that preserves the temporal structure. It generate an integer id where unit steps represent the greatest common divisor in the original sequence e.g \code{c(4, 6, 10) -> c(1, 2, 4)} or \code{c(0.25, 0.75, 1) -> c(1, 3, 4)}.
 9 | }
10 | \usage{
11 | timeid(x, factor = FALSE, ordered = factor, extra = FALSE)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{x}{a numeric time object such as a \code{Date}, \code{POSIXct} or other integer or double vector representing time.}
16 |   \item{factor}{logical. \code{TRUE} returns an (ordered) factor with levels corresponding to the full sequence (without irregular gaps) of time. This is useful for inclusion in the \link[=findex]{index} but might be computationally expensive for long sequences, see Details. \code{FALSE} returns a simpler object of class '\code{\link{qG}}'. }
17 |   \item{ordered}{logical. \code{TRUE} adds a class 'ordered'. }
18 |   \item{extra}{logical. \code{TRUE} attaches a set of 4 diagnostic items as attributes to the result:
19 |    \itemize{
20 |    \item \code{"unique_ints"}: \code{unique(unattrib(timeid(x)))} - the unique integer time steps in first-appearance order. This can be useful to check the size of gaps in the sequence. %The \code{\link{seqid}} function can help in the exploration of this attribute, e.g. \code{seqid(attr(timeid(x, extra = TRUE), "unique"))} shows the number and position of the dicontinuities.
21 |    \item \code{"sort_unique_x"}: \code{sort(unique(x))}.
22 |    \item \code{"range_x"}: \code{range(x)}.
23 |    \item \code{"step_x"}: \code{vgcd(sort(unique(diff(sort(unique(x))))))} - the greatest common divisor.
24 |    }
25 |    \emph{Note} that returning these attributes does not incur additional computations.
26 |   }
27 | }
28 | \details{
29 | Let \code{range_x} and \code{step_x} be the like-named attributes returned when \code{extra = TRUE}, then, if \code{factor = TRUE}, a complete sequence of levels is generated as \code{seq(range_x[1], range_x[2], by = step_x) |> copyMostAttrib(x) |> as.character()}. If \code{factor = FALSE}, the number of timesteps recorded in the \code{"N.groups"} attribute is computed as \code{(range_x[2]-range_x[1])/step_x + 1}, which is equal to the number of factor levels. In both cases the underlying integer id is the same and preserves gaps in time. Large gaps (strong irregularity) can result in many unused factor levels, the generation of which can become expensive. Using \code{factor = FALSE} (the default) is thus more efficient.
30 | }
31 | \value{
32 | A factor or '\code{\link{qG}}' object, optionally with additional attributes attached.
33 | }
34 | 
35 | \seealso{
36 | \code{\link{seqid}}, \link[=indexing]{Indexing}, \link[=time-series-panel-series]{Time Series and Panel Series}, \link[=collapse-documentation]{Collapse Overview}
37 | }
38 | \examples{
39 | oldopts <- options(max.print = 30)
40 | 
41 | # A normal use case
42 | timeid(wlddev$decade)
43 | timeid(wlddev$decade, factor = TRUE)
44 | timeid(wlddev$decade, extra = TRUE)
45 | 
46 | # Here a large number of levels is generated, which is expensive
47 | timeid(wlddev$date, factor = TRUE)
48 | tid <- timeid(wlddev$date, extra = TRUE) # Much faster
49 | str(tid)
50 | 
51 | # The reason for step = 1 are leap years with 366 days every 4 years
52 | diff(attr(tid, "unique"))
53 | 
54 | # So in this case simple factor generation gives a better result
55 | qF(wlddev$date, ordered = TRUE, na.exclude = FALSE)
56 | 
57 | # The best way to deal with this data would be to convert it
58 | # to zoo::yearmon and then use timeid:
59 | timeid(zoo::as.yearmon(wlddev$date), factor = TRUE, extra = TRUE)
60 | 
61 | options(oldopts)
62 | rm(oldopts, tid)
63 | }
64 | % Add one or more standard keywords, see file 'KEYWORDS' in the
65 | % R documentation directory (show via RShowDoc("KEYWORDS")):
66 | \keyword{ts}
67 | \keyword{manip}
68 | % \keyword{ ~kwd2 }
69 | % Use only one keyword per line.
70 | % For non-standard keywords, use \concept instead of \keyword:
71 | % \concept{ ~cpt1 }
72 | % \concept{ ~cpt2 }
73 | % Use only one concept per line.
74 | 


--------------------------------------------------------------------------------
/man/pad.Rd:
--------------------------------------------------------------------------------
 1 | \name{pad}
 2 | \alias{pad}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Pad Matrix-Like Objects with a Value
 6 | }
 7 | \description{
 8 | The \code{pad} function inserts elements / rows filled with \code{value} into a vector matrix or data frame \code{X} at positions given by \code{i}. It is particularly useful to expand objects returned by statistical procedures which remove missing values to the original data dimensions.
 9 | }
10 | \usage{
11 | pad(X, i, value = NA, method = c("auto", "xpos", "vpos"))
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{X}{
16 |   a vector, matrix, data frame or list of equal-length columns.
17 | }
18 |   \item{i}{
19 |   either an integer (positive or negative) or logical vector giving positions / rows of \code{X} into which \code{value}'s should be inserted, or, alternatively, a positive integer vector with \code{length(i) == NROW(X)}, but with some gaps in the indices into which \code{value}'s can be inserted, or a logical vector with \code{sum(i) == NROW(X)} such that \code{value}'s can be inserted for \code{FALSE} values in the logical vector. See also \code{method} and Examples.
20 | }
21 |   \item{value}{
22 |  a scalar value to be replicated and inserted into \code{X} at positions / rows given by \code{i}. Default is \code{NA}.
23 | }
24 |   \item{method}{
25 |  an integer or string specifying the use of \code{i}. The options are:
26 |       \tabular{lllll}{\emph{ Int. }   \tab\tab \emph{ String }   \tab\tab \emph{ Description }  \cr
27 |                  1 \tab\tab "auto" \tab\tab automatic method selection: If \code{i} is positive integer and \code{length(i) == NROW(X)} or if \code{i} is logical and \code{sum(i) == NROW(X)}, choose method "xpos", else choose "vpos". \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr
28 |                  1 \tab\tab "xpos"   \tab\tab \code{i} is a vector of positive integers or a logical vector giving the positions of the the elements / rows of \code{X}. \code{values}'s are inserted where there are gaps / \code{FALSE} values in \code{i}. \cr \tab\tab\tab\tab \cr \tab\tab\tab\tab \cr
29 |                  2 \tab\tab "vpos" \tab\tab \code{i} is a vector of positive / negative integers or a logical vector giving the positions at which \code{values}'s / rows should be inserted into \code{X}.
30 |   }
31 | }
32 | }
33 | \value{
34 | \code{X} with elements / rows filled with \code{value} inserted at positions given by \code{i}.
35 | }
36 | \seealso{
37 | \code{\link{append}}, \link[=recode-replace]{Recode and Replace Values}, \link[=small-helpers]{Small (Helper) Functions}, \link[=collapse-documentation]{Collapse Overview}
38 | }
39 | \examples{
40 | v <- 1:3
41 | 
42 | pad(v, 1:2)       # Automatic selection of method "vpos"
43 | pad(v, -(1:2))    # Same thing
44 | pad(v, c(TRUE, TRUE, FALSE, FALSE, FALSE)) # Same thing
45 | 
46 | pad(v, c(1, 3:4)) # Automatic selection of method "xpos"
47 | pad(v, c(TRUE, FALSE, TRUE, TRUE, FALSE))  # Same thing
48 | 
49 | head(pad(wlddev, 1:3)) # Insert 3 missing rows at the beginning of the data
50 | head(pad(wlddev, 2:4)) # ... at rows positions 2-4
51 | 
52 | # pad() is mostly useful for statistical models which only use the complete cases:
53 | mod <- lm(LIFEEX ~ PCGDP, wlddev)
54 | # Generating a residual column in the original data (automatic selection of method "vpos")
55 | settfm(wlddev, resid = pad(resid(mod), mod$na.action))
56 | # Another way to do it:
57 | r <- resid(mod)
58 | i <- as.integer(names(r))
59 | resid2 <- pad(r, i)        # automatic selection of method "xpos"
60 | # here we need to add some elements as flast(i) < nrow(wlddev)
61 | resid2 <- c(resid2, rep(NA, nrow(wlddev)-length(resid2)))
62 | # See that these are identical:
63 | identical(unattrib(wlddev$resid), resid2)
64 | 
65 | # Can also easily get a model matrix at the dimensions of the original data
66 | mm <- pad(model.matrix(mod), mod$na.action)
67 | 
68 | }
69 | % Add one or more standard keywords, see file 'KEYWORDS' in the
70 | % R documentation directory (show via RShowDoc("KEYWORDS")):
71 | \keyword{manip}
72 | % \keyword{ ~kwd2 }
73 | % Use only one keyword per line.
74 | % For non-standard keywords, use \concept instead of \keyword:
75 | % \concept{ ~cpt1 }
76 | % \concept{ ~cpt2 }
77 | % Use only one concept per line.
78 | 


--------------------------------------------------------------------------------
/src/collapse_cpp.h:
--------------------------------------------------------------------------------
 1 | 
 2 | // BWCpp
 3 | SEXP _collapse_BWCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP thetaSEXP, SEXP set_meanSEXP, SEXP BSEXP, SEXP fillSEXP);
 4 | // BWmCpp
 5 | SEXP _collapse_BWmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP thetaSEXP, SEXP set_meanSEXP, SEXP BSEXP, SEXP fillSEXP);
 6 | // BWlCpp
 7 | SEXP _collapse_BWlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP thetaSEXP, SEXP set_meanSEXP, SEXP BSEXP, SEXP fillSEXP);
 8 | // pwnobsmCpp
 9 | SEXP _collapse_pwnobsmCpp(SEXP xSEXP);
10 | // varyingCpp
11 | SEXP _collapse_varyingCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP any_groupSEXP);
12 | // varyingmCpp
13 | SEXP _collapse_varyingmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP any_groupSEXP, SEXP dropSEXP);
14 | // varyinglCpp
15 | SEXP _collapse_varyinglCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP any_groupSEXP, SEXP dropSEXP);
16 | // fbstatsCpp
17 | SEXP _collapse_fbstatsCpp(SEXP xSEXP, SEXP extSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP npgSEXP, SEXP pgSEXP, SEXP wSEXP, SEXP stable_algoSEXP, SEXP arraySEXP, SEXP setnSEXP, SEXP gnSEXP);
18 | // fbstatsmCpp
19 | SEXP _collapse_fbstatsmCpp(SEXP xSEXP, SEXP extSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP npgSEXP, SEXP pgSEXP, SEXP wSEXP, SEXP stable_algoSEXP, SEXP arraySEXP, SEXP gnSEXP);
20 | // fbstatslCpp
21 | SEXP _collapse_fbstatslCpp(SEXP xSEXP, SEXP extSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP npgSEXP, SEXP pgSEXP, SEXP wSEXP, SEXP stable_algoSEXP, SEXP arraySEXP, SEXP gnSEXP);
22 | // fdiffgrowthCpp
23 | SEXP _collapse_fdiffgrowthCpp(SEXP xSEXP, SEXP nSEXP, SEXP diffSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP tSEXP, SEXP retSEXP, SEXP rhoSEXP, SEXP namesSEXP, SEXP powerSEXP);
24 | // fdiffgrowthmCpp
25 | SEXP _collapse_fdiffgrowthmCpp(SEXP xSEXP, SEXP nSEXP, SEXP diffSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP tSEXP, SEXP retSEXP, SEXP rhoSEXP, SEXP namesSEXP, SEXP powerSEXP);
26 | // fdiffgrowthlCpp
27 | SEXP _collapse_fdiffgrowthlCpp(SEXP xSEXP, SEXP nSEXP, SEXP diffSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP tSEXP, SEXP retSEXP, SEXP rhoSEXP, SEXP namesSEXP, SEXP powerSEXP);
28 | // flagleadCpp
29 | SEXP _collapse_flagleadCpp(SEXP xSEXP, SEXP nSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP tSEXP, SEXP namesSEXP);
30 | // flagleadmCpp
31 | SEXP _collapse_flagleadmCpp(SEXP xSEXP, SEXP nSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP tSEXP, SEXP namesSEXP);
32 | // flagleadlCpp
33 | SEXP _collapse_flagleadlCpp(SEXP xSEXP, SEXP nSEXP, SEXP fillSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP tSEXP, SEXP namesSEXP);
34 | // fscaleCpp
35 | SEXP _collapse_fscaleCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP set_meanSEXP, SEXP set_sdSEXP);
36 | // fscalemCpp
37 | SEXP _collapse_fscalemCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP set_meanSEXP, SEXP set_sdSEXP);
38 | // fscalelCpp
39 | SEXP _collapse_fscalelCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP set_meanSEXP, SEXP set_sdSEXP);
40 | // fvarsdCpp
41 | SEXP _collapse_fvarsdCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP stable_algoSEXP, SEXP sdSEXP);
42 | // fvarsdmCpp
43 | SEXP _collapse_fvarsdmCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP stable_algoSEXP, SEXP sdSEXP, SEXP dropSEXP);
44 | // fvarsdlCpp
45 | SEXP _collapse_fvarsdlCpp(SEXP xSEXP, SEXP ngSEXP, SEXP gSEXP, SEXP gsSEXP, SEXP wSEXP, SEXP narmSEXP, SEXP stable_algoSEXP, SEXP sdSEXP, SEXP dropSEXP);
46 | // mrtl
47 | SEXP _collapse_mrtl(SEXP XSEXP, SEXP namesSEXP, SEXP retSEXP);
48 | // mctl
49 | SEXP _collapse_mctl(SEXP XSEXP, SEXP namesSEXP, SEXP retSEXP);
50 | // psmatCpp
51 | SEXP _collapse_psmatCpp(SEXP xSEXP, SEXP gSEXP, SEXP tSEXP, SEXP transposeSEXP, SEXP fillSEXP);
52 | // qFCpp
53 | SEXP _collapse_qFCpp(SEXP xSEXP, SEXP orderedSEXP, SEXP na_excludeSEXP, SEXP keep_attrSEXP, SEXP retSEXP);
54 | // sortuniqueCpp
55 | SEXP _collapse_sortuniqueCpp(SEXP xSEXP);
56 | // fdroplevelsCpp
57 | SEXP _collapse_fdroplevelsCpp(SEXP xSEXP, SEXP check_NASEXP);
58 | // seqid
59 | SEXP _collapse_seqid(SEXP xSEXP, SEXP oSEXP, SEXP delSEXP, SEXP startSEXP, SEXP na_skipSEXP, SEXP skip_seqSEXP, SEXP check_oSEXP);
60 | // groupid
61 | SEXP _collapse_groupid(SEXP xSEXP, SEXP oSEXP, SEXP startSEXP, SEXP na_skipSEXP, SEXP check_oSEXP);
62 | 


--------------------------------------------------------------------------------
/man/pwcor_pwcov_pwnobs.Rd:
--------------------------------------------------------------------------------
 1 | \name{pwcor-pwcov-pwnobs}
 2 | \alias{pwcor}
 3 | \alias{pwcov}
 4 | \alias{pwnobs}
 5 | \alias{print.pwcov}
 6 | \alias{print.pwcor}
 7 | %- Also NEED an '\alias' for EACH other topic documented here.
 8 | \title{
 9 | (Pairwise, Weighted) Correlations, Covariances and Observation Counts
10 | }
11 | \description{
12 | Computes (pairwise, weighted) Pearson's correlations, covariances and observation counts. Pairwise correlations and covariances can be computed together with observation counts and p-values, and output as 3D array (default) or list of matrices. \code{pwcor} and \code{pwcov} offer an elaborate print method.
13 | }
14 | \usage{
15 | pwcor(X, \dots, w = NULL, N = FALSE, P = FALSE, array = TRUE, use = "pairwise.complete.obs")
16 | 
17 | pwcov(X, \dots, w = NULL, N = FALSE, P = FALSE, array = TRUE, use = "pairwise.complete.obs")
18 | 
19 | pwnobs(X)
20 | 
21 | \method{print}{pwcor}(x, digits = .op[["digits"]], sig.level = 0.05,
22 |       show = c("all","lower.tri","upper.tri"), spacing = 1L, return = FALSE, \dots)
23 | 
24 | \method{print}{pwcov}(x, digits = .op[["digits"]], sig.level = 0.05,
25 |       show = c("all","lower.tri","upper.tri"), spacing = 1L, return = FALSE, \dots)
26 | 
27 | }
28 | %- maybe also 'usage' for other objects documented here.
29 | \arguments{
30 |   \item{X}{a matrix or data.frame, for \code{pwcor} and \code{pwcov} all columns must be numeric. All functions are faster on matrices, so converting is advised for large data (see \code{\link{qM}}).}
31 |   \item{x}{an object of class 'pwcor' / 'pwcov'. }
32 |   \item{w}{numeric. A vector of (frequency) weights. }
33 |   \item{N}{logical. \code{TRUE} also computes pairwise observation counts.}
34 |   \item{P}{logical. \code{TRUE} also computes pairwise p-values (same as \code{\link{cor.test}} and \code{Hmisc::rcorr}).}
35 |   \item{array}{logical. If \code{N = TRUE} or \code{P = TRUE}, \code{TRUE} (default) returns output as 3D array whereas \code{FALSE} returns a list of matrices.}
36 |   \item{use}{argument passed to \code{\link{cor}} / \code{\link{cov}}. If \code{use != "pairwise.complete.obs"}, \code{sum(complete.cases(X))} is used for \code{N}, and p-values are computed accordingly. }
37 |   \item{digits}{integer. The number of digits to round to in print. }
38 |   \item{sig.level}{numeric. P-value threshold below which a \code{'*'} is displayed above significant coefficients if \code{P = TRUE}. }
39 |   \item{show}{character. The part of the correlation / covariance matrix to display. }
40 |   \item{spacing}{integer. Controls the spacing between different reported quantities in the printout of the matrix: 0 - compressed, 1 - single space, 2 - double space.}
41 |   \item{return}{logical. \code{TRUE} returns the formatted object from the print method for exporting. The default is to return \code{x} invisibly.}
42 |   \item{\dots}{other arguments passed to \code{\link{cor}} or \code{\link{cov}}. Only sensible if \code{P = FALSE}. }
43 | }
44 | 
45 | \value{
46 | a numeric matrix, 3D array or list of matrices with the computed statistics. For \code{pwcor} and \code{pwcov} the object has a class 'pwcor' and 'pwcov', respectively.
47 | }
48 | 
49 | \note{
50 | \code{weights::wtd.cors} is imported for weighted pairwise correlations (written in C for speed). For weighted correlations with bootstrap SE's see \code{weights::wtd.cor} (bootstrap can be slow). Weighted correlations for complex surveys are implemented in \code{jtools::svycor}. An equivalent and faster implementation of \code{pwcor} (without weights) is provided in \code{Hmisc::rcorr} (written in Fortran).
51 | }
52 | 
53 | %% ~Make other sections like Warning with \section{Warning }{\dots.} ~
54 | 
55 | \seealso{
56 | \code{\link{qsu}}, \link[=summary-statistics]{Summary Statistics}, \link[=collapse-documentation]{Collapse Overview} %% ~~objects to See Also as
57 | }
58 | \examples{
59 | mna <- na_insert(mtcars)
60 | pwcor(mna)
61 | pwcov(mna)
62 | pwnobs(mna)
63 | pwcor(mna, N = TRUE)
64 | pwcor(mna, P = TRUE)
65 | pwcor(mna, N = TRUE, P = TRUE)
66 | aperm(pwcor(mna, N = TRUE, P = TRUE))
67 | print(pwcor(mna, N = TRUE, P = TRUE), digits = 3, sig.level = 0.01, show = "lower.tri")
68 | pwcor(mna, N = TRUE, P = TRUE, array = FALSE)
69 | print(pwcor(mna, N = TRUE, P = TRUE, array = FALSE), show = "lower.tri")
70 | 
71 | 
72 | }
73 | % Add one or more standard keywords, see file 'KEYWORDS' in the
74 | % R documentation directory.
75 | \keyword{multivariate} % use one of  RShowDoc("KEYWORDS")
76 | 
77 | 


--------------------------------------------------------------------------------
/man/frename.Rd:
--------------------------------------------------------------------------------
 1 | \name{frename}
 2 | \alias{rnm}
 3 | \alias{frename}
 4 | \alias{setrename}
 5 | \alias{relabel}
 6 | \alias{setrelabel}
 7 | %- Also NEED an '\alias' for EACH other topic documented here.
 8 | \title{
 9 | Fast Renaming and Relabelling Objects
10 | }
11 | \description{
12 | \code{frename} returns a renamed shallow-copy, \code{setrename} renames objects by reference. These functions also work with objects other than data frames that have a 'names' attribute. \code{relabel} and \code{setrelabel} do that same for labels attached to data frame columns.
13 | }
14 | \usage{
15 | frename(.x, \dots, cols = NULL, .nse = TRUE)
16 | rnm(.x, \dots, cols = NULL, .nse = TRUE)     # Shorthand for frename()
17 | 
18 | setrename(.x, \dots, cols = NULL, .nse = TRUE)
19 | 
20 | relabel(.x, \dots, cols = NULL, attrn = "label")
21 | 
22 | setrelabel(.x, \dots, cols = NULL, attrn = "label")
23 | 
24 | }
25 | %- maybe also 'usage' for other objects documented here.
26 | \arguments{
27 |   \item{.x}{for \code{(f/set)rename}: an R object with a \code{"names"} attribute. For \code{(set)relabel}: a named list.
28 | }
29 |   \item{\dots}{either tagged vector expressions of the form \code{name = newname} / \code{name = newlabel} (\code{frename} also supports \code{newname = name}), a (named) vector of names/labels, or a single function (+ optional arguments to the function) applied to all names/labels (of columns/elements selected in \code{cols}).
30 | }
31 |   \item{cols}{If \code{\dots} is a function, select a subset of columns/elements to rename/relabel using names, indices, a logical vector or a function applied to the columns if \code{.x} is a list (e.g. \code{is.numeric}).}
32 | 
33 |   \item{.nse}{logical. \code{TRUE} allows non-standard evaluation of tagged vector expressions, allowing you to supply new names without quotes. Set to \code{FALSE} for programming or passing vectors of names.}
34 | 
35 |   \item{attrn}{character. Name of attribute to store labels or retrieve labels from.}
36 | }
37 | \value{
38 | \code{.x} renamed / relabelled. \code{setrename} and \code{setrelabel} return \code{.x} invisibly.
39 | }
40 | \note{
41 | Note that both \code{relabel} and \code{setrelabel} modify \code{.x} by reference. This is because labels are attached to columns themselves, making it impossible to avoid permanent modification by taking a shallow copy of the encompassing list / data.frame. On the other hand \code{frename} makes a shallow copy whereas \code{setrename} also modifies by reference.
42 | }
43 | \seealso{
44 | \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=collapse-documentation]{Collapse Overview}
45 | }
46 | \examples{
47 | ## Using tagged expressions
48 | head(frename(iris, Sepal.Length = SL, Sepal.Width = SW,
49 |                    Petal.Length = PL, Petal.Width = PW))
50 | head(frename(iris, Sepal.Length = "S L", Sepal.Width = "S W",
51 |                    Petal.Length = "P L", Petal.Width = "P W"))
52 | 
53 | ## Since v2.0.0 this is also supported
54 | head(frename(iris, SL = Sepal.Length, SW = Sepal.Width,
55 |                    PL = Petal.Length, PW = Petal.Width))
56 | 
57 | ## Using a function
58 | head(frename(iris, tolower))
59 | head(frename(iris, tolower, cols = 1:2))
60 | head(frename(iris, tolower, cols = is.numeric))
61 | head(frename(iris, paste, "new", sep = "_", cols = 1:2))
62 | 
63 | ## Using vectors of names and programming
64 | newname = "sepal_length"
65 | head(frename(iris, Sepal.Length = newname, .nse = FALSE))
66 | newnames = c("sepal_length", "sepal_width")
67 | head(frename(iris, newnames, cols = 1:2))
68 | newnames = c(Sepal.Length = "sepal_length", Sepal.Width = "sepal_width")
69 | head(frename(iris, newnames, .nse = FALSE))
70 | # Since v2.0.0, this works as well
71 | newnames = c(sepal_length = "Sepal.Length", sepal_width = "Sepal.Width")
72 | head(frename(iris, newnames, .nse = FALSE))
73 | 
74 | ## Renaming by reference
75 | # setrename(iris, tolower)
76 | # head(iris)
77 | # rm(iris)
78 | # etc...
79 | 
80 | ## Relabelling (by reference)
81 | # namlab(relabel(wlddev, PCGDP = "GDP per Capita", LIFEEX = "Life Expectancy"))
82 | # namlab(relabel(wlddev, toupper))
83 | 
84 | 
85 | }
86 | % Add one or more standard keywords, see file 'KEYWORDS' in the
87 | % R documentation directory (show via RShowDoc("KEYWORDS")):
88 | \keyword{ manip }
89 | % \keyword{ ~kwd2 }
90 | % Use only one keyword per line.
91 | % For non-standard keywords, use \concept instead of \keyword:
92 | % \concept{ ~cpt1 }
93 | % \concept{ ~cpt2 }
94 | % Use only one concept per line.
95 | 


--------------------------------------------------------------------------------
/man/radixorder.Rd:
--------------------------------------------------------------------------------
 1 | \name{radixorder}
 2 | \alias{radixorder}
 3 | \alias{radixorderv}
 4 | %- Also NEED an '\alias' for EACH other topic documented here.
 5 | \title{
 6 | Fast Radix-Based Ordering
 7 | }
 8 | \description{
 9 | A slight modification of \code{\link[=order]{order(..., method = "radix")}} that is more programmer friendly and, importantly, provides features for ordered grouping of data (similar to \code{data.table:::forderv} from which it descended). % \code{radixorderv} is a programmers version directly supporting vector and list input. % Apart from added grouping features, the source code and standard functionality is identical to \code{\link{order(\dots, method = "radix")}.
10 | }
11 | \usage{
12 | radixorder(\dots, na.last = TRUE, decreasing = FALSE, starts = FALSE,
13 |            group.sizes = FALSE, sort = TRUE)
14 | 
15 | radixorderv(x, na.last = TRUE, decreasing = FALSE, starts = FALSE,
16 |             group.sizes = FALSE, sort = TRUE)
17 | }
18 | %- maybe also 'usage' for other objects documented here.
19 | \arguments{
20 |   \item{\dots}{comma-separated atomic vectors to order.
21 | 
22 | }
23 |   \item{x}{
24 | an atomic vector or list of atomic vectors such as a data frame.
25 | }
26 |   \item{na.last}{logical. for controlling the treatment of \code{NA}'s. If \code{TRUE}, missing values in the data are put last; if \code{FALSE}, they are put first; if NA, they are removed.
27 | }
28 |   \item{decreasing}{
29 | logical. Should the sort order be increasing or decreasing? Can be a vector of length equal to the number of arguments in \code{\dots} / \code{x}.
30 | }
31 |   \item{starts}{logical. \code{TRUE} returns an attribute 'starts' containing the first element of each new group i.e. the row denoting the start of each new group if the data were sorted using the computed ordering vector. See Examples.
32 | %%     ~~Describe \code{starts} here~~
33 | }
34 |   \item{group.sizes}{logical. \code{TRUE} returns an attribute 'group.sizes' containing sizes of each group in the same order as groups are encountered if the data were sorted using the computed ordering vector. See Examples.
35 | }
36 |   \item{sort}{logical. This argument only affects character vectors / columns passed. If \code{FALSE}, these are not ordered but simply grouped in the order of first appearance of unique elements. This provides a slight performance gain if only grouping but not alphabetic ordering is required. See also \code{\link{group}}.
37 | %%     ~~Describe \code{sort} here~~
38 | }
39 | }
40 | % \details{
41 | % \code{radixorder} works just like \code{\link[=order]{order(\dots, method = "radix")}}, the source code is the same. However if \code{starts = TRUE}, and attribute
42 | % }
43 | %}
44 | \value{
45 | An integer ordering vector with attributes: Unless \code{na.last = NA} an attribute \code{"sorted"} indicating whether the input data was already sorted is attached. If \code{starts = TRUE}, \code{"starts"} giving a vector of group starts in the ordered data, and if \code{group.sizes = TRUE}, \code{"group.sizes"} giving the vector of group sizes are attached. In either case an attribute \code{"maxgrpn"} providing the size of the largest group is also attached.
46 | }
47 | 
48 | \author{
49 | The C code was taken - with slight modifications - from \href{https://github.com/wch/r-source/blob/79298c499218846d14500255efd622b5021c10ec/src/main/radixsort.c}{base R source code}, and is originally due to \emph{data.table} authors Matt Dowle and Arun Srinivasan.
50 | }
51 | 
52 | 
53 | \seealso{
54 | \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview}
55 | }
56 | \examples{
57 | radixorder(mtcars$mpg)
58 | head(mtcars[radixorder(mtcars$mpg), ])
59 | radixorder(mtcars$cyl, mtcars$vs)
60 | 
61 | o <- radixorder(mtcars$cyl, mtcars$vs, starts = TRUE)
62 | st <- attr(o, "starts")
63 | head(mtcars[o, ])
64 | mtcars[o[st], c("cyl", "vs")]  # Unique groups
65 | 
66 | # Note that if attr(o, "sorted") == TRUE, then all(o[st] == st)
67 | radixorder(rep(1:3, each = 3), starts = TRUE)
68 | 
69 | # Group sizes
70 | radixorder(mtcars$cyl, mtcars$vs, group.sizes = TRUE)
71 | 
72 | # Both
73 | radixorder(mtcars$cyl, mtcars$vs, starts = TRUE, group.sizes = TRUE)
74 | 
75 | }
76 | % Add one or more standard keywords, see file 'KEYWORDS' in the
77 | % R documentation directory (show via RShowDoc("KEYWORDS")):
78 | \keyword{manip}
79 | % \keyword{ ~kwd2 }
80 | % Use only one keyword per line.
81 | % For non-standard keywords, use \concept instead of \keyword:
82 | % \concept{ ~cpt1 }
83 | % \concept{ ~cpt2 }
84 | % Use only one concept per line.
85 | 


--------------------------------------------------------------------------------
/man/GGDC10S.Rd:
--------------------------------------------------------------------------------
 1 | \name{GGDC10S}
 2 | \alias{GGDC10S}
 3 | \docType{data}
 4 | \title{
 5 | Groningen Growth and Development Centre 10-Sector Database
 6 | }
 7 | \description{
 8 | The GGDC 10-Sector Database provides a long-run internationally comparable dataset on sectoral productivity performance in Africa, Asia, and Latin America. Variables covered in the data set are annual series of value added (in local currency), and persons employed for 10 broad sectors.
 9 | }
10 | \usage{data("GGDC10S")}
11 | \format{
12 |   A data frame with 5027 observations on the following 16 variables.
13 |   \describe{
14 |     \item{\code{Country}}{\emph{char}: Country (43 countries)}
15 |     \item{\code{Regioncode}}{\emph{char}: ISO3 Region code}
16 |     \item{\code{Region}}{\emph{char}: Region (6 World Regions)}
17 |     \item{\code{Variable}}{\emph{char}: Variable (Value Added or Employment)}
18 |     \item{\code{Year}}{\emph{num}: Year (67 Years, 1947-2013)}
19 |     \item{\code{AGR}}{\emph{num}: Agriculture}
20 |     \item{\code{MIN}}{\emph{num}: Mining}
21 |     \item{\code{MAN}}{\emph{num}: Manufacturing}
22 |     \item{\code{PU}}{\emph{num}: Utilities}
23 |     \item{\code{CON}}{\emph{num}: Construction}
24 |     \item{\code{WRT}}{\emph{num}: Trade, restaurants and hotels}
25 |     \item{\code{TRA}}{\emph{num}: Transport, storage and communication}
26 |     \item{\code{FIRE}}{\emph{num}: Finance, insurance, real estate and business services}
27 |     \item{\code{GOV}}{\emph{num}: Government services}
28 |     \item{\code{OTH}}{\emph{num}: Community, social and personal services}
29 |     \item{\code{SUM}}{\emph{num}: Summation of sector GDP}
30 |   }
31 | }
32 | % \details{
33 | %%  ~~ If necessary, more details than the __description__ above ~~
34 | % }
35 | \source{
36 | \url{https://www.rug.nl/ggdc/productivity/10-sector/}
37 | }
38 | \references{
39 | Timmer, M. P., de Vries, G. J., & de Vries, K. (2015). "Patterns of Structural Change in Developing Countries." . In J. Weiss, & M. Tribe (Eds.), \emph{Routledge Handbook of Industry and Development.} (pp. 65-83). Routledge.
40 | }
41 | \seealso{
42 | \code{\link{wlddev}}, \link[=collapse-documentation]{Collapse Overview}
43 | }
44 | \examples{
45 | namlab(GGDC10S, class = TRUE)
46 | # aperm(qsu(GGDC10S, ~ Variable, ~ Variable + Country, vlabels = TRUE))
47 | \donttest{
48 | library(ggplot2)
49 | 
50 | ## World Regions Structural Change Plot
51 | 
52 | GGDC10S |>
53 |   fmutate(across(AGR:OTH, `*`, 1 / SUM),
54 |           Variable = ifelse(Variable == "VA","Value Added Share", "Employment Share")) |>
55 |   replace_outliers(0, NA, "min") |>
56 |   collap( ~ Variable + Region + Year, cols = 6:15) |> qDT() |>
57 |   pivot(1:3, names = list(variable = "Sector"), na.rm = TRUE) |>
58 | 
59 |   ggplot(aes(x = Year, y = value, fill = Sector)) +
60 |     geom_area(position = "fill", alpha = 0.9) + labs(x = NULL, y = NULL) +
61 |     theme_linedraw(base_size = 14) +
62 |     facet_grid(Variable ~ Region, scales = "free_x") +
63 |     scale_fill_manual(values = sub("#00FF66", "#00CC66", rainbow(10))) +
64 |     scale_x_continuous(breaks = scales::pretty_breaks(n = 7), expand = c(0, 0))+
65 |     scale_y_continuous(breaks = scales::pretty_breaks(n = 10), expand = c(0, 0),
66 |                        labels = scales::percent) +
67 |     theme(axis.text.x = element_text(angle = 315, hjust = 0, margin = ggplot2::margin(t = 0)),
68 |           strip.background = element_rect(colour = "grey30", fill = "grey30"))
69 | 
70 | # A function to plot the structural change of an arbitrary country
71 | 
72 | plotGGDC <- function(ctry) {
73 | 
74 |   GGDC10S |>
75 |   fsubset(Country == ctry, Variable, Year, AGR:SUM) |>
76 |   fmutate(across(AGR:OTH, `*`, 1 / SUM), SUM = NULL,
77 |           Variable = ifelse(Variable == "VA","Value Added Share", "Employment Share")) |>
78 |   replace_outliers(0, NA, "min") |> qDT() |>
79 |   pivot(1:2, names = list(variable = "Sector"), na.rm = TRUE) |>
80 | 
81 |   ggplot(aes(x = Year, y = value, fill = Sector)) +
82 |     geom_area(position = "fill", alpha = 0.9) + labs(x = NULL, y = NULL) +
83 |     theme_linedraw(base_size = 14) + facet_wrap( ~ Variable) +
84 |     scale_fill_manual(values = sub("#00FF66", "#00CC66", rainbow(10))) +
85 |     scale_x_continuous(breaks = scales::pretty_breaks(n = 7), expand = c(0, 0)) +
86 |     scale_y_continuous(breaks = scales::pretty_breaks(n = 10), expand = c(0, 0),
87 |                        labels = scales::percent) +
88 |     theme(axis.text.x = element_text(angle = 315, hjust = 0, margin = ggplot2::margin(t = 0)),
89 |           strip.background = element_rect(colour = "grey20", fill = "grey20"),
90 |           strip.text = element_text(face = "bold"))
91 | }
92 | 
93 | plotGGDC("BWA")
94 | 
95 | }
96 | 
97 | }
98 | \keyword{datasets}
99 | 


--------------------------------------------------------------------------------
/R/fslice.R:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | fslice <- function(x, ..., n = 1, how = "first", order.by = NULL,
 4 |                    na.rm = .op[["na.rm"]], sort = FALSE, with.ties = FALSE) {
 5 | 
 6 |   # handle grouping
 7 |   if(!missing(...)) {
 8 |     g <- GRP.default(if(is.list(x)) fselect(unclass(x), ...) else list(...), sort = sort, return.groups = FALSE, return.order = sort, call = FALSE)
 9 |   } else if(is.list(x) && inherits(x, "grouped_df")) {
10 |     g <- GRP.grouped_df(x, return.groups = FALSE, call = FALSE)
11 |     x <- fungroup2(x, oldClass(x))
12 |   } else g <- NULL
13 | 
14 |   # resolve values to order by
15 |   if(switch(how, min = TRUE, max = TRUE, FALSE)) {
16 |     if(is.list(x)) order.by <- eval(substitute(order.by), x, parent.frame())
17 |     if(is.character(order.by) && length(order.by) == 1L && anyv(attr(x, "names"), order.by))
18 |       order.by <- .subset2(x, order.by)
19 |     if(length(order.by) != fnrow(x)) stop("order.by must be a numeric vector of the same length as the number of rows in x, or the name of a column in x.")
20 |   }
21 | 
22 |   fslice_core(x, g, n, how, order.by, na.rm, with.ties, sort)
23 | }
24 | 
25 | fslicev <- function(x, cols = NULL, n = 1, how = "first", order.by = NULL,
26 |                    na.rm = .op[["na.rm"]], sort = FALSE, with.ties = FALSE, ...) {
27 | 
28 |   # handle grouping
29 |   if(!is.null(cols)) {
30 |     cond <- is.list(cols) || is.atomic(x)
31 |     g <- GRP.default(if(cond) cols else x,
32 |                      by = if(cond) NULL else cols,
33 |                      sort = sort, return.groups = FALSE, return.order = sort, call = FALSE, ...)
34 |   } else if(is.list(x) && inherits(x, "grouped_df")) {
35 |     g <- GRP.grouped_df(x, return.groups = FALSE, call = FALSE)
36 |     x <- fungroup2(x, oldClass(x))
37 |   } else g <- NULL
38 | 
39 |   # resolve values to order by
40 |   if(switch(how, min = TRUE, max = TRUE, FALSE)) {
41 |     if(is.character(order.by) && length(order.by) == 1L && anyv(attr(x, "names"), order.by))
42 |       order.by <- .subset2(x, order.by)
43 |     if(length(order.by) != fnrow(x)) stop("order.by must be a numeric vector of the same length as the number of rows in x, or the name of a column in x.")
44 |   }
45 | 
46 |   fslice_core(x, g, n, how, order.by, na.rm, with.ties, sort)
47 | }
48 | 
49 | 
50 | fslice_core <- function(x, g, n, how, order.by, na.rm, with.ties, sort) {
51 | 
52 |   # convert a proportion to a number if applicable
53 |   if(n < 1) n <- if(is.null(g)) max(1L, as.integer(round(n * fnrow(x)))) else max(1L, as.integer(round(n * fnrow(x)/g[[1L]])))
54 |   if(n > 1 && with.ties) stop("with.ties = TRUE is only supported for n = 1")
55 | 
56 |   if(is.null(g)) {
57 |     ind <- switch(how,
58 |       first = 1:n,
59 |       last = (fnrow(x)-n+1L):fnrow(x),
60 |       min = if(n > 1) radixorderv(order.by, decreasing = FALSE, na.last = na.rm)[1:n] else if(with.ties) order.by %==% fmin.default(order.by, na.rm = na.rm) else which.min(order.by),
61 |       max = if(n > 1) radixorderv(order.by, decreasing = TRUE, na.last = na.rm)[1:n] else if(with.ties) order.by %==% fmax.default(order.by, na.rm = na.rm) else which.max(order.by),
62 |       stop("Unknown 'how' option: ", how)
63 |     )
64 |     return(ss(x, ind, check = FALSE))
65 |   }
66 | 
67 |   if(n == 1) {
68 |     if(with.ties && sort) warning("sorting with ties is currently not supported")
69 |     return(switch(how,
70 |       first = condalc(ffirst(x, g, na.rm = FALSE), inherits(x, "data.table")),
71 |       last = condalc(flast(x, g, na.rm = FALSE), inherits(x, "data.table")),
72 |       # TODO: sort with ties?
73 |       min = if(with.ties) ss(x, order.by %==% fmin(order.by, g, TRA = "fill", na.rm = na.rm, use.g.names = FALSE), check = FALSE) else
74 |             ss(x, .Call(C_gwhich_first, order.by, g, fmin.default(order.by, g, na.rm = na.rm, use.g.names = FALSE)), check = FALSE),
75 |       max = if(with.ties) ss(x, order.by %==% fmax(order.by, g, TRA = "fill", na.rm = na.rm, use.g.names = FALSE), check = FALSE) else
76 |             ss(x, .Call(C_gwhich_first, order.by, g, fmax.default(order.by, g, na.rm = na.rm, use.g.names = FALSE)), check = FALSE),
77 |       stop("Unknown 'how' option: ", how)
78 |   ))
79 |   }
80 | 
81 |   ind <- switch(how,
82 |       first = .Call(C_gslice_multi, g, g$order, n, TRUE), # g$order is NULL if sort = FALSE
83 |       last = .Call(C_gslice_multi, g, g$order, n, FALSE), # g$order is NULL if sort = FALSE
84 |       min = .Call(C_gslice_multi, g, radixorder(g$group.id, order.by, decreasing = FALSE, na.last = na.rm), n, TRUE),
85 |       max = .Call(C_gslice_multi, g, radixorder(g$group.id, order.by, decreasing = c(FALSE, TRUE), na.last = na.rm), n, TRUE),
86 |       stop("Unknown 'how' option: ", how)
87 |     )
88 | 
89 |   return(ss(x, ind, check = FALSE))
90 | }
91 | 


--------------------------------------------------------------------------------
/tests/testthat/test-flm-fFtest.R:
--------------------------------------------------------------------------------
  1 | context("flm and fFtest")
  2 | 
  3 | 
  4 | 
  5 | y <- mtcars$mpg
  6 | x <- qM(mtcars[c("cyl","vs","am","carb","hp")])
  7 | w <- mtcars$wt
  8 | 
  9 | lmr <- lm(mpg ~ cyl + vs + am + carb + hp, mtcars)
 10 | lmw <- lm(mpg ~ cyl + vs + am + carb + hp, weights = wt, mtcars)
 11 | 
 12 | NCRAN <- identical(Sys.getenv("NCRAN"), "TRUE")
 13 | 
 14 | test_that("flm works as intended", {
 15 | 
 16 |   if(NCRAN) for(i in 1:6) expect_equal(drop(flm(y, x, add.icpt = TRUE, method = i)), coef(lmr))
 17 |   if(NCRAN) for(i in 1:6) expect_equal(drop(flm(y, x, w, add.icpt = TRUE, method = i)), coef(lmw))
 18 |   expect_equal(flm(y, x, method = 1L, return.raw = TRUE), .lm.fit(x, y))
 19 |   expect_equal(flm(y, x, method = 2L, return.raw = TRUE), solve(crossprod(x), crossprod(x, y)))
 20 |   expect_equal(flm(y, x, method = 3L, return.raw = TRUE), qr.coef(qr(x), y))
 21 |   expect_equal(flm(y, x, method = 5L, return.raw = TRUE), cinv(crossprod(x)) %*% crossprod(x, y))
 22 |   if(NCRAN) {
 23 |     # This is to fool very silly checks on CRAN scanning the code of the tests
 24 |     afmlp <- eval(parse(text = paste0("RcppArmadillo", ":", ":", "fastLmPure")))
 25 |     efmlp <- eval(parse(text = paste0("RcppEigen", ":", ":", "fastLmPure")))
 26 | 
 27 |     expect_equal(flm(y, x, method = 4L, return.raw = TRUE), afmlp(x, y))
 28 |     expect_equal(flm(y, x, method = 6L, return.raw = TRUE), efmlp(x, y, 3L))
 29 |   }
 30 |   if(NCRAN) for(i in 1:6) expect_visible(flm(y, x, w, method = i, return.raw = TRUE))
 31 |   ym <- cbind(y, y)
 32 |   for(i in c(1:3, 5L)) expect_visible(flm(ym, x, w, method = i))
 33 | 
 34 |   expect_error(flm(y[-1L], x, w))
 35 |   expect_error(flm(y, x, w[-1L]))
 36 |   expect_error(flm(y, x[-1L, ], w))
 37 | 
 38 | })
 39 | 
 40 | 
 41 | test_that("fFtest works as intended", {
 42 | 
 43 |   r <- fFtest(iris$Sepal.Length, gv(iris, -1L))
 44 |   rlm <- summary(lm(Sepal.Length ~., iris))
 45 |   expect_equal(unattrib(r)[1:4], unattrib(c(rlm$r.squared, rlm$fstatistic[c(2:3, 1L)])))
 46 |   # Same with weights:
 47 |   w <- abs(rnorm(fnrow(iris)))
 48 |   r <- fFtest(iris$Sepal.Length, gv(iris, -1L), w = w)
 49 |   rlm <- summary(lm(Sepal.Length ~., weights = w, iris))
 50 |   expect_equal(unattrib(r)[1:4], unattrib(c(rlm$r.squared, rlm$fstatistic[c(2:3, 1L)])))
 51 | 
 52 |   # Repeat with missing values
 53 |   set.seed(101)
 54 |   iris <- na_insert(iris)
 55 |   r <- fFtest(iris$Sepal.Length, gv(iris, -1L))
 56 |   rlm <- summary(lm(Sepal.Length ~., iris))
 57 |   expect_equal(unattrib(r)[1:4], unattrib(c(rlm$r.squared, rlm$fstatistic[c(2:3, 1L)])))
 58 |   # Same with weights:
 59 |   set.seed(101)
 60 |   w <- na_insert(w)
 61 |   r <- fFtest(iris$Sepal.Length, gv(iris, -1L), w = w)
 62 |   rlm <- summary(lm(Sepal.Length ~., weights = w, iris))
 63 |   expect_equal(unattrib(r)[1:4], unattrib(c(rlm$r.squared, rlm$fstatistic[c(2:3, 1L)])))
 64 |   rm(iris)
 65 | 
 66 |   if(NCRAN) {
 67 |   r <- fFtest(wlddev$PCGDP, qF(wlddev$year), wlddev[c("iso3c","LIFEEX")])
 68 |   # Same test done using lm:
 69 |   data <- na_omit(get_vars(wlddev, c("iso3c","year","PCGDP","LIFEEX")), na.attr = TRUE)
 70 |   full <- lm(PCGDP ~ LIFEEX + iso3c + qF(year), data)
 71 |   rest <- lm(PCGDP ~ LIFEEX + iso3c, data)
 72 |   ranv <- anova(rest, full)
 73 | 
 74 |   expect_equal(unattrib(r[1L, 1:4]), unlist(summary(full)[c("r.squared", "fstatistic")],
 75 |                                             use.names = FALSE)[c(1L, 3:4, 2L)])
 76 |   expect_equal(unattrib(r[2L, 1:4]), unlist(summary(rest)[c("r.squared", "fstatistic")],
 77 |                                             use.names = FALSE)[c(1L, 3:4, 2L)])
 78 |   expect_equal(rev(unattrib(r[1:2, 3L])), ranv$Res.Df)
 79 |   expect_equal(r[3L, 2L], na_rm(ranv$Df))
 80 |   expect_equal(r[3L, 4L], na_rm(ranv$F))
 81 |   expect_equal(r[3L, 5L], na_rm(ranv$`Pr(>F)`))
 82 | 
 83 |   # Same with weights:
 84 |   w <- abs(rnorm(fnrow(wlddev)))
 85 |   r <- fFtest(wlddev$PCGDP, qF(wlddev$year), wlddev[c("iso3c","LIFEEX")], w)
 86 |   full <- lm(PCGDP ~ LIFEEX + iso3c + qF(year), weights = w[-attr(data, "na.action")], data)
 87 |   rest <- lm(PCGDP ~ LIFEEX + iso3c, weights = w[-attr(data, "na.action")], data)
 88 |   ranv <- anova(rest, full)
 89 | 
 90 |   expect_equal(unattrib(r[1L, 1:4]), unlist(summary(full)[c("r.squared", "fstatistic")],
 91 |                                             use.names = FALSE)[c(1L, 3:4, 2L)])
 92 |   expect_equal(unattrib(r[2L, 1:4]), unlist(summary(rest)[c("r.squared", "fstatistic")],
 93 |                                             use.names = FALSE)[c(1L, 3:4, 2L)])
 94 |   expect_equal(rev(unattrib(r[1:2, 3L])), ranv$Res.Df)
 95 |   expect_equal(r[3L, 2L], na_rm(ranv$Df))
 96 |   expect_equal(r[3L, 4L], na_rm(ranv$F))
 97 |   expect_equal(r[3L, 5L], na_rm(ranv$`Pr(>F)`))
 98 | 
 99 |   }
100 | })
101 | 


--------------------------------------------------------------------------------
/tests/testthat/test-whichv.R:
--------------------------------------------------------------------------------
  1 | context("anyv, allv, whichv, setv, copyv etc.")
  2 | 
  3 | 
  4 | 
  5 | # d <- replace_NA(wlddev, cols = 9:13)
  6 | 
  7 | test_that("whichv works well", {
  8 |   expect_identical(whichv(wlddev$country, "Chad"), which(wlddev$country == "Chad"))
  9 |   expect_identical(whichv(wlddev$country, "Chad", invert = TRUE), which(wlddev$country != "Chad"))
 10 |   expect_identical(whichNA(wlddev$PCGDP), which(is.na(wlddev$PCGDP)))
 11 |   expect_identical(whichNA(wlddev$PCGDP, invert = TRUE), which(!is.na(wlddev$PCGDP)))
 12 |   expect_identical(whichv(is.na(wlddev$PCGDP), FALSE), which(!is.na(wlddev$PCGDP)))
 13 | })
 14 | 
 15 | 
 16 | test_that("anyv, allv and whichv work properly", {
 17 |   for(i in seq_along(wlddev)) {
 18 |     vec <- .subset2(wlddev, i)
 19 |     v <- vec[trunc(runif(1L, 1L, length(vec)))]
 20 |     if(is.na(v)) v <- flast(vec)
 21 |     expect_identical(which(vec == v), whichv(vec, v))
 22 |     if(!anyNA(vec)) expect_identical(which(vec != v), whichv(vec, v, TRUE))
 23 |     expect_identical(all(vec == v), allv(vec, v))
 24 |     expect_identical(any(vec == v), anyv(vec, v))
 25 |     vecNA <- is.na(vec)
 26 |     expect_identical(which(vecNA), whichNA(vec))
 27 |     expect_identical(which(!vecNA), whichNA(vec, TRUE))
 28 |     expect_identical(all(vecNA), allNA(vec))
 29 |     expect_identical(any(vecNA), anyNA(vec))
 30 |   }
 31 |   if(identical(Sys.getenv("NCRAN"), "TRUE")) {
 32 |   expect_true(allv(rep(0.0004, 1000), 0.0004))
 33 |   expect_false(allv(rep(0.0004, 1000), 0.0005))
 34 |   }
 35 | })
 36 | 
 37 | if(requireNamespace("data.table", quietly = TRUE)) {
 38 | 
 39 | wldcopy <- data.table::copy(wlddev)
 40 | mtccopy <- data.table::copy(mtcars)
 41 | 
 42 | test_that("setv and copyv work properly", {
 43 |   for(FUN in list(copyv, setv)) {
 44 |     for(i in seq_along(wlddev)) {
 45 |       # print(i)
 46 |       vec <- .subset2(wlddev, i)
 47 |       v <- vec[trunc(runif(1L, 1L, length(vec)))]
 48 |       r <- vec[trunc(runif(1L, 1L, length(vec)))]
 49 |       if(is.na(v)) v <- flast(vec)
 50 |       vl <- vec == v
 51 |       nvl <- vec != v
 52 |       vna <- is.na(vec)
 53 |       expect_identical(FUN(vec, v, r), replace(vec, vl, r))
 54 |       expect_identical(FUN(vec, which(vl), r, vind1 = TRUE), replace(vec, which(vl), r))
 55 |       expect_identical(FUN(vec, 10:1000, r), replace(vec, 10:1000, r))
 56 |       expect_identical(FUN(vec, NA, r), replace(vec, vna, r))
 57 |       expect_identical(FUN(vec, vl, r), replace(vec, vl, r))
 58 |       expect_identical(FUN(vec, 258L, r, vind1 = TRUE), replace(vec, 258L, r))
 59 |       expect_identical(FUN(vec, vl, r, invert = TRUE), replace(vec, !vl, r))
 60 |       expect_identical(FUN(vec, which(nvl), r), replace(vec, which(nvl), r))
 61 |       expect_error(FUN(vec, which(vl), r, invert = TRUE, vind1 = TRUE))
 62 |       # expect_error(FUN(vec, which(nvl), r, invert = TRUE))
 63 |       if(anyNA(vl)) {
 64 |         setv(vl, NA, FALSE)
 65 |         setv(nvl, NA, FALSE)
 66 |       }
 67 |       expect_identical(FUN(vec, v, vec), replace(vec, vl, vec[vl]))
 68 |       expect_identical(FUN(vec, NA, vec), replace(vec, vna, vec[vna]))
 69 |       expect_identical(FUN(vec, vl, vec), replace(vec, vl, vec[vl]))
 70 |       expect_identical(FUN(vec, vl, vec, invert = TRUE), replace(vec, nvl, vec[nvl]))
 71 |       expect_identical(FUN(vec, which(vl), vec), replace(vec, vl, vec[vl]))
 72 |       expect_identical(FUN(vec, which(nvl), vec), replace(vec, nvl, vec[nvl]))
 73 |       # expect_error(FUN(vec, which(nvl), vec, invert = TRUE))
 74 |     }
 75 |     replr <- function(x, i, v) {
 76 |       x[i, ] <- v
 77 |       x
 78 |     }
 79 |     expect_identical(FUN(mtcars, 1, 2), replace(mtcars, mtcars == 1, 2))
 80 |     expect_identical(FUN(mtcars, 1, 2, invert = TRUE), replace(mtcars, mtcars != 1, 2))
 81 |     if(identical(FUN, copyv)) expect_visible(FUN(mtcars, 1, mtcars$mpg, invert = TRUE)) else
 82 |       expect_invisible(FUN(mtcars, 1, mtcars$mpg, invert = TRUE))
 83 |     expect_identical(FUN(mtcars, 23L, mtcars$mpg, vind1 = TRUE), replr(mtcars, 23L, mtcars$mpg[23L]))
 84 |     expect_identical(FUN(mtcars, 3:6, mtcars$mpg), replr(mtcars, 3:6, mtcars$mpg[3:6]))
 85 |     expect_identical(FUN(mtcars, 23L, mtcars, vind1 = TRUE), replr(mtcars, 23L, mtcars[23L, ]))
 86 |     expect_identical(FUN(mtcars, 3:6, mtcars), replr(mtcars, 3:6, mtcars[3:6, ]))
 87 |     expect_error(FUN(mtcars, 23, mtcars$mpg[4:10]))
 88 |     expect_warning(FUN(mtcars, 23, mtcars[4:10]))
 89 |     expect_error(FUN(mtcars, 23L, mtcars$mpg[4:10], vind1 = TRUE))
 90 |     expect_warning(FUN(mtcars, 23L, mtcars[4:10], vind1 = TRUE))
 91 |     expect_error(FUN(mtcars, 3:6, mtcars$mpg[4:10]))
 92 |     expect_warning(FUN(mtcars, 3:6, mtcars[4:10]))
 93 |     if(identical(FUN, copyv)) {
 94 |     expect_identical(wlddev, wldcopy)
 95 |     expect_identical(mtcars, mtccopy)
 96 |     }
 97 |   }
 98 | })
 99 | 
100 | wlddev <- wldcopy
101 | mtcars <- mtccopy
102 | 
103 | }
104 | 


--------------------------------------------------------------------------------
/man/ffirst_flast.Rd:
--------------------------------------------------------------------------------
 1 | \name{ffirst-flast}
 2 | \alias{ffirst}
 3 | \alias{ffirst.default}
 4 | \alias{ffirst.matrix}
 5 | \alias{ffirst.data.frame}
 6 | \alias{ffirst.grouped_df}
 7 | \alias{flast}
 8 | \alias{flast.default}
 9 | \alias{flast.matrix}
10 | \alias{flast.data.frame}
11 | \alias{flast.grouped_df}
12 | \title{Fast (Grouped) First and Last Value for Matrix-Like Objects}  % Vectors, Matrix and Data Frame Columns}
13 | \description{
14 | \code{ffirst} and \code{flast} are S3 generic functions that (column-wise) returns the first and last values in \code{x}, (optionally) grouped by \code{g}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (groupwise) first and last values.
15 | }
16 | 
17 | \usage{
18 | ffirst(x, \dots)
19 | flast(x, \dots)
20 | 
21 | \method{ffirst}{default}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]],
22 |        use.g.names = TRUE, \dots)
23 | \method{flast}{default}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]],
24 |       use.g.names = TRUE, \dots)
25 | 
26 | \method{ffirst}{matrix}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]],
27 |        use.g.names = TRUE, drop = TRUE, \dots)
28 | \method{flast}{matrix}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]],
29 |       use.g.names = TRUE, drop = TRUE, \dots)
30 | 
31 | \method{ffirst}{data.frame}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]],
32 |        use.g.names = TRUE, drop = TRUE, \dots)
33 | \method{flast}{data.frame}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]],
34 |       use.g.names = TRUE, drop = TRUE, \dots)
35 | 
36 | \method{ffirst}{grouped_df}(x, TRA = NULL, na.rm = .op[["na.rm"]],
37 |        use.g.names = FALSE, keep.group_vars = TRUE, \dots)
38 | \method{flast}{grouped_df}(x, TRA = NULL, na.rm = .op[["na.rm"]],
39 |       use.g.names = FALSE, keep.group_vars = TRUE, \dots)
40 | }
41 | \arguments{
42 | \item{x}{a vector, matrix, data frame or grouped data frame (class 'grouped_df').}
43 | 
44 | \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.}
45 | 
46 | \item{TRA}{an integer or quoted operator indicating the transformation to perform:
47 | 0 - "na"     |     1 - "fill"     |     2 - "replace"     |     3 - "-"     |     4 - "-+"     |     5 - "/"     |     6 - "\%"     |     7 - "+"     |     8 - "*"     |     9 - "\%\%"     |     10 - "-\%\%". See \code{\link{TRA}}.}
48 | 
49 | 
50 | \item{na.rm}{logical. \code{TRUE} skips missing values and returns the first / last non-missing value i.e. if the first (1) / last (n) value is \code{NA}, take the second (2) / second-to-last (n-1) value etc..}
51 | 
52 | \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.}
53 | 
54 | \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.}
55 | 
56 | \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.}
57 | 
58 | \item{\dots}{arguments to be passed to or from other methods. If \code{TRA} is used, passing \code{set = TRUE} will transform data by reference and return the result invisibly.}
59 | 
60 | }
61 | \value{
62 | \code{ffirst} returns the first value in \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its first value, grouped by \code{g}. Similarly \code{flast} returns the last value in \code{x}, \dots
63 | }
64 | \note{
65 | Both functions are significantly faster if \code{na.rm = FALSE}, particularly \code{ffirst} which can take direct advantage of the 'group.starts' elements in \code{\link{GRP}} objects.
66 | }
67 | \seealso{
68 | \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview}
69 | }
70 | \examples{
71 | ## default vector method
72 | ffirst(airquality$Ozone)                   # Simple first value
73 | ffirst(airquality$Ozone, airquality$Month) # Grouped first value
74 | ffirst(airquality$Ozone, airquality$Month,
75 |        na.rm = FALSE)                      # Grouped first, but without skipping initial NA's
76 | 
77 | ## data.frame method
78 | ffirst(airquality)
79 | ffirst(airquality, airquality$Month)
80 | ffirst(airquality, airquality$Month, na.rm = FALSE) # Again first Ozone measurement in month 6 is NA
81 | 
82 | ## matrix method
83 | aqm <- qM(airquality)
84 | ffirst(aqm)
85 | ffirst(aqm, airquality$Month) # etc..
86 | \donttest{ % The tidyverse regularly causes havoc to CRAN tests in other packages, therefore this is not tested
87 | ## method for grouped data frames - created with dplyr::group_by or fgroup_by
88 | library(dplyr)
89 | airquality |> group_by(Month) |> ffirst()
90 | airquality |> group_by(Month) |> select(Ozone) |> ffirst(na.rm = FALSE)
91 | }
92 | # Note: All examples generalize to flast.
93 | }
94 | \keyword{univar}
95 | \keyword{manip}
96 | 


--------------------------------------------------------------------------------
/tests/testthat/test-pivot.R:
--------------------------------------------------------------------------------
 1 | context("pivot")
 2 | 
 3 | skip_if_not_installed("data.table")
 4 | library(data.table)
 5 | mtcDT <- qDT(mtcars)
 6 | mtcnaDT <- qDT(na_insert(mtcars))
 7 | irisDT <- qDT(iris)
 8 | wldDT <- qDT(wlddev)
 9 | GGDCDT <- qDT(GGDC10S)
10 | 
11 | rmnic <- function(x) {
12 |   if(!length(fci <- fact_vars(x, "indices"))) return(x)
13 |   for (i in fci) oldClass(x[[i]]) <- setdiff(oldClass(x[[i]]), "na.included")
14 |   x
15 | }
16 | 
17 | test_that("long pivots work properly", {
18 |   # No id's
19 |   expect_identical(rmnic(pivot(mtcDT)), melt(mtcDT, measure.vars = seq_along(mtcDT)))
20 |   expect_identical(rmnic(pivot(mtcDT, values = 3:11)), melt(mtcDT, measure.vars = 3:11))
21 |   expect_identical(rmnic(pivot(mtcnaDT, na.rm = TRUE)), melt(mtcnaDT, measure.vars = seq_along(mtcnaDT), na.rm = TRUE))
22 |   expect_identical(rmnic(pivot(mtcnaDT, values = 3:11, na.rm = TRUE)), melt(mtcnaDT, measure.vars = 3:11, na.rm = TRUE))
23 | 
24 |   expect_identical(names(pivot(gv(wlddev, 9:10), labels = TRUE)), c("variable", "label", "value"))
25 |   expect_identical(names(pivot(gv(wlddev, 9:10), labels = "bla")), c("variable", "bla", "value"))
26 |   expect_identical(names(pivot(gv(wlddev, 9:10), labels = TRUE, na.rm = TRUE)), c("variable", "label", "value"))
27 |   expect_identical(names(pivot(gv(wlddev, 9:10), labels = "bla", na.rm = TRUE)), c("variable", "bla", "value"))
28 |   expect_warning(pivot(mtcnaDT, check.dups = TRUE))
29 | 
30 |   # with ids
31 |   expect_identical(rmnic(pivot(irisDT, "Species")), melt(irisDT, "Species"))
32 |   expect_identical(rmnic(setLabels(pivot(wldDT, 1:8), NULL)), setLabels(melt(wldDT, 1:8), NULL))
33 |   expect_identical(rmnic(setLabels(pivot(wldDT, 1:8, na.rm = TRUE), NULL)), setLabels(melt(wldDT, 1:8, na.rm = TRUE), NULL))
34 |   expect_warning(pivot(irisDT, "Species", check.dups = TRUE))
35 |   # with labels
36 |   expect_identical(names(pivot(wldDT, c("iso3c", "year"), values = 9:10, labels = TRUE)), c("iso3c", "year", "variable", "label", "value"))
37 |   expect_identical(names(pivot(wldDT, c("iso3c", "year"), values = 9:10, names = list("var", "val"), labels = "lab")), c("iso3c", "year", "var", "lab", "val"))
38 |   expect_identical(names(pivot(wldDT, c("iso3c", "year"), values = 9:10, names = list(value = "val"), labels = "lab")), c("iso3c", "year", "variable", "lab", "val"))
39 |   expect_identical(names(pivot(wldDT, c("iso3c", "year"), values = 9:10, names = list(variable = "var"), labels = "lab")), c("iso3c", "year", "var", "lab", "value"))
40 | 
41 | })
42 | 
43 | 
44 | test_that("wide pivots work properly", {
45 | 
46 |   # 1 column
47 |   expect_identical(qDF(dcast(wldDT, iso3c ~ year, value.var = "PCGDP")),
48 |                    qDF(pivot(wldDT, "iso3c", "PCGDP", "year", how = "wider", sort = "ids")))
49 |   expect_identical(qDF(dcast(wldDT, country ~ year, value.var = "PCGDP")),
50 |                    qDF(pivot(wldDT, "country", "PCGDP", "year", how = "wider")))
51 | 
52 |   # 2 columns
53 |   expect_identical(qDF(dcast(wldDT, iso3c ~ year, value.var = c("PCGDP", "LIFEEX"))),
54 |                    qDF(pivot(wldDT, "iso3c", c("PCGDP", "LIFEEX"), "year", how = "wider", sort = "ids")))
55 |   expect_identical(qDF(dcast(wldDT, country ~ year, value.var = c("PCGDP", "LIFEEX"))),
56 |                    qDF(pivot(wldDT, "country", c("PCGDP", "LIFEEX"), "year", how = "wider")))
57 | 
58 |   # pivot(wlddev, "iso3c", "PCGDP", "year", how = "wider", check.dups = TRUE, na.rm = TRUE, sort = c("ids", "names"))
59 |   # pivot(wlddev, "iso3c", "PCGDP", "year", "decade", how = "wider", check.dups = TRUE, na.rm = TRUE, sort = c("ids", "names"))
60 |   # pivot(wlddev, "iso3c", c("PCGDP", "LIFEEX"), "year", "decade", how = "wider", check.dups = TRUE, na.rm = TRUE, sort = c("ids", "names"))
61 |   # pivot(wlddev, "iso3c", c("PCGDP", "LIFEEX"), "year", "decade", how = "wider", check.dups = TRUE, na.rm = TRUE, sort = c("ids", "names"), transpose = c("cols", "names"))
62 | 
63 |   # 1 column: sum, mean, min, max
64 |   for (f in .c(sum, mean, min, max)) {
65 |     expect_equal(dapply(dcast(wldDT[is.finite(PCGDP)], income ~ year, value.var = "PCGDP", fun = match.fun(f)), unattrib, return = "data.frame"),
66 |                  dapply(pivot(wldDT, "income", "PCGDP", "year", how = "wider", FUN = f, na.rm = TRUE, sort = TRUE), unattrib, return = "data.frame"))
67 |   }
68 |   for (f in .c(sum, mean, min, max)) {
69 |     expect_equal(dapply(dcast(wldDT[is.finite(PCGDP)], income ~ year, value.var = "PCGDP", fun = match.fun(f)), unattrib, return = "data.frame"),
70 |                  dapply(pivot(wldDT, "income", "PCGDP", "year", how = "wider", FUN = match.fun(f), na.rm = TRUE, sort = TRUE), unattrib, return = "data.frame"))
71 |   }
72 |   for (f in .c(sum, mean, min, max)) {
73 |     expect_equal(dapply(dcast(wldDT[is.finite(PCGDP)], income ~ year, value.var = "PCGDP", fun = match.fun(f)), unattrib, return = "data.frame"),
74 |                  dapply(pivot(wldDT, "income", "PCGDP", "year", how = "wider", FUN = match.fun(paste0("f", f)), na.rm = TRUE, sort = TRUE), unattrib, return = "data.frame"))
75 |   }
76 | 
77 | })
78 | 


--------------------------------------------------------------------------------
/man/time-series-panel-series.Rd:
--------------------------------------------------------------------------------
 1 | \name{time-series-panel-series} % \name{Time Series and Panel Computations}
 2 | \alias{A7-time-series-panel-series}
 3 | \alias{time-series-panel-series}
 4 | % \alias{tscomp}
 5 | \title{Time Series and Panel Series} % \emph{collapse}
 6 | 
 7 | \description{
 8 | \emph{collapse} provides a flexible and powerful set of functions and classes to work with time-dependent data:
 9 | \itemize{
10 | \item \code{\link[=findex_by]{findex_by/iby}} creates an 'indexed_frame': a flexible structure that can be imposed upon any data-frame like object and facilitates \bold{indexed (time-aware) computations on time series and panel data}. Indexed frames are composed of 'indexed_series', which can also be created from vector and matrix-based objects using the \code{reindex} function. Further functions \code{findex/ix}, \code{unindex}, \code{is_irregular} and \code{to_plm} help operate these classes, check for irregularity, and ensure \emph{plm} compatibility. Methods are defined for various time series, data transformation and data manipulation functions in \emph{collapse}.
11 | 
12 | \item \code{\link{timeid}} efficiently converts numeric time sequences, such as 'Date' or 'POSIXct' vectors, to a \bold{time-factor / integer id}, where a unit-step represents the greatest common divisor of the underlying sequence.
13 | 
14 | \item \code{\link{flag}}, and the lag- and lead- operators \code{\link{L}} and \code{\link{F}} are S3 generics to efficiently compute sequences of \bold{lags and leads} on regular or irregular / unbalanced time series and panel data.
15 | \item Similarly, \code{\link{fdiff}}, \code{\link{fgrowth}}, and the operators \code{\link{D}}, \code{\link{Dlog}} and \code{\link{G}} are S3 generics to efficiently compute sequences of suitably lagged / leaded and iterated \bold{differences, log-differences and growth rates}. \code{\link[=fdiff]{fdiff/D/Dlog}} can also compute \bold{quasi-differences} of the form \eqn{x_t - \rho x_{t-1}}.
16 | \item \code{\link{fcumsum}} is an S3 generic to efficiently compute \bold{cumulative sums} on time series and panel data. In contrast to \code{\link{cumsum}}, it can handle missing values and supports both grouped and indexed / ordered computations.
17 | \item \code{\link{psmat}} is an S3 generic to efficiently convert panel-vectors / 'indexed_series' and data frames / 'indexed_frame's to \bold{panel series matrices and 3D arrays}, respectively (where time, individuals and variables receive different dimensions, allowing for fast indexation, visualization, and computations).
18 | \item \code{\link{psacf}}, \code{\link{pspacf}} and \code{\link{psccf}} are S3 generics to compute estimates of the \bold{auto-, partial auto- and cross- correlation or covariance functions} for panel-vectors / 'indexed_series', and multivariate versions for data frames / 'indexed_frame's.
19 | }
20 | }
21 | \section{Table of Functions}{
22 |   \tabular{lllll}{\emph{ S3 Generic }   \tab\tab \emph{ Methods }   \tab\tab \emph{ Description }  \cr % \Sexpr{"\u200B"} \Sexpr{"\u200B"} \Sexpr{"\u200B"} \Sexpr{"\u200B"}
23 |                  \code{\link[=findex_by]{findex_by/iby}}, \code{findex/ix}, \code{reindex}, \code{unindex}, \code{is_irregular}, \code{to_plm} \tab\tab For vectors, matrices and data frames / lists.   \tab\tab Fast and flexible time series and panel data classes 'indexed_series' and 'indexed_frame'. \cr
24 |                  \code{\link{timeid}} \tab\tab For time sequences represented by integer or double vectors / objects.  \tab\tab Generate integer time-id/factor \cr
25 | 
26 |                  \code{\link[=flag]{flag/L/F}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df}  \tab\tab Compute (sequences of) lags and leads \cr
27 |                  \code{\link[=fdiff]{fdiff/D/Dlog}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df}  \tab\tab Compute (sequences of lagged / leaded and iterated) (quasi-)differences or log-differences \cr
28 |                  \code{\link[=fgrowth]{fgrowth/G}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df}  \tab\tab Compute (sequences of lagged / leaded and iterated) growth rates (exact, via log-differencing, or compounded) \cr
29 |                  \code{\link{fcumsum}} \tab\tab \code{default, matrix, data.frame, pseries, pdata.frame, grouped_df}  \tab\tab Compute cumulative sums \cr
30 |                  \code{\link{psmat}} \tab\tab \code{default, pseries, data.frame, pdata.frame} \tab\tab Convert panel data to matrix / array \cr
31 |                  \code{\link{psacf}} \tab\tab \code{default, pseries, data.frame, pdata.frame} \tab\tab Compute ACF on panel data \cr
32 |                  \code{\link{pspacf}} \tab\tab \code{default, pseries, data.frame, pdata.frame} \tab\tab Compute PACF on panel data \cr
33 |                  \code{\link{psccf}} \tab\tab \code{default, pseries, data.frame, pdata.frame} \tab\tab Compute CCF on panel data
34 | }
35 | }
36 | \seealso{
37 | \link[=collapse-documentation]{Collapse Overview}, \link[=data-transformations]{Data Transformations}
38 | }
39 | \keyword{ts}
40 | \keyword{manip}
41 | \keyword{documentation}
42 | 
43 | 
44 | 


--------------------------------------------------------------------------------
/misc/legacy/sorted out 1.5.3 - 1.6.0/fmax.c:
--------------------------------------------------------------------------------
  1 | #include "collapse_c.h"
  2 | 
  3 | void fmax_double_impl(double *pout, double *px, int ng, int *pg, int narm, int l) {
  4 |   if(ng == 0) {
  5 |     double max;
  6 |     if(narm) {
  7 |       int j = l-1;
  8 |       max = px[j];
  9 |       while(ISNAN(max) && j!=0) max = px[--j];
 10 |       if(j != 0) for(int i = j; i--; ) {
 11 |         if(max < px[i]) max = px[i];
 12 |       }
 13 |     } else {
 14 |       max = px[0];
 15 |       for(int i = 0; i != l; ++i) {
 16 |         if(ISNAN(px[i])) {
 17 |           max = px[i];
 18 |           break;
 19 |         } else {
 20 |           if(max < px[i]) max = px[i];
 21 |         }
 22 |       }
 23 |     }
 24 |     pout[0] = max;
 25 |   } else {
 26 |     if(narm) {
 27 |       for(int i = ng; i--; ) pout[i] = NA_REAL; // Other way ?
 28 |       --pout;
 29 |       for(int i = l; i--; ) if(pout[pg[i]] < px[i] || ISNAN(pout[pg[i]])) pout[pg[i]] = px[i];  // fastest
 30 |     } else {
 31 |       for(int i = ng; i--; ) pout[i] = DBL_MIN;
 32 |       --pout;
 33 |       for(int i = l; i--; ) if(pout[pg[i]] < px[i] || ISNAN(px[i])) pout[pg[i]] = px[i];  // Used to stop loop when all groups passed with NA, but probably no speed gain since groups are mostly ordered.
 34 |     }
 35 |   }
 36 | }
 37 | 
 38 | void fmax_int_impl(int *pout, int *px, int ng, int *pg, int narm, int l) {
 39 |   if(ng == 0) {
 40 |     int max;
 41 |     if(narm) {
 42 |       max = NA_INTEGER; // same as INT_MIN
 43 |       for(int i = l; i--; ) if(max < px[i]) max = px[i];
 44 |     } else {
 45 |       max = px[0];
 46 |       for(int i = 0; i != l; ++i) {
 47 |         if(px[i] == NA_INTEGER) {
 48 |           max = NA_INTEGER;
 49 |           break;
 50 |         } else {
 51 |           if(max < px[i]) max = px[i];
 52 |         }
 53 |       }
 54 |     }
 55 |     pout[0] = max;
 56 |   } else {
 57 |     if(narm) {
 58 |       for(int i = ng; i--; ) pout[i] = NA_INTEGER;
 59 |       --pout;
 60 |       for(int i = l; i--; ) if(pout[pg[i]] < px[i]) pout[pg[i]] = px[i];  // fastest??
 61 |     } else {
 62 |       for(int i = ng; i--; ) pout[i] = INT_MIN + 1; // best ??
 63 |       --pout;
 64 |       for(int i = l; i--; ) if(px[i] == NA_INTEGER || (pout[pg[i]] != NA_INTEGER && pout[pg[i]] < px[i])) pout[pg[i]] = px[i];
 65 |     }
 66 |   }
 67 | }
 68 | 
 69 | 
 70 | SEXP fmaxC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm) {
 71 |   int l = length(x), tx = TYPEOF(x), ng = asInteger(Rng), narm = asInteger(Rnarm);
 72 |   if (l < 1) return x; // Prevents seqfault for numeric(0) #101
 73 |   if(ng && l != length(g)) error("length(g) must match length(x)");
 74 |   if(tx == LGLSXP) tx = INTSXP;
 75 |   SEXP out = PROTECT(allocVector(tx, ng == 0 ? 1 : ng));
 76 |   switch(tx) {
 77 |   case REALSXP: fmax_double_impl(REAL(out), REAL(x), ng, INTEGER(g), narm, l);
 78 |     break;
 79 |   case INTSXP: fmax_int_impl(INTEGER(out), INTEGER(x), ng, INTEGER(g), narm, l);
 80 |     break;
 81 |   default: error("Unsupported SEXP type");
 82 |   }
 83 |   if(ng && !isObject(x)) copyMostAttrib(x, out);
 84 |   UNPROTECT(1);
 85 |   return out;
 86 | }
 87 | 
 88 | SEXP fmaxmC(SEXP x, SEXP Rng, SEXP g, SEXP Rnarm, SEXP Rdrop) {
 89 |   SEXP dim = getAttrib(x, R_DimSymbol);
 90 |   if(isNull(dim)) error("x is not a matrix");
 91 |   int tx = TYPEOF(x), l = INTEGER(dim)[0], col = INTEGER(dim)[1], *pg = INTEGER(g),
 92 |     ng = asInteger(Rng), ng1 = ng == 0 ? 1 : ng, narm = asInteger(Rnarm);
 93 |   if (l < 1) return x; // Prevents seqfault for numeric(0) #101
 94 |   if(ng && l != length(g)) error("length(g) must match nrow(x)");
 95 |   if(tx == LGLSXP) tx = INTSXP;
 96 |   SEXP out = PROTECT(allocVector(tx, ng == 0 ? col : col * ng));
 97 |   switch(tx) {
 98 |   case REALSXP: {
 99 |     double *px = REAL(x), *pout = REAL(out);
100 |     for(int j = 0; j != col; ++j) fmax_double_impl(pout + j*ng1, px + j*l, ng, pg, narm, l);
101 |     break;
102 |   }
103 |   case INTSXP: {
104 |     int *px = INTEGER(x), *pout = INTEGER(out);
105 |     for(int j = 0; j != col; ++j) fmax_int_impl(pout + j*ng1, px + j*l, ng, pg, narm, l);
106 |     break;
107 |   }
108 |   default: error("Unsupported SEXP type");
109 |   }
110 |   matCopyAttr(out, x, Rdrop, ng);
111 |   UNPROTECT(1);
112 |   return out;
113 | }
114 | 
115 | SEXP fmaxlC(SEXP x, SEXP Rng, SEXP g, SEXP w, SEXP Rnarm, SEXP Rdrop) {
116 |   int l = length(x), ng = asInteger(Rng);
117 |   if(l < 1) return x; // needed ??
118 |   if(ng == 0 && asLogical(Rdrop)) {
119 |     SEXP out = PROTECT(allocVector(REALSXP, l)), *px = SEXPPTR(x);
120 |     double *pout = REAL(out);
121 |     for(int j = 0; j != l; ++j) pout[j] = asReal(fmaxC(px[j], Rng, g, Rnarm));
122 |     setAttrib(out, R_NamesSymbol, getAttrib(x, R_NamesSymbol));
123 |     UNPROTECT(1);
124 |     return out;
125 |   }
126 |   SEXP out = PROTECT(allocVector(VECSXP, l)), *pout = SEXPPTR(out), *px = SEXPPTR(x);
127 |   for(int j = 0; j != l; ++j) pout[j] = fmaxC(px[j], Rng, g, Rnarm);
128 |   if(ng == 0) for(int j = 0; j != l; ++j) copyMostAttrib(px[j], pout[j]);
129 |   DFcopyAttr(out, x, ng);
130 |   UNPROTECT(1);
131 |   return out;
132 | }
133 | 


--------------------------------------------------------------------------------
/R/fndistinct.R:
--------------------------------------------------------------------------------
  1 | 
  2 | fndistinct <- function(x, ...) UseMethod("fndistinct") # , x
  3 | 
  4 | fndistinct.default <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, nthreads = .op[["nthreads"]], ...) {
  5 |   # if(is.matrix(x) && !inherits(x, "matrix")) return(fndistinct.matrix(x, g, TRA, na.rm, use.g.names, nthreads = nthreads, ...))
  6 |   if(!is.null(g)) g <- GRP(g, return.groups = use.g.names && is.null(TRA), call = FALSE) # sort = FALSE for TRA: not faster here...
  7 |   res <- .Call(C_fndistinct,x,g,na.rm,nthreads)
  8 |   if(is.null(TRA)) {
  9 |     if(!missing(...)) unused_arg_action(match.call(), ...)
 10 |     if(is.null(g)) return(res)
 11 |     if(use.g.names) names(res) <- GRPnames(g, FALSE)
 12 |     return(res)
 13 |   }
 14 |   TRAC(x,res,g[[2L]],TRA, ...)
 15 | }
 16 | 
 17 | fndistinct.matrix <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, nthreads = .op[["nthreads"]], ...) {
 18 |   if(!is.null(g)) g <- GRP(g, return.groups = use.g.names && is.null(TRA), call = FALSE) # sort = FALSE for TRA: not faster here...
 19 |   res <- .Call(C_fndistinctm,x,g,na.rm,drop,nthreads)
 20 |   if(is.null(TRA)) {
 21 |     if(!missing(...)) unused_arg_action(match.call(), ...)
 22 |     if(is.null(g)) return(res)
 23 |     if(use.g.names) dimnames(res)[[1L]] <- GRPnames(g)
 24 |     return(res)
 25 |   }
 26 |   TRAmC(x,res,g[[2L]],TRA, ...)
 27 | }
 28 | 
 29 | fndistinct.zoo <- function(x, ...) if(is.matrix(x)) fndistinct.matrix(x, ...) else fndistinct.default(x, ...)
 30 | fndistinct.units <- fndistinct.zoo
 31 | 
 32 | fndistinct.data.frame <- function(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = TRUE, drop = TRUE, nthreads = .op[["nthreads"]], ...) {
 33 |   if(!is.null(g)) g <- GRP(g, return.groups = use.g.names && is.null(TRA), call = FALSE) # sort = FALSE for TRA: not faster here...
 34 |   res <- .Call(C_fndistinctl,x,g,na.rm,drop,nthreads)
 35 |   if(is.null(TRA)) {
 36 |     if(!missing(...)) unused_arg_action(match.call(), ...)
 37 |     if(is.null(g)) return(res)
 38 |     if(use.g.names && !inherits(x, "data.table") && length(gn <- GRPnames(g)))
 39 |       attr(res, "row.names") <- gn
 40 |     return(res)
 41 |   }
 42 |   TRAlC(x,res,g[[2L]],TRA, ...)
 43 | }
 44 | 
 45 | fndistinct.list <- function(x, ...) fndistinct.data.frame(x, ...)
 46 | 
 47 | fndistinct.grouped_df <- function(x, TRA = NULL, na.rm = .op[["na.rm"]], use.g.names = FALSE, keep.group_vars = TRUE, nthreads = .op[["nthreads"]], ...) {
 48 |   g <- GRP.grouped_df(x, call = FALSE)
 49 |   if(is.null(g[[4L]])) keep.group_vars <- FALSE
 50 |   nam <- attr(x, "names")
 51 |   gn <- which(nam %in% g[[5L]])
 52 |   nTRAl <- is.null(TRA)
 53 |   gl <- length(gn) > 0L
 54 |   if(gl || nTRAl) {
 55 |     ax <- attributes(x)
 56 |     attributes(x) <- NULL
 57 |     if(nTRAl) {
 58 |       if(!missing(...)) unused_arg_action(match.call(), ...)
 59 |       ax[["groups"]] <- NULL
 60 |       ax[["class"]] <- fsetdiff(ax[["class"]], c("GRP_df", "grouped_df"))
 61 |       ax[["row.names"]] <- if(use.g.names) GRPnames(g) else .set_row_names(g[[1L]])
 62 |       if(gl) {
 63 |         if(keep.group_vars) {
 64 |           ax[["names"]] <- c(g[[5L]], nam[-gn])
 65 |           return(setAttributes(c(g[[4L]],.Call(C_fndistinctl,x[-gn],g,na.rm,FALSE,nthreads)), ax))
 66 |         }
 67 |         ax[["names"]] <- nam[-gn]
 68 |         return(setAttributes(.Call(C_fndistinctl,x[-gn],g,na.rm,FALSE,nthreads), ax))
 69 |       } else if(keep.group_vars) {
 70 |         ax[["names"]] <- c(g[[5L]], nam)
 71 |         return(setAttributes(c(g[[4L]],.Call(C_fndistinctl,x,g,na.rm,FALSE,nthreads)), ax))
 72 |       } else return(setAttributes(.Call(C_fndistinctl,x,g,na.rm,FALSE,nthreads), ax))
 73 |     } else if(keep.group_vars) {
 74 |       ax[["names"]] <- c(nam[gn], nam[-gn])
 75 |       return(setAttributes(c(x[gn],TRAlC(x[-gn],.Call(C_fndistinctl,x[-gn],g,na.rm,FALSE,nthreads),g[[2L]],TRA, ...)), ax))
 76 |     }
 77 |     ax[["names"]] <- nam[-gn]
 78 |     return(setAttributes(TRAlC(x[-gn],.Call(C_fndistinctl,x[-gn],g,na.rm,FALSE,nthreads),g[[2L]],TRA, ...), ax))
 79 |   } else return(TRAlC(x,.Call(C_fndistinctl,x,g,na.rm,FALSE,nthreads),g[[2L]],TRA, ...))
 80 | }
 81 | 
 82 | 
 83 | fNdistinct <- function(x, ...) {
 84 |   message("Note that 'fNdistinct' was renamed to 'fndistinct'. The S3 generic will not be removed anytime soon, but please use updated function names in new code, see help('collapse-renamed')")
 85 |   UseMethod("fndistinct")
 86 | }
 87 | fNdistinct.default <- function(x, ...) {
 88 |   if(is.matrix(x) && !inherits(x, "matrix")) return(fndistinct.matrix(x, ...))
 89 |   # .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')")
 90 |   fndistinct.default(x, ...)
 91 | }
 92 | fNdistinct.matrix <- function(x, ...) {
 93 |   # .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')")
 94 |   fndistinct.matrix(x, ...)
 95 | }
 96 | fNdistinct.data.frame <- function(x, ...) {
 97 |   # .Deprecated(msg = "This method belongs to a renamed function and will be removed end of 2022, see help('collapse-renamed')")
 98 |   fndistinct.data.frame(x, ...)
 99 | }
100 | 
101 | 


--------------------------------------------------------------------------------
/man/list-processing.Rd:
--------------------------------------------------------------------------------
 1 | \name{list-processing}
 2 | \alias{A8-list-processing}
 3 | \alias{list-processing}
 4 | \title{List Processing} % \emph{collapse}
 5 | 
 6 | \description{
 7 | \emph{collapse} provides the following set of functions to efficiently work with lists of R objects:
 8 | \itemize{
 9 | \item \bold{Search and Identification}\itemize{
10 | \item \code{\link{is_unlistable}} checks whether a (nested) list is composed of atomic objects in all final nodes, and thus unlistable to an atomic vector using \code{\link{unlist}}.
11 | \item \code{\link{ldepth}} determines the level of nesting of the list (i.e. the maximum number of nodes of the list-tree).
12 | \item \code{\link{has_elem}} searches elements in a list using element names, regular expressions applied to element names, or a function applied to the elements, and returns \code{TRUE} if any matches were found.
13 | }
14 | 
15 | \item \bold{Subsetting} \itemize{
16 | \item \code{\link{atomic_elem}} examines the top-level of a list and returns a sublist with the atomic elements. Conversely \code{\link{list_elem}} returns the sublist of elements which are themselves lists or list-like objects.
17 |  \item \code{\link{reg_elem}} and \code{\link{irreg_elem}} are recursive versions of the former. \code{\link{reg_elem}} extracts the 'regular' part of the list-tree leading to atomic elements in the final nodes, while \code{\link{irreg_elem}} extracts the 'irregular' part of the list tree leading to non-atomic elements in the final nodes. (\emph{Tip}: try calling both on an \code{lm} object). Naturally for all lists \code{l}, \code{is_unlistable(reg_elem(l))} evaluates to \code{TRUE}.
18 | \item \code{\link{get_elem}} extracts elements from a list using element names, regular expressions applied to element names, a function applied to the elements, or element-indices used to subset the lowest-level sub-lists. by default the result is presented as a simplified list containing all matching elements. With the \code{keep.tree} option however \code{\link{get_elem}} can also be used to subset lists i.e. maintain the full tree but cut off non-matching branches.
19 | }
20 | 
21 | \item \bold{Splitting and Transposition} \itemize{
22 | \item \code{\link{rsplit}} recursively splits a vector or data frame into subsets according to combinations of (multiple) vectors / factors - by default returning a (nested) list. If \code{flatten = TRUE}, the list is flattened yielding the same result as \code{\link{split}}. \code{rsplit} is also faster than \code{\link{split}}, particularly for data frames.
23 | 
24 | \item \code{\link{t_list}} efficiently transposes nested lists of lists, such as those obtained from splitting a data frame by multiple variables using \code{\link{rsplit}}.
25 | }
26 | 
27 | \item \bold{Apply Functions} \itemize{
28 | \item \code{\link{rapply2d}} is a recursive version of \code{\link{lapply}} with two key differences to \code{\link{rapply}} to apply a function to nested lists of data frames or other list-based objects.
29 | }
30 | 
31 | \item \bold{Unlisting / Row-Binding} \itemize{
32 | \item \code{\link{unlist2d}} efficiently unlists unlistable lists in 2-dimensions and creates a data frame (or \emph{data.table}) representation of the list. This is done by recursively flattening and row-binding R objects in the list while creating identifier columns for each level of the list-tree and (optionally) saving the row-names of the objects in a separate column. \code{\link{unlist2d}} can thus also be understood as a recursive generalization of \code{do.call(rbind, l)}, for lists of vectors, data frames, arrays or heterogeneous objects. A simpler version for non-recursive row-binding lists of lists / data.frames, is also available by \code{\link{rowbind}}.
33 | }
34 | 
35 | }
36 | }
37 | \section{Table of Functions}{
38 |   \tabular{lll}{\emph{ Function }  \tab\tab \emph{ Description }  \cr
39 |   % \code{\link{is.regular}} \tab\tab \code{function(x) is.atomic(x) || is.list(x)} \cr
40 |   \code{\link{is_unlistable}} \tab\tab Checks if list is unlistable \cr
41 |   \code{\link{ldepth}} \tab\tab Level of nesting / maximum depth of list-tree \cr
42 |   \code{\link{has_elem}} \tab\tab Checks if list contains a certain element \cr
43 |   \code{\link{get_elem}} \tab\tab Subset list / extract certain elements \cr
44 |   \code{\link{atomic_elem}} \tab\tab Top-level subset atomic elements \cr
45 |   \code{\link{list_elem}} \tab\tab Top-level subset list/list-like elements \cr
46 |   \code{\link{reg_elem}} \tab\tab Recursive version of \code{atomic_elem}: Subset / extract 'regular' part of list \cr
47 |   \code{\link{irreg_elem}} \tab\tab Subset / extract non-regular part of list \cr
48 |   \code{\link{rsplit}} \tab\tab Recursively split vectors or data frames / lists \cr
49 |   \code{\link{t_list}} \tab\tab Transpose lists of lists \cr
50 |   \code{\link{rapply2d}} \tab\tab Recursively apply functions to lists of data objects \cr
51 |   \code{\link{unlist2d}} \tab\tab Recursively unlist/row-bind lists of data objects in 2D, to data frame or \emph{data.table} \cr
52 |   \code{\link{rowbind}} \tab\tab Non-recursive binding of lists of lists / data.frames. \cr
53 | }
54 | }
55 | \seealso{
56 | \link[=collapse-documentation]{Collapse Overview}
57 | }
58 | \keyword{list}
59 | \keyword{manip}
60 | \keyword{documentation}
61 | 
62 | 
63 | 


--------------------------------------------------------------------------------
/R/rsplit.R:
--------------------------------------------------------------------------------
  1 | 
  2 | # fsplit <- function(x, f, drop, ...) if(drop && is.factor(f))
  3 | #   split(x, .Call(Cpp_fdroplevels, f, !inherits(f, "na.included")), drop = FALSE, ...) else
  4 | #     split(x, qF(f), drop = FALSE, ...)
  5 | 
  6 | t_list2 <- function(x) .Call(Cpp_mctl, do.call(rbind, x), TRUE, 0L)
  7 | 
  8 | # This is for export
  9 | t_list <- function(l) {
 10 |   lmat <- do.call(rbind, l)
 11 |   dn <- dimnames(lmat)
 12 |   res <- .Call(Cpp_mctl, lmat, !is.null(dn[[2L]]), 0L)
 13 |   if(length(rn <- dn[[1L]])) res <- lapply(res, `names<-`, rn)
 14 |   .Call(C_copyMostAttrib, res, l)
 15 | }
 16 | 
 17 | 
 18 | rsplit <- function(x, ...) UseMethod("rsplit")
 19 | 
 20 | rsplit.default <- function(x, fl, drop = TRUE, flatten = FALSE, use.names = TRUE, ...) { # , check = TRUE
 21 |   # if(is.matrix(x) && !inherits(x, "matrix")) return(rsplit.matrix(x, fl, drop, flatten, use.names, ...))
 22 |   if(is.atomic(fl) || flatten || is_GRP(fl)) return(gsplit(x, fl, use.names, drop = drop, ...))
 23 |   attributes(fl) <- NULL
 24 |   # if(check) fl <- lapply(fl, qF) # necessary ? -> split.default is actually faster on non-factor variables !
 25 |   rspl <- function(y, fly) {
 26 |     if(length(fly) == 1L) return(gsplit(y, fly[[1L]], use.names, drop = drop, ...))
 27 |     mapply(rspl, y = gsplit(y, fly[[1L]], use.names, drop = drop, ...),
 28 |            fly = t_list2(lapply(fly[-1L], gsplit, fly[[1L]], use.names, drop = drop, ...)), SIMPLIFY = FALSE) # Possibility to avoid transpose ? C_subsetDT ??
 29 |   }
 30 |   rspl(x, fl)
 31 | }
 32 | 
 33 | # Matrix method: requested in https://github.com/ycroissant/plm/issues/33
 34 | split_mat <- function(x, fl, dd, ...) {
 35 |   ssfun <- if(dd) function(i) x[i, , drop = TRUE] else function(i) x[i, , drop = FALSE]
 36 |   lapply(gsplit(NULL, fl, ...), ssfun)
 37 | }
 38 | 
 39 | rsplit.matrix <- function(x, fl, drop = TRUE, flatten = FALSE, use.names = TRUE, drop.dim = FALSE, ...) {
 40 |   if(is.atomic(fl) || flatten || is_GRP(fl)) return(split_mat(x, fl, drop.dim, use.names, drop = drop, ...))
 41 |   attributes(fl) <- NULL
 42 |   rspl <- function(y, fly) {
 43 |     if(length(fly) == 1L) return(split_mat(y, fly[[1L]], drop.dim, use.names, drop = drop, ...))
 44 |     mapply(rspl, y = split_mat(y, fly[[1L]], drop.dim, use.names, drop = drop, ...),
 45 |            fly = t_list2(lapply(fly[-1L], gsplit, fly[[1L]], use.names, drop = drop, ...)), SIMPLIFY = FALSE)
 46 |   }
 47 |   rspl(x, fl)
 48 | }
 49 | 
 50 | rsplit.zoo <- function(x, ...) if(is.matrix(x)) rsplit.matrix(x, ...) else rsplit.default(x, ...)
 51 | rsplit.units <- rsplit.zoo
 52 | 
 53 | # From stackoverflow package:
 54 | # rsplit <- function (x, by, drop = FALSE)
 55 | # {
 56 | #   if (is.atomic(by))
 57 | #     return(split(x, by, drop = drop))
 58 | #   attributes(by) <- NULL
 59 | #   if (length(by) == 1L)
 60 | #     return(split(x, by[[1L]], drop = drop))
 61 | #   mapply(rsplit, x = split(x, by[[1L]], drop = drop), by = t(lapply(by[-1L], split, by[[1L]], drop = drop)), drop = drop,
 62 | #          SIMPLIFY = FALSE)
 63 | # }
 64 | 
 65 | rsplit.data.frame <- function(x, by, drop = TRUE, flatten = FALSE, # check = TRUE,
 66 |                               cols = NULL, keep.by = FALSE, simplify = TRUE,
 67 |                               use.names = TRUE, ...) {
 68 | 
 69 |   if(is.call(by)) {
 70 |     nam <- attr(x, "names")
 71 |     if(length(by) == 3L) {
 72 |       byn <- ckmatch(all.vars(by[[3L]]), nam)
 73 |       cols <- ckmatch(all.vars(by[[2L]]), nam)
 74 |     } else { # keep.by always added: Same behavior as L or W !!
 75 |       byn <- ckmatch(all.vars(by), nam)
 76 |       if(!(is.null(cols) && keep.by))
 77 |         cols <- if(is.null(cols)) -byn else cols2int(cols, x, nam, FALSE)
 78 |     }
 79 |     by <- .subset(x, byn)
 80 |     if(length(cols)) x <- fcolsubset(x, if(keep.by) c(byn, cols) else cols, TRUE)
 81 |   } else if(length(cols))
 82 |     x <- fcolsubset(x, cols2int(cols, x, attr(x, "names"), FALSE), TRUE)
 83 | 
 84 |   if(simplify && length(unclass(x)) == 1L)
 85 |     return(rsplit.default(.subset2(x, 1L), by, drop, flatten, use.names, ...))  # , check
 86 |   # Note there is a data.table method: split.data.table, which can also do recursive splitting..
 87 | 
 88 |   j <- seq_along(unclass(x))
 89 |   rn <- attr(x, "row.names")
 90 |   if(is.numeric(rn) || is.null(rn) || rn[1L] == "1") {
 91 |     gsplit_DF <- function(x, f, ...)
 92 |       lapply(gsplit(NULL, f, use.names, drop = drop, ...),
 93 |              function(i) .Call(C_subsetDT, x, i, j, FALSE)) # .Call, .NAME = C_subsetDT, j, FALSE) -> doesn't work!
 94 |   } else {
 95 |     gsplit_DF <- function(x, f, ...) {
 96 |       rown <- attr(x, "row.names") # Need to do this, handing down from the function body doesn't work
 97 |       lapply(gsplit(NULL, f, use.names, drop = drop, ...),
 98 |              function(i) `attr<-`(.Call(C_subsetDT, x, i, j, FALSE), "row.names", rown[i]))
 99 |     }
100 |   }
101 | 
102 |   if(is.atomic(by) || flatten || is_GRP(by)) return(gsplit_DF(x, by, ...))
103 | 
104 |   attributes(by) <- NULL
105 |   # if(check) by <- lapply(by, qF) # necessary ?
106 |   rspl_DF <- function(y, fly) {
107 |     if(length(fly) == 1L) return(gsplit_DF(y, fly[[1L]], ...))
108 |     mapply(rspl_DF, y = gsplit_DF(y, fly[[1L]], ...),
109 |            fly = t_list2(lapply(fly[-1L], gsplit, fly[[1L]], use.names, drop = drop, ...)), SIMPLIFY = FALSE) # Possibility to avoid transpose ?
110 |   }                # use C_subsetDT here as well ??? what is faster ???
111 |   rspl_DF(x, by)
112 | }
113 | 
114 | 


--------------------------------------------------------------------------------
/man/roworder.Rd:
--------------------------------------------------------------------------------
 1 | \name{roworder}
 2 | \alias{roworder}
 3 | \alias{roworderv}
 4 | %- Also NEED an '\alias' for EACH other topic documented here.
 5 | \title{
 6 | Fast Reordering of Data Frame Rows
 7 | }
 8 | \description{
 9 | A fast substitute for \code{dplyr::arrange}, based on \code{\link[=radixorder]{radixorder(v)}} and inspired by \code{data.table::setorder(v)}. It returns a sorted copy of the data frame, unless the data is already sorted in which case no copy is made. In addition, rows can be manually re-ordered. \code{roworderv} is a programmers version that takes vectors/variables as input.
10 | 
11 | Use \code{data.table::setorder(v)} to sort a data frame without creating a copy. %\code{roworder} also does not support grouped tibbles or pdata.frame's, i.e. every data frame is treated the same.
12 | }
13 | \usage{
14 | roworder(X, \dots, na.last = TRUE, verbose = .op[["verbose"]])
15 | 
16 | roworderv(X, cols = NULL, neworder = NULL, decreasing = FALSE,
17 |           na.last = TRUE, pos = "front", verbose = .op[["verbose"]])
18 | }
19 | %- maybe also 'usage' for other objects documented here.
20 | \arguments{
21 |   \item{X}{a data frame or list of equal-length columns. }
22 |   \item{\dots}{comma-separated columns of \code{X} to sort by e.g. \code{var1, var2}. Negatives i.e. \code{-var1, var2} can be used to sort in decreasing order of \code{var1}. Internally all expressions are turned into strings and \code{startsWith(expr, "-")} is used to detect this, thus it does not negate the actual values (which may as well be strings), and you cannot apply any other functions to columns inside \code{roworder()} to induce different sorting behavior.}
23 |   \item{cols}{select columns to sort by using a function, column names, indices or a logical vector. The default \code{NULL} sorts by all columns in order of occurrence (from left to right). }
24 |   \item{na.last}{logical. If \code{TRUE}, missing values in the sorting columns are placed last; if \code{FALSE}, they are placed first; if \code{NA} they are removed (argument passed to \code{\link{radixorder}}).}
25 |   \item{decreasing}{logical. Should the sort order be increasing or decreasing? Can also be a vector of length equal to the number of arguments in \code{cols} (argument passed to \code{\link{radixorder}}).}
26 |   \item{neworder}{an ordering vector, can be \code{< nrow(X)}. if \code{pos = "front"} or \code{pos = "end"}, a logical vector can also be supplied. This argument overwrites \code{cols}.}
27 |     \item{pos}{integer or character. Different arrangement options if \code{!is.null(neworder) && length(neworder) < nrow(X)}.
28 |         \tabular{lllll}{\emph{ Int. }   \tab\tab \emph{ String }   \tab\tab \emph{ Description }  \cr
29 |                  1 \tab\tab "front"   \tab\tab move rows in \code{neworder} to the front (top) of \code{X} (the default). \cr
30 |                  2 \tab\tab "end" \tab\tab move rows in \code{neworder} to the end (bottom) of \code{X}. \cr
31 |                  3 \tab\tab "exchange"   \tab\tab just exchange the order of rows in \code{neworder}, other rows remain in the same position. \cr
32 |                  4 \tab\tab "after"  \tab\tab place all further selected rows behind the first selected row. \cr
33 | 
34 |   }
35 |   }
36 |   \item{verbose}{logical. \code{1L} (default) prints a message when ordering a grouped or indexed frame, indicating that this is not efficient and encouraging reordering the data prior to the grouping/indexing step. Users can also set \code{verbose = 2L} to also toggle a message if \code{x} is already sorted, implying that no copy was made and the call to \code{roworder(v)} is redundant.}
37 | 
38 | }
39 | 
40 | \value{
41 | A copy of \code{X} with rows reordered. If \code{X} is already sorted, \code{X} is simply returned.
42 | }
43 | \note{
44 | If you don't require a copy of the data, use \code{data.table::setorder} (you can also use it in a piped call as it invisibly returns the data).
45 | 
46 | \code{roworder(v)} has internal facilities to deal with \link[=GRP]{grouped} and \link[=indexing]{indexed} data. This is however inefficient (since in most cases data could be reordered before grouping/indexing), and therefore issues a message if \code{verbose > 0L}.
47 | }
48 | 
49 | %% ~Make other sections like Warning with \section{Warning }{\dots.} ~
50 | 
51 | \seealso{
52 | \code{\link{colorder}}, \link[=fast-data-manipulation]{Data Frame Manipulation}, \link[=fast-grouping-ordering]{Fast Grouping and Ordering}, \link[=collapse-documentation]{Collapse Overview}
53 | }
54 | \examples{
55 | head(roworder(airquality, Month, -Ozone))
56 | head(roworder(airquality, Month, -Ozone, na.last = NA))  # Removes the missing values in Ozone
57 | 
58 | ## Same in standard evaluation
59 | head(roworderv(airquality, c("Month", "Ozone"), decreasing = c(FALSE, TRUE)))
60 | head(roworderv(airquality, c("Month", "Ozone"), decreasing = c(FALSE, TRUE), na.last = NA))
61 | 
62 | ## Custom reordering
63 | head(roworderv(mtcars, neworder = 3:4))               # Bring rows 3 and 4 to the front
64 | head(roworderv(mtcars, neworder = 3:4, pos = "end"))  # Bring them to the end
65 | head(roworderv(mtcars, neworder = mtcars$vs == 1))    # Bring rows with vs == 1 to the top
66 | }
67 | % Add one or more standard keywords, see file 'KEYWORDS' in the
68 | % R documentation directory (show via RShowDoc("KEYWORDS")):
69 | \keyword{ manip }
70 | % \keyword{ ~kwd2 }
71 | % Use only one keyword per line.
72 | % For non-standard keywords, use \concept instead of \keyword:
73 | % \concept{ ~cpt1 }
74 | % \concept{ ~cpt2 }
75 | % Use only one concept per line.
76 | 


--------------------------------------------------------------------------------
/man/fndistinct.Rd:
--------------------------------------------------------------------------------
 1 | \name{fndistinct}
 2 | \alias{fndistinct}
 3 | \alias{fndistinct.default}
 4 | \alias{fndistinct.matrix}
 5 | \alias{fndistinct.data.frame}
 6 | \alias{fndistinct.grouped_df}
 7 | \title{Fast (Grouped) Distinct Value Count for Matrix-Like Objects}  % Vectors, Matrix and Data Frame Columns}
 8 | \description{
 9 | \code{fndistinct} is a generic function that (column-wise) computes the number of distinct values in \code{x}, (optionally) grouped by \code{g}. It is significantly faster than \code{length(unique(x))}. The \code{\link{TRA}} argument can further be used to transform \code{x} using its (grouped) distinct value count.
10 | }
11 | 
12 | \usage{
13 | fndistinct(x, \dots)
14 | 
15 | \method{fndistinct}{default}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]],
16 |            use.g.names = TRUE, nthreads = .op[["nthreads"]], \dots)
17 | 
18 | \method{fndistinct}{matrix}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]],
19 |            use.g.names = TRUE, drop = TRUE, nthreads = .op[["nthreads"]], \dots)
20 | 
21 | \method{fndistinct}{data.frame}(x, g = NULL, TRA = NULL, na.rm = .op[["na.rm"]],
22 |            use.g.names = TRUE, drop = TRUE, nthreads = .op[["nthreads"]], \dots)
23 | 
24 | \method{fndistinct}{grouped_df}(x, TRA = NULL, na.rm = .op[["na.rm"]],
25 |            use.g.names = FALSE, keep.group_vars = TRUE, nthreads = .op[["nthreads"]], \dots)
26 | }
27 | \arguments{
28 | \item{x}{a vector, matrix, data frame or grouped data frame (class 'grouped_df').}
29 | 
30 | \item{g}{a factor, \code{\link{GRP}} object, atomic vector (internally converted to factor) or a list of vectors / factors (internally converted to a \code{\link{GRP}} object) used to group \code{x}.}
31 | 
32 | \item{TRA}{an integer or quoted operator indicating the transformation to perform:
33 | 0 - "na"     |     1 - "fill"     |     2 - "replace"     |     3 - "-"     |     4 - "-+"     |     5 - "/"     |     6 - "\%"     |     7 - "+"     |     8 - "*"     |     9 - "\%\%"     |     10 - "-\%\%". See \code{\link{TRA}}.}
34 | 
35 | 
36 | \item{na.rm}{logical. \code{TRUE}: Skip missing values in \code{x} (faster computation). \code{FALSE}: Also consider 'NA' as one distinct value.}
37 | 
38 | \item{use.g.names}{logical. Make group-names and add to the result as names (default method) or row-names (matrix and data frame methods). No row-names are generated for \emph{data.table}'s.}
39 | 
40 | \item{nthreads}{integer. The number of threads to utilize. Parallelism is across groups for grouped computations and at the column-level otherwise. }
41 | 
42 | \item{drop}{\emph{matrix and data.frame method:} Logical. \code{TRUE} drops dimensions and returns an atomic vector if \code{g = NULL} and \code{TRA = NULL}.}
43 | 
44 | \item{keep.group_vars}{\emph{grouped_df method:} Logical. \code{FALSE} removes grouping variables after computation.}
45 | 
46 | \item{\dots}{arguments to be passed to or from other methods. If \code{TRA} is used, passing \code{set = TRUE} will transform data by reference and return the result invisibly.}
47 | 
48 | }
49 | \details{
50 | \code{fndistinct} implements a pretty fast C-level hashing algorithm inspired by the \emph{kit} package to find the number of distinct values.
51 | %\code{fndistinct} implements a fast algorithm to find the number of distinct values utilizing index- hashing implemented in the \code{Rcpp::sugar::IndexHash} class.
52 | 
53 | If \code{na.rm = TRUE} (the default), missing values will be skipped yielding substantial performance gains in data with many missing values. If \code{na.rm = FALSE}, missing values will simply be treated as any other value and read into the hash-map. Thus with the former, a numeric vector \code{c(1.25,NaN,3.56,NA)} will have a distinct value count of 2, whereas the latter will return a distinct value count of 4.
54 | 
55 | % Grouped computations are performed by mapping the data to a sparse-array and then hash-mapping each group. This is often not much slower than using a larger hash-map for the entire data when \code{g = NULL}.
56 | 
57 | \code{fndistinct} preserves all attributes of non-classed vectors / columns, and only the 'label' attribute (if available) of classed vectors / columns (i.e. dates or factors). When applied to data frames and matrices, the row-names are adjusted as necessary.
58 | 
59 | }
60 | \value{
61 | Integer. The number of distinct values in \code{x}, grouped by \code{g}, or (if \code{\link{TRA}} is used) \code{x} transformed by its distinct value count, grouped by \code{g}.
62 | }
63 | \seealso{
64 | \code{\link{fnunique}}, \code{\link{fnobs}}, \link[=fast-statistical-functions]{Fast Statistical Functions}, \link[=collapse-documentation]{Collapse Overview}
65 | }
66 | \examples{
67 | ## default vector method
68 | fndistinct(airquality$Solar.R)                   # Simple distinct value count
69 | fndistinct(airquality$Solar.R, airquality$Month) # Grouped distinct value count
70 | 
71 | ## data.frame method
72 | fndistinct(airquality)
73 | fndistinct(airquality, airquality$Month)
74 | fndistinct(wlddev)                               # Works with data of all types!
75 | head(fndistinct(wlddev, wlddev$iso3c))
76 | 
77 | ## matrix method
78 | aqm <- qM(airquality)
79 | fndistinct(aqm)                                  # Also works for character or logical matrices
80 | fndistinct(aqm, airquality$Month)
81 | 
82 | ## method for grouped data frames - created with dplyr::group_by or fgroup_by
83 | airquality |> fgroup_by(Month) |> fndistinct()
84 | wlddev |> fgroup_by(country) |>
85 |              fselect(PCGDP,LIFEEX,GINI,ODA) |> fndistinct()
86 | }
87 | \keyword{univar}
88 | \keyword{manip}
89 | 


--------------------------------------------------------------------------------